Groups in a Regular Expression

Documentation

  1. Grouping in the Regular Expressions HOWTO Tutorial
  2. parentheses and numbered group references in the re module

Two consecutive copies of the same character

All of the programs in this section fail. For example, the following program merely prints every line of length 2 or more.

import sys
import re   #regular expressions

filename = "/usr/share/dict/words"

try:
    lines = open(filename)
except FileNotFoundError:
    print(f"Sorry, could not find file \"{filename}\".")
    sys.exit(1)
except PermissionError:
    print(f"Sorry, no permission to open file \"{filename}\".")
    sys.exit(1)

for line in lines:
    line = line.rstrip("\n")   #Remove the trailing newline.
    if re.search("..", line):
        print(line)

lines.close()
sys.exit(0)
    if re.search("aa", line):
    if re.search("bb", line):
    if re.search("cc", line):

etc.

Groups

    if re.search(r"(.)\1", line):   #lines that contain a double character
    if re.search(r"^(.)\1", line):   #lines that begin with a double character
    if re.search(r"^i(.)\1", line):   #illegal, immoral, fattening
    #lines that begin and end with the same character
    if re.search(r"^(.).*\1$", line, flags = re.IGNORECASE):
    if re.search(r"(.)\1\1", line):   #triple character
    if re.search(r"(...)\1", line):   #alfalfa, clinging, instantaneous, murmur
    #lines made of two identical non-empty halves (beriberi, murmur).
    if re.search(r"^(.+)\1$", line, flags = re.IGNORECASE):

Assign numbers to two or more groups

r"(.)\1(.)\2"          #raccoon
r"^(.)\1(.)\2"         #eellike
r"^(.)\1.*(.)\2$"      #eelgrass
r"(.)\1.*(.)\2.*(.)\3" #Mississippi, Tennessee

Palindromes

r"^(.).\1$"      #eye, gag
r"^(.)(.)\2\1$"  #peep, toot
r"^(.)(.).\2\1$" #Ababa, madam