1 # Regular expression subroutines:
2 # sub(pat, repl, str): replace first occurrence of pattern in string
3 # gsub(pat, repl, str): replace all occurrences of pattern in string
4 # split(str, pat): split string using pattern as delimiter
10 # Replace first occurrence of pattern pat in string str by replacement
11 # repl. If the pattern isn't found, the string is returned unchanged.
12 # The replacement may contain references \digit to subpatterns and
13 # escaped backslashes. The pattern may be a string or an already
16 def sub(pat
, repl
, str):
18 if prog
.search(str) >= 0:
21 str = str[:a
] + expand(repl
, regs
, str) + str[b
:]
25 # Replace all (non-overlapping) occurrences of pattern pat in string
26 # str by replacement repl. The same rules as for sub() apply.
27 # Empty matches for the pattern are replaced only when not adjacent to
28 # a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
30 def gsub(pat
, repl
, str):
35 while prog
.search(str, start
) >= 0:
38 if a
== b
== start
and not first
:
39 if start
>= len(str) or prog
.search(str, start
+1) < 0:
43 new
= new
+ str[start
:a
] + expand(repl
, regs
, str)
46 new
= new
+ str[start
:]
50 # Split string str in fields separated by delimiters matching pattern
51 # pat. Only non-empty matches for the pattern are considered, so e.g.
52 # split('abc', '') returns ['abc'].
58 while prog
.search(str, next
) >= 0:
66 res
.append(str[start
:a
])
68 res
.append(str[start
:])
72 # Internal subroutines:
73 # compile(pat): compile a pattern, caching already compiled patterns
74 # expand(repl, regs, str): expand \digit escapes in replacement string
77 # Manage a cache of compiled regular expressions.
78 # If the pattern is a string a compiled version of it is returned.
79 # If the pattern has been used before we return an already compiled
80 # version from the cache; otherwise we compile it now and save the
81 # compiled version in the cache.
82 # Instead of a string, a compiled regular expression can also be
84 # WARNING: if the pattern syntax is changed, the cache should be
90 if type(pat
) <> type(''):
91 return pat
# Assume it is a compiled regex
92 if cache
.has_key(pat
):
93 prog
= cache
[pat
] # Get it from the cache
95 prog
= cache
[pat
] = regex
.compile(pat
)
99 # Expand \digit in the replacement.
100 # Each occurrence of \digit is replaced by the substring of str
101 # indicated by regs[digit]. To include a literal \ in the
102 # replacement, double it; other \ escapes are left unchanged (i.e.
103 # the \ and the following character are both copied).
105 def expand(repl
, regs
, str):
112 if c
<> '\\' or i
>= len(repl
):
126 # Test program, reads sequences "pat repl str" from stdin.
127 # Optional argument specifies pattern used to split lines.
136 if sys
.stdin
.isatty(): sys
.stderr
.write('--> ')
137 line
= sys
.stdin
.readline()
139 if line
[-1] == '\n': line
= line
[:-1]
140 fields
= split(line
, delpat
)
142 print 'Sorry, not three fields'
143 print 'split:', `fields`
145 [pat
, repl
, str] = split(line
, delpat
)
146 print 'sub :', `
sub(pat
, repl
, str)`
147 print 'gsub:', `
gsub(pat
, repl
, str)`