6 # First, the public part of the interface:
9 # pcre.error and re.error should be the same, since exceptions can be
10 # raised from either module.
27 def _cachecompile(pattern
, flags
=0):
28 key
= (pattern
, flags
)
33 value
= compile(pattern
, flags
)
34 if len(_cache
) >= _MAXCACHE
:
39 def match(pattern
, string
, flags
=0):
40 return _cachecompile(pattern
, flags
).match(string
)
42 def search(pattern
, string
, flags
=0):
43 return _cachecompile(pattern
, flags
).search(string
)
45 def sub(pattern
, repl
, string
, count
=0):
46 if type(pattern
) == type(''):
47 pattern
= _cachecompile(pattern
)
48 return pattern
.sub(repl
, string
, count
)
50 def subn(pattern
, repl
, string
, count
=0):
51 if type(pattern
) == type(''):
52 pattern
= _cachecompile(pattern
)
53 return pattern
.subn(repl
, string
, count
)
55 def split(pattern
, string
, maxsplit
=0):
56 if type(pattern
) == type(''):
57 pattern
= _cachecompile(pattern
)
58 return pattern
.split(string
, maxsplit
)
60 def findall(pattern
, string
):
61 if type(pattern
) == type(''):
62 pattern
= _cachecompile(pattern
)
63 return pattern
.findall(string
)
66 "Escape all non-alphanumeric characters in pattern."
67 result
= list(pattern
)
68 alphanum
=string
.letters
+'_'+string
.digits
69 for i
in range(len(pattern
)):
71 if char
not in alphanum
:
72 if char
=='\000': result
[i
] = '\\000'
73 else: result
[i
] = '\\'+char
74 return string
.join(result
, '')
76 def compile(pattern
, flags
=0):
77 "Compile a regular expression pattern, returning a RegexObject."
79 code
=pcre_compile(pattern
, flags
, groupindex
)
80 return RegexObject(pattern
, flags
, code
, groupindex
)
89 def __init__(self
, pattern
, flags
, code
, groupindex
):
92 self
.pattern
= pattern
93 self
.groupindex
= groupindex
95 def search(self
, string
, pos
=0, endpos
=None):
96 """Scan through string looking for a match to the pattern, returning
97 a MatchObject instance, or None if no match was found."""
99 if endpos
is None or endpos
>len(string
):
101 if endpos
<pos
: endpos
=pos
102 regs
= self
.code
.match(string
, pos
, endpos
, 0)
105 self
._num
_regs
=len(regs
)
107 return MatchObject(self
,
112 def match(self
, string
, pos
=0, endpos
=None):
113 """Try to apply the pattern at the start of the string, returning
114 a MatchObject instance, or None if no match was found."""
116 if endpos
is None or endpos
>len(string
):
118 if endpos
<pos
: endpos
=pos
119 regs
= self
.code
.match(string
, pos
, endpos
, ANCHORED
)
122 self
._num
_regs
=len(regs
)
123 return MatchObject(self
,
128 def sub(self
, repl
, string
, count
=0):
129 """Return the string obtained by replacing the leftmost
130 non-overlapping occurrences of the pattern in string by the
133 return self
.subn(repl
, string
, count
)[0]
135 def subn(self
, repl
, source
, count
=0):
136 """Return a 2-tuple containing (new_string, number).
137 new_string is the string obtained by replacing the leftmost
138 non-overlapping occurrences of the pattern in the source
139 string by the replacement repl. number is the number of
140 substitutions that were made."""
143 raise error
, "negative substitution count"
146 n
= 0 # Number of matches
147 pos
= 0 # Where to start searching
148 lastmatch
= -1 # End of last match
149 results
= [] # Substrings making up the result
152 if type(repl
) is type(''):
153 # See if repl contains group references
155 repl
= pcre_expand(_Dummy
, repl
)
157 m
= MatchObject(self
, source
, 0, end
, [])
158 repl
= lambda m
, repl
=repl
, expand
=pcre_expand
: expand(m
, repl
)
162 m
= MatchObject(self
, source
, 0, end
, [])
164 match
= self
.code
.match
165 append
= results
.append
166 while n
< count
and pos
<= end
:
167 regs
= match(source
, pos
, end
, 0)
170 self
._num
_regs
= len(regs
)
172 if i
== j
== lastmatch
:
173 # Empty match adjacent to previous match
175 append(source
[lastmatch
:pos
])
178 append(source
[pos
:i
])
187 # Last match was empty; don't try here again
189 append(source
[lastmatch
:pos
])
192 return (string
.join(results
, ''), n
)
194 def split(self
, source
, maxsplit
=0):
195 """Split the source string by the occurrences of the pattern,
196 returning a list containing the resulting substrings."""
199 raise error
, "negative split count"
201 maxsplit
= sys
.maxint
207 match
= self
.code
.match
208 append
= results
.append
210 regs
= match(source
, pos
, end
, 0)
220 append(source
[lastmatch
:i
])
224 if a
== -1 or b
== -1:
231 append(source
[lastmatch
:])
234 def findall(self
, source
):
235 """Return a list of all non-overlapping matches in the string.
237 If one or more groups are present in the pattern, return a
238 list of groups; this will be a list of tuples if the pattern
239 has more than one group.
241 Empty matches are included in the result.
247 match
= self
.code
.match
248 append
= results
.append
250 regs
= match(source
, pos
, end
, 0)
263 gr
.append(source
[a
:b
])
269 # The following 3 functions were contributed by Mike Fletcher, and
270 # allow pickling and unpickling of RegexObject instances.
271 def __getinitargs__(self
):
272 return (None,None,None,None) # any 4 elements, to work around
274 # pickle/cPickle modules not yet
275 # ignoring the __init__ function
276 def __getstate__(self
):
277 return self
.pattern
, self
.flags
, self
.groupindex
278 def __setstate__(self
, statetuple
):
279 self
.pattern
= statetuple
[0]
280 self
.flags
= statetuple
[1]
281 self
.groupindex
= statetuple
[2]
282 self
.code
= apply(pcre_compile
, statetuple
)
285 # Dummy class used by _subn_string(). Has 'group' to avoid core dump.
290 def __init__(self
, re
, string
, pos
, endpos
, regs
):
297 def start(self
, g
= 0):
298 "Return the start of the substring matched by group g"
299 if type(g
) == type(''):
301 g
= self
.re
.groupindex
[g
]
302 except (KeyError, TypeError):
303 raise IndexError, 'group %s is undefined' % `g`
304 return self
.regs
[g
][0]
306 def end(self
, g
= 0):
307 "Return the end of the substring matched by group g"
308 if type(g
) == type(''):
310 g
= self
.re
.groupindex
[g
]
311 except (KeyError, TypeError):
312 raise IndexError, 'group %s is undefined' % `g`
313 return self
.regs
[g
][1]
315 def span(self
, g
= 0):
316 "Return (start, end) of the substring matched by group g"
317 if type(g
) == type(''):
319 g
= self
.re
.groupindex
[g
]
320 except (KeyError, TypeError):
321 raise IndexError, 'group %s is undefined' % `g`
324 def groups(self
, default
=None):
325 "Return a tuple containing all subgroups of the match object"
327 for g
in range(1, self
.re
._num
_regs
):
329 if a
== -1 or b
== -1:
330 result
.append(default
)
332 result
.append(self
.string
[a
:b
])
335 def group(self
, *groups
):
336 "Return one or more groups of the match"
341 if type(g
) == type(''):
343 g
= self
.re
.groupindex
[g
]
344 except (KeyError, TypeError):
345 raise IndexError, 'group %s is undefined' % `g`
346 if g
>= len(self
.regs
):
347 raise IndexError, 'group %s is undefined' % `g`
349 if a
== -1 or b
== -1:
352 result
.append(self
.string
[a
:b
])
355 elif len(result
) == 1:
360 def groupdict(self
, default
=None):
361 "Return a dictionary containing all named subgroups of the match"
363 for name
, index
in self
.re
.groupindex
.items():
364 a
, b
= self
.regs
[index
]
365 if a
== -1 or b
== -1:
368 dict[name
] = self
.string
[a
:b
]