2 # Secret Labs' Regular Expression Engine
4 # re-compatible interface for the sre matching engine
6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
8 # This version of the SRE library can be redistributed under CNRI's
9 # Python 1.6 license. For any other use, please contact Secret Labs
10 # AB (info@pythonware.com).
12 # Portions of this engine have been developed in cooperation with
13 # CNRI. Hewlett-Packard provided funding for 1.6 integration and
14 # other compatibility work.
21 __all__
= [ "match", "search", "sub", "subn", "split", "findall",
22 "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
23 "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
28 # this module works under 1.5.2 and later. don't use string methods
32 I
= IGNORECASE
= sre_compile
.SRE_FLAG_IGNORECASE
# ignore case
33 L
= LOCALE
= sre_compile
.SRE_FLAG_LOCALE
# assume current 8-bit locale
34 U
= UNICODE
= sre_compile
.SRE_FLAG_UNICODE
# assume unicode locale
35 M
= MULTILINE
= sre_compile
.SRE_FLAG_MULTILINE
# make anchors look for newline
36 S
= DOTALL
= sre_compile
.SRE_FLAG_DOTALL
# make dot match newline
37 X
= VERBOSE
= sre_compile
.SRE_FLAG_VERBOSE
# ignore whitespace and comments
39 # sre extensions (experimental, don't rely on these)
40 T
= TEMPLATE
= sre_compile
.SRE_FLAG_TEMPLATE
# disable backtracking
41 DEBUG
= sre_compile
.SRE_FLAG_DEBUG
# dump pattern after compilation
44 error
= sre_compile
.error
46 # --------------------------------------------------------------------
49 def match(pattern
, string
, flags
=0):
50 """Try to apply the pattern at the start of the string, returning
51 a match object, or None if no match was found."""
52 return _compile(pattern
, flags
).match(string
)
54 def search(pattern
, string
, flags
=0):
55 """Scan through string looking for a match to the pattern, returning
56 a match object, or None if no match was found."""
57 return _compile(pattern
, flags
).search(string
)
59 def sub(pattern
, repl
, string
, count
=0):
60 """Return the string obtained by replacing the leftmost
61 non-overlapping occurrences of the pattern in string by the
63 return _compile(pattern
, 0).sub(repl
, string
, count
)
65 def subn(pattern
, repl
, string
, count
=0):
66 """Return a 2-tuple containing (new_string, number).
67 new_string is the string obtained by replacing the leftmost
68 non-overlapping occurrences of the pattern in the source
69 string by the replacement repl. number is the number of
70 substitutions that were made."""
71 return _compile(pattern
, 0).subn(repl
, string
, count
)
73 def split(pattern
, string
, maxsplit
=0):
74 """Split the source string by the occurrences of the pattern,
75 returning a list containing the resulting substrings."""
76 return _compile(pattern
, 0).split(string
, maxsplit
)
78 def findall(pattern
, string
, maxsplit
=0):
79 """Return a list of all non-overlapping matches in the string.
81 If one or more groups are present in the pattern, return a
82 list of groups; this will be a list of tuples if the pattern
83 has more than one group.
85 Empty matches are included in the result."""
86 return _compile(pattern
, 0).findall(string
, maxsplit
)
88 def compile(pattern
, flags
=0):
89 "Compile a regular expression pattern, returning a pattern object."
90 return _compile(pattern
, flags
)
93 "Clear the regular expression cache"
97 def template(pattern
, flags
=0):
98 "Compile a template pattern, returning a pattern object"
99 return _compile(pattern
, flags|T
)
102 "Escape all non-alphanumeric characters in pattern."
104 for i
in range(len(pattern
)):
106 if not ("a" <= c
<= "z" or "A" <= c
<= "Z" or "0" <= c
<= "9"):
111 return _join(s
, pattern
)
113 # --------------------------------------------------------------------
122 # internal: join into string having the same type as sep
123 return string
.join(seq
, sep
[:0])
126 # internal: compile pattern
131 if type(pattern
) not in sre_compile
.STRING_TYPES
:
134 p
= sre_compile
.compile(pattern
, flags
)
136 raise error
, v
# invalid expression
137 if len(_cache
) >= _MAXCACHE
:
142 def _compile_repl(*key
):
143 # internal: compile replacement pattern
144 p
= _cache_repl
.get(key
)
149 p
= sre_parse
.parse_template(repl
, pattern
)
151 raise error
, v
# invalid expression
152 if len(_cache_repl
) >= _MAXCACHE
:
157 def _expand(pattern
, match
, template
):
158 # internal: match.expand implementation hook
159 template
= sre_parse
.parse_template(template
, pattern
)
160 return sre_parse
.expand_template(template
, match
)
162 def _sub(pattern
, template
, string
, count
=0):
163 # internal: pattern.sub implementation hook
164 return _subn(pattern
, template
, string
, count
)[0]
166 def _subn(pattern
, template
, string
, count
=0):
167 # internal: pattern.subn implementation hook
168 if callable(template
):
171 template
= _compile_repl(template
, pattern
)
172 def filter(match
, template
=template
):
173 return sre_parse
.expand_template(template
, match
)
177 c
= pattern
.scanner(string
)
178 while not count
or n
< count
:
189 return _join(s
, string
[:0]), n
191 def _split(pattern
, string
, maxsplit
=0):
192 # internal: pattern.split implementation hook
197 c
= pattern
.scanner(string
)
199 while not maxsplit
or n
< maxsplit
:
210 extend(list(m
.groups()))
216 # register myself for pickling
221 return _compile
, (p
.pattern
, p
.flags
)
223 copy_reg
.pickle(type(_compile("", 0)), _pickle
, _compile
)
225 # --------------------------------------------------------------------
226 # experimental stuff (see python-dev discussions for details)
229 def __init__(self
, lexicon
):
230 from sre_constants
import BRANCH
, SUBPATTERN
231 self
.lexicon
= lexicon
232 # combine phrases into a compound pattern
234 s
= sre_parse
.Pattern()
235 for phrase
, action
in lexicon
:
236 p
.append(sre_parse
.SubPattern(s
, [
237 (SUBPATTERN
, (len(p
), sre_parse
.parse(phrase
))),
239 p
= sre_parse
.SubPattern(s
, [(BRANCH
, (None, p
))])
241 self
.scanner
= sre_compile
.compile(p
)
242 def scan(self
, string
):
244 append
= result
.append
245 match
= self
.scanner
.match
254 action
= self
.lexicon
[m
.lastindex
][1]
257 action
= action(self
, m
.group())
258 if action
is not None:
261 return result
, string
[i
:]