Apparently the code to forestall Tk eating events was too aggressive (Tk user input...
[python/dscho.git] / Lib / sre.py
blob6706fac8692e09cf723f76b744428d8046440665
2 # Secret Labs' Regular Expression Engine
4 # re-compatible interface for the sre matching engine
6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
8 # This version of the SRE library can be redistributed under CNRI's
9 # Python 1.6 license. For any other use, please contact Secret Labs
10 # AB (info@pythonware.com).
12 # Portions of this engine have been developed in cooperation with
13 # CNRI. Hewlett-Packard provided funding for 1.6 integration and
14 # other compatibility work.
17 import sre_compile
18 import sre_parse
20 # public symbols
21 __all__ = [ "match", "search", "sub", "subn", "split", "findall",
22 "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
23 "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
24 "UNICODE", "error" ]
26 __version__ = "2.1b2"
28 # this module works under 1.5.2 and later. don't use string methods
29 import string
31 # flags
32 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
33 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
34 U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
35 M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
36 S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
37 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
39 # sre extensions (experimental, don't rely on these)
40 T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
41 DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
43 # sre exception
44 error = sre_compile.error
46 # --------------------------------------------------------------------
47 # public interface
49 def match(pattern, string, flags=0):
50 """Try to apply the pattern at the start of the string, returning
51 a match object, or None if no match was found."""
52 return _compile(pattern, flags).match(string)
54 def search(pattern, string, flags=0):
55 """Scan through string looking for a match to the pattern, returning
56 a match object, or None if no match was found."""
57 return _compile(pattern, flags).search(string)
59 def sub(pattern, repl, string, count=0):
60 """Return the string obtained by replacing the leftmost
61 non-overlapping occurrences of the pattern in string by the
62 replacement repl"""
63 return _compile(pattern, 0).sub(repl, string, count)
65 def subn(pattern, repl, string, count=0):
66 """Return a 2-tuple containing (new_string, number).
67 new_string is the string obtained by replacing the leftmost
68 non-overlapping occurrences of the pattern in the source
69 string by the replacement repl. number is the number of
70 substitutions that were made."""
71 return _compile(pattern, 0).subn(repl, string, count)
73 def split(pattern, string, maxsplit=0):
74 """Split the source string by the occurrences of the pattern,
75 returning a list containing the resulting substrings."""
76 return _compile(pattern, 0).split(string, maxsplit)
78 def findall(pattern, string, maxsplit=0):
79 """Return a list of all non-overlapping matches in the string.
81 If one or more groups are present in the pattern, return a
82 list of groups; this will be a list of tuples if the pattern
83 has more than one group.
85 Empty matches are included in the result."""
86 return _compile(pattern, 0).findall(string, maxsplit)
88 def compile(pattern, flags=0):
89 "Compile a regular expression pattern, returning a pattern object."
90 return _compile(pattern, flags)
92 def purge():
93 "Clear the regular expression cache"
94 _cache.clear()
95 _cache_repl.clear()
97 def template(pattern, flags=0):
98 "Compile a template pattern, returning a pattern object"
99 return _compile(pattern, flags|T)
101 def escape(pattern):
102 "Escape all non-alphanumeric characters in pattern."
103 s = list(pattern)
104 for i in range(len(pattern)):
105 c = pattern[i]
106 if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
107 if c == "\000":
108 s[i] = "\\000"
109 else:
110 s[i] = "\\" + c
111 return _join(s, pattern)
113 # --------------------------------------------------------------------
114 # internals
116 _cache = {}
117 _cache_repl = {}
119 _MAXCACHE = 100
121 def _join(seq, sep):
122 # internal: join into string having the same type as sep
123 return string.join(seq, sep[:0])
125 def _compile(*key):
126 # internal: compile pattern
127 p = _cache.get(key)
128 if p is not None:
129 return p
130 pattern, flags = key
131 if type(pattern) not in sre_compile.STRING_TYPES:
132 return pattern
133 try:
134 p = sre_compile.compile(pattern, flags)
135 except error, v:
136 raise error, v # invalid expression
137 if len(_cache) >= _MAXCACHE:
138 _cache.clear()
139 _cache[key] = p
140 return p
142 def _compile_repl(*key):
143 # internal: compile replacement pattern
144 p = _cache_repl.get(key)
145 if p is not None:
146 return p
147 repl, pattern = key
148 try:
149 p = sre_parse.parse_template(repl, pattern)
150 except error, v:
151 raise error, v # invalid expression
152 if len(_cache_repl) >= _MAXCACHE:
153 _cache_repl.clear()
154 _cache_repl[key] = p
155 return p
157 def _expand(pattern, match, template):
158 # internal: match.expand implementation hook
159 template = sre_parse.parse_template(template, pattern)
160 return sre_parse.expand_template(template, match)
162 def _sub(pattern, template, string, count=0):
163 # internal: pattern.sub implementation hook
164 return _subn(pattern, template, string, count)[0]
166 def _subn(pattern, template, string, count=0):
167 # internal: pattern.subn implementation hook
168 if callable(template):
169 filter = template
170 else:
171 template = _compile_repl(template, pattern)
172 def filter(match, template=template):
173 return sre_parse.expand_template(template, match)
174 n = i = 0
175 s = []
176 append = s.append
177 c = pattern.scanner(string)
178 while not count or n < count:
179 m = c.search()
180 if not m:
181 break
182 b, e = m.span()
183 if i < b:
184 append(string[i:b])
185 append(filter(m))
186 i = e
187 n = n + 1
188 append(string[i:])
189 return _join(s, string[:0]), n
191 def _split(pattern, string, maxsplit=0):
192 # internal: pattern.split implementation hook
193 n = i = 0
194 s = []
195 append = s.append
196 extend = s.extend
197 c = pattern.scanner(string)
198 g = pattern.groups
199 while not maxsplit or n < maxsplit:
200 m = c.search()
201 if not m:
202 break
203 b, e = m.span()
204 if b == e:
205 if i >= len(string):
206 break
207 continue
208 append(string[i:b])
209 if g and b != e:
210 extend(list(m.groups()))
211 i = e
212 n = n + 1
213 append(string[i:])
214 return s
216 # register myself for pickling
218 import copy_reg
220 def _pickle(p):
221 return _compile, (p.pattern, p.flags)
223 copy_reg.pickle(type(_compile("", 0)), _pickle, _compile)
225 # --------------------------------------------------------------------
226 # experimental stuff (see python-dev discussions for details)
228 class Scanner:
229 def __init__(self, lexicon):
230 from sre_constants import BRANCH, SUBPATTERN
231 self.lexicon = lexicon
232 # combine phrases into a compound pattern
233 p = []
234 s = sre_parse.Pattern()
235 for phrase, action in lexicon:
236 p.append(sre_parse.SubPattern(s, [
237 (SUBPATTERN, (len(p), sre_parse.parse(phrase))),
239 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
240 s.groups = len(p)
241 self.scanner = sre_compile.compile(p)
242 def scan(self, string):
243 result = []
244 append = result.append
245 match = self.scanner.match
246 i = 0
247 while 1:
248 m = match(string, i)
249 if not m:
250 break
251 j = m.end()
252 if i == j:
253 break
254 action = self.lexicon[m.lastindex][1]
255 if callable(action):
256 self.match = m
257 action = action(self, m.group())
258 if action is not None:
259 append(action)
260 i = j
261 return result, string[i:]