Add forgotten initialization. Fixes bug #120994, "Traceback with
[python/dscho.git] / Lib / sre.py
blob6dea5c40456f23a7dcd0e04eadfda16cad631630
2 # Secret Labs' Regular Expression Engine
4 # re-compatible interface for the sre matching engine
6 # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
8 # This version of the SRE library can be redistributed under CNRI's
9 # Python 1.6 license. For any other use, please contact Secret Labs
10 # AB (info@pythonware.com).
12 # Portions of this engine have been developed in cooperation with
13 # CNRI. Hewlett-Packard provided funding for 1.6 integration and
14 # other compatibility work.
17 # FIXME: change all FIXME's to XXX ;-)
19 import sre_compile
20 import sre_parse
22 import string
24 # flags
25 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
26 L = LOCALE = sre_compile.SRE_FLAG_LOCALE
27 M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
28 S = DOTALL = sre_compile.SRE_FLAG_DOTALL
29 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
31 # sre extensions (may or may not be in 1.6/2.0 final)
32 T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
33 U = UNICODE = sre_compile.SRE_FLAG_UNICODE
35 # sre exception
36 error = sre_compile.error
38 # --------------------------------------------------------------------
39 # public interface
41 # FIXME: add docstrings
43 def match(pattern, string, flags=0):
44 return _compile(pattern, flags).match(string)
46 def search(pattern, string, flags=0):
47 return _compile(pattern, flags).search(string)
49 def sub(pattern, repl, string, count=0):
50 return _compile(pattern, 0).sub(repl, string, count)
52 def subn(pattern, repl, string, count=0):
53 return _compile(pattern, 0).subn(repl, string, count)
55 def split(pattern, string, maxsplit=0):
56 return _compile(pattern, 0).split(string, maxsplit)
58 def findall(pattern, string, maxsplit=0):
59 return _compile(pattern, 0).findall(string, maxsplit)
61 def compile(pattern, flags=0):
62 return _compile(pattern, flags)
64 def purge():
65 _cache.clear()
67 def template(pattern, flags=0):
68 return _compile(pattern, flags|T)
70 def escape(pattern):
71 s = list(pattern)
72 for i in range(len(pattern)):
73 c = pattern[i]
74 if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
75 if c == "\000":
76 s[i] = "\\000"
77 else:
78 s[i] = "\\" + c
79 return _join(s, pattern)
81 # --------------------------------------------------------------------
82 # internals
84 _cache = {}
85 _MAXCACHE = 100
87 def _join(seq, sep):
88 # internal: join into string having the same type as sep
89 return string.join(seq, sep[:0])
91 def _compile(*key):
92 # internal: compile pattern
93 p = _cache.get(key)
94 if p is not None:
95 return p
96 pattern, flags = key
97 if type(pattern) not in sre_compile.STRING_TYPES:
98 return pattern
99 try:
100 p = sre_compile.compile(pattern, flags)
101 except error, v:
102 raise error, v # invalid expression
103 if len(_cache) >= _MAXCACHE:
104 _cache.clear()
105 _cache[key] = p
106 return p
108 def _expand(pattern, match, template):
109 # internal: match.expand implementation hook
110 template = sre_parse.parse_template(template, pattern)
111 return sre_parse.expand_template(template, match)
113 def _sub(pattern, template, string, count=0):
114 # internal: pattern.sub implementation hook
115 return _subn(pattern, template, string, count)[0]
117 def _subn(pattern, template, string, count=0):
118 # internal: pattern.subn implementation hook
119 if callable(template):
120 filter = template
121 else:
122 template = sre_parse.parse_template(template, pattern)
123 def filter(match, template=template):
124 return sre_parse.expand_template(template, match)
125 n = i = 0
126 s = []
127 append = s.append
128 c = pattern.scanner(string)
129 while not count or n < count:
130 m = c.search()
131 if not m:
132 break
133 b, e = m.span()
134 if i < b:
135 append(string[i:b])
136 append(filter(m))
137 i = e
138 n = n + 1
139 append(string[i:])
140 return _join(s, string[:0]), n
142 def _split(pattern, string, maxsplit=0):
143 # internal: pattern.split implementation hook
144 n = i = 0
145 s = []
146 append = s.append
147 extend = s.extend
148 c = pattern.scanner(string)
149 g = pattern.groups
150 while not maxsplit or n < maxsplit:
151 m = c.search()
152 if not m:
153 break
154 b, e = m.span()
155 if b == e:
156 if i >= len(string):
157 break
158 continue
159 append(string[i:b])
160 if g and b != e:
161 extend(m.groups())
162 i = e
163 n = n + 1
164 append(string[i:])
165 return s
167 # register myself for pickling
169 import copy_reg
171 def _pickle(p):
172 return _compile, (p.pattern, p.flags)
174 copy_reg.pickle(type(_compile("", 0)), _pickle, _compile)
176 # --------------------------------------------------------------------
177 # experimental stuff (see python-dev discussions for details)
179 class Scanner:
180 def __init__(self, lexicon):
181 from sre_constants import BRANCH, SUBPATTERN
182 self.lexicon = lexicon
183 # combine phrases into a compound pattern
184 p = []
185 s = sre_parse.Pattern()
186 for phrase, action in lexicon:
187 p.append(sre_parse.SubPattern(s, [
188 (SUBPATTERN, (len(p), sre_parse.parse(phrase))),
190 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
191 s.groups = len(p)
192 self.scanner = sre_compile.compile(p)
193 def scan(self, string):
194 result = []
195 append = result.append
196 match = self.scanner.match
197 i = 0
198 while 1:
199 m = match(string, i)
200 if not m:
201 break
202 j = m.end()
203 if i == j:
204 break
205 action = self.lexicon[m.lastindex][1]
206 if callable(action):
207 self.match = match
208 action = action(self, m.group())
209 if action is not None:
210 append(action)
211 i = j
212 return result, string[i:]