Updated for 2.1a3
[python/dscho.git] / Lib / test / test_sre.py
blob88c0d62e8db8d1ee1ad3e408709df6fbbf3b9727
1 # SRE test harness for the Python regression suite
3 # this is based on test_re.py, but uses a test function instead
4 # of all those asserts
6 import sys
7 sys.path=['.']+sys.path
9 from test_support import verbose, TestFailed
10 import sre
11 import sys, os, string, traceback
14 # test support
16 def test(expression, result, exception=None):
17 try:
18 r = eval(expression)
19 except:
20 if exception:
21 if not isinstance(sys.exc_value, exception):
22 print expression, "FAILED"
23 # display name, not actual value
24 if exception is sre.error:
25 print "expected", "sre.error"
26 else:
27 print "expected", exception.__name__
28 print "got", sys.exc_type.__name__, str(sys.exc_value)
29 else:
30 print expression, "FAILED"
31 traceback.print_exc(file=sys.stdout)
32 else:
33 if exception:
34 print expression, "FAILED"
35 if exception is sre.error:
36 print "expected", "sre.error"
37 else:
38 print "expected", exception.__name__
39 print "got result", repr(r)
40 else:
41 if r != result:
42 print expression, "FAILED"
43 print "expected", repr(result)
44 print "got result", repr(r)
46 if verbose:
47 print 'Running tests on character literals'
49 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
50 test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
56 test(r"""sre.match("\911", "")""", None, sre.error)
59 # Misc tests from Tim Peters' re.doc
61 if verbose:
62 print 'Running tests on sre.search and sre.match'
64 test(r"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65 test(r"""sre.search(r'x*', 'axx').span()""", (0, 0))
66 test(r"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67 test(r"""sre.search(r'x+', 'axx').span()""", (1, 3))
68 test(r"""sre.search(r'x', 'aaa')""", None)
70 test(r"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71 test(r"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72 test(r"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73 test(r"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74 test(r"""sre.match(r'a+', 'xxx')""", None)
76 # bug 113254
77 test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78 test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79 test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
81 if verbose:
82 print 'Running tests on sre.sub'
84 test(r"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
86 def bump_num(matchobj):
87 int_value = int(matchobj.group(0))
88 return str(int_value + 1)
90 test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
91 test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
93 test(r"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
94 test(r"""sre.sub(r'.', r"\n", 'x')""", '\n')
96 s = r"\1\1"
98 test(r"""sre.sub(r'(.)', s, 'x')""", 'xx')
99 test(r"""sre.sub(r'(.)', sre.escape(s), 'x')""", s)
100 test(r"""sre.sub(r'(.)', lambda m: s, 'x')""", s)
102 test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
103 test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
104 test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105 test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
107 test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108 test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109 test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
111 test(r"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
113 # qualified sub
114 test(r"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
115 test(r"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
117 # bug 114660
118 test(r"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
120 if verbose:
121 print 'Running tests on symbolic references'
123 test(r"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre.error)
124 test(r"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre.error)
125 test(r"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre.error)
126 test(r"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre.error)
127 test(r"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre.error)
128 test(r"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
129 test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre.error)
130 test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre.error)
132 if verbose:
133 print 'Running tests on sre.subn'
135 test(r"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
136 test(r"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
137 test(r"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
138 test(r"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
139 test(r"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
141 if verbose:
142 print 'Running tests on sre.split'
144 test(r"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
145 test(r"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
146 test(r"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
147 test(r"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
148 test(r"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
149 test(r"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
150 test(r"""sre.split(r"(b)|(:+)", ":a:b::c")""",
151 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
152 test(r"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
154 test(r"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
155 test(r"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
157 test(r"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
158 test(r"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
160 if verbose:
161 print "Running tests on sre.findall"
163 test(r"""sre.findall(r":+", "abc")""", [])
164 test(r"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
165 test(r"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
166 test(r"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
167 [(":", ""), (":", ":"), (":", "::")])
168 test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
170 # bug 117612
171 test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
173 if verbose:
174 print "Running tests on sre.match"
176 test(r"""sre.match(r'a', 'a').groups()""", ())
177 test(r"""sre.match(r'(a)', 'a').groups()""", ('a',))
178 test(r"""sre.match(r'(a)', 'a').group(0)""", 'a')
179 test(r"""sre.match(r'(a)', 'a').group(1)""", 'a')
180 test(r"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
182 pat = sre.compile(r'((a)|(b))(c)?')
183 test(r"""pat.match('a').groups()""", ('a', 'a', None, None))
184 test(r"""pat.match('b').groups()""", ('b', None, 'b', None))
185 test(r"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
186 test(r"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
187 test(r"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
189 pat = sre.compile(r'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
190 test(r"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
191 test(r"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
192 test(r"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
194 if verbose:
195 print "Running tests on sre.escape"
197 p = ""
198 for i in range(0, 256):
199 p = p + chr(i)
200 test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
201 test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
203 pat = sre.compile(sre.escape(p))
204 test(r"""pat.match(p) is not None""", 1)
205 test(r"""pat.match(p).span()""", (0,256))
207 if verbose:
208 print 'Pickling a SRE_Pattern instance'
210 try:
211 import pickle
212 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
213 s = pickle.dumps(pat)
214 pat = pickle.loads(s)
215 except:
216 print TestFailed, 're module pickle' # expected
218 try:
219 import cPickle
220 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
221 s = cPickle.dumps(pat)
222 pat = cPickle.loads(s)
223 except:
224 print TestFailed, 're module cPickle' # expected
226 # constants
227 test(r"""sre.I""", sre.IGNORECASE)
228 test(r"""sre.L""", sre.LOCALE)
229 test(r"""sre.M""", sre.MULTILINE)
230 test(r"""sre.S""", sre.DOTALL)
231 test(r"""sre.X""", sre.VERBOSE)
232 test(r"""sre.T""", sre.TEMPLATE)
233 test(r"""sre.U""", sre.UNICODE)
235 for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]:
236 try:
237 r = sre.compile('^pattern$', flags)
238 except:
239 print 'Exception raised on flag', flags
241 if verbose:
242 print 'Test engine limitations'
244 # Try nasty case that overflows the straightforward recursive
245 # implementation of repeated groups.
246 test(r"""sre.match(r'(x)*', 50000*'x').span()""",
247 (0, 50000), RuntimeError)
248 test(r"""sre.match(r'(x)*y', 50000*'x'+'y').span()""",
249 (0, 50001), RuntimeError)
250 test(r"""sre.match(r'(x)*?y', 50000*'x'+'y').span()""",
251 (0, 50001)) # this works in 2.1
253 from re_tests import *
255 if verbose:
256 print 'Running re_tests test suite'
257 else:
258 # To save time, only run the first and last 10 tests
259 #tests = tests[:10] + tests[-10:]
260 pass
262 for t in tests:
263 sys.stdout.flush()
264 pattern=s=outcome=repl=expected=None
265 if len(t)==5:
266 pattern, s, outcome, repl, expected = t
267 elif len(t)==3:
268 pattern, s, outcome = t
269 else:
270 raise ValueError, ('Test tuples should have 3 or 5 fields',t)
272 try:
273 obj=sre.compile(pattern)
274 except sre.error:
275 if outcome==SYNTAX_ERROR: pass # Expected a syntax error
276 else:
277 print '=== Syntax error:', t
278 except KeyboardInterrupt: raise KeyboardInterrupt
279 except:
280 print '*** Unexpected error ***', t
281 if verbose:
282 traceback.print_exc(file=sys.stdout)
283 else:
284 try:
285 result=obj.search(s)
286 except (sre.error), msg:
287 print '=== Unexpected exception', t, repr(msg)
288 if outcome==SYNTAX_ERROR:
289 print '=== Compiled incorrectly', t
290 elif outcome==FAIL:
291 if result is None: pass # No match, as expected
292 else: print '=== Succeeded incorrectly', t
293 elif outcome==SUCCEED:
294 if result is not None:
295 # Matched, as expected, so now we compute the
296 # result string and compare it to our expected result.
297 start, end = result.span(0)
298 vardict={'found': result.group(0),
299 'groups': result.group(),
300 'flags': result.re.flags}
301 for i in range(1, 100):
302 try:
303 gi = result.group(i)
304 # Special hack because else the string concat fails:
305 if gi is None:
306 gi = "None"
307 except IndexError:
308 gi = "Error"
309 vardict['g%d' % i] = gi
310 for i in result.re.groupindex.keys():
311 try:
312 gi = result.group(i)
313 if gi is None:
314 gi = "None"
315 except IndexError:
316 gi = "Error"
317 vardict[i] = gi
318 repl=eval(repl, vardict)
319 if repl!=expected:
320 print '=== grouping error', t,
321 print repr(repl)+' should be '+repr(expected)
322 else:
323 print '=== Failed incorrectly', t
324 continue
326 # Try the match on a unicode string, and check that it
327 # still succeeds.
328 try:
329 u = unicode(s, "latin-1")
330 except NameError:
331 pass
332 else:
333 result=obj.search(u)
334 if result==None:
335 print '=== Fails on unicode match', t
337 # Try the match on a unicode pattern, and check that it
338 # still succeeds.
339 try:
340 u = unicode(pattern, "latin-1")
341 except NameError:
342 pass
343 else:
344 obj=sre.compile(u)
345 result=obj.search(s)
346 if result==None:
347 print '=== Fails on unicode pattern match', t
349 # Try the match with the search area limited to the extent
350 # of the match and see if it still succeeds. \B will
351 # break (because it won't match at the end or start of a
352 # string), so we'll ignore patterns that feature it.
354 if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
355 obj=sre.compile(pattern)
356 result=obj.search(s, result.start(0), result.end(0)+1)
357 if result==None:
358 print '=== Failed on range-limited match', t
360 # Try the match with IGNORECASE enabled, and check that it
361 # still succeeds.
362 obj=sre.compile(pattern, sre.IGNORECASE)
363 result=obj.search(s)
364 if result==None:
365 print '=== Fails on case-insensitive match', t
367 # Try the match with LOCALE enabled, and check that it
368 # still succeeds.
369 obj=sre.compile(pattern, sre.LOCALE)
370 result=obj.search(s)
371 if result==None:
372 print '=== Fails on locale-sensitive match', t
374 # Try the match with UNICODE locale enabled, and check
375 # that it still succeeds.
376 obj=sre.compile(pattern, sre.UNICODE)
377 result=obj.search(s)
378 if result==None:
379 print '=== Fails on unicode-sensitive match', t