Merged release21-maint changes.
[python/dscho.git] / Lib / test / test_sre.py
blobf673c339fe9943bd8a48768e535ec7c9f2b0d5bb
1 # SRE test harness for the Python regression suite
3 # this is based on test_re.py, but uses a test function instead
4 # of all those asserts
6 import sys
7 sys.path=['.']+sys.path
9 from test_support import verbose, TestFailed
10 import sre
11 import sys, os, string, traceback
14 # test support
16 def test(expression, result, exception=None):
17 try:
18 r = eval(expression)
19 except:
20 if exception:
21 if not isinstance(sys.exc_value, exception):
22 print expression, "FAILED"
23 # display name, not actual value
24 if exception is sre.error:
25 print "expected", "sre.error"
26 else:
27 print "expected", exception.__name__
28 print "got", sys.exc_type.__name__, str(sys.exc_value)
29 else:
30 print expression, "FAILED"
31 traceback.print_exc(file=sys.stdout)
32 else:
33 if exception:
34 print expression, "FAILED"
35 if exception is sre.error:
36 print "expected", "sre.error"
37 else:
38 print "expected", exception.__name__
39 print "got result", repr(r)
40 else:
41 if r != result:
42 print expression, "FAILED"
43 print "expected", repr(result)
44 print "got result", repr(r)
46 if verbose:
47 print 'Running tests on character literals'
49 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
50 test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
56 test(r"""sre.match("\911", "")""", None, sre.error)
59 # Misc tests from Tim Peters' re.doc
61 if verbose:
62 print 'Running tests on sre.search and sre.match'
64 test(r"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65 test(r"""sre.search(r'x*', 'axx').span()""", (0, 0))
66 test(r"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67 test(r"""sre.search(r'x+', 'axx').span()""", (1, 3))
68 test(r"""sre.search(r'x', 'aaa')""", None)
70 test(r"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71 test(r"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72 test(r"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73 test(r"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74 test(r"""sre.match(r'a+', 'xxx')""", None)
76 # bug 113254
77 test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78 test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79 test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
81 if verbose:
82 print 'Running tests on sre.sub'
84 test(r"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
86 def bump_num(matchobj):
87 int_value = int(matchobj.group(0))
88 return str(int_value + 1)
90 test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
91 test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
93 test(r"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
94 test(r"""sre.sub(r'.', r"\n", 'x')""", '\n')
96 s = r"\1\1"
98 test(r"""sre.sub(r'(.)', s, 'x')""", 'xx')
99 test(r"""sre.sub(r'(.)', sre.escape(s), 'x')""", s)
100 test(r"""sre.sub(r'(.)', lambda m: s, 'x')""", s)
102 test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
103 test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
104 test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105 test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
107 test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108 test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109 test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
111 test(r"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
113 # qualified sub
114 test(r"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
115 test(r"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
117 # bug 114660
118 test(r"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
120 # Test for sub() on escaped characters, see SF bug #449000
121 test(r"""sre.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
122 test(r"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
123 test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
124 test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
126 if verbose:
127 print 'Running tests on symbolic references'
129 test(r"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre.error)
130 test(r"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre.error)
131 test(r"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre.error)
132 test(r"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre.error)
133 test(r"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre.error)
134 test(r"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
135 test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre.error)
136 test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre.error)
138 if verbose:
139 print 'Running tests on sre.subn'
141 test(r"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
142 test(r"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
143 test(r"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
144 test(r"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
145 test(r"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
147 if verbose:
148 print 'Running tests on sre.split'
150 test(r"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
151 test(r"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
152 test(r"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
153 test(r"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
154 test(r"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
155 test(r"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
156 test(r"""sre.split(r"(b)|(:+)", ":a:b::c")""",
157 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
158 test(r"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
160 test(r"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
161 test(r"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
163 test(r"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
164 test(r"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
166 if verbose:
167 print "Running tests on sre.findall"
169 test(r"""sre.findall(r":+", "abc")""", [])
170 test(r"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
171 test(r"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
172 test(r"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
173 [(":", ""), (":", ":"), (":", "::")])
174 test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
176 # bug 117612
177 test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
179 if verbose:
180 print "Running tests on sre.match"
182 test(r"""sre.match(r'a', 'a').groups()""", ())
183 test(r"""sre.match(r'(a)', 'a').groups()""", ('a',))
184 test(r"""sre.match(r'(a)', 'a').group(0)""", 'a')
185 test(r"""sre.match(r'(a)', 'a').group(1)""", 'a')
186 test(r"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
188 pat = sre.compile(r'((a)|(b))(c)?')
189 test(r"""pat.match('a').groups()""", ('a', 'a', None, None))
190 test(r"""pat.match('b').groups()""", ('b', None, 'b', None))
191 test(r"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
192 test(r"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
193 test(r"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
195 pat = sre.compile(r'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
196 test(r"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
197 test(r"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
198 test(r"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
200 if verbose:
201 print "Running tests on sre.escape"
203 p = ""
204 for i in range(0, 256):
205 p = p + chr(i)
206 test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
207 test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
209 pat = sre.compile(sre.escape(p))
210 test(r"""pat.match(p) is not None""", 1)
211 test(r"""pat.match(p).span()""", (0,256))
213 if verbose:
214 print 'Pickling a SRE_Pattern instance'
216 try:
217 import pickle
218 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
219 s = pickle.dumps(pat)
220 pat = pickle.loads(s)
221 except:
222 print TestFailed, 're module pickle' # expected
224 try:
225 import cPickle
226 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
227 s = cPickle.dumps(pat)
228 pat = cPickle.loads(s)
229 except:
230 print TestFailed, 're module cPickle' # expected
232 # constants
233 test(r"""sre.I""", sre.IGNORECASE)
234 test(r"""sre.L""", sre.LOCALE)
235 test(r"""sre.M""", sre.MULTILINE)
236 test(r"""sre.S""", sre.DOTALL)
237 test(r"""sre.X""", sre.VERBOSE)
238 test(r"""sre.T""", sre.TEMPLATE)
239 test(r"""sre.U""", sre.UNICODE)
241 for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]:
242 try:
243 r = sre.compile('^pattern$', flags)
244 except:
245 print 'Exception raised on flag', flags
247 if verbose:
248 print 'Test engine limitations'
250 # Try nasty case that overflows the straightforward recursive
251 # implementation of repeated groups.
252 test("sre.match('(x)*', 50000*'x').span()", (0, 50000), RuntimeError)
253 test("sre.match(r'(x)*y', 50000*'x'+'y').span()", (0, 50001), RuntimeError)
254 test("sre.match(r'(x)*?y', 50000*'x'+'y').span()", (0, 50001))
256 from re_tests import *
258 if verbose:
259 print 'Running re_tests test suite'
260 else:
261 # To save time, only run the first and last 10 tests
262 #tests = tests[:10] + tests[-10:]
263 pass
265 for t in tests:
266 sys.stdout.flush()
267 pattern=s=outcome=repl=expected=None
268 if len(t)==5:
269 pattern, s, outcome, repl, expected = t
270 elif len(t)==3:
271 pattern, s, outcome = t
272 else:
273 raise ValueError, ('Test tuples should have 3 or 5 fields',t)
275 try:
276 obj=sre.compile(pattern)
277 except sre.error:
278 if outcome==SYNTAX_ERROR: pass # Expected a syntax error
279 else:
280 print '=== Syntax error:', t
281 except KeyboardInterrupt: raise KeyboardInterrupt
282 except:
283 print '*** Unexpected error ***', t
284 if verbose:
285 traceback.print_exc(file=sys.stdout)
286 else:
287 try:
288 result=obj.search(s)
289 except (sre.error), msg:
290 print '=== Unexpected exception', t, repr(msg)
291 if outcome==SYNTAX_ERROR:
292 print '=== Compiled incorrectly', t
293 elif outcome==FAIL:
294 if result is None: pass # No match, as expected
295 else: print '=== Succeeded incorrectly', t
296 elif outcome==SUCCEED:
297 if result is not None:
298 # Matched, as expected, so now we compute the
299 # result string and compare it to our expected result.
300 start, end = result.span(0)
301 vardict={'found': result.group(0),
302 'groups': result.group(),
303 'flags': result.re.flags}
304 for i in range(1, 100):
305 try:
306 gi = result.group(i)
307 # Special hack because else the string concat fails:
308 if gi is None:
309 gi = "None"
310 except IndexError:
311 gi = "Error"
312 vardict['g%d' % i] = gi
313 for i in result.re.groupindex.keys():
314 try:
315 gi = result.group(i)
316 if gi is None:
317 gi = "None"
318 except IndexError:
319 gi = "Error"
320 vardict[i] = gi
321 repl=eval(repl, vardict)
322 if repl!=expected:
323 print '=== grouping error', t,
324 print repr(repl)+' should be '+repr(expected)
325 else:
326 print '=== Failed incorrectly', t
327 continue
329 # Try the match on a unicode string, and check that it
330 # still succeeds.
331 try:
332 u = unicode(s, "latin-1")
333 except NameError:
334 pass
335 except TypeError:
336 continue # skip unicode test strings
337 else:
338 result=obj.search(u)
339 if result==None:
340 print '=== Fails on unicode match', t
342 # Try the match on a unicode pattern, and check that it
343 # still succeeds.
344 try:
345 u = unicode(pattern, "latin-1")
346 except NameError:
347 pass
348 else:
349 obj=sre.compile(u)
350 result=obj.search(s)
351 if result==None:
352 print '=== Fails on unicode pattern match', t
354 # Try the match with the search area limited to the extent
355 # of the match and see if it still succeeds. \B will
356 # break (because it won't match at the end or start of a
357 # string), so we'll ignore patterns that feature it.
359 if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
360 obj=sre.compile(pattern)
361 result=obj.search(s, result.start(0), result.end(0)+1)
362 if result==None:
363 print '=== Failed on range-limited match', t
365 # Try the match with IGNORECASE enabled, and check that it
366 # still succeeds.
367 obj=sre.compile(pattern, sre.IGNORECASE)
368 result=obj.search(s)
369 if result==None:
370 print '=== Fails on case-insensitive match', t
372 # Try the match with LOCALE enabled, and check that it
373 # still succeeds.
374 obj=sre.compile(pattern, sre.LOCALE)
375 result=obj.search(s)
376 if result==None:
377 print '=== Fails on locale-sensitive match', t
379 # Try the match with UNICODE locale enabled, and check
380 # that it still succeeds.
381 obj=sre.compile(pattern, sre.UNICODE)
382 result=obj.search(s)
383 if result==None:
384 print '=== Fails on unicode-sensitive match', t