py-cvs-2001_07_13 (Rev 1.3) merge
[python/dscho.git] / Lib / test / test_sre.py
blobe266d14d5ef7420c8ff7fd290939a0478ec4ff97
1 # SRE test harness for the Python regression suite
3 # this is based on test_re.py, but uses a test function instead
4 # of all those asserts
6 import sys
7 sys.path=['.']+sys.path
9 from test_support import verbose, TestFailed
10 import sre
11 import sys, os, string, traceback
14 # test support
16 def test(expression, result, exception=None):
17 try:
18 r = eval(expression)
19 except:
20 if exception:
21 if not isinstance(sys.exc_value, exception):
22 print expression, "FAILED"
23 # display name, not actual value
24 if exception is sre.error:
25 print "expected", "sre.error"
26 else:
27 print "expected", exception.__name__
28 print "got", sys.exc_type.__name__, str(sys.exc_value)
29 else:
30 print expression, "FAILED"
31 traceback.print_exc(file=sys.stdout)
32 else:
33 if exception:
34 print expression, "FAILED"
35 if exception is sre.error:
36 print "expected", "sre.error"
37 else:
38 print "expected", exception.__name__
39 print "got result", repr(r)
40 else:
41 if r != result:
42 print expression, "FAILED"
43 print "expected", repr(result)
44 print "got result", repr(r)
46 if verbose:
47 print 'Running tests on character literals'
49 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
50 test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
56 test(r"""sre.match("\911", "")""", None, sre.error)
59 # Misc tests from Tim Peters' re.doc
61 if verbose:
62 print 'Running tests on sre.search and sre.match'
64 test(r"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65 test(r"""sre.search(r'x*', 'axx').span()""", (0, 0))
66 test(r"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67 test(r"""sre.search(r'x+', 'axx').span()""", (1, 3))
68 test(r"""sre.search(r'x', 'aaa')""", None)
70 test(r"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71 test(r"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72 test(r"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73 test(r"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74 test(r"""sre.match(r'a+', 'xxx')""", None)
76 # bug 113254
77 test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78 test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79 test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
81 if verbose:
82 print 'Running tests on sre.sub'
84 test(r"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
86 def bump_num(matchobj):
87 int_value = int(matchobj.group(0))
88 return str(int_value + 1)
90 test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
91 test(r"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
93 test(r"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
94 test(r"""sre.sub(r'.', r"\n", 'x')""", '\n')
96 s = r"\1\1"
98 test(r"""sre.sub(r'(.)', s, 'x')""", 'xx')
99 test(r"""sre.sub(r'(.)', sre.escape(s), 'x')""", s)
100 test(r"""sre.sub(r'(.)', lambda m: s, 'x')""", s)
102 test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
103 test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
104 test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105 test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
107 test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108 test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109 test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
111 test(r"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
113 # qualified sub
114 test(r"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
115 test(r"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
117 # bug 114660
118 test(r"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
120 if verbose:
121 print 'Running tests on symbolic references'
123 test(r"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre.error)
124 test(r"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre.error)
125 test(r"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre.error)
126 test(r"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre.error)
127 test(r"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre.error)
128 test(r"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
129 test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre.error)
130 test(r"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre.error)
132 if verbose:
133 print 'Running tests on sre.subn'
135 test(r"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
136 test(r"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
137 test(r"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
138 test(r"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
139 test(r"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
141 if verbose:
142 print 'Running tests on sre.split'
144 test(r"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
145 test(r"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
146 test(r"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
147 test(r"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
148 test(r"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
149 test(r"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
150 test(r"""sre.split(r"(b)|(:+)", ":a:b::c")""",
151 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
152 test(r"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
154 test(r"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
155 test(r"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
157 test(r"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
158 test(r"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
160 if verbose:
161 print "Running tests on sre.findall"
163 test(r"""sre.findall(r":+", "abc")""", [])
164 test(r"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
165 test(r"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
166 test(r"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
167 [(":", ""), (":", ":"), (":", "::")])
168 test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
170 # bug 117612
171 test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
173 if verbose:
174 print "Running tests on sre.match"
176 test(r"""sre.match(r'a', 'a').groups()""", ())
177 test(r"""sre.match(r'(a)', 'a').groups()""", ('a',))
178 test(r"""sre.match(r'(a)', 'a').group(0)""", 'a')
179 test(r"""sre.match(r'(a)', 'a').group(1)""", 'a')
180 test(r"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
182 pat = sre.compile(r'((a)|(b))(c)?')
183 test(r"""pat.match('a').groups()""", ('a', 'a', None, None))
184 test(r"""pat.match('b').groups()""", ('b', None, 'b', None))
185 test(r"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
186 test(r"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
187 test(r"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
189 pat = sre.compile(r'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
190 test(r"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
191 test(r"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
192 test(r"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
194 if verbose:
195 print "Running tests on sre.escape"
197 p = ""
198 for i in range(0, 256):
199 p = p + chr(i)
200 test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
201 test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
203 pat = sre.compile(sre.escape(p))
204 test(r"""pat.match(p) is not None""", 1)
205 test(r"""pat.match(p).span()""", (0,256))
207 if verbose:
208 print 'Pickling a SRE_Pattern instance'
210 try:
211 import pickle
212 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
213 s = pickle.dumps(pat)
214 pat = pickle.loads(s)
215 except:
216 print TestFailed, 're module pickle' # expected
218 try:
219 import cPickle
220 pat = sre.compile(r'a(?:b|(c|e){1,2}?|d)+?(.)')
221 s = cPickle.dumps(pat)
222 pat = cPickle.loads(s)
223 except:
224 print TestFailed, 're module cPickle' # expected
226 # constants
227 test(r"""sre.I""", sre.IGNORECASE)
228 test(r"""sre.L""", sre.LOCALE)
229 test(r"""sre.M""", sre.MULTILINE)
230 test(r"""sre.S""", sre.DOTALL)
231 test(r"""sre.X""", sre.VERBOSE)
232 test(r"""sre.T""", sre.TEMPLATE)
233 test(r"""sre.U""", sre.UNICODE)
235 for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]:
236 try:
237 r = sre.compile('^pattern$', flags)
238 except:
239 print 'Exception raised on flag', flags
241 if verbose:
242 print 'Test engine limitations'
244 # Try nasty case that overflows the straightforward recursive
245 # implementation of repeated groups.
246 test("sre.match('(x)*', 50000*'x').span()", (0, 50000), RuntimeError)
247 test("sre.match(r'(x)*y', 50000*'x'+'y').span()", (0, 50001), RuntimeError)
248 test("sre.match(r'(x)*?y', 50000*'x'+'y').span()", (0, 50001))
250 from re_tests import *
252 if verbose:
253 print 'Running re_tests test suite'
254 else:
255 # To save time, only run the first and last 10 tests
256 #tests = tests[:10] + tests[-10:]
257 pass
259 for t in tests:
260 sys.stdout.flush()
261 pattern=s=outcome=repl=expected=None
262 if len(t)==5:
263 pattern, s, outcome, repl, expected = t
264 elif len(t)==3:
265 pattern, s, outcome = t
266 else:
267 raise ValueError, ('Test tuples should have 3 or 5 fields',t)
269 try:
270 obj=sre.compile(pattern)
271 except sre.error:
272 if outcome==SYNTAX_ERROR: pass # Expected a syntax error
273 else:
274 print '=== Syntax error:', t
275 except KeyboardInterrupt: raise KeyboardInterrupt
276 except:
277 print '*** Unexpected error ***', t
278 if verbose:
279 traceback.print_exc(file=sys.stdout)
280 else:
281 try:
282 result=obj.search(s)
283 except (sre.error), msg:
284 print '=== Unexpected exception', t, repr(msg)
285 if outcome==SYNTAX_ERROR:
286 print '=== Compiled incorrectly', t
287 elif outcome==FAIL:
288 if result is None: pass # No match, as expected
289 else: print '=== Succeeded incorrectly', t
290 elif outcome==SUCCEED:
291 if result is not None:
292 # Matched, as expected, so now we compute the
293 # result string and compare it to our expected result.
294 start, end = result.span(0)
295 vardict={'found': result.group(0),
296 'groups': result.group(),
297 'flags': result.re.flags}
298 for i in range(1, 100):
299 try:
300 gi = result.group(i)
301 # Special hack because else the string concat fails:
302 if gi is None:
303 gi = "None"
304 except IndexError:
305 gi = "Error"
306 vardict['g%d' % i] = gi
307 for i in result.re.groupindex.keys():
308 try:
309 gi = result.group(i)
310 if gi is None:
311 gi = "None"
312 except IndexError:
313 gi = "Error"
314 vardict[i] = gi
315 repl=eval(repl, vardict)
316 if repl!=expected:
317 print '=== grouping error', t,
318 print repr(repl)+' should be '+repr(expected)
319 else:
320 print '=== Failed incorrectly', t
321 continue
323 # Try the match on a unicode string, and check that it
324 # still succeeds.
325 try:
326 u = unicode(s, "latin-1")
327 except NameError:
328 pass
329 except TypeError:
330 continue # skip unicode test strings
331 else:
332 result=obj.search(u)
333 if result==None:
334 print '=== Fails on unicode match', t
336 # Try the match on a unicode pattern, and check that it
337 # still succeeds.
338 try:
339 u = unicode(pattern, "latin-1")
340 except NameError:
341 pass
342 else:
343 obj=sre.compile(u)
344 result=obj.search(s)
345 if result==None:
346 print '=== Fails on unicode pattern match', t
348 # Try the match with the search area limited to the extent
349 # of the match and see if it still succeeds. \B will
350 # break (because it won't match at the end or start of a
351 # string), so we'll ignore patterns that feature it.
353 if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
354 obj=sre.compile(pattern)
355 result=obj.search(s, result.start(0), result.end(0)+1)
356 if result==None:
357 print '=== Failed on range-limited match', t
359 # Try the match with IGNORECASE enabled, and check that it
360 # still succeeds.
361 obj=sre.compile(pattern, sre.IGNORECASE)
362 result=obj.search(s)
363 if result==None:
364 print '=== Fails on case-insensitive match', t
366 # Try the match with LOCALE enabled, and check that it
367 # still succeeds.
368 obj=sre.compile(pattern, sre.LOCALE)
369 result=obj.search(s)
370 if result==None:
371 print '=== Fails on locale-sensitive match', t
373 # Try the match with UNICODE locale enabled, and check
374 # that it still succeeds.
375 obj=sre.compile(pattern, sre.UNICODE)
376 result=obj.search(s)
377 if result==None:
378 print '=== Fails on unicode-sensitive match', t