1 # SRE test harness for the Python regression suite
3 # this is based on test_re.py, but uses a test function instead
7 sys
.path
=['.']+sys
.path
9 from test_support
import verbose
, TestFailed
11 import sys
, os
, string
, traceback
16 def test(expression
, result
, exception
=None):
21 if not isinstance(sys
.exc_value
, exception
):
22 print expression
, "FAILED"
23 # display name, not actual value
24 if exception
is sre
.error
:
25 print "expected", "sre.error"
27 print "expected", exception
.__name
__
28 print "got", sys
.exc_type
.__name
__, str(sys
.exc_value
)
30 print expression
, "FAILED"
31 traceback
.print_exc(file=sys
.stdout
)
34 print expression
, "FAILED"
35 if exception
is sre
.error
:
36 print "expected", "sre.error"
38 print "expected", exception
.__name
__
39 print "got result", repr(r
)
42 print expression
, "FAILED"
43 print "expected", repr(result
)
44 print "got result", repr(r
)
47 print 'Running tests on character literals'
49 for i
in [0, 8, 16, 32, 64, 127, 128, 255]:
50 test(r
"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r
"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r
"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r
"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r
"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r
"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
56 test(r
"""sre.match("\911", "")""", None, sre
.error
)
59 # Misc tests from Tim Peters' re.doc
62 print 'Running tests on sre.search and sre.match'
64 test(r
"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65 test(r
"""sre.search(r'x*', 'axx').span()""", (0, 0))
66 test(r
"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67 test(r
"""sre.search(r'x+', 'axx').span()""", (1, 3))
68 test(r
"""sre.search(r'x', 'aaa')""", None)
70 test(r
"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71 test(r
"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72 test(r
"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73 test(r
"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74 test(r
"""sre.match(r'a+', 'xxx')""", None)
77 test(r
"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78 test(r
"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79 test(r
"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
82 print 'Running tests on sre.sub'
84 test(r
"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
86 def bump_num(matchobj
):
87 int_value
= int(matchobj
.group(0))
88 return str(int_value
+ 1)
90 test(r
"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
91 test(r
"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
93 test(r
"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
94 test(r
"""sre.sub(r'.', r"\n", 'x')""", '\n')
98 test(r
"""sre.sub(r'(.)', s, 'x')""", 'xx')
99 test(r
"""sre.sub(r'(.)', sre.escape(s), 'x')""", s
)
100 test(r
"""sre.sub(r'(.)', lambda m: s, 'x')""", s
)
102 test(r
"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
103 test(r
"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
104 test(r
"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105 test(r
"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
107 test(r
"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108 test(r
"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109 test(r
"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
111 test(r
"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
114 test(r
"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
115 test(r
"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
118 test(r
"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
120 # Test for sub() on escaped characters, see SF bug #449000
121 test(r
"""sre.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
122 test(r
"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
123 test(r
"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
124 test(r
"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
127 print 'Running tests on symbolic references'
129 test(r
"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre
.error
)
130 test(r
"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre
.error
)
131 test(r
"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre
.error
)
132 test(r
"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre
.error
)
133 test(r
"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre
.error
)
134 test(r
"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
135 test(r
"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre
.error
)
136 test(r
"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre
.error
)
139 print 'Running tests on sre.subn'
141 test(r
"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
142 test(r
"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
143 test(r
"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
144 test(r
"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
145 test(r
"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
148 print 'Running tests on sre.split'
150 test(r
"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
151 test(r
"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
152 test(r
"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
153 test(r
"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
154 test(r
"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
155 test(r
"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
156 test(r
"""sre.split(r"(b)|(:+)", ":a:b::c")""",
157 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
158 test(r
"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
160 test(r
"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
161 test(r
"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
163 test(r
"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
164 test(r
"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
167 print "Running tests on sre.findall"
169 test(r
"""sre.findall(r":+", "abc")""", [])
170 test(r
"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
171 test(r
"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
172 test(r
"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
173 [(":", ""), (":", ":"), (":", "::")])
174 test(r
"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
177 test(r
"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
180 print "Running tests on sre.match"
182 test(r
"""sre.match(r'a', 'a').groups()""", ())
183 test(r
"""sre.match(r'(a)', 'a').groups()""", ('a',))
184 test(r
"""sre.match(r'(a)', 'a').group(0)""", 'a')
185 test(r
"""sre.match(r'(a)', 'a').group(1)""", 'a')
186 test(r
"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
188 pat
= sre
.compile(r
'((a)|(b))(c)?')
189 test(r
"""pat.match('a').groups()""", ('a', 'a', None, None))
190 test(r
"""pat.match('b').groups()""", ('b', None, 'b', None))
191 test(r
"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
192 test(r
"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
193 test(r
"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
195 pat
= sre
.compile(r
'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
196 test(r
"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
197 test(r
"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
198 test(r
"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
201 print "Running tests on sre.escape"
204 for i
in range(0, 256):
206 test(r
"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
207 test(r
"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
209 pat
= sre
.compile(sre
.escape(p
))
210 test(r
"""pat.match(p) is not None""", 1)
211 test(r
"""pat.match(p).span()""", (0,256))
214 print 'Pickling a SRE_Pattern instance'
218 pat
= sre
.compile(r
'a(?:b|(c|e){1,2}?|d)+?(.)')
219 s
= pickle
.dumps(pat
)
220 pat
= pickle
.loads(s
)
222 print TestFailed
, 're module pickle' # expected
226 pat
= sre
.compile(r
'a(?:b|(c|e){1,2}?|d)+?(.)')
227 s
= cPickle
.dumps(pat
)
228 pat
= cPickle
.loads(s
)
230 print TestFailed
, 're module cPickle' # expected
233 test(r
"""sre.I""", sre
.IGNORECASE
)
234 test(r
"""sre.L""", sre
.LOCALE
)
235 test(r
"""sre.M""", sre
.MULTILINE
)
236 test(r
"""sre.S""", sre
.DOTALL
)
237 test(r
"""sre.X""", sre
.VERBOSE
)
238 test(r
"""sre.T""", sre
.TEMPLATE
)
239 test(r
"""sre.U""", sre
.UNICODE
)
241 for flags
in [sre
.I
, sre
.M
, sre
.X
, sre
.S
, sre
.L
, sre
.T
, sre
.U
]:
243 r
= sre
.compile('^pattern$', flags
)
245 print 'Exception raised on flag', flags
248 print 'Test engine limitations'
250 # Try nasty case that overflows the straightforward recursive
251 # implementation of repeated groups.
252 test("sre.match('(x)*', 50000*'x').span()", (0, 50000), RuntimeError)
253 test("sre.match(r'(x)*y', 50000*'x'+'y').span()", (0, 50001), RuntimeError)
254 test("sre.match(r'(x)*?y', 50000*'x'+'y').span()", (0, 50001))
256 from re_tests
import *
259 print 'Running re_tests test suite'
261 # To save time, only run the first and last 10 tests
262 #tests = tests[:10] + tests[-10:]
267 pattern
=s
=outcome
=repl
=expected
=None
269 pattern
, s
, outcome
, repl
, expected
= t
271 pattern
, s
, outcome
= t
273 raise ValueError, ('Test tuples should have 3 or 5 fields',t
)
276 obj
=sre
.compile(pattern
)
278 if outcome
==SYNTAX_ERROR
: pass # Expected a syntax error
280 print '=== Syntax error:', t
281 except KeyboardInterrupt: raise KeyboardInterrupt
283 print '*** Unexpected error ***', t
285 traceback
.print_exc(file=sys
.stdout
)
289 except (sre
.error
), msg
:
290 print '=== Unexpected exception', t
, repr(msg
)
291 if outcome
==SYNTAX_ERROR
:
292 print '=== Compiled incorrectly', t
294 if result
is None: pass # No match, as expected
295 else: print '=== Succeeded incorrectly', t
296 elif outcome
==SUCCEED
:
297 if result
is not None:
298 # Matched, as expected, so now we compute the
299 # result string and compare it to our expected result.
300 start
, end
= result
.span(0)
301 vardict
={'found': result
.group(0),
302 'groups': result
.group(),
303 'flags': result
.re
.flags
}
304 for i
in range(1, 100):
307 # Special hack because else the string concat fails:
312 vardict
['g%d' % i
] = gi
313 for i
in result
.re
.groupindex
.keys():
321 repl
=eval(repl
, vardict
)
323 print '=== grouping error', t
,
324 print repr(repl
)+' should be '+repr(expected
)
326 print '=== Failed incorrectly', t
329 # Try the match on a unicode string, and check that it
332 u
= unicode(s
, "latin-1")
336 continue # skip unicode test strings
340 print '=== Fails on unicode match', t
342 # Try the match on a unicode pattern, and check that it
345 u
= unicode(pattern
, "latin-1")
352 print '=== Fails on unicode pattern match', t
354 # Try the match with the search area limited to the extent
355 # of the match and see if it still succeeds. \B will
356 # break (because it won't match at the end or start of a
357 # string), so we'll ignore patterns that feature it.
359 if pattern
[:2]!='\\B' and pattern
[-2:]!='\\B':
360 obj
=sre
.compile(pattern
)
361 result
=obj
.search(s
, result
.start(0), result
.end(0)+1)
363 print '=== Failed on range-limited match', t
365 # Try the match with IGNORECASE enabled, and check that it
367 obj
=sre
.compile(pattern
, sre
.IGNORECASE
)
370 print '=== Fails on case-insensitive match', t
372 # Try the match with LOCALE enabled, and check that it
374 obj
=sre
.compile(pattern
, sre
.LOCALE
)
377 print '=== Fails on locale-sensitive match', t
379 # Try the match with UNICODE locale enabled, and check
380 # that it still succeeds.
381 obj
=sre
.compile(pattern
, sre
.UNICODE
)
384 print '=== Fails on unicode-sensitive match', t