1 # SRE test harness for the Python regression suite
3 # this is based on test_re.py, but uses a test function instead
7 sys
.path
=['.']+sys
.path
9 from test_support
import verbose
, TestFailed
11 import sys
, os
, string
, traceback
16 def test(expression
, result
, exception
=None):
21 if not isinstance(sys
.exc_value
, exception
):
22 print expression
, "FAILED"
23 # display name, not actual value
24 if exception
is sre
.error
:
25 print "expected", "sre.error"
27 print "expected", exception
.__name
__
28 print "got", sys
.exc_type
.__name
__, str(sys
.exc_value
)
30 print expression
, "FAILED"
31 traceback
.print_exc(file=sys
.stdout
)
34 print expression
, "FAILED"
35 if exception
is sre
.error
:
36 print "expected", "sre.error"
38 print "expected", exception
.__name
__
39 print "got result", repr(r
)
42 print expression
, "FAILED"
43 print "expected", repr(result
)
44 print "got result", repr(r
)
47 print 'Running tests on character literals'
49 for i
in [0, 8, 16, 32, 64, 127, 128, 255]:
50 test(r
"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r
"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r
"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r
"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r
"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r
"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
56 test(r
"""sre.match("\911", "")""", None, sre
.error
)
59 # Misc tests from Tim Peters' re.doc
62 print 'Running tests on sre.search and sre.match'
64 test(r
"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65 test(r
"""sre.search(r'x*', 'axx').span()""", (0, 0))
66 test(r
"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67 test(r
"""sre.search(r'x+', 'axx').span()""", (1, 3))
68 test(r
"""sre.search(r'x', 'aaa')""", None)
70 test(r
"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71 test(r
"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72 test(r
"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73 test(r
"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74 test(r
"""sre.match(r'a+', 'xxx')""", None)
77 test(r
"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78 test(r
"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79 test(r
"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
82 print 'Running tests on sre.sub'
84 test(r
"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
86 def bump_num(matchobj
):
87 int_value
= int(matchobj
.group(0))
88 return str(int_value
+ 1)
90 test(r
"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
91 test(r
"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
93 test(r
"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
94 test(r
"""sre.sub(r'.', r"\n", 'x')""", '\n')
98 test(r
"""sre.sub(r'(.)', s, 'x')""", 'xx')
99 test(r
"""sre.sub(r'(.)', sre.escape(s), 'x')""", s
)
100 test(r
"""sre.sub(r'(.)', lambda m: s, 'x')""", s
)
102 test(r
"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
103 test(r
"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
104 test(r
"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105 test(r
"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
107 test(r
"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108 test(r
"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109 test(r
"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
111 test(r
"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
114 test(r
"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
115 test(r
"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
118 test(r
"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
121 print 'Running tests on symbolic references'
123 test(r
"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre
.error
)
124 test(r
"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre
.error
)
125 test(r
"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre
.error
)
126 test(r
"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre
.error
)
127 test(r
"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre
.error
)
128 test(r
"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
129 test(r
"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre
.error
)
130 test(r
"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre
.error
)
133 print 'Running tests on sre.subn'
135 test(r
"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
136 test(r
"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
137 test(r
"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
138 test(r
"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
139 test(r
"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
142 print 'Running tests on sre.split'
144 test(r
"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
145 test(r
"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
146 test(r
"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
147 test(r
"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
148 test(r
"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
149 test(r
"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
150 test(r
"""sre.split(r"(b)|(:+)", ":a:b::c")""",
151 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
152 test(r
"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
154 test(r
"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
155 test(r
"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
157 test(r
"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
158 test(r
"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
161 print "Running tests on sre.findall"
163 test(r
"""sre.findall(r":+", "abc")""", [])
164 test(r
"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
165 test(r
"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
166 test(r
"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
167 [(":", ""), (":", ":"), (":", "::")])
168 test(r
"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
171 test(r
"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
174 print "Running tests on sre.match"
176 test(r
"""sre.match(r'a', 'a').groups()""", ())
177 test(r
"""sre.match(r'(a)', 'a').groups()""", ('a',))
178 test(r
"""sre.match(r'(a)', 'a').group(0)""", 'a')
179 test(r
"""sre.match(r'(a)', 'a').group(1)""", 'a')
180 test(r
"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
182 pat
= sre
.compile(r
'((a)|(b))(c)?')
183 test(r
"""pat.match('a').groups()""", ('a', 'a', None, None))
184 test(r
"""pat.match('b').groups()""", ('b', None, 'b', None))
185 test(r
"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
186 test(r
"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
187 test(r
"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
189 pat
= sre
.compile(r
'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
190 test(r
"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
191 test(r
"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
192 test(r
"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
195 print "Running tests on sre.escape"
198 for i
in range(0, 256):
200 test(r
"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
201 test(r
"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
203 pat
= sre
.compile(sre
.escape(p
))
204 test(r
"""pat.match(p) is not None""", 1)
205 test(r
"""pat.match(p).span()""", (0,256))
208 print 'Pickling a SRE_Pattern instance'
212 pat
= sre
.compile(r
'a(?:b|(c|e){1,2}?|d)+?(.)')
213 s
= pickle
.dumps(pat
)
214 pat
= pickle
.loads(s
)
216 print TestFailed
, 're module pickle' # expected
220 pat
= sre
.compile(r
'a(?:b|(c|e){1,2}?|d)+?(.)')
221 s
= cPickle
.dumps(pat
)
222 pat
= cPickle
.loads(s
)
224 print TestFailed
, 're module cPickle' # expected
227 test(r
"""sre.I""", sre
.IGNORECASE
)
228 test(r
"""sre.L""", sre
.LOCALE
)
229 test(r
"""sre.M""", sre
.MULTILINE
)
230 test(r
"""sre.S""", sre
.DOTALL
)
231 test(r
"""sre.X""", sre
.VERBOSE
)
232 test(r
"""sre.T""", sre
.TEMPLATE
)
233 test(r
"""sre.U""", sre
.UNICODE
)
235 for flags
in [sre
.I
, sre
.M
, sre
.X
, sre
.S
, sre
.L
, sre
.T
, sre
.U
]:
237 r
= sre
.compile('^pattern$', flags
)
239 print 'Exception raised on flag', flags
242 print 'Test engine limitations'
244 # Try nasty case that overflows the straightforward recursive
245 # implementation of repeated groups.
246 test("sre.match('(x)*', 50000*'x').span()", (0, 50000), RuntimeError)
247 test("sre.match(r'(x)*y', 50000*'x'+'y').span()", (0, 50001), RuntimeError)
248 test("sre.match(r'(x)*?y', 50000*'x'+'y').span()", (0, 50001))
250 from re_tests
import *
253 print 'Running re_tests test suite'
255 # To save time, only run the first and last 10 tests
256 #tests = tests[:10] + tests[-10:]
261 pattern
=s
=outcome
=repl
=expected
=None
263 pattern
, s
, outcome
, repl
, expected
= t
265 pattern
, s
, outcome
= t
267 raise ValueError, ('Test tuples should have 3 or 5 fields',t
)
270 obj
=sre
.compile(pattern
)
272 if outcome
==SYNTAX_ERROR
: pass # Expected a syntax error
274 print '=== Syntax error:', t
275 except KeyboardInterrupt: raise KeyboardInterrupt
277 print '*** Unexpected error ***', t
279 traceback
.print_exc(file=sys
.stdout
)
283 except (sre
.error
), msg
:
284 print '=== Unexpected exception', t
, repr(msg
)
285 if outcome
==SYNTAX_ERROR
:
286 print '=== Compiled incorrectly', t
288 if result
is None: pass # No match, as expected
289 else: print '=== Succeeded incorrectly', t
290 elif outcome
==SUCCEED
:
291 if result
is not None:
292 # Matched, as expected, so now we compute the
293 # result string and compare it to our expected result.
294 start
, end
= result
.span(0)
295 vardict
={'found': result
.group(0),
296 'groups': result
.group(),
297 'flags': result
.re
.flags
}
298 for i
in range(1, 100):
301 # Special hack because else the string concat fails:
306 vardict
['g%d' % i
] = gi
307 for i
in result
.re
.groupindex
.keys():
315 repl
=eval(repl
, vardict
)
317 print '=== grouping error', t
,
318 print repr(repl
)+' should be '+repr(expected
)
320 print '=== Failed incorrectly', t
323 # Try the match on a unicode string, and check that it
326 u
= unicode(s
, "latin-1")
330 continue # skip unicode test strings
334 print '=== Fails on unicode match', t
336 # Try the match on a unicode pattern, and check that it
339 u
= unicode(pattern
, "latin-1")
346 print '=== Fails on unicode pattern match', t
348 # Try the match with the search area limited to the extent
349 # of the match and see if it still succeeds. \B will
350 # break (because it won't match at the end or start of a
351 # string), so we'll ignore patterns that feature it.
353 if pattern
[:2]!='\\B' and pattern
[-2:]!='\\B':
354 obj
=sre
.compile(pattern
)
355 result
=obj
.search(s
, result
.start(0), result
.end(0)+1)
357 print '=== Failed on range-limited match', t
359 # Try the match with IGNORECASE enabled, and check that it
361 obj
=sre
.compile(pattern
, sre
.IGNORECASE
)
364 print '=== Fails on case-insensitive match', t
366 # Try the match with LOCALE enabled, and check that it
368 obj
=sre
.compile(pattern
, sre
.LOCALE
)
371 print '=== Fails on locale-sensitive match', t
373 # Try the match with UNICODE locale enabled, and check
374 # that it still succeeds.
375 obj
=sre
.compile(pattern
, sre
.UNICODE
)
378 print '=== Fails on unicode-sensitive match', t