1 # SRE test harness for the Python regression suite
3 # this is based on test_re.py, but uses a test function instead
7 sys
.path
=['.']+sys
.path
9 from test_support
import verbose
, TestFailed
11 import sys
, os
, string
, traceback
16 def test(expression
, result
, exception
=None):
21 if not isinstance(sys
.exc_value
, exception
):
22 print expression
, "FAILED"
23 # display name, not actual value
24 if exception
is sre
.error
:
25 print "expected", "sre.error"
27 print "expected", exception
.__name
__
28 print "got", sys
.exc_type
.__name
__, str(sys
.exc_value
)
30 print expression
, "FAILED"
31 traceback
.print_exc(file=sys
.stdout
)
34 print expression
, "FAILED"
35 if exception
is sre
.error
:
36 print "expected", "sre.error"
38 print "expected", exception
.__name
__
39 print "got result", repr(r
)
42 print expression
, "FAILED"
43 print "expected", repr(result
)
44 print "got result", repr(r
)
47 print 'Running tests on character literals'
49 for i
in [0, 8, 16, 32, 64, 127, 128, 255]:
50 test(r
"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
51 test(r
"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
52 test(r
"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
53 test(r
"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
54 test(r
"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
55 test(r
"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
56 test(r
"""sre.match("\911", "")""", None, sre
.error
)
59 # Misc tests from Tim Peters' re.doc
62 print 'Running tests on sre.search and sre.match'
64 test(r
"""sre.search(r'x*', 'axx').span(0)""", (0, 0))
65 test(r
"""sre.search(r'x*', 'axx').span()""", (0, 0))
66 test(r
"""sre.search(r'x+', 'axx').span(0)""", (1, 3))
67 test(r
"""sre.search(r'x+', 'axx').span()""", (1, 3))
68 test(r
"""sre.search(r'x', 'aaa')""", None)
70 test(r
"""sre.match(r'a*', 'xxx').span(0)""", (0, 0))
71 test(r
"""sre.match(r'a*', 'xxx').span()""", (0, 0))
72 test(r
"""sre.match(r'x*', 'xxxa').span(0)""", (0, 3))
73 test(r
"""sre.match(r'x*', 'xxxa').span()""", (0, 3))
74 test(r
"""sre.match(r'a+', 'xxx')""", None)
77 test(r
"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
78 test(r
"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
79 test(r
"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
82 print 'Running tests on sre.sub'
84 test(r
"""sre.sub(r"(?i)b+", "x", "bbbb BBBB")""", 'x x')
86 def bump_num(matchobj
):
87 int_value
= int(matchobj
.group(0))
88 return str(int_value
+ 1)
90 test(r
"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y')""", '9.3 -3 24x100y')
91 test(r
"""sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3)""", '9.3 -3 23x99y')
93 test(r
"""sre.sub(r'.', lambda m: r"\n", 'x')""", '\\n')
94 test(r
"""sre.sub(r'.', r"\n", 'x')""", '\n')
98 test(r
"""sre.sub(r'(.)', s, 'x')""", 'xx')
99 test(r
"""sre.sub(r'(.)', sre.escape(s), 'x')""", s
)
100 test(r
"""sre.sub(r'(.)', lambda m: s, 'x')""", s
)
102 test(r
"""sre.sub(r'(?P<a>x)', '\g<a>\g<a>', 'xx')""", 'xxxx')
103 test(r
"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx')
104 test(r
"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
105 test(r
"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
107 test(r
"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
108 test(r
"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
109 test(r
"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
111 test(r
"""sre.sub(r'^\s*', 'X', 'test')""", 'Xtest')
114 test(r
"""sre.sub(r'a', 'b', 'aaaaa')""", 'bbbbb')
115 test(r
"""sre.sub(r'a', 'b', 'aaaaa', 1)""", 'baaaa')
118 test(r
"""sre.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there')""", 'hello there')
121 print 'Running tests on symbolic references'
123 test(r
"""sre.sub(r'(?P<a>x)', '\g<a', 'xx')""", None, sre
.error
)
124 test(r
"""sre.sub(r'(?P<a>x)', '\g<', 'xx')""", None, sre
.error
)
125 test(r
"""sre.sub(r'(?P<a>x)', '\g', 'xx')""", None, sre
.error
)
126 test(r
"""sre.sub(r'(?P<a>x)', '\g<a a>', 'xx')""", None, sre
.error
)
127 test(r
"""sre.sub(r'(?P<a>x)', '\g<1a1>', 'xx')""", None, sre
.error
)
128 test(r
"""sre.sub(r'(?P<a>x)', '\g<ab>', 'xx')""", None, IndexError)
129 test(r
"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')""", None, sre
.error
)
130 test(r
"""sre.sub(r'(?P<a>x)|(?P<b>y)', '\\2', 'xx')""", None, sre
.error
)
133 print 'Running tests on sre.subn'
135 test(r
"""sre.subn(r"(?i)b+", "x", "bbbb BBBB")""", ('x x', 2))
136 test(r
"""sre.subn(r"b+", "x", "bbbb BBBB")""", ('x BBBB', 1))
137 test(r
"""sre.subn(r"b+", "x", "xyz")""", ('xyz', 0))
138 test(r
"""sre.subn(r"b*", "x", "xyz")""", ('xxxyxzx', 4))
139 test(r
"""sre.subn(r"b*", "x", "xyz", 2)""", ('xxxyz', 2))
142 print 'Running tests on sre.split'
144 test(r
"""sre.split(r":", ":a:b::c")""", ['', 'a', 'b', '', 'c'])
145 test(r
"""sre.split(r":*", ":a:b::c")""", ['', 'a', 'b', 'c'])
146 test(r
"""sre.split(r"(:*)", ":a:b::c")""", ['', ':', 'a', ':', 'b', '::', 'c'])
147 test(r
"""sre.split(r"(?::*)", ":a:b::c")""", ['', 'a', 'b', 'c'])
148 test(r
"""sre.split(r"(:)*", ":a:b::c")""", ['', ':', 'a', ':', 'b', ':', 'c'])
149 test(r
"""sre.split(r"([b:]+)", ":a:b::c")""", ['', ':', 'a', ':b::', 'c'])
150 test(r
"""sre.split(r"(b)|(:+)", ":a:b::c")""",
151 ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
152 test(r
"""sre.split(r"(?:b)|(?::+)", ":a:b::c")""", ['', 'a', '', '', 'c'])
154 test(r
"""sre.split(r":", ":a:b::c", 2)""", ['', 'a', 'b::c'])
155 test(r
"""sre.split(r':', 'a:b:c:d', 2)""", ['a', 'b', 'c:d'])
157 test(r
"""sre.split(r"(:)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
158 test(r
"""sre.split(r"(:*)", ":a:b::c", 2)""", ['', ':', 'a', ':', 'b::c'])
161 print "Running tests on sre.findall"
163 test(r
"""sre.findall(r":+", "abc")""", [])
164 test(r
"""sre.findall(r":+", "a:b::c:::d")""", [":", "::", ":::"])
165 test(r
"""sre.findall(r"(:+)", "a:b::c:::d")""", [":", "::", ":::"])
166 test(r
"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
167 [(":", ""), (":", ":"), (":", "::")])
168 test(r
"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
171 test(r
"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
174 print "Running tests on sre.match"
176 test(r
"""sre.match(r'a', 'a').groups()""", ())
177 test(r
"""sre.match(r'(a)', 'a').groups()""", ('a',))
178 test(r
"""sre.match(r'(a)', 'a').group(0)""", 'a')
179 test(r
"""sre.match(r'(a)', 'a').group(1)""", 'a')
180 test(r
"""sre.match(r'(a)', 'a').group(1, 1)""", ('a', 'a'))
182 pat
= sre
.compile(r
'((a)|(b))(c)?')
183 test(r
"""pat.match('a').groups()""", ('a', 'a', None, None))
184 test(r
"""pat.match('b').groups()""", ('b', None, 'b', None))
185 test(r
"""pat.match('ac').groups()""", ('a', 'a', None, 'c'))
186 test(r
"""pat.match('bc').groups()""", ('b', None, 'b', 'c'))
187 test(r
"""pat.match('bc').groups("")""", ('b', "", 'b', 'c'))
189 pat
= sre
.compile(r
'(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
190 test(r
"""pat.match('a').group(1, 2, 3)""", ('a', None, None))
191 test(r
"""pat.match('b').group('a1', 'b2', 'c3')""", (None, 'b', None))
192 test(r
"""pat.match('ac').group(1, 'b2', 3)""", ('a', None, 'c'))
195 print "Running tests on sre.escape"
198 for i
in range(0, 256):
200 test(r
"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
201 test(r
"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
203 pat
= sre
.compile(sre
.escape(p
))
204 test(r
"""pat.match(p) is not None""", 1)
205 test(r
"""pat.match(p).span()""", (0,256))
208 print 'Pickling a SRE_Pattern instance'
212 pat
= sre
.compile(r
'a(?:b|(c|e){1,2}?|d)+?(.)')
213 s
= pickle
.dumps(pat
)
214 pat
= pickle
.loads(s
)
216 print TestFailed
, 're module pickle' # expected
220 pat
= sre
.compile(r
'a(?:b|(c|e){1,2}?|d)+?(.)')
221 s
= cPickle
.dumps(pat
)
222 pat
= cPickle
.loads(s
)
224 print TestFailed
, 're module cPickle' # expected
227 test(r
"""sre.I""", sre
.IGNORECASE
)
228 test(r
"""sre.L""", sre
.LOCALE
)
229 test(r
"""sre.M""", sre
.MULTILINE
)
230 test(r
"""sre.S""", sre
.DOTALL
)
231 test(r
"""sre.X""", sre
.VERBOSE
)
232 test(r
"""sre.T""", sre
.TEMPLATE
)
233 test(r
"""sre.U""", sre
.UNICODE
)
235 for flags
in [sre
.I
, sre
.M
, sre
.X
, sre
.S
, sre
.L
, sre
.T
, sre
.U
]:
237 r
= sre
.compile('^pattern$', flags
)
239 print 'Exception raised on flag', flags
242 print 'Test engine limitations'
244 # Try nasty case that overflows the straightforward recursive
245 # implementation of repeated groups.
246 test(r
"""sre.match(r'(x)*', 50000*'x').span()""",
247 (0, 50000), RuntimeError)
248 test(r
"""sre.match(r'(x)*y', 50000*'x'+'y').span()""",
249 (0, 50001), RuntimeError)
250 test(r
"""sre.match(r'(x)*?y', 50000*'x'+'y').span()""",
251 (0, 50001)) # this works in 2.1
253 from re_tests
import *
256 print 'Running re_tests test suite'
258 # To save time, only run the first and last 10 tests
259 #tests = tests[:10] + tests[-10:]
264 pattern
=s
=outcome
=repl
=expected
=None
266 pattern
, s
, outcome
, repl
, expected
= t
268 pattern
, s
, outcome
= t
270 raise ValueError, ('Test tuples should have 3 or 5 fields',t
)
273 obj
=sre
.compile(pattern
)
275 if outcome
==SYNTAX_ERROR
: pass # Expected a syntax error
277 print '=== Syntax error:', t
278 except KeyboardInterrupt: raise KeyboardInterrupt
280 print '*** Unexpected error ***', t
282 traceback
.print_exc(file=sys
.stdout
)
286 except (sre
.error
), msg
:
287 print '=== Unexpected exception', t
, repr(msg
)
288 if outcome
==SYNTAX_ERROR
:
289 print '=== Compiled incorrectly', t
291 if result
is None: pass # No match, as expected
292 else: print '=== Succeeded incorrectly', t
293 elif outcome
==SUCCEED
:
294 if result
is not None:
295 # Matched, as expected, so now we compute the
296 # result string and compare it to our expected result.
297 start
, end
= result
.span(0)
298 vardict
={'found': result
.group(0),
299 'groups': result
.group(),
300 'flags': result
.re
.flags
}
301 for i
in range(1, 100):
304 # Special hack because else the string concat fails:
309 vardict
['g%d' % i
] = gi
310 for i
in result
.re
.groupindex
.keys():
318 repl
=eval(repl
, vardict
)
320 print '=== grouping error', t
,
321 print repr(repl
)+' should be '+repr(expected
)
323 print '=== Failed incorrectly', t
326 # Try the match on a unicode string, and check that it
329 u
= unicode(s
, "latin-1")
335 print '=== Fails on unicode match', t
337 # Try the match on a unicode pattern, and check that it
340 u
= unicode(pattern
, "latin-1")
347 print '=== Fails on unicode pattern match', t
349 # Try the match with the search area limited to the extent
350 # of the match and see if it still succeeds. \B will
351 # break (because it won't match at the end or start of a
352 # string), so we'll ignore patterns that feature it.
354 if pattern
[:2]!='\\B' and pattern
[-2:]!='\\B':
355 obj
=sre
.compile(pattern
)
356 result
=obj
.search(s
, result
.start(0), result
.end(0)+1)
358 print '=== Failed on range-limited match', t
360 # Try the match with IGNORECASE enabled, and check that it
362 obj
=sre
.compile(pattern
, sre
.IGNORECASE
)
365 print '=== Fails on case-insensitive match', t
367 # Try the match with LOCALE enabled, and check that it
369 obj
=sre
.compile(pattern
, sre
.LOCALE
)
372 print '=== Fails on locale-sensitive match', t
374 # Try the match with UNICODE locale enabled, and check
375 # that it still succeeds.
376 obj
=sre
.compile(pattern
, sre
.UNICODE
)
379 print '=== Fails on unicode-sensitive match', t