2 sys
.path
= ['.'] + sys
.path
4 from test
.test_support
import verbose
, run_unittest
6 from sre
import Scanner
7 import sys
, os
, traceback
9 # Misc tests from Tim Peters' re.doc
11 # WARNING: Don't change details in these tests if you don't know
12 # what you're doing. Some of these tests were carefuly modeled to
13 # cover most of the code.
17 class ReTests(unittest
.TestCase
):
18 def test_search_star_plus(self
):
19 self
.assertEqual(re
.search('x*', 'axx').span(0), (0, 0))
20 self
.assertEqual(re
.search('x*', 'axx').span(), (0, 0))
21 self
.assertEqual(re
.search('x+', 'axx').span(0), (1, 3))
22 self
.assertEqual(re
.search('x+', 'axx').span(), (1, 3))
23 self
.assertEqual(re
.search('x', 'aaa'), None)
24 self
.assertEqual(re
.match('a*', 'xxx').span(0), (0, 0))
25 self
.assertEqual(re
.match('a*', 'xxx').span(), (0, 0))
26 self
.assertEqual(re
.match('x*', 'xxxa').span(0), (0, 3))
27 self
.assertEqual(re
.match('x*', 'xxxa').span(), (0, 3))
28 self
.assertEqual(re
.match('a+', 'xxx'), None)
30 def bump_num(self
, matchobj
):
31 int_value
= int(matchobj
.group(0))
32 return str(int_value
+ 1)
34 def test_basic_re_sub(self
):
35 self
.assertEqual(re
.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
36 self
.assertEqual(re
.sub(r
'\d+', self
.bump_num
, '08.2 -2 23x99y'),
38 self
.assertEqual(re
.sub(r
'\d+', self
.bump_num
, '08.2 -2 23x99y', 3),
41 self
.assertEqual(re
.sub('.', lambda m
: r
"\n", 'x'), '\\n')
42 self
.assertEqual(re
.sub('.', r
"\n", 'x'), '\n')
45 self
.assertEqual(re
.sub('(.)', s
, 'x'), 'xx')
46 self
.assertEqual(re
.sub('(.)', re
.escape(s
), 'x'), s
)
47 self
.assertEqual(re
.sub('(.)', lambda m
: s
, 'x'), s
)
49 self
.assertEqual(re
.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
50 self
.assertEqual(re
.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
51 self
.assertEqual(re
.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
52 self
.assertEqual(re
.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
54 self
.assertEqual(re
.sub('a',r
'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
55 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
56 self
.assertEqual(re
.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
57 self
.assertEqual(re
.sub('a', '\t\n\v\r\f\a', 'a'),
58 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
60 self
.assertEqual(re
.sub('^\s*', 'X', 'test'), 'Xtest')
62 def test_bug_449964(self
):
63 # fails for group followed by other escape
64 self
.assertEqual(re
.sub(r
'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
67 def test_bug_449000(self
):
68 # Test for sub() on escaped characters
69 self
.assertEqual(re
.sub(r
'\r\n', r
'\n', 'abc\r\ndef\r\n'),
71 self
.assertEqual(re
.sub('\r\n', r
'\n', 'abc\r\ndef\r\n'),
73 self
.assertEqual(re
.sub(r
'\r\n', '\n', 'abc\r\ndef\r\n'),
75 self
.assertEqual(re
.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
78 def test_qualified_re_sub(self
):
79 self
.assertEqual(re
.sub('a', 'b', 'aaaaa'), 'bbbbb')
80 self
.assertEqual(re
.sub('a', 'b', 'aaaaa', 1), 'baaaa')
82 def test_bug_114660(self
):
83 self
.assertEqual(re
.sub(r
'(\S)\s+(\S)', r
'\1 \2', 'hello there'),
86 def test_bug_462270(self
):
87 # Test for empty sub() behaviour, see SF bug #462270
88 self
.assertEqual(re
.sub('x*', '-', 'abxd'), '-a-b-d-')
89 self
.assertEqual(re
.sub('x+', '-', 'abxd'), 'ab-d')
91 def test_symbolic_refs(self
):
92 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)', '\g<a', 'xx')
93 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)', '\g<', 'xx')
94 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)', '\g', 'xx')
95 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)', '\g<a a>', 'xx')
96 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)', '\g<1a1>', 'xx')
97 self
.assertRaises(IndexError, re
.sub
, '(?P<a>x)', '\g<ab>', 'xx')
98 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
99 self
.assertRaises(re
.error
, re
.sub
, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
101 def test_re_subn(self
):
102 self
.assertEqual(re
.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
103 self
.assertEqual(re
.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
104 self
.assertEqual(re
.subn("b+", "x", "xyz"), ('xyz', 0))
105 self
.assertEqual(re
.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
106 self
.assertEqual(re
.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
108 def test_re_split(self
):
109 self
.assertEqual(re
.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
110 self
.assertEqual(re
.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
111 self
.assertEqual(re
.split("(:*)", ":a:b::c"),
112 ['', ':', 'a', ':', 'b', '::', 'c'])
113 self
.assertEqual(re
.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
114 self
.assertEqual(re
.split("(:)*", ":a:b::c"),
115 ['', ':', 'a', ':', 'b', ':', 'c'])
116 self
.assertEqual(re
.split("([b:]+)", ":a:b::c"),
117 ['', ':', 'a', ':b::', 'c'])
118 self
.assertEqual(re
.split("(b)|(:+)", ":a:b::c"),
119 ['', None, ':', 'a', None, ':', '', 'b', None, '',
121 self
.assertEqual(re
.split("(?:b)|(?::+)", ":a:b::c"),
122 ['', 'a', '', '', 'c'])
124 def test_qualified_re_split(self
):
125 self
.assertEqual(re
.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
126 self
.assertEqual(re
.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
127 self
.assertEqual(re
.split("(:)", ":a:b::c", 2),
128 ['', ':', 'a', ':', 'b::c'])
129 self
.assertEqual(re
.split("(:*)", ":a:b::c", 2),
130 ['', ':', 'a', ':', 'b::c'])
132 def test_re_findall(self
):
133 self
.assertEqual(re
.findall(":+", "abc"), [])
134 self
.assertEqual(re
.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
135 self
.assertEqual(re
.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
136 self
.assertEqual(re
.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
140 def test_bug_117612(self
):
141 self
.assertEqual(re
.findall(r
"(a|(b))", "aba"),
142 [("a", ""),("b", "b"),("a", "")])
144 def test_re_match(self
):
145 self
.assertEqual(re
.match('a', 'a').groups(), ())
146 self
.assertEqual(re
.match('(a)', 'a').groups(), ('a',))
147 self
.assertEqual(re
.match(r
'(a)', 'a').group(0), 'a')
148 self
.assertEqual(re
.match(r
'(a)', 'a').group(1), 'a')
149 self
.assertEqual(re
.match(r
'(a)', 'a').group(1, 1), ('a', 'a'))
151 pat
= re
.compile('((a)|(b))(c)?')
152 self
.assertEqual(pat
.match('a').groups(), ('a', 'a', None, None))
153 self
.assertEqual(pat
.match('b').groups(), ('b', None, 'b', None))
154 self
.assertEqual(pat
.match('ac').groups(), ('a', 'a', None, 'c'))
155 self
.assertEqual(pat
.match('bc').groups(), ('b', None, 'b', 'c'))
156 self
.assertEqual(pat
.match('bc').groups(""), ('b', "", 'b', 'c'))
159 m
= re
.match('(a)', 'a')
160 self
.assertEqual(m
.group(0), 'a')
161 self
.assertEqual(m
.group(0), 'a')
162 self
.assertEqual(m
.group(1), 'a')
163 self
.assertEqual(m
.group(1, 1), ('a', 'a'))
165 pat
= re
.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
166 self
.assertEqual(pat
.match('a').group(1, 2, 3), ('a', None, None))
167 self
.assertEqual(pat
.match('b').group('a1', 'b2', 'c3'),
169 self
.assertEqual(pat
.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
171 def test_re_groupref_exists(self
):
173 self
.assertEqual(re
.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
175 self
.assertEqual(re
.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
177 self
.assertEqual(re
.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
178 self
.assertEqual(re
.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
179 self
.assertEqual(re
.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
181 self
.assertEqual(re
.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
183 self
.assertEqual(re
.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
185 self
.assertEqual(re
.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
188 def test_re_groupref(self
):
189 self
.assertEqual(re
.match(r
'^(\|)?([^()]+)\1$', '|a|').groups(),
191 self
.assertEqual(re
.match(r
'^(\|)?([^()]+)\1?$', 'a').groups(),
193 self
.assertEqual(re
.match(r
'^(\|)?([^()]+)\1$', 'a|'), None)
194 self
.assertEqual(re
.match(r
'^(\|)?([^()]+)\1$', '|a'), None)
195 self
.assertEqual(re
.match(r
'^(?:(a)|c)(\1)$', 'aa').groups(),
197 self
.assertEqual(re
.match(r
'^(?:(a)|c)(\1)?$', 'c').groups(),
200 def test_groupdict(self
):
201 self
.assertEqual(re
.match('(?P<first>first) (?P<second>second)',
202 'first second').groupdict(),
203 {'first':'first', 'second':'second'})
205 def test_expand(self
):
206 self
.assertEqual(re
.match("(?P<first>first) (?P<second>second)",
208 .expand(r
"\2 \1 \g<second> \g<first>"),
209 "second first second first")
211 def test_repeat_minmax(self
):
212 self
.assertEqual(re
.match("^(\w){1}$", "abc"), None)
213 self
.assertEqual(re
.match("^(\w){1}?$", "abc"), None)
214 self
.assertEqual(re
.match("^(\w){1,2}$", "abc"), None)
215 self
.assertEqual(re
.match("^(\w){1,2}?$", "abc"), None)
217 self
.assertEqual(re
.match("^(\w){3}$", "abc").group(1), "c")
218 self
.assertEqual(re
.match("^(\w){1,3}$", "abc").group(1), "c")
219 self
.assertEqual(re
.match("^(\w){1,4}$", "abc").group(1), "c")
220 self
.assertEqual(re
.match("^(\w){3,4}?$", "abc").group(1), "c")
221 self
.assertEqual(re
.match("^(\w){3}?$", "abc").group(1), "c")
222 self
.assertEqual(re
.match("^(\w){1,3}?$", "abc").group(1), "c")
223 self
.assertEqual(re
.match("^(\w){1,4}?$", "abc").group(1), "c")
224 self
.assertEqual(re
.match("^(\w){3,4}?$", "abc").group(1), "c")
226 self
.assertEqual(re
.match("^x{1}$", "xxx"), None)
227 self
.assertEqual(re
.match("^x{1}?$", "xxx"), None)
228 self
.assertEqual(re
.match("^x{1,2}$", "xxx"), None)
229 self
.assertEqual(re
.match("^x{1,2}?$", "xxx"), None)
231 self
.assertNotEqual(re
.match("^x{3}$", "xxx"), None)
232 self
.assertNotEqual(re
.match("^x{1,3}$", "xxx"), None)
233 self
.assertNotEqual(re
.match("^x{1,4}$", "xxx"), None)
234 self
.assertNotEqual(re
.match("^x{3,4}?$", "xxx"), None)
235 self
.assertNotEqual(re
.match("^x{3}?$", "xxx"), None)
236 self
.assertNotEqual(re
.match("^x{1,3}?$", "xxx"), None)
237 self
.assertNotEqual(re
.match("^x{1,4}?$", "xxx"), None)
238 self
.assertNotEqual(re
.match("^x{3,4}?$", "xxx"), None)
240 def test_getattr(self
):
241 self
.assertEqual(re
.match("(a)", "a").pos
, 0)
242 self
.assertEqual(re
.match("(a)", "a").endpos
, 1)
243 self
.assertEqual(re
.match("(a)", "a").string
, "a")
244 self
.assertEqual(re
.match("(a)", "a").regs
, ((0, 1), (0, 1)))
245 self
.assertNotEqual(re
.match("(a)", "a").re
, None)
247 def test_special_escapes(self
):
248 self
.assertEqual(re
.search(r
"\b(b.)\b",
249 "abcd abc bcd bx").group(1), "bx")
250 self
.assertEqual(re
.search(r
"\B(b.)\B",
251 "abc bcd bc abxd").group(1), "bx")
252 self
.assertEqual(re
.search(r
"\b(b.)\b",
253 "abcd abc bcd bx", re
.LOCALE
).group(1), "bx")
254 self
.assertEqual(re
.search(r
"\B(b.)\B",
255 "abc bcd bc abxd", re
.LOCALE
).group(1), "bx")
256 self
.assertEqual(re
.search(r
"\b(b.)\b",
257 "abcd abc bcd bx", re
.UNICODE
).group(1), "bx")
258 self
.assertEqual(re
.search(r
"\B(b.)\B",
259 "abc bcd bc abxd", re
.UNICODE
).group(1), "bx")
260 self
.assertEqual(re
.search(r
"^abc$", "\nabc\n", re
.M
).group(0), "abc")
261 self
.assertEqual(re
.search(r
"^\Aabc\Z$", "abc", re
.M
).group(0), "abc")
262 self
.assertEqual(re
.search(r
"^\Aabc\Z$", "\nabc\n", re
.M
), None)
263 self
.assertEqual(re
.search(r
"\b(b.)\b",
264 u
"abcd abc bcd bx").group(1), "bx")
265 self
.assertEqual(re
.search(r
"\B(b.)\B",
266 u
"abc bcd bc abxd").group(1), "bx")
267 self
.assertEqual(re
.search(r
"^abc$", u
"\nabc\n", re
.M
).group(0), "abc")
268 self
.assertEqual(re
.search(r
"^\Aabc\Z$", u
"abc", re
.M
).group(0), "abc")
269 self
.assertEqual(re
.search(r
"^\Aabc\Z$", u
"\nabc\n", re
.M
), None)
270 self
.assertEqual(re
.search(r
"\d\D\w\W\s\S",
271 "1aa! a").group(0), "1aa! a")
272 self
.assertEqual(re
.search(r
"\d\D\w\W\s\S",
273 "1aa! a", re
.LOCALE
).group(0), "1aa! a")
274 self
.assertEqual(re
.search(r
"\d\D\w\W\s\S",
275 "1aa! a", re
.UNICODE
).group(0), "1aa! a")
277 def test_ignore_case(self
):
278 self
.assertEqual(re
.match("abc", "ABC", re
.I
).group(0), "ABC")
279 self
.assertEqual(re
.match("abc", u
"ABC", re
.I
).group(0), "ABC")
281 def test_bigcharset(self
):
282 self
.assertEqual(re
.match(u
"([\u2222\u2223])",
283 u
"\u2222").group(1), u
"\u2222")
284 self
.assertEqual(re
.match(u
"([\u2222\u2223])",
285 u
"\u2222", re
.UNICODE
).group(1), u
"\u2222")
287 def test_anyall(self
):
288 self
.assertEqual(re
.match("a.b", "a\nb", re
.DOTALL
).group(0),
290 self
.assertEqual(re
.match("a.*b", "a\n\nb", re
.DOTALL
).group(0),
293 def test_non_consuming(self
):
294 self
.assertEqual(re
.match("(a(?=\s[^a]))", "a b").group(1), "a")
295 self
.assertEqual(re
.match("(a(?=\s[^a]*))", "a b").group(1), "a")
296 self
.assertEqual(re
.match("(a(?=\s[abc]))", "a b").group(1), "a")
297 self
.assertEqual(re
.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
298 self
.assertEqual(re
.match(r
"(a)(?=\s\1)", "a a").group(1), "a")
299 self
.assertEqual(re
.match(r
"(a)(?=\s\1*)", "a aa").group(1), "a")
300 self
.assertEqual(re
.match(r
"(a)(?=\s(abc|a))", "a a").group(1), "a")
302 self
.assertEqual(re
.match(r
"(a(?!\s[^a]))", "a a").group(1), "a")
303 self
.assertEqual(re
.match(r
"(a(?!\s[abc]))", "a d").group(1), "a")
304 self
.assertEqual(re
.match(r
"(a)(?!\s\1)", "a b").group(1), "a")
305 self
.assertEqual(re
.match(r
"(a)(?!\s(abc|a))", "a b").group(1), "a")
307 def test_ignore_case(self
):
308 self
.assertEqual(re
.match(r
"(a\s[^a])", "a b", re
.I
).group(1), "a b")
309 self
.assertEqual(re
.match(r
"(a\s[^a]*)", "a bb", re
.I
).group(1), "a bb")
310 self
.assertEqual(re
.match(r
"(a\s[abc])", "a b", re
.I
).group(1), "a b")
311 self
.assertEqual(re
.match(r
"(a\s[abc]*)", "a bb", re
.I
).group(1), "a bb")
312 self
.assertEqual(re
.match(r
"((a)\s\2)", "a a", re
.I
).group(1), "a a")
313 self
.assertEqual(re
.match(r
"((a)\s\2*)", "a aa", re
.I
).group(1), "a aa")
314 self
.assertEqual(re
.match(r
"((a)\s(abc|a))", "a a", re
.I
).group(1), "a a")
315 self
.assertEqual(re
.match(r
"((a)\s(abc|a)*)", "a aa", re
.I
).group(1), "a aa")
317 def test_category(self
):
318 self
.assertEqual(re
.match(r
"(\s)", " ").group(1), " ")
320 def test_getlower(self
):
322 self
.assertEqual(_sre
.getlower(ord('A'), 0), ord('a'))
323 self
.assertEqual(_sre
.getlower(ord('A'), re
.LOCALE
), ord('a'))
324 self
.assertEqual(_sre
.getlower(ord('A'), re
.UNICODE
), ord('a'))
326 self
.assertEqual(re
.match("abc", "ABC", re
.I
).group(0), "ABC")
327 self
.assertEqual(re
.match("abc", u
"ABC", re
.I
).group(0), "ABC")
329 def test_not_literal(self
):
330 self
.assertEqual(re
.search("\s([^a])", " b").group(1), "b")
331 self
.assertEqual(re
.search("\s([^a]*)", " bb").group(1), "bb")
333 def test_search_coverage(self
):
334 self
.assertEqual(re
.search("\s(b)", " b").group(1), "b")
335 self
.assertEqual(re
.search("a\s", "a ").group(0), "a ")
337 def test_re_escape(self
):
339 for i
in range(0, 256):
341 self
.assertEqual(re
.match(re
.escape(chr(i
)), chr(i
)) is not None,
343 self
.assertEqual(re
.match(re
.escape(chr(i
)), chr(i
)).span(), (0,1))
345 pat
=re
.compile(re
.escape(p
))
346 self
.assertEqual(pat
.match(p
) is not None, True)
347 self
.assertEqual(pat
.match(p
).span(), (0,256))
349 def test_pickling(self
):
351 self
.pickle_test(pickle
)
353 self
.pickle_test(cPickle
)
355 def pickle_test(self
, pickle
):
356 oldpat
= re
.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
357 s
= pickle
.dumps(oldpat
)
358 newpat
= pickle
.loads(s
)
359 self
.assertEqual(oldpat
, newpat
)
361 def test_constants(self
):
362 self
.assertEqual(re
.I
, re
.IGNORECASE
)
363 self
.assertEqual(re
.L
, re
.LOCALE
)
364 self
.assertEqual(re
.M
, re
.MULTILINE
)
365 self
.assertEqual(re
.S
, re
.DOTALL
)
366 self
.assertEqual(re
.X
, re
.VERBOSE
)
368 def test_flags(self
):
369 for flag
in [re
.I
, re
.M
, re
.X
, re
.S
, re
.L
]:
370 self
.assertNotEqual(re
.compile('^pattern$', flag
), None)
372 def test_sre_character_literals(self
):
373 for i
in [0, 8, 16, 32, 64, 127, 128, 255]:
374 self
.assertNotEqual(re
.match(r
"\%03o" % i
, chr(i
)), None)
375 self
.assertNotEqual(re
.match(r
"\%03o0" % i
, chr(i
)+"0"), None)
376 self
.assertNotEqual(re
.match(r
"\%03o8" % i
, chr(i
)+"8"), None)
377 self
.assertNotEqual(re
.match(r
"\x%02x" % i
, chr(i
)), None)
378 self
.assertNotEqual(re
.match(r
"\x%02x0" % i
, chr(i
)+"0"), None)
379 self
.assertNotEqual(re
.match(r
"\x%02xz" % i
, chr(i
)+"z"), None)
380 self
.assertRaises(re
.error
, re
.match
, "\911", "")
382 def test_bug_113254(self
):
383 self
.assertEqual(re
.match(r
'(a)|(b)', 'b').start(1), -1)
384 self
.assertEqual(re
.match(r
'(a)|(b)', 'b').end(1), -1)
385 self
.assertEqual(re
.match(r
'(a)|(b)', 'b').span(1), (-1, -1))
387 def test_bug_527371(self
):
388 # bug described in patches 527371/672491
389 self
.assertEqual(re
.match(r
'(a)?a','a').lastindex
, None)
390 self
.assertEqual(re
.match(r
'(a)(b)?b','ab').lastindex
, 1)
391 self
.assertEqual(re
.match(r
'(?P<a>a)(?P<b>b)?b','ab').lastgroup
, 'a')
392 self
.assertEqual(re
.match("(?P<a>a(b))", "ab").lastgroup
, 'a')
393 self
.assertEqual(re
.match("((a))", "a").lastindex
, 1)
395 def test_bug_545855(self
):
396 # bug 545855 -- This pattern failed to cause a compile error as it
397 # should, instead provoking a TypeError.
398 self
.assertRaises(re
.error
, re
.compile, 'foo[a-')
400 def test_bug_418626(self
):
401 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
402 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
403 # pattern '*?' on a long string.
404 self
.assertEqual(re
.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
405 self
.assertEqual(re
.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
407 self
.assertEqual(re
.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
408 # non-simple '*?' still recurses and hits the recursion limit
409 self
.assertRaises(RuntimeError, re
.search
, '(a|b)*?c', 10000*'ab'+'cd')
411 def test_bug_612074(self
):
412 pat
=u
"["+re
.escape(u
"\u2039")+u
"]"
413 self
.assertEqual(re
.compile(pat
) and 1, 1)
415 def test_stack_overflow(self
):
416 # nasty case that overflows the straightforward recursive
417 # implementation of repeated groups.
418 self
.assertRaises(RuntimeError, re
.match
, '(x)*', 50000*'x')
419 self
.assertRaises(RuntimeError, re
.match
, '(x)*y', 50000*'x'+'y')
420 self
.assertRaises(RuntimeError, re
.match
, '(x)*?y', 50000*'x'+'y')
422 def test_scanner(self
):
423 def s_ident(scanner
, token
): return token
424 def s_operator(scanner
, token
): return "op%s" % token
425 def s_float(scanner
, token
): return float(token
)
426 def s_int(scanner
, token
): return int(token
)
429 (r
"[a-zA-Z_]\w*", s_ident
),
430 (r
"\d+\.\d*", s_float
),
432 (r
"=|\+|-|\*|/", s_operator
),
436 self
.assertNotEqual(scanner
.scanner
.scanner("").pattern
, None)
438 self
.assertEqual(scanner
.scan("sum = 3*foo + 312.50 + bar"),
439 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
442 def test_bug_448951(self
):
443 # bug 448951 (similar to 429357, but with single char match)
444 # (Also test greedy matches.)
445 for op
in '','?','*':
446 self
.assertEqual(re
.match(r
'((.%s):)?z'%op
, 'z').groups(),
448 self
.assertEqual(re
.match(r
'((.%s):)?z'%op
, 'a:z').groups(),
451 def test_bug_725106(self
):
452 # capturing groups in alternatives in repeats
453 self
.assertEqual(re
.match('^((a)|b)*', 'abc').groups(),
455 self
.assertEqual(re
.match('^(([ab])|c)*', 'abc').groups(),
457 self
.assertEqual(re
.match('^((d)|[ab])*', 'abc').groups(),
459 self
.assertEqual(re
.match('^((a)c|[ab])*', 'abc').groups(),
461 self
.assertEqual(re
.match('^((a)|b)*?c', 'abc').groups(),
463 self
.assertEqual(re
.match('^(([ab])|c)*?d', 'abcd').groups(),
465 self
.assertEqual(re
.match('^((d)|[ab])*?c', 'abc').groups(),
467 self
.assertEqual(re
.match('^((a)c|[ab])*?c', 'abc').groups(),
470 def test_bug_725149(self
):
471 # mark_stack_base restoring before restoring marks
472 self
.assertEqual(re
.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
474 self
.assertEqual(re
.match('(a)((?!(b)*))*', 'abb').groups(),
477 def test_bug_764548(self
):
478 # bug 764548, re.compile() barfs on str/unicode subclasses
482 return # no problem if we have no unicode
483 class my_unicode(unicode): pass
484 pat
= re
.compile(my_unicode("abc"))
485 self
.assertEqual(pat
.match("xyz"), None)
487 def test_finditer(self
):
488 iter = re
.finditer(r
":+", "a:b::c:::d")
489 self
.assertEqual([item
.group(0) for item
in iter],
492 def test_bug_926075(self
):
496 return # no problem if we have no unicode
497 self
.assert_(re
.compile('bug_926075') is not
498 re
.compile(eval("u'bug_926075'")))
501 from test
.re_tests
import benchmarks
, tests
, SUCCEED
, FAIL
, SYNTAX_ERROR
503 print 'Running re_tests test suite'
505 # To save time, only run the first and last 10 tests
506 #tests = tests[:10] + tests[-10:]
511 pattern
= s
= outcome
= repl
= expected
= None
513 pattern
, s
, outcome
, repl
, expected
= t
515 pattern
, s
, outcome
= t
517 raise ValueError, ('Test tuples should have 3 or 5 fields', t
)
520 obj
= re
.compile(pattern
)
522 if outcome
== SYNTAX_ERROR
: pass # Expected a syntax error
524 print '=== Syntax error:', t
525 except KeyboardInterrupt: raise KeyboardInterrupt
527 print '*** Unexpected error ***', t
529 traceback
.print_exc(file=sys
.stdout
)
532 result
= obj
.search(s
)
533 except re
.error
, msg
:
534 print '=== Unexpected exception', t
, repr(msg
)
535 if outcome
== SYNTAX_ERROR
:
536 # This should have been a syntax error; forget it.
538 elif outcome
== FAIL
:
539 if result
is None: pass # No match, as expected
540 else: print '=== Succeeded incorrectly', t
541 elif outcome
== SUCCEED
:
542 if result
is not None:
543 # Matched, as expected, so now we compute the
544 # result string and compare it to our expected result.
545 start
, end
= result
.span(0)
546 vardict
={'found': result
.group(0),
547 'groups': result
.group(),
548 'flags': result
.re
.flags
}
549 for i
in range(1, 100):
552 # Special hack because else the string concat fails:
557 vardict
['g%d' % i
] = gi
558 for i
in result
.re
.groupindex
.keys():
566 repl
= eval(repl
, vardict
)
568 print '=== grouping error', t
,
569 print repr(repl
) + ' should be ' + repr(expected
)
571 print '=== Failed incorrectly', t
573 # Try the match on a unicode string, and check that it
576 result
= obj
.search(unicode(s
, "latin-1"))
578 print '=== Fails on unicode match', t
582 continue # unicode test case
584 # Try the match on a unicode pattern, and check that it
586 obj
=re
.compile(unicode(pattern
, "latin-1"))
587 result
= obj
.search(s
)
589 print '=== Fails on unicode pattern match', t
591 # Try the match with the search area limited to the extent
592 # of the match and see if it still succeeds. \B will
593 # break (because it won't match at the end or start of a
594 # string), so we'll ignore patterns that feature it.
596 if pattern
[:2] != '\\B' and pattern
[-2:] != '\\B' \
597 and result
is not None:
598 obj
= re
.compile(pattern
)
599 result
= obj
.search(s
, result
.start(0), result
.end(0) + 1)
601 print '=== Failed on range-limited match', t
603 # Try the match with IGNORECASE enabled, and check that it
605 obj
= re
.compile(pattern
, re
.IGNORECASE
)
606 result
= obj
.search(s
)
608 print '=== Fails on case-insensitive match', t
610 # Try the match with LOCALE enabled, and check that it
612 obj
= re
.compile(pattern
, re
.LOCALE
)
613 result
= obj
.search(s
)
615 print '=== Fails on locale-sensitive match', t
617 # Try the match with UNICODE locale enabled, and check
618 # that it still succeeds.
619 obj
= re
.compile(pattern
, re
.UNICODE
)
620 result
= obj
.search(s
)
622 print '=== Fails on unicode-sensitive match', t
625 run_unittest(ReTests
)
628 if __name__
== "__main__":