This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Lib / test / test_re.py
blobfb8b175df8edf9d953ae3067b75e4e2e4a26eb17
1 import sys
2 sys.path = ['.'] + sys.path
4 from test.test_support import verbose, run_unittest
5 import re
6 from sre import Scanner
7 import sys, os, traceback
9 # Misc tests from Tim Peters' re.doc
11 # WARNING: Don't change details in these tests if you don't know
12 # what you're doing. Some of these tests were carefuly modeled to
13 # cover most of the code.
15 import unittest
17 class ReTests(unittest.TestCase):
18 def test_search_star_plus(self):
19 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
20 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
21 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
22 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
23 self.assertEqual(re.search('x', 'aaa'), None)
24 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
25 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
26 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
27 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
28 self.assertEqual(re.match('a+', 'xxx'), None)
30 def bump_num(self, matchobj):
31 int_value = int(matchobj.group(0))
32 return str(int_value + 1)
34 def test_basic_re_sub(self):
35 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
36 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
37 '9.3 -3 24x100y')
38 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
39 '9.3 -3 23x99y')
41 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
42 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
44 s = r"\1\1"
45 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
46 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
47 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
49 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
50 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
51 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
52 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
54 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
55 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
56 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
57 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
58 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
60 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
62 def test_bug_449964(self):
63 # fails for group followed by other escape
64 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
65 'xx\bxx\b')
67 def test_bug_449000(self):
68 # Test for sub() on escaped characters
69 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
70 'abc\ndef\n')
71 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
72 'abc\ndef\n')
73 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
74 'abc\ndef\n')
75 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
76 'abc\ndef\n')
78 def test_qualified_re_sub(self):
79 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
80 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
82 def test_bug_114660(self):
83 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
84 'hello there')
86 def test_bug_462270(self):
87 # Test for empty sub() behaviour, see SF bug #462270
88 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
89 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
91 def test_symbolic_refs(self):
92 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
93 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
94 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
95 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
96 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
97 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
98 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
99 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
101 def test_re_subn(self):
102 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
103 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
104 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
105 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
106 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
108 def test_re_split(self):
109 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
110 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
111 self.assertEqual(re.split("(:*)", ":a:b::c"),
112 ['', ':', 'a', ':', 'b', '::', 'c'])
113 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
114 self.assertEqual(re.split("(:)*", ":a:b::c"),
115 ['', ':', 'a', ':', 'b', ':', 'c'])
116 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
117 ['', ':', 'a', ':b::', 'c'])
118 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
119 ['', None, ':', 'a', None, ':', '', 'b', None, '',
120 None, '::', 'c'])
121 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
122 ['', 'a', '', '', 'c'])
124 def test_qualified_re_split(self):
125 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
126 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
127 self.assertEqual(re.split("(:)", ":a:b::c", 2),
128 ['', ':', 'a', ':', 'b::c'])
129 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
130 ['', ':', 'a', ':', 'b::c'])
132 def test_re_findall(self):
133 self.assertEqual(re.findall(":+", "abc"), [])
134 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
135 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
136 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
137 (":", ":"),
138 (":", "::")])
140 def test_bug_117612(self):
141 self.assertEqual(re.findall(r"(a|(b))", "aba"),
142 [("a", ""),("b", "b"),("a", "")])
144 def test_re_match(self):
145 self.assertEqual(re.match('a', 'a').groups(), ())
146 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
147 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
148 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
149 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
151 pat = re.compile('((a)|(b))(c)?')
152 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
153 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
154 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
155 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
156 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
158 # A single group
159 m = re.match('(a)', 'a')
160 self.assertEqual(m.group(0), 'a')
161 self.assertEqual(m.group(0), 'a')
162 self.assertEqual(m.group(1), 'a')
163 self.assertEqual(m.group(1, 1), ('a', 'a'))
165 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
166 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
167 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
168 (None, 'b', None))
169 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
171 def test_re_groupref_exists(self):
172 return # not yet
173 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
174 ('(', 'a'))
175 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
176 (None, 'a'))
177 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
178 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
179 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
180 ('a', 'b'))
181 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
182 (None, 'd'))
183 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
184 (None, 'd'))
185 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
186 ('a', ''))
188 def test_re_groupref(self):
189 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
190 ('|', 'a'))
191 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
192 (None, 'a'))
193 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
194 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
195 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
196 ('a', 'a'))
197 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
198 (None, None))
200 def test_groupdict(self):
201 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
202 'first second').groupdict(),
203 {'first':'first', 'second':'second'})
205 def test_expand(self):
206 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
207 "first second")
208 .expand(r"\2 \1 \g<second> \g<first>"),
209 "second first second first")
211 def test_repeat_minmax(self):
212 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
213 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
214 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
215 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
217 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
218 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
219 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
220 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
221 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
222 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
223 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
224 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
226 self.assertEqual(re.match("^x{1}$", "xxx"), None)
227 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
228 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
229 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
231 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
232 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
233 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
234 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
235 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
236 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
237 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
238 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
240 def test_getattr(self):
241 self.assertEqual(re.match("(a)", "a").pos, 0)
242 self.assertEqual(re.match("(a)", "a").endpos, 1)
243 self.assertEqual(re.match("(a)", "a").string, "a")
244 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
245 self.assertNotEqual(re.match("(a)", "a").re, None)
247 def test_special_escapes(self):
248 self.assertEqual(re.search(r"\b(b.)\b",
249 "abcd abc bcd bx").group(1), "bx")
250 self.assertEqual(re.search(r"\B(b.)\B",
251 "abc bcd bc abxd").group(1), "bx")
252 self.assertEqual(re.search(r"\b(b.)\b",
253 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
254 self.assertEqual(re.search(r"\B(b.)\B",
255 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
256 self.assertEqual(re.search(r"\b(b.)\b",
257 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
258 self.assertEqual(re.search(r"\B(b.)\B",
259 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
260 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
261 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
262 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
263 self.assertEqual(re.search(r"\b(b.)\b",
264 u"abcd abc bcd bx").group(1), "bx")
265 self.assertEqual(re.search(r"\B(b.)\B",
266 u"abc bcd bc abxd").group(1), "bx")
267 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
268 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
269 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
270 self.assertEqual(re.search(r"\d\D\w\W\s\S",
271 "1aa! a").group(0), "1aa! a")
272 self.assertEqual(re.search(r"\d\D\w\W\s\S",
273 "1aa! a", re.LOCALE).group(0), "1aa! a")
274 self.assertEqual(re.search(r"\d\D\w\W\s\S",
275 "1aa! a", re.UNICODE).group(0), "1aa! a")
277 def test_ignore_case(self):
278 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
279 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
281 def test_bigcharset(self):
282 self.assertEqual(re.match(u"([\u2222\u2223])",
283 u"\u2222").group(1), u"\u2222")
284 self.assertEqual(re.match(u"([\u2222\u2223])",
285 u"\u2222", re.UNICODE).group(1), u"\u2222")
287 def test_anyall(self):
288 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
289 "a\nb")
290 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
291 "a\n\nb")
293 def test_non_consuming(self):
294 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
295 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
296 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
297 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
298 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
299 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
300 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
302 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
303 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
304 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
305 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
307 def test_ignore_case(self):
308 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
309 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
310 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
311 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
312 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
313 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
314 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
315 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
317 def test_category(self):
318 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
320 def test_getlower(self):
321 import _sre
322 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
323 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
324 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
326 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
327 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
329 def test_not_literal(self):
330 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
331 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
333 def test_search_coverage(self):
334 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
335 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
337 def test_re_escape(self):
338 p=""
339 for i in range(0, 256):
340 p = p + chr(i)
341 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
342 True)
343 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
345 pat=re.compile(re.escape(p))
346 self.assertEqual(pat.match(p) is not None, True)
347 self.assertEqual(pat.match(p).span(), (0,256))
349 def test_pickling(self):
350 import pickle
351 self.pickle_test(pickle)
352 import cPickle
353 self.pickle_test(cPickle)
355 def pickle_test(self, pickle):
356 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
357 s = pickle.dumps(oldpat)
358 newpat = pickle.loads(s)
359 self.assertEqual(oldpat, newpat)
361 def test_constants(self):
362 self.assertEqual(re.I, re.IGNORECASE)
363 self.assertEqual(re.L, re.LOCALE)
364 self.assertEqual(re.M, re.MULTILINE)
365 self.assertEqual(re.S, re.DOTALL)
366 self.assertEqual(re.X, re.VERBOSE)
368 def test_flags(self):
369 for flag in [re.I, re.M, re.X, re.S, re.L]:
370 self.assertNotEqual(re.compile('^pattern$', flag), None)
372 def test_sre_character_literals(self):
373 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
374 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
375 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
376 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
377 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
378 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
379 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
380 self.assertRaises(re.error, re.match, "\911", "")
382 def test_bug_113254(self):
383 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
384 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
385 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
387 def test_bug_527371(self):
388 # bug described in patches 527371/672491
389 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
390 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
391 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
392 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
393 self.assertEqual(re.match("((a))", "a").lastindex, 1)
395 def test_bug_545855(self):
396 # bug 545855 -- This pattern failed to cause a compile error as it
397 # should, instead provoking a TypeError.
398 self.assertRaises(re.error, re.compile, 'foo[a-')
400 def test_bug_418626(self):
401 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
402 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
403 # pattern '*?' on a long string.
404 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
405 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
406 20003)
407 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
408 # non-simple '*?' still recurses and hits the recursion limit
409 self.assertRaises(RuntimeError, re.search, '(a|b)*?c', 10000*'ab'+'cd')
411 def test_bug_612074(self):
412 pat=u"["+re.escape(u"\u2039")+u"]"
413 self.assertEqual(re.compile(pat) and 1, 1)
415 def test_stack_overflow(self):
416 # nasty case that overflows the straightforward recursive
417 # implementation of repeated groups.
418 self.assertRaises(RuntimeError, re.match, '(x)*', 50000*'x')
419 self.assertRaises(RuntimeError, re.match, '(x)*y', 50000*'x'+'y')
420 self.assertRaises(RuntimeError, re.match, '(x)*?y', 50000*'x'+'y')
422 def test_scanner(self):
423 def s_ident(scanner, token): return token
424 def s_operator(scanner, token): return "op%s" % token
425 def s_float(scanner, token): return float(token)
426 def s_int(scanner, token): return int(token)
428 scanner = Scanner([
429 (r"[a-zA-Z_]\w*", s_ident),
430 (r"\d+\.\d*", s_float),
431 (r"\d+", s_int),
432 (r"=|\+|-|\*|/", s_operator),
433 (r"\s+", None),
436 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
438 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
439 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
440 'op+', 'bar'], ''))
442 def test_bug_448951(self):
443 # bug 448951 (similar to 429357, but with single char match)
444 # (Also test greedy matches.)
445 for op in '','?','*':
446 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
447 (None, None))
448 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
449 ('a:', 'a'))
451 def test_bug_725106(self):
452 # capturing groups in alternatives in repeats
453 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
454 ('b', 'a'))
455 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
456 ('c', 'b'))
457 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
458 ('b', None))
459 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
460 ('b', None))
461 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
462 ('b', 'a'))
463 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
464 ('c', 'b'))
465 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
466 ('b', None))
467 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
468 ('b', None))
470 def test_bug_725149(self):
471 # mark_stack_base restoring before restoring marks
472 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
473 ('a', None))
474 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
475 ('a', None, None))
477 def test_bug_764548(self):
478 # bug 764548, re.compile() barfs on str/unicode subclasses
479 try:
480 unicode
481 except NameError:
482 return # no problem if we have no unicode
483 class my_unicode(unicode): pass
484 pat = re.compile(my_unicode("abc"))
485 self.assertEqual(pat.match("xyz"), None)
487 def test_finditer(self):
488 iter = re.finditer(r":+", "a:b::c:::d")
489 self.assertEqual([item.group(0) for item in iter],
490 [":", "::", ":::"])
492 def test_bug_926075(self):
493 try:
494 unicode
495 except NameError:
496 return # no problem if we have no unicode
497 self.assert_(re.compile('bug_926075') is not
498 re.compile(eval("u'bug_926075'")))
500 def run_re_tests():
501 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
502 if verbose:
503 print 'Running re_tests test suite'
504 else:
505 # To save time, only run the first and last 10 tests
506 #tests = tests[:10] + tests[-10:]
507 pass
509 for t in tests:
510 sys.stdout.flush()
511 pattern = s = outcome = repl = expected = None
512 if len(t) == 5:
513 pattern, s, outcome, repl, expected = t
514 elif len(t) == 3:
515 pattern, s, outcome = t
516 else:
517 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
519 try:
520 obj = re.compile(pattern)
521 except re.error:
522 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
523 else:
524 print '=== Syntax error:', t
525 except KeyboardInterrupt: raise KeyboardInterrupt
526 except:
527 print '*** Unexpected error ***', t
528 if verbose:
529 traceback.print_exc(file=sys.stdout)
530 else:
531 try:
532 result = obj.search(s)
533 except re.error, msg:
534 print '=== Unexpected exception', t, repr(msg)
535 if outcome == SYNTAX_ERROR:
536 # This should have been a syntax error; forget it.
537 pass
538 elif outcome == FAIL:
539 if result is None: pass # No match, as expected
540 else: print '=== Succeeded incorrectly', t
541 elif outcome == SUCCEED:
542 if result is not None:
543 # Matched, as expected, so now we compute the
544 # result string and compare it to our expected result.
545 start, end = result.span(0)
546 vardict={'found': result.group(0),
547 'groups': result.group(),
548 'flags': result.re.flags}
549 for i in range(1, 100):
550 try:
551 gi = result.group(i)
552 # Special hack because else the string concat fails:
553 if gi is None:
554 gi = "None"
555 except IndexError:
556 gi = "Error"
557 vardict['g%d' % i] = gi
558 for i in result.re.groupindex.keys():
559 try:
560 gi = result.group(i)
561 if gi is None:
562 gi = "None"
563 except IndexError:
564 gi = "Error"
565 vardict[i] = gi
566 repl = eval(repl, vardict)
567 if repl != expected:
568 print '=== grouping error', t,
569 print repr(repl) + ' should be ' + repr(expected)
570 else:
571 print '=== Failed incorrectly', t
573 # Try the match on a unicode string, and check that it
574 # still succeeds.
575 try:
576 result = obj.search(unicode(s, "latin-1"))
577 if result is None:
578 print '=== Fails on unicode match', t
579 except NameError:
580 continue # 1.5.2
581 except TypeError:
582 continue # unicode test case
584 # Try the match on a unicode pattern, and check that it
585 # still succeeds.
586 obj=re.compile(unicode(pattern, "latin-1"))
587 result = obj.search(s)
588 if result is None:
589 print '=== Fails on unicode pattern match', t
591 # Try the match with the search area limited to the extent
592 # of the match and see if it still succeeds. \B will
593 # break (because it won't match at the end or start of a
594 # string), so we'll ignore patterns that feature it.
596 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
597 and result is not None:
598 obj = re.compile(pattern)
599 result = obj.search(s, result.start(0), result.end(0) + 1)
600 if result is None:
601 print '=== Failed on range-limited match', t
603 # Try the match with IGNORECASE enabled, and check that it
604 # still succeeds.
605 obj = re.compile(pattern, re.IGNORECASE)
606 result = obj.search(s)
607 if result is None:
608 print '=== Fails on case-insensitive match', t
610 # Try the match with LOCALE enabled, and check that it
611 # still succeeds.
612 obj = re.compile(pattern, re.LOCALE)
613 result = obj.search(s)
614 if result is None:
615 print '=== Fails on locale-sensitive match', t
617 # Try the match with UNICODE locale enabled, and check
618 # that it still succeeds.
619 obj = re.compile(pattern, re.UNICODE)
620 result = obj.search(s)
621 if result is None:
622 print '=== Fails on unicode-sensitive match', t
624 def test_main():
625 run_unittest(ReTests)
626 run_re_tests()
628 if __name__ == "__main__":
629 test_main()