This commit was manufactured by cvs2svn to create tag 'r211c1'.
[python/dscho.git] / Lib / test / re_tests.py
blob7c5dc890d919bb106d3c2b939459b94db0fc37b6
1 #!/usr/bin/env python
2 # -*- mode: python -*-
4 # Re test suite and benchmark suite v1.5
6 # The 3 possible outcomes for each pattern
7 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
9 # Benchmark suite (needs expansion)
11 # The benchmark suite does not test correctness, just speed. The
12 # first element of each tuple is the regex pattern; the second is a
13 # string to match it against. The benchmarking code will embed the
14 # second string inside several sizes of padding, to test how regex
15 # matching performs on large strings.
17 benchmarks = [
19 # test common prefix
20 ('Python|Perl', 'Perl'), # Alternation
21 ('(Python|Perl)', 'Perl'), # Grouped alternation
23 ('Python|Perl|Tcl', 'Perl'), # Alternation
24 ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation
26 ('(Python)\\1', 'PythonPython'), # Backreference
27 ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
28 ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
30 ('Python', 'Python'), # Simple text literal
31 ('.*Python', 'Python'), # Bad text literal
32 ('.*Python.*', 'Python'), # Worse text literal
33 ('.*(Python)', 'Python'), # Bad text literal with grouping
37 # Test suite (for verifying correctness)
39 # The test suite is a list of 5- or 3-tuples. The 5 parts of a
40 # complete tuple are:
41 # element 0: a string containing the pattern
42 # 1: the string to match against the pattern
43 # 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
44 # 3: a string that will be eval()'ed to produce a test string.
45 # This is an arbitrary Python expression; the available
46 # variables are "found" (the whole match), and "g1", "g2", ...
47 # up to "g99" contain the contents of each group, or the
48 # string 'None' if the group wasn't given a value, or the
49 # string 'Error' if the group index was out of range;
50 # also "groups", the return value of m.group() (a tuple).
51 # 4: The expected result of evaluating the expression.
52 # If the two don't match, an error is reported.
54 # If the regex isn't expected to work, the latter two elements can be omitted.
56 tests = [
57 # Test ?P< and ?P= extensions
58 ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier
59 ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit
60 ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
61 ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char
63 # Same tests, for the ?P= form
64 ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
65 ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
66 ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
67 ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group
69 ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
70 ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
72 # Test octal escapes
73 ('\\1', 'a', SYNTAX_ERROR), # Backreference
74 ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character
75 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
76 ('\\141', 'a', SUCCEED, 'found', 'a'),
77 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
79 # Test \0 is handled everywhere
80 (r'\0', '\0', SUCCEED, 'found', '\0'),
81 (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
82 (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
83 (r'[^a\0]', '\0', FAIL),
85 # Test various letter escapes
86 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
87 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
88 # NOTE: not an error under PCRE/PRE:
89 # (r'\u', '', SYNTAX_ERROR), # A Perl escape
90 (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
91 (r'\xff', '\377', SUCCEED, 'found', chr(255)),
92 # new \x semantics
93 (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
94 (r'\x00f', '\017', FAIL, 'found', chr(15)),
95 (r'\x00fe', '\376', FAIL, 'found', chr(254)),
96 # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
97 # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
98 # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
100 (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
101 SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
103 # Test that . only matches \n in DOTALL mode
104 ('a.b', 'acb', SUCCEED, 'found', 'acb'),
105 ('a.b', 'a\nb', FAIL),
106 ('a.*b', 'acc\nccb', FAIL),
107 ('a.{4,5}b', 'acc\nccb', FAIL),
108 ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
109 ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
110 ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
111 ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
112 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
114 (')', '', SYNTAX_ERROR), # Unmatched right bracket
115 ('', '', SUCCEED, 'found', ''), # Empty pattern
116 ('abc', 'abc', SUCCEED, 'found', 'abc'),
117 ('abc', 'xbc', FAIL),
118 ('abc', 'axc', FAIL),
119 ('abc', 'abx', FAIL),
120 ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
121 ('abc', 'ababc', SUCCEED, 'found', 'abc'),
122 ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
123 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
124 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
125 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
126 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
127 ('ab+bc', 'abc', FAIL),
128 ('ab+bc', 'abq', FAIL),
129 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
130 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
131 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
132 ('ab?bc', 'abbbbc', FAIL),
133 ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
134 ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
135 ('^abc$', 'abcc', FAIL),
136 ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
137 ('^abc$', 'aabc', FAIL),
138 ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
139 ('^', 'abc', SUCCEED, 'found+"-"', '-'),
140 ('$', 'abc', SUCCEED, 'found+"-"', '-'),
141 ('a.c', 'abc', SUCCEED, 'found', 'abc'),
142 ('a.c', 'axc', SUCCEED, 'found', 'axc'),
143 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
144 ('a.*c', 'axyzd', FAIL),
145 ('a[bc]d', 'abc', FAIL),
146 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
147 ('a[b-d]e', 'abd', FAIL),
148 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
149 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
150 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
151 ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
152 # NOTE: not an error under PCRE/PRE:
153 # ('a[b-]', 'a-', SYNTAX_ERROR),
154 ('a[]b', '-', SYNTAX_ERROR),
155 ('a[', '-', SYNTAX_ERROR),
156 ('a\\', '-', SYNTAX_ERROR),
157 ('abc)', '-', SYNTAX_ERROR),
158 ('(abc', '-', SYNTAX_ERROR),
159 ('a]', 'a]', SUCCEED, 'found', 'a]'),
160 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
161 ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
162 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
163 ('a[^bc]d', 'abd', FAIL),
164 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
165 ('a[^-b]c', 'a-c', FAIL),
166 ('a[^]b]c', 'a]c', FAIL),
167 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
168 ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
169 ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
170 ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
171 ('\\by\\b', 'xy', FAIL),
172 ('\\by\\b', 'yz', FAIL),
173 ('\\by\\b', 'xyz', FAIL),
174 ('x\\b', 'xyz', FAIL),
175 ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
176 ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
177 ('z\\B', 'xyz', FAIL),
178 ('\\Bx', 'xyz', FAIL),
179 ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
180 ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
181 ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
182 ('\\By\\B', 'xy', FAIL),
183 ('\\By\\B', 'yz', FAIL),
184 ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
185 ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
186 ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
187 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
188 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
189 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
190 ('$b', 'b', FAIL),
191 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
192 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
193 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
194 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
195 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
196 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
197 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
198 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
199 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
200 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
201 (')(', '-', SYNTAX_ERROR),
202 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
203 ('abc', '', FAIL),
204 ('a*', '', SUCCEED, 'found', ''),
205 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
206 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
207 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
208 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
209 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
210 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
211 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
212 ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
213 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
214 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
215 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
216 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
217 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
218 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
219 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
220 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
221 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
222 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
223 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
224 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
225 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
226 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
227 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
228 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
229 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
230 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
231 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
232 ('multiple words of text', 'uh-uh', FAIL),
233 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
234 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
235 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
236 ('[k]', 'ab', FAIL),
237 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
238 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
239 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
240 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
241 ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
242 ('^(a+).\\1$', 'aaaa', FAIL),
243 ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
244 ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
245 ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
246 ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
247 ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
248 ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
249 ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
250 ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
251 ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
252 ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
253 ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
254 ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
255 ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
256 ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
257 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
258 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
259 ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
260 ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
261 ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
262 ('([abc]*)x', 'abc', FAIL),
263 ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
264 ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
266 # Test symbolic groups
268 ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
269 ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
270 ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
271 ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
273 # Test octal escapes/memory references
275 ('\\1', 'a', SYNTAX_ERROR),
276 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
277 ('\\141', 'a', SUCCEED, 'found', 'a'),
278 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
280 # All tests from Perl
282 ('abc', 'abc', SUCCEED, 'found', 'abc'),
283 ('abc', 'xbc', FAIL),
284 ('abc', 'axc', FAIL),
285 ('abc', 'abx', FAIL),
286 ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
287 ('abc', 'ababc', SUCCEED, 'found', 'abc'),
288 ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
289 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
290 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
291 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
292 ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
293 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
294 ('ab+bc', 'abc', FAIL),
295 ('ab+bc', 'abq', FAIL),
296 ('ab{1,}bc', 'abq', FAIL),
297 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
298 ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
299 ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
300 ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
301 ('ab{4,5}bc', 'abbbbc', FAIL),
302 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
303 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
304 ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
305 ('ab?bc', 'abbbbc', FAIL),
306 ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
307 ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
308 ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
309 ('^abc$', 'abcc', FAIL),
310 ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
311 ('^abc$', 'aabc', FAIL),
312 ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
313 ('^', 'abc', SUCCEED, 'found', ''),
314 ('$', 'abc', SUCCEED, 'found', ''),
315 ('a.c', 'abc', SUCCEED, 'found', 'abc'),
316 ('a.c', 'axc', SUCCEED, 'found', 'axc'),
317 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
318 ('a.*c', 'axyzd', FAIL),
319 ('a[bc]d', 'abc', FAIL),
320 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
321 ('a[b-d]e', 'abd', FAIL),
322 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
323 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
324 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
325 ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
326 ('a[b-a]', '-', SYNTAX_ERROR),
327 ('a[]b', '-', SYNTAX_ERROR),
328 ('a[', '-', SYNTAX_ERROR),
329 ('a]', 'a]', SUCCEED, 'found', 'a]'),
330 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
331 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
332 ('a[^bc]d', 'abd', FAIL),
333 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
334 ('a[^-b]c', 'a-c', FAIL),
335 ('a[^]b]c', 'a]c', FAIL),
336 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
337 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
338 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
339 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
340 ('*a', '-', SYNTAX_ERROR),
341 ('(*)b', '-', SYNTAX_ERROR),
342 ('$b', 'b', FAIL),
343 ('a\\', '-', SYNTAX_ERROR),
344 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
345 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
346 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
347 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
348 ('abc)', '-', SYNTAX_ERROR),
349 ('(abc', '-', SYNTAX_ERROR),
350 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
351 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
352 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
353 ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
354 ('a**', '-', SYNTAX_ERROR),
355 ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
356 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
357 ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
358 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
359 ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
360 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
361 ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
362 (')(', '-', SYNTAX_ERROR),
363 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
364 ('abc', '', FAIL),
365 ('a*', '', SUCCEED, 'found', ''),
366 ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
367 ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
368 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
369 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
370 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
371 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
372 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
373 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
374 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
375 ('^(ab|cd)e', 'abcde', FAIL),
376 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
377 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
378 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
379 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
380 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
381 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
382 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
383 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
384 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
385 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
386 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
387 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
388 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
389 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
390 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
391 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
392 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
393 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
394 ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
395 ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
396 # Python does not have the same rules for \\41 so this is a syntax error
397 # ('((((((((((a))))))))))\\41', 'aa', FAIL),
398 # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
399 ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
400 ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
401 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
402 ('multiple words of text', 'uh-uh', FAIL),
403 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
404 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
405 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
406 ('[k]', 'ab', FAIL),
407 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
408 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
409 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
410 ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
411 ('(?i)abc', 'XBC', FAIL),
412 ('(?i)abc', 'AXC', FAIL),
413 ('(?i)abc', 'ABX', FAIL),
414 ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
415 ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
416 ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
417 ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
418 ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
419 ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
420 ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
421 ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
422 ('(?i)ab+bc', 'ABC', FAIL),
423 ('(?i)ab+bc', 'ABQ', FAIL),
424 ('(?i)ab{1,}bc', 'ABQ', FAIL),
425 ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
426 ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
427 ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
428 ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
429 ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
430 ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
431 ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
432 ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
433 ('(?i)ab??bc', 'ABBBBC', FAIL),
434 ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
435 ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
436 ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
437 ('(?i)^abc$', 'ABCC', FAIL),
438 ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
439 ('(?i)^abc$', 'AABC', FAIL),
440 ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
441 ('(?i)^', 'ABC', SUCCEED, 'found', ''),
442 ('(?i)$', 'ABC', SUCCEED, 'found', ''),
443 ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
444 ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
445 ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
446 ('(?i)a.*c', 'AXYZD', FAIL),
447 ('(?i)a[bc]d', 'ABC', FAIL),
448 ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
449 ('(?i)a[b-d]e', 'ABD', FAIL),
450 ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
451 ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
452 ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
453 ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
454 ('(?i)a[b-a]', '-', SYNTAX_ERROR),
455 ('(?i)a[]b', '-', SYNTAX_ERROR),
456 ('(?i)a[', '-', SYNTAX_ERROR),
457 ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
458 ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
459 ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
460 ('(?i)a[^bc]d', 'ABD', FAIL),
461 ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
462 ('(?i)a[^-b]c', 'A-C', FAIL),
463 ('(?i)a[^]b]c', 'A]C', FAIL),
464 ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
465 ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
466 ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
467 ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
468 ('(?i)*a', '-', SYNTAX_ERROR),
469 ('(?i)(*)b', '-', SYNTAX_ERROR),
470 ('(?i)$b', 'B', FAIL),
471 ('(?i)a\\', '-', SYNTAX_ERROR),
472 ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
473 ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
474 ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
475 ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
476 ('(?i)abc)', '-', SYNTAX_ERROR),
477 ('(?i)(abc', '-', SYNTAX_ERROR),
478 ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
479 ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
480 ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
481 ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
482 ('(?i)a**', '-', SYNTAX_ERROR),
483 ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
484 ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
485 ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
486 ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
487 ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
488 ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
489 ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
490 ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
491 ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
492 ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
493 ('(?i))(', '-', SYNTAX_ERROR),
494 ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
495 ('(?i)abc', '', FAIL),
496 ('(?i)a*', '', SUCCEED, 'found', ''),
497 ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
498 ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
499 ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
500 ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
501 ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
502 ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
503 ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
504 ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
505 ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
506 ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
507 ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
508 ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
509 ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
510 ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
511 ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
512 ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
513 ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
514 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
515 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
516 ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
517 ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
518 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
519 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
520 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
521 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
522 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
523 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
524 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
525 ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
526 ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
527 #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
528 #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
529 ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
530 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
531 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
532 ('(?i)multiple words of text', 'UH-UH', FAIL),
533 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
534 ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
535 ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
536 ('(?i)[k]', 'AB', FAIL),
537 # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
538 # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
539 ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
540 ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
541 ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
542 ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
543 ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
544 ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
545 ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
546 ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
547 ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
548 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
549 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
551 # Comments using the (?#...) syntax
553 ('w(?# comment', 'w', SYNTAX_ERROR),
554 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
556 # Check odd placement of embedded pattern modifiers
558 # not an error under PCRE/PRE:
559 ('w(?i)', 'W', SUCCEED, 'found', 'W'),
560 # ('w(?i)', 'W', SYNTAX_ERROR),
562 # Comments using the x embedded pattern modifier
564 ("""(?x)w# comment 1
566 # comment 2
567 z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
569 # using the m embedded pattern modifier
571 ('^abc', """jkl
573 xyz""", FAIL),
574 ('(?m)^abc', """jkl
576 xyz""", SUCCEED, 'found', 'abc'),
578 ('(?m)abc$', """jkl
579 xyzabc
580 123""", SUCCEED, 'found', 'abc'),
582 # using the s embedded pattern modifier
584 ('a.b', 'a\nb', FAIL),
585 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
587 # test \w, etc. both inside and outside character classes
589 ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
590 ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
591 ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
592 ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
593 ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
594 # not an error under PCRE/PRE:
595 # ('[\\d-x]', '-', SYNTAX_ERROR),
596 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
597 (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
599 (r'\xff', '\377', SUCCEED, 'found', chr(255)),
600 # new \x semantics
601 (r'\x00ff', '\377', FAIL),
602 # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
603 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
604 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
605 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
606 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
609 # post-1.5.2 additions
611 # xmllib problem
612 (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
613 # bug 110866: reference to undefined group
614 (r'((.)\1+)', '', SYNTAX_ERROR),
615 # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
616 (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
617 # bug 112468: various expected syntax errors
618 (r'(', '', SYNTAX_ERROR),
619 (r'[\41]', '!', SUCCEED, 'found', '!'),
620 # bug 114033: nothing to repeat
621 (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
622 # bug 115040: rescan if flags are modified inside pattern
623 (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
624 # bug 115618: negative lookahead
625 (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
626 # bug 116251: character class bug
627 (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
628 # bug 123769+127259: non-greedy backtracking bug
629 (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
630 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
631 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'),
632 # bug 127259: \Z shouldn't depend on multiline mode
633 (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
634 # bug 128899: uppercase literals under the ignorecase flag
635 (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
636 (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
637 (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
638 (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
639 # bug 130748: ^* should be an error (nothing to repeat)
640 (r'^*', '', SYNTAX_ERROR),
643 try:
644 u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
645 except SyntaxError:
646 pass
647 else:
648 tests.extend([
649 # bug 410271: \b broken under locales
650 (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
651 (r'(?u)\b.\b', u, SUCCEED, 'found', u),