2 -- Regular expression tests
4 -- Don't want to have to double backslashes in regexes
5 set standard_conforming_strings = on;
6 -- Test simple quantified backrefs
7 select 'bbbbb' ~ '^([bc])\1*$' as t;
13 select 'ccc' ~ '^([bc])\1*$' as t;
19 select 'xxx' ~ '^([bc])\1*$' as f;
25 select 'bbc' ~ '^([bc])\1*$' as f;
31 select 'b' ~ '^([bc])\1*$' as t;
37 -- Test quantified backref within a larger expression
38 select 'abc abc abc' ~ '^(\w+)( \1)+$' as t;
44 select 'abc abd abc' ~ '^(\w+)( \1)+$' as f;
50 select 'abc abc abd' ~ '^(\w+)( \1)+$' as f;
56 select 'abc abc abc' ~ '^(.+)( \1)+$' as t;
62 select 'abc abd abc' ~ '^(.+)( \1)+$' as f;
68 select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
74 -- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun
75 select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
81 select substring('a' from '((a))+');
87 select substring('a' from '((a)+)');
93 -- Test regexp_match()
94 select regexp_match('abc', '');
100 select regexp_match('abc', 'bc');
106 select regexp_match('abc', 'd') is null;
112 select regexp_match('abc', '(B)(c)', 'i');
118 select regexp_match('abc', 'Bd', 'ig'); -- error
119 ERROR: regexp_match() does not support the "global" option
120 HINT: Use the regexp_matches function instead.
121 -- Test lookahead constraints
122 select regexp_matches('ab', 'a(?=b)b*');
128 select regexp_matches('a', 'a(?=b)b*');
133 select regexp_matches('abc', 'a(?=b)b*(?=c)c*');
139 select regexp_matches('ab', 'a(?=b)b*(?=c)c*');
144 select regexp_matches('ab', 'a(?!b)b*');
149 select regexp_matches('a', 'a(?!b)b*');
155 select regexp_matches('b', '(?=b)b');
161 select regexp_matches('a', '(?=b)b');
166 -- Test lookbehind constraints
167 select regexp_matches('abb', '(?<=a)b*');
173 select regexp_matches('a', 'a(?<=a)b*');
179 select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*');
185 select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*');
191 select regexp_matches('ab', 'a*(?<!a)b*');
197 select regexp_matches('ab', 'a*(?<!a)b+');
202 select regexp_matches('b', 'a*(?<!a)b+');
208 select regexp_matches('a', 'a(?<!a)b*');
213 select regexp_matches('b', '(?<=b)b');
218 select regexp_matches('foobar', '(?<=f)b+');
223 select regexp_matches('foobar', '(?<=foo)b+');
229 select regexp_matches('foobar', '(?<=oo)b+');
235 -- Test optimization of single-chr-or-bracket-expression lookaround constraints
236 select 'xz' ~ 'x(?=[xy])';
242 select 'xy' ~ 'x(?=[xy])';
248 select 'xz' ~ 'x(?![xy])';
254 select 'xy' ~ 'x(?![xy])';
260 select 'x' ~ 'x(?![xy])';
266 select 'xyy' ~ '(?<=[xy])yy+';
272 select 'zyy' ~ '(?<=[xy])yy+';
278 select 'xyy' ~ '(?<![xy])yy+';
284 select 'zyy' ~ '(?<![xy])yy+';
290 -- Test conversion of regex patterns to indexable conditions
291 explain (costs off) select * from pg_proc where proname ~ 'abc';
293 -----------------------------------
295 Filter: (proname ~ 'abc'::text)
298 explain (costs off) select * from pg_proc where proname ~ '^abc';
300 ----------------------------------------------------------------------
301 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
302 Index Cond: ((proname >= 'abc'::text) AND (proname < 'abd'::text))
303 Filter: (proname ~ '^abc'::text)
306 explain (costs off) select * from pg_proc where proname ~ '^abc$';
308 ------------------------------------------------------------
309 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
310 Index Cond: (proname = 'abc'::text)
311 Filter: (proname ~ '^abc$'::text)
314 explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
316 ----------------------------------------------------------------------
317 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
318 Index Cond: ((proname >= 'abc'::text) AND (proname < 'abd'::text))
319 Filter: (proname ~ '^abcd*e'::text)
322 explain (costs off) select * from pg_proc where proname ~ '^abc+d';
324 ----------------------------------------------------------------------
325 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
326 Index Cond: ((proname >= 'abc'::text) AND (proname < 'abd'::text))
327 Filter: (proname ~ '^abc+d'::text)
330 explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
332 ----------------------------------------------------------------------------
333 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
334 Index Cond: ((proname >= 'abcdef'::text) AND (proname < 'abcdeg'::text))
335 Filter: (proname ~ '^(abc)(def)'::text)
338 explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
340 ------------------------------------------------------------
341 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
342 Index Cond: (proname = 'abc'::text)
343 Filter: (proname ~ '^(abc)$'::text)
346 explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
348 ----------------------------------------
350 Filter: (proname ~ '^(abc)?d'::text)
353 explain (costs off) select * from pg_proc where proname ~ '^abcd(x|(?=\w\w)q)';
355 ------------------------------------------------------------------------
356 Index Scan using pg_proc_proname_args_nsp_index on pg_proc
357 Index Cond: ((proname >= 'abcd'::text) AND (proname < 'abce'::text))
358 Filter: (proname ~ '^abcd(x|(?=\w\w)q)'::text)
361 -- Test for infinite loop in pullback() (CVE-2007-4772)
362 select 'a' ~ '($|^)*';
368 -- These cases expose a bug in the original fix for CVE-2007-4772
369 select 'a' ~ '(^)+^';
375 select 'a' ~ '$($$)+';
381 -- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix
382 select 'a' ~ '($^)+';
388 select 'a' ~ '(^$)*';
394 select 'aa bb cc' ~ '(^(?!aa))+';
400 select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+';
406 select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+';
412 select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+';
418 select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+';
424 -- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
425 select 'a' ~ '((((((a)*)*)*)*)*)*';
431 select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
437 -- These cases used to give too-many-states failures
438 select 'x' ~ 'abcd(\m)+xyz';
444 select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)';
450 select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$';
456 select 'x' ~ 'xyz(\Y\Y)+';
462 select 'x' ~ 'x|(?:\M)+';
468 -- This generates O(N) states but O(N^2) arcs, so it causes problems
469 -- if arc count is not constrained
470 select 'x' ~ repeat('x*y*z*', 1000);
471 ERROR: invalid regular expression: regular expression is too complex
472 -- Test backref in combination with non-greedy quantifier
473 -- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
474 select 'Programmer' ~ '(\w).*?\1' as t;
480 select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
487 -- Test for proper matching of non-greedy iteration (bug #11478)
488 select regexp_matches('foo/bar/baz',
489 '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', '');
495 -- Test that greediness can be overridden by outer quantifier
496 select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$');
502 select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$');
508 select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$');
514 select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$');
520 select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$');
526 select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$');
532 select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$');
538 select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$');
544 -- Test for infinite loop in cfindloop with zero-length possible match
545 -- but no actual match (can only happen in the presence of backrefs)
546 select 'a' ~ '$()|^\1';
552 select 'a' ~ '.. ()|\1';
558 select 'a' ~ '()*\1';
564 select 'a' ~ '()+\1';
570 -- Test incorrect removal of capture groups within {0}
571 select 'xxx' ~ '(.){0}(\1)' as f;
577 select 'xxx' ~ '((.)){0}(\2)' as f;
583 select 'xyz' ~ '((.)){0}(\2){0}' as t;
589 -- Test ancient oversight in when to apply zaptreesubs
590 select 'abcdef' ~ '^(.)\1|\1.' as f;
596 select 'abadef' ~ '^((.)\2|..)\2' as f;
602 -- Add coverage for some cases in checkmatchall
603 select regexp_match('xy', '.|...');
609 select regexp_match('xyz', '.|...');
615 select regexp_match('xy', '.*');
621 select regexp_match('fooba', '(?:..)*');
627 select regexp_match('xyz', repeat('.', 260));
633 select regexp_match('foo', '(?:.|){99}');
640 select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs
641 ERROR: invalid regular expression: invalid backreference number
642 select 'xyz' ~ 'x(\w)(?=(\1))';
643 ERROR: invalid regular expression: invalid backreference number
644 select 'a' ~ '\x7fffffff'; -- invalid chr code
645 ERROR: invalid regular expression: invalid escape \ sequence