Fortran: Fix PR 47485.
[gcc.git] / libphobos / src / std / regex / internal / tests.d
blob8a0fec16a1b6b8378c49e6767606728a6c29288e
1 /*
2 Regualar expressions package test suite.
3 */
4 module std.regex.internal.tests;
6 package(std.regex):
8 import std.conv, std.exception, std.meta, std.range,
9 std.typecons, std.regex;
11 import std.uni : Escapables; // characters that need escaping
13 debug(std_regex_test) import std.stdio;
15 @safe unittest
16 {//sanity checks
17 regex("(a|b)*");
18 regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
19 regex("abc|edf|ighrg");
20 auto r1 = regex("abc");
21 auto r2 = regex("(gylba)");
22 assert(match("abcdef", r1).hit == "abc");
23 assert(!match("wida",r2));
24 assert(bmatch("abcdef", r1).hit == "abc");
25 assert(!bmatch("wida", r2));
26 assert(match("abc", "abc".dup));
27 assert(bmatch("abc", "abc".dup));
28 Regex!char rc;
29 assert(rc.empty);
30 rc = regex("test");
31 assert(!rc.empty);
34 /* The test vectors in this file are altered from Henry Spencer's regexp
35 test code. His copyright notice is:
37 Copyright (c) 1986 by University of Toronto.
38 Written by Henry Spencer. Not derived from licensed software.
40 Permission is granted to anyone to use this software for any
41 purpose on any computer system, and to redistribute it freely,
42 subject to the following restrictions:
44 1. The author is not responsible for the consequences of use of
45 this software, no matter how awful, even if they arise
46 from defects in it.
48 2. The origin of this software must not be misrepresented, either
49 by explicit claim or by omission.
51 3. Altered versions must be plainly marked as such, and must not
52 be misrepresented as being the original software.
57 @safe unittest
59 struct TestVectors
61 string pattern;
62 string input;
63 string result;
64 string format;
65 string replace;
66 string flags;
69 static immutable TestVectors[] tv = [
70 TestVectors( "a\\b", "a", "y", "$&", "a" ),
71 TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ),
72 TestVectors( "()b\\1", "aaab", "y", "$&", "b" ),
73 TestVectors( "abc", "abc", "y", "$&", "abc" ),
74 TestVectors( "abc", "xbc", "n", "-", "-" ),
75 TestVectors( "abc", "axc", "n", "-", "-" ),
76 TestVectors( "abc", "abx", "n", "-", "-" ),
77 TestVectors( "abc", "xabcy","y", "$&", "abc" ),
78 TestVectors( "abc", "ababc","y", "$&", "abc" ),
79 TestVectors( "ab*c", "abc", "y", "$&", "abc" ),
80 TestVectors( "ab*bc", "abc", "y", "$&", "abc" ),
81 TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ),
82 TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ),
83 TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ),
84 TestVectors( "ab+bc", "abc", "n", "-", "-" ),
85 TestVectors( "ab+bc", "abq", "n", "-", "-" ),
86 TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ),
87 TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ),
88 TestVectors( "ab?bc", "abc", "y", "$&", "abc" ),
89 TestVectors( "ab?bc", "abbbbc","n", "-", "-" ),
90 TestVectors( "ab?c", "abc", "y", "$&", "abc" ),
91 TestVectors( "^abc$", "abc", "y", "$&", "abc" ),
92 TestVectors( "^abc$", "abcc", "n", "-", "-" ),
93 TestVectors( "^abc", "abcc", "y", "$&", "abc" ),
94 TestVectors( "^abc$", "aabc", "n", "-", "-" ),
95 TestVectors( "abc$", "aabc", "y", "$&", "abc" ),
96 TestVectors( "^", "abc", "y", "$&", "" ),
97 TestVectors( "$", "abc", "y", "$&", "" ),
98 TestVectors( "a.c", "abc", "y", "$&", "abc" ),
99 TestVectors( "a.c", "axc", "y", "$&", "axc" ),
100 TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ),
101 TestVectors( "a.*c", "axyzd","n", "-", "-" ),
102 TestVectors( "a[bc]d", "abc", "n", "-", "-" ),
103 TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ),
104 TestVectors( "a[b-d]e", "abd", "n", "-", "-" ),
105 TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ),
106 TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ),
107 TestVectors( "a[-b]", "a-", "y", "$&", "a-" ),
108 TestVectors( "a[b-]", "a-", "y", "$&", "a-" ),
109 TestVectors( "a[b-a]", "-", "c", "-", "-" ),
110 TestVectors( "a[]b", "-", "c", "-", "-" ),
111 TestVectors( "a[", "-", "c", "-", "-" ),
112 TestVectors( "a]", "a]", "y", "$&", "a]" ),
113 TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ),
114 TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ),
115 TestVectors( "a[^bc]d", "abd", "n", "-", "-" ),
116 TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ),
117 TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ),
118 TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ),
119 TestVectors( "ab|cd", "abc", "y", "$&", "ab" ),
120 TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ),
121 TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ),
122 TestVectors( "()*", "-", "y", "-", "-" ),
123 TestVectors( "*a", "-", "c", "-", "-" ),
124 TestVectors( "^*", "-", "y", "-", "-" ),
125 TestVectors( "$*", "-", "y", "-", "-" ),
126 TestVectors( "(*)b", "-", "c", "-", "-" ),
127 TestVectors( "$b", "b", "n", "-", "-" ),
128 TestVectors( "a\\", "-", "c", "-", "-" ),
129 TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ),
130 TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ),
131 TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ),
132 TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ),
133 TestVectors( "abc)", "-", "c", "-", "-" ),
134 TestVectors( "(abc", "-", "c", "-", "-" ),
135 TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ),
136 TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ),
137 TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ),
138 TestVectors( "a**", "-", "c", "-", "-" ),
139 TestVectors( "a*?a", "aa", "y", "$&", "a" ),
140 TestVectors( "(a*)*", "aaa", "y", "-", "-" ),
141 TestVectors( "(a*)+", "aaa", "y", "-", "-" ),
142 TestVectors( "(a|)*", "-", "y", "-", "-" ),
143 TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ),
144 TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ),
145 TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ),
146 TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ),
147 TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ),
148 TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ),
149 TestVectors( "(^)*", "-", "y", "-", "-" ),
150 TestVectors( "(ab|)*", "-", "y", "-", "-" ),
151 TestVectors( ")(", "-", "c", "-", "-" ),
152 TestVectors( "", "abc", "y", "$&", "" ),
153 TestVectors( "abc", "", "n", "-", "-" ),
154 TestVectors( "a*", "", "y", "$&", "" ),
155 TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ),
156 TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ),
157 TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ),
158 TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ),
159 TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ),
160 TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ),
161 TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ),
162 TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ),
163 TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ),
164 TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ),
165 TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ),
166 TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ),
167 TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ),
168 TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ),
169 TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ),
170 TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
171 TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
172 TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ),
173 TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ),
174 TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ),
175 TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ),
176 TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ),
177 TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ),
178 TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ),
179 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ),
180 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ),
181 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ),
182 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ),
183 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ),
184 TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ),
185 TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ),
186 TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ),
187 TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ),
188 TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ),
189 TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ),
190 TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ),
191 TestVectors( "[k]", "ab", "n", "-", "-" ),
192 TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ),
193 TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ),
194 TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
195 TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
196 TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
197 TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
198 TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
199 TestVectors( "a{2}", "candy", "n", "", "" ),
200 TestVectors( "a{2}", "caandy", "y", "$&", "aa" ),
201 TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ),
202 TestVectors( "a{2,}", "candy", "n", "", "" ),
203 TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ),
204 TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ),
205 TestVectors( "a{1,3}", "cndy", "n", "", "" ),
206 TestVectors( "a{1,3}", "candy", "y", "$&", "a" ),
207 TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ),
208 TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ),
209 TestVectors( "e?le?", "angel", "y", "$&", "el" ),
210 TestVectors( "e?le?", "angle", "y", "$&", "le" ),
211 TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ),
212 TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ),
213 TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ),
214 TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ),
215 TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ),
216 TestVectors( "\\d", "B2 is", "y", "$&", "2" ),
217 TestVectors( "\\D", "B2 is", "y", "$&", "B" ),
218 TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ),
219 TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ),
220 TestVectors( "abc", "ababc", "y", "$&", "abc" ),
221 TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
222 TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ),
223 TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
224 TestVectors( ".*c", "abcde", "y", "$&", "abc" ),
225 TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
226 TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ),
227 TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ),
228 TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
229 TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
230 TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
231 TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
232 TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
233 TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ),
234 TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
235 TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ),
236 TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ),
237 TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ),
238 TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"),
239 //more repetitions:
240 TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
241 TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
242 //groups:
243 TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"),
244 TestVectors( "(?P<q>\\d+)/(?P<d>\\d+)", "2/3", "y", "${d}/${q}", "3/2"),
245 //set operations:
246 TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"),
247 TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"),
248 TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"),
249 TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"),
250 TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"),
251 TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"),
252 TestVectors( "[a-c||d-f]+", "abcdef", "y", "$&", "abcdef"),
253 TestVectors( "[a-f--a-c]+", "abcdef", "y", "$&", "def"),
254 TestVectors( "[a-c&&b-f]+", "abcdef", "y", "$&", "bc"),
255 TestVectors( "[a-c~~b-f]+", "abcdef", "y", "$&", "a"),
256 //unicode blocks & properties:
257 TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
258 TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`,
259 "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
260 TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"),
261 TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"),
262 TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"),
263 TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"),
264 TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"),
265 TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"),
266 TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"),
267 //case insensitive:
268 TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"),
269 TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"),
270 TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"),
271 TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"),
272 TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"),
273 TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"),
274 TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"),
275 //escapes:
276 TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"),
277 TestVectors( `\u`, "", "c", "-", "-"),
278 TestVectors( `\U`, "", "c", "-", "-"),
279 TestVectors( `\u003`, "", "c", "-", "-"),
280 TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"),
281 TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
282 TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"),
283 TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"),
284 TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"),
285 TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"),
286 TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"),
287 TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"),
288 TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"),
289 TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"),
290 TestVectors( `\pX`, "", "c", "-", "-"),
291 // ^, $, \b, \B, multiline :
292 TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
293 TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"),
294 TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"),
295 TestVectors( `^$`, "\r\n", "y", "$&", "", "m"),
296 TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"),
297 TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"),
298 TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"),
299 TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"),
300 TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"),
301 TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"),
302 TestVectors( `\b^.`, "ab", "y", "$&", "a"),
303 TestVectors( `\B^.`, "ab", "n", "-", "-"),
304 TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"),
305 TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"),
307 // luckily obtained regression on incremental matching in backtracker
308 TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
309 "0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"),
310 //lookahead
311 TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
312 TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ),
313 TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"),
314 TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"),
315 TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"),
316 TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"),
317 TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"),
318 //lookback
319 TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"),
320 TestVectors( `\w(?<!\d)\w`, "123ab24", "y", "$&", "ab"),
321 TestVectors( `(?<=Dåb)x\w`, "DåbDÅBxdÅb", "y", "$&", "xd", "i"),
322 TestVectors( `(?<=(ab*c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
323 TestVectors( `(?<=(ab*?c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
324 TestVectors( `(?<=(a.*?c))x`, "ababbcxac", "y", "$&-$1", "x-abbc"),
325 TestVectors( `(?<=(a{2,4}b{1,3}))x`, "yyaaaabx", "y", "$&-$1", "x-aaaab"),
326 TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}))x`, "aabbbaaaabx", "y", "$&-$1", "x-aabbbaaaab"),
327 TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}?))x`, "aabbbaaaabx", "y", "$&-$1", "x-aaaab"),
328 TestVectors( `(?<=(abc|def|aef))x`, "abcx", "y", "$&-$1", "x-abc"),
329 TestVectors( `(?<=(abc|def|aef))x`, "aefx", "y", "$&-$1", "x-aef"),
330 TestVectors( `(?<=(abc|dabc))(x)`, "dabcx", "y", "$&-$1-$2", "x-abc-x"),
331 TestVectors( `(?<=(|abc))x`, "dabcx", "y", "$&-$1", "x-"),
332 TestVectors( `(?<=((ab|da)*))x`, "abdaabx", "y", "$&-$2-$1", "x-ab-abdaab"),
333 TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
334 TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"),
335 TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"),
336 TestVectors( `(?<=\.|[!?]+)X`, "Hey?!X", "y", "$&", "X"),
337 TestVectors( `(?<=\.|[!?]+)a{3}`, ".Nope.aaaX", "y", "$&", "aaa"),
338 //mixed lookaround
339 TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"),
340 TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"),
341 TestVectors( `a(?i)bc`, "aBc", "y", "$&", "aBc"),
342 TestVectors( `a(?i)bc`, "Abc", "n", "$&", "-"),
343 TestVectors( `(?i)a(?-i)bc`, "aBcAbc", "y", "$&", "Abc"),
344 TestVectors( `(?s).(?-s).`, "\n\n\na", "y", "$&", "\na"),
345 TestVectors( `(?m)^a(?-m)$`, "\na", "y", "$&", "a")
347 string produceExpected(M,String)(auto ref M m, String fmt)
349 auto app = appender!(String)();
350 replaceFmt(fmt, m.captures, app, true);
351 return app.data;
353 void run_tests(alias matchFn)()
355 int i;
356 static foreach (Char; AliasSeq!( char, wchar, dchar))
358 alias String = immutable(Char)[];
359 String produceExpected(M,Range)(auto ref M m, Range fmt)
361 auto app = appender!(String)();
362 replaceFmt(fmt, m.captures, app, true);
363 return app.data;
365 Regex!(Char) r;
366 foreach (a, tvd; tv)
368 uint c = tvd.result[0];
369 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
372 i = 1;
373 r = regex(to!(String)(tvd.pattern), tvd.flags);
375 catch (RegexException e)
377 i = 0;
378 debug(std_regex_test) writeln(e.msg);
381 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
383 if (c != 'c')
385 auto m = matchFn(to!(String)(tvd.input), r);
386 i = !m.empty;
387 assert(
388 (c == 'y') ? i : !i,
389 text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern)
391 if (c == 'y')
393 auto result = produceExpected(m, to!(String)(tvd.format));
394 assert(result == to!String(tvd.replace),
395 text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
396 tvd.replace, " vs ", result));
401 debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
405 void ct_tests()
407 import std.algorithm.comparison : equal;
408 version (std_regex_ct1)
410 pragma(msg, "Testing 1st part of ctRegex");
411 enum Tests = iota(0, 155);
413 else version (std_regex_ct2)
415 pragma(msg, "Testing 2nd part of ctRegex");
416 enum Tests = iota(155, 174);
418 //FIXME: #174-178 contains CTFE parser bug
419 else version (std_regex_ct3)
421 pragma(msg, "Testing 3rd part of ctRegex");
422 enum Tests = iota(178, 220);
424 else version (std_regex_ct4)
426 pragma(msg, "Testing 4th part of ctRegex");
427 enum Tests = iota(220, tv.length);
429 else
430 enum Tests = chain(iota(0, 30), iota(235, tv.length-5));
431 static foreach (v; Tests)
433 enum tvd = tv[v];
434 static if (tvd.result == "c")
436 static assert(!__traits(compiles, (){
437 enum r = regex(tvd.pattern, tvd.flags);
438 }), "errornously compiles regex pattern: " ~ tvd.pattern);
440 else
442 //BUG: tv[v] is fine but tvd is not known at compile time?!
443 auto r = ctRegex!(tv[v].pattern, tv[v].flags);
444 auto nr = regex(tvd.pattern, tvd.flags);
445 assert(equal(r.ir, nr.ir),
446 text("!C-T regex! failed to compile pattern #", v ,": ", tvd.pattern));
447 auto m = match(tvd.input, r);
448 auto c = tvd.result[0];
449 bool ok = (c == 'y') ^ m.empty;
450 assert(ok, text("ctRegex: failed to match pattern #",
451 v ,": ", tvd.pattern));
452 if (c == 'y')
454 auto result = produceExpected(m, tvd.format);
455 assert(result == tvd.replace, text("ctRegex mismatch pattern #", v,
456 ": ", tvd.pattern," expected: ", tvd.replace, " vs ", result));
460 debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
463 ct_tests();
464 run_tests!bmatch(); //backtracker
465 run_tests!match(); //thompson VM