1 /* $NetBSD: t_regex_att.c,v 1.1 2012/08/24 20:24:40 jmmv Exp $ */
4 * Copyright (c) 2011 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: t_regex_att.c,v 1.1 2012/08/24 20:24:40 jmmv Exp $");
42 #include <sys/param.h>
52 static const char sep
[] = "\r\n\t";
53 static const char delim
[3] = "\\\\\0";
57 fail(const char *pattern
, const char *input
, size_t lineno
) {
59 "skipping failed test at line %zu (pattern=%s, input=%s)\n",
60 lineno
, pattern
, input
);
64 bug(const char *pattern
, const char *input
, size_t lineno
) {
69 #if defined(REGEX_SPENCER)
71 * The default libc implementation by Henry Spencer
73 { "a[-]?c", "ac" }, // basic.dat
74 { "(a*)*", "a" }, // categorization.dat
75 { "(aba|a*b)*", "ababa" }, // categorization.dat
76 { "\\(a\\(b\\)*\\)*\\2", "abab" }, // categorization.dat
77 { "(a*)*", "aaaaaa" }, // nullsubexpression.dat
78 { "(a*)*", "aaaaaax" }, // nullsubexpression.dat
79 { "(a*)+", "a" }, // nullsubexpression.dat
80 { "(a*)+", "aaaaaa" }, // nullsubexpression.dat
81 { "(a*)+", "aaaaaax" }, // nullsubexpression.dat
82 { "([a]*)*", "a" }, // nullsubexpression.dat
83 { "([a]*)*", "aaaaaa" }, // nullsubexpression.dat
84 { "([a]*)*", "aaaaaax" }, // nullsubexpression.dat
85 { "([a]*)+", "a" }, // nullsubexpression.dat
86 { "([a]*)+", "aaaaaa" }, // nullsubexpression.dat
87 { "([a]*)+", "aaaaaax" }, // nullsubexpression.dat
88 { "([^b]*)*", "a" }, // nullsubexpression.dat
89 { "([^b]*)*", "aaaaaa" }, // nullsubexpression.dat
90 { "([^b]*)*", "aaaaaab" }, // nullsubexpression.dat
91 { "([ab]*)*", "a" }, // nullsubexpression.dat
92 { "([ab]*)*", "aaaaaa" }, // nullsubexpression.dat
93 { "([ab]*)*", "ababab" }, // nullsubexpression.dat
94 { "([ab]*)*", "bababa" }, // nullsubexpression.dat
95 { "([ab]*)*", "b" }, // nullsubexpression.dat
96 { "([ab]*)*", "bbbbbb" }, // nullsubexpression.dat
97 { "([ab]*)*", "aaaabcde" }, // nullsubexpression.dat
98 { "([^a]*)*", "b" }, // nullsubexpression.dat
99 { "([^a]*)*", "bbbbbb" }, // nullsubexpression.dat
100 { "([^ab]*)*", "ccccxx" }, // nullsubexpression.dat
101 { "\\(a*\\)*\\(x\\)", "ax" }, // nullsubexpression.dat
102 { "\\(a*\\)*\\(x\\)", "axa" }, // nullsubexpression.dat
103 { "\\(a*\\)*\\(x\\)\\(\\1\\)", "x" }, // nullsubexpression.dat
104 /* crash! */ { "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" }, // nullsubexpression.dat
105 /* crash! */ { "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" }, // ""
106 { "(a*)*(x)", "ax" }, // nullsubexpression.dat
107 { "(a*)*(x)", "axa" }, // nullsubexpression.dat
108 { "(a*)+(x)", "ax" }, // nullsubexpression.dat
109 { "(a*)+(x)", "axa" }, // nullsubexpression.dat
110 { "((a|ab)(c|bcd))(d*)", "abcd" }, // forcedassoc.dat
111 { "((a|ab)(bcd|c))(d*)", "abcd" }, // forcedassoc.dat
112 { "((ab|a)(c|bcd))(d*)", "abcd" }, // forcedassoc.dat
113 { "((ab|a)(bcd|c))(d*)", "abcd" }, // forcedassoc.dat
114 { "((a*)(b|abc))(c*)", "abc" }, // forcedassoc.dat
115 { "((a*)(abc|b))(c*)", "abc" }, // forcedassoc.dat
116 { "((..)|(.)){2}", "aaa" }, // repetition.dat
117 { "((..)|(.)){3}", "aaa" }, // repetition.dat
118 { "((..)|(.)){3}", "aaaa" }, // repetition.dat
119 { "((..)|(.)){3}", "aaaaa" }, // repetition.dat
120 { "X(.?){0,}Y", "X1234567Y" }, // repetition.dat
121 { "X(.?){1,}Y", "X1234567Y" }, // repetition.dat
122 { "X(.?){2,}Y", "X1234567Y" }, // repetition.dat
123 { "X(.?){3,}Y", "X1234567Y" }, // repetition.dat
124 { "X(.?){4,}Y", "X1234567Y" }, // repetition.dat
125 { "X(.?){5,}Y", "X1234567Y" }, // repetition.dat
126 { "X(.?){6,}Y", "X1234567Y" }, // repetition.dat
127 { "X(.?){7,}Y", "X1234567Y" }, // repetition.dat
128 { "X(.?){0,8}Y", "X1234567Y" }, // repetition.dat
129 { "X(.?){1,8}Y", "X1234567Y" }, // repetition.dat
130 { "X(.?){2,8}Y", "X1234567Y" }, // repetition.dat
131 { "X(.?){3,8}Y", "X1234567Y" }, // repetition.dat
132 { "X(.?){4,8}Y", "X1234567Y" }, // repetition.dat
133 { "X(.?){5,8}Y", "X1234567Y" }, // repetition.dat
134 { "X(.?){6,8}Y", "X1234567Y" }, // repetition.dat
135 { "X(.?){7,8}Y", "X1234567Y" }, // repetition.dat
136 { "(a|ab|c|bcd){0,}(d*)", "ababcd" }, // repetition.dat
137 { "(a|ab|c|bcd){1,}(d*)", "ababcd" }, // repetition.dat
138 { "(a|ab|c|bcd){2,}(d*)", "ababcd" }, // repetition.dat
139 { "(a|ab|c|bcd){3,}(d*)", "ababcd" }, // repetition.dat
140 { "(a|ab|c|bcd){1,10}(d*)", "ababcd" }, // repetition.dat
141 { "(a|ab|c|bcd){2,10}(d*)", "ababcd" }, // repetition.dat
142 { "(a|ab|c|bcd){3,10}(d*)", "ababcd" }, // repetition.dat
143 { "(a|ab|c|bcd)*(d*)", "ababcd" }, // repetition.dat
144 { "(a|ab|c|bcd)+(d*)", "ababcd" }, // repetition.dat
145 { "(ab|a|c|bcd){0,}(d*)", "ababcd" }, // repetition.dat
146 { "(ab|a|c|bcd){1,}(d*)", "ababcd" }, // repetition.dat
147 { "(ab|a|c|bcd){2,}(d*)", "ababcd" }, // repetition.dat
148 { "(ab|a|c|bcd){3,}(d*)", "ababcd" }, // repetition.dat
149 { "(ab|a|c|bcd){1,10}(d*)", "ababcd" }, // repetition.dat
150 { "(ab|a|c|bcd){2,10}(d*)", "ababcd" }, // repetition.dat
151 { "(ab|a|c|bcd){3,10}(d*)", "ababcd" }, // repetition.dat
152 { "(ab|a|c|bcd)*(d*)", "ababcd" }, // repetition.dat
153 { "(ab|a|c|bcd)+(d*)", "ababcd" }, // repetition.dat
154 #elif defined(REGEX_TRE)
155 { "a[-]?c", "ac" }, // basic.dat
156 { "a\\(b\\)*\\1", "a" }, // categorization.dat
157 { "a\\(b\\)*\\1", "abab" }, // categorization.dat
158 { "\\(a\\(b\\)*\\)*\\2", "abab" }, // categorization.dat
159 { "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" }, // categorization.dat
160 { "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" }, // ""
161 { "((..)|(.))*", "aa" }, // repetition.dat
162 { "((..)|(.))*", "aaa" }, // repetition.dat
163 { "((..)|(.))*", "aaaaa" }, // repetition.dat
164 { "X(.?){7,}Y", "X1234567Y" }, // repetition.dat
170 for (size_t i
= 0; i
< __arraycount(b
); i
++) {
171 if (strcmp(pattern
, b
[i
].p
) == 0 &&
172 strcmp(input
, b
[i
].i
) == 0) {
173 fail(pattern
, input
, lineno
);
181 #define HAVE_BRACES 1
182 #define HAVE_MINIMAL 0
185 #define HAVE_BRACES 1
188 #define HAVE_MINIMAL 1
192 optional(const char *s
)
198 { "[[<element>]] not supported", HAVE_BRACES
},
199 { "no *? +? mimimal match ops", HAVE_MINIMAL
},
202 for (size_t i
= 0; i
< __arraycount(nv
); i
++)
203 if (strcmp(nv
[i
].n
, s
) == 0) {
206 fprintf(stderr
, "skipping unsupported [%s] tests\n", s
);
210 ATF_REQUIRE_MSG(0, "Unknown feature: %s", s
);
215 unsupported(const char *s
)
217 static const char *we
[] = {
218 #if defined(REGEX_SPENCER)
219 "ASSOCIATIVITY=left", // have right associativity
220 "SUBEXPRESSION=precedence", // have grouping subexpression
221 "REPEAT_LONGEST=last", // have first repeat longest
222 "BUG=alternation-order", // don't have it
223 "BUG=first-match", // don't have it
224 "BUG=nomatch-match", // don't have it
225 "BUG=repeat-any", // don't have it
226 "BUG=range-null", // don't have it
227 "BUG=repeat-null-unknown", // don't have it
228 "BUG=repeat-null", // don't have it
229 "BUG=repeat-artifact", // don't have it
230 "BUG=subexpression-first", // don't have it
231 #elif defined(REGEX_TRE)
232 "ASSOCIATIVITY=right", // have left associativity
233 "SUBEXPRESSION=grouping", // have precedence subexpression
234 "REPEAT_LONGEST=first", // have last repeat longest
235 "LENGTH=first", // have last length
236 "BUG=alternation-order", // don't have it
237 "BUG=first-match", // don't have it
238 "BUG=range-null", // don't have it
239 "BUG=repeat-null", // don't have it
240 "BUG=repeat-artifact", // don't have it
241 "BUG=subexpression-first", // don't have it
242 "BUG=repeat-short", // don't have it
249 while (*s
== '#' || isspace((unsigned char)*s
))
252 for (size_t i
= 0; i
< __arraycount(we
); i
++)
253 if (strcmp(we
[i
], s
) == 0)
259 geterror(const char *s
, int *comp
, int *exec
)
261 static const struct {
268 { "OK", 0, COMP
|EXEC
},
269 #define _DO(a, b) { # a, REG_ ## a, b },
291 for (size_t i
= 0; i
< __arraycount(nv
); i
++)
292 if (strcmp(s
, nv
[i
].n
) == 0) {
299 ATF_REQUIRE_MSG(0, "Unknown error %s", s
);
310 case '0': case '1': case '2': case '3': case '4':
311 case '5': case '6': case '7': case '8': case '9':
332 ATF_REQUIRE_MSG(0, "Unknown char %c", *s
);
338 getmatches(const char *s
)
342 for (i
= 0; (q
= strchr(s
, '(')) != NULL
; i
++, s
= q
+ 1)
344 ATF_REQUIRE_MSG(i
!= 0, "No parentheses found");
349 checkcomment(const char *s
, size_t lineno
)
351 if (s
&& strstr(s
, "BUG") != NULL
)
352 fprintf(stderr
, "Expected %s at line %zu\n", s
, lineno
);
356 checkmatches(const char *matches
, size_t nm
, const regmatch_t
*pm
,
363 size_t len
= strlen(matches
) + 1, off
= 0;
365 ATF_REQUIRE((res
= strdup(matches
)) != NULL
);
366 for (size_t i
= 0; i
< nm
; i
++) {
368 if (pm
[i
].rm_so
== -1 && pm
[i
].rm_eo
== -1)
369 l
= snprintf(res
+ off
, len
- off
, "(?,?)");
371 l
= snprintf(res
+ off
, len
- off
, "(%lld,%lld)",
372 (long long)pm
[i
].rm_so
, (long long)pm
[i
].rm_eo
);
373 ATF_REQUIRE_MSG((size_t) l
< len
- off
, "String too long %s"
374 " cur=%d, max=%zu", res
, l
, len
- off
);
377 ATF_REQUIRE_STREQ_MSG(res
, matches
, " at line %zu", lineno
);
382 att_test(const struct atf_tc
*tc
, const char *data_name
)
385 char *line
, *lastpattern
= NULL
, data_path
[MAXPATHLEN
];
386 size_t len
, lineno
= 0;
390 snprintf(data_path
, sizeof(data_path
), "%s/data/%s.dat",
391 atf_tc_get_config_var(tc
, "srcdir"), data_name
);
393 input_file
= fopen(data_path
, "r");
394 if (input_file
== NULL
)
395 atf_tc_fail("Failed to open input file %s", data_path
);
397 for (; (line
= fparseln(input_file
, &len
, &lineno
, delim
, 0))
398 != NULL
; free(line
)) {
399 char *name
, *pattern
, *input
, *matches
, *comment
;
403 fprintf(stderr
, "[%s]\n", line
);
405 if ((name
= strtok(line
, sep
)) == NULL
)
409 * We check these early so that we skip the lines quickly
410 * in order to do more strict testing on the other arguments
411 * The same characters are also tested in the switch below
419 if (*name
== ';' || *name
== '#' || strcmp(name
, "NOTE") == 0)
422 /* Skip ":HA#???:" prefix */
423 while (*++name
&& *name
!= ':')
429 ATF_REQUIRE_MSG((pattern
= strtok(NULL
, sep
)) != NULL
,
430 "Missing pattern at line %zu", lineno
);
431 ATF_REQUIRE_MSG((input
= strtok(NULL
, sep
)) != NULL
,
432 "Missing input at line %zu", lineno
);
434 if (strchr(name
, '$')) {
435 ATF_REQUIRE(strunvis(pattern
, pattern
) != -1);
436 ATF_REQUIRE(strunvis(input
, input
) != -1);
440 if (strcmp(input
, "NULL") == 0)
443 if (strcmp(pattern
, "SAME") == 0) {
444 ATF_REQUIRE(lastpattern
!= NULL
);
445 pattern
= lastpattern
;
448 ATF_REQUIRE((lastpattern
= strdup(pattern
)) != NULL
);
451 ATF_REQUIRE_MSG((matches
= strtok(NULL
, sep
)) != NULL
,
452 "Missing matches at line %zu", lineno
);
454 comment
= strtok(NULL
, sep
);
456 case '{': /* Begin optional implementation */
457 if (optional(comment
)) {
461 name
++; /* We have it, so ignore */
463 case '}': /* End optional implementation */
466 case '?': /* Optional */
467 case '|': /* Alternative */
468 if (unsupported(comment
))
470 name
++; /* We have it, so ignore */
472 case '#': /* Comment */
480 if (bug(pattern
, input
, lineno
))
484 if (*matches
!= '(') {
485 geterror(matches
, &comp
, &exec
);
490 nm
= getmatches(matches
);
491 ATF_REQUIRE((pm
= calloc(nm
, sizeof(*pm
))) != NULL
);
496 int iflags
= getflags(name
);
497 for (; *name
; name
++) {
504 flags
= REG_EXTENDED
;
510 ATF_REQUIRE_MSG(0, "Bad name %c", *name
);
513 int c
= regcomp(&re
, pattern
, flags
| iflags
);
514 ATF_REQUIRE_MSG(c
== comp
,
515 "regcomp returned %d for pattern %s at line %zu",
519 int e
= regexec(&re
, input
, nm
, pm
, 0);
520 ATF_REQUIRE_MSG(e
== exec
, "Expected error %d,"
521 " got %d at line %zu", exec
, e
, lineno
);
522 checkmatches(matches
, nm
, pm
, lineno
);
523 checkcomment(comment
, lineno
);
533 ATF_TC_HEAD(basic
, tc
)
535 atf_tc_set_md_var(tc
, "descr", "Tests basic functionality");
537 ATF_TC_BODY(basic
, tc
)
539 att_test(tc
, "basic");
542 ATF_TC(categorization
);
543 ATF_TC_HEAD(categorization
, tc
)
545 atf_tc_set_md_var(tc
, "descr", "Tests implementation categorization");
547 ATF_TC_BODY(categorization
, tc
)
549 att_test(tc
, "categorization");
553 ATF_TC_HEAD(nullsubexpr
, tc
)
555 atf_tc_set_md_var(tc
, "descr", "Tests (...)*");
557 ATF_TC_BODY(nullsubexpr
, tc
)
559 att_test(tc
, "nullsubexpr");
563 ATF_TC_HEAD(leftassoc
, tc
)
565 atf_tc_set_md_var(tc
, "descr", "Tests left-associative "
568 ATF_TC_BODY(leftassoc
, tc
)
571 /* jmmv: I converted the original shell-based tests to C and they
572 * disabled this test in a very unconventional way without giving
573 * any explation. Mark as broken here, but I don't know why. */
574 atf_tc_expect_fail("Reason for breakage unknown");
576 att_test(tc
, "leftassoc");
580 ATF_TC_HEAD(rightassoc
, tc
)
582 atf_tc_set_md_var(tc
, "descr", "Tests right-associative "
585 ATF_TC_BODY(rightassoc
, tc
)
588 /* jmmv: I converted the original shell-based tests to C and they
589 * disabled this test in a very unconventional way without giving
590 * any explation. Mark as broken here, but I don't know why. */
591 atf_tc_expect_fail("Reason for breakage unknown");
593 att_test(tc
, "rightassoc");
597 ATF_TC_HEAD(forcedassoc
, tc
)
599 atf_tc_set_md_var(tc
, "descr", "Tests subexpression grouping to "
600 "force association");
602 ATF_TC_BODY(forcedassoc
, tc
)
604 att_test(tc
, "forcedassoc");
608 ATF_TC_HEAD(repetition
, tc
)
610 atf_tc_set_md_var(tc
, "descr", "Tests implicit vs. explicit "
613 ATF_TC_BODY(repetition
, tc
)
615 att_test(tc
, "repetition");
621 ATF_TP_ADD_TC(tp
, basic
);
622 ATF_TP_ADD_TC(tp
, categorization
);
623 ATF_TP_ADD_TC(tp
, nullsubexpr
);
624 ATF_TP_ADD_TC(tp
, leftassoc
);
625 ATF_TP_ADD_TC(tp
, rightassoc
);
626 ATF_TP_ADD_TC(tp
, forcedassoc
);
627 ATF_TP_ADD_TC(tp
, repetition
);
628 return atf_no_error();