1 /* $NetBSD: regex.c,v 1.3 2014/12/10 04:37:59 christos Exp $ */
4 * Copyright (C) 2013, 2014 Internet Systems Consortium, Inc. ("ISC")
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
11 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
13 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
15 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
22 #include <isc/regex.h>
23 #include <isc/string.h>
25 #if VALREGEX_REPORT_REASON
26 #define FAIL(x) do { reason = (x); goto error; } while(/*CONSTCOND*/0)
28 #define FAIL(x) goto error
32 * Validate the regular expression 'C' locale.
35 isc_regex_validate(const char *c
) {
37 none
, parse_bracket
, parse_bound
,
38 parse_ce
, parse_ec
, parse_cc
40 /* Well known character classes. */
42 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
43 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
46 isc_boolean_t seen_comma
= ISC_FALSE
;
47 isc_boolean_t seen_high
= ISC_FALSE
;
48 isc_boolean_t seen_char
= ISC_FALSE
;
49 isc_boolean_t seen_ec
= ISC_FALSE
;
50 isc_boolean_t seen_ce
= ISC_FALSE
;
51 isc_boolean_t have_atom
= ISC_FALSE
;
55 isc_boolean_t empty_ok
= ISC_FALSE
;
56 isc_boolean_t neg
= ISC_FALSE
;
57 isc_boolean_t was_multiple
= ISC_FALSE
;
59 unsigned int high
= 0;
60 const char *ccname
= NULL
;
62 #if VALREGEX_REPORT_REASON
63 const char *reason
= "";
66 if (c
== NULL
|| *c
== 0)
69 while (c
!= NULL
&& *c
!= 0) {
73 case '\\': /* make literal */
76 case '1': case '2': case '3':
77 case '4': case '5': case '6':
78 case '7': case '8': case '9':
80 FAIL("bad back reference");
82 was_multiple
= ISC_FALSE
;
85 FAIL("escaped end-of-string");
91 case '[': /* bracket start */
94 was_multiple
= ISC_FALSE
;
95 seen_char
= ISC_FALSE
;
96 state
= parse_bracket
;
98 case '{': /* bound start */
100 case '0': case '1': case '2': case '3':
101 case '4': case '5': case '6': case '7':
106 FAIL("was multiple");
107 seen_comma
= ISC_FALSE
;
108 seen_high
= ISC_FALSE
;
116 have_atom
= ISC_TRUE
;
117 was_multiple
= ISC_TRUE
;
121 case '(': /* group start */
122 have_atom
= ISC_FALSE
;
123 was_multiple
= ISC_FALSE
;
129 case ')': /* group end */
130 if (group
&& !have_atom
&& !empty_ok
)
131 FAIL("empty alternative");
132 have_atom
= ISC_TRUE
;
133 was_multiple
= ISC_FALSE
;
138 case '|': /* alternative seperator */
141 have_atom
= ISC_FALSE
;
142 empty_ok
= ISC_FALSE
;
143 was_multiple
= ISC_FALSE
;
148 have_atom
= ISC_TRUE
;
149 was_multiple
= ISC_TRUE
;
156 FAIL("was multiple");
159 have_atom
= ISC_TRUE
;
160 was_multiple
= ISC_TRUE
;
166 have_atom
= ISC_TRUE
;
167 was_multiple
= ISC_FALSE
;
174 case '0': case '1': case '2': case '3': case '4':
175 case '5': case '6': case '7': case '8': case '9':
177 low
= low
* 10 + *c
- '0';
179 FAIL("lower bound too big");
181 seen_high
= ISC_TRUE
;
182 high
= high
* 10 + *c
- '0';
184 FAIL("upper bound too big");
190 FAIL("multiple commas");
191 seen_comma
= ISC_TRUE
;
196 FAIL("non digit/comma");
198 if (seen_high
&& low
> high
)
199 FAIL("bad parse bound");
200 seen_comma
= ISC_FALSE
;
209 if (seen_char
|| neg
) goto inside
;
214 if (range
== 2) goto inside
;
215 if (!seen_char
) goto inside
;
224 case '.': /* collating element */
225 if (range
!= 0) --range
;
230 case '=': /* equivalence class */
232 FAIL("equivalence class in range");
237 case ':': /* character class */
239 FAIL("character class in range");
245 seen_char
= ISC_TRUE
;
248 if (!c
[1] && !seen_char
)
249 FAIL("unfinished brace");
254 have_atom
= ISC_TRUE
;
259 seen_char
= ISC_TRUE
;
260 if (range
== 2 && (*c
& 0xff) < range_start
)
261 FAIL("out of order range");
264 range_start
= *c
& 0xff;
278 state
= parse_bracket
;
308 state
= parse_bracket
;
328 isc_boolean_t found
= ISC_FALSE
;
330 i
< sizeof(cc
)/sizeof(*cc
);
336 (unsigned int)(c
- ccname
))
338 if (strncmp(cc
[i
], ccname
, len
))
345 state
= parse_bracket
;
368 #if VALREGEX_REPORT_REASON
369 fprintf(stderr
, "%s\n", reason
);