Remove building with NOCRYPTO option
[minix.git] / external / bsd / bind / dist / lib / isc / regex.c
blob3597fb81b4f9a7d6667b7131d9daa77a6e75bad1
1 /* $NetBSD: regex.c,v 1.3 2014/12/10 04:37:59 christos Exp $ */
3 /*
4 * Copyright (C) 2013, 2014 Internet Systems Consortium, Inc. ("ISC")
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
11 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
13 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
15 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16 * PERFORMANCE OF THIS SOFTWARE.
19 #include <config.h>
21 #include <isc/file.h>
22 #include <isc/regex.h>
23 #include <isc/string.h>
25 #if VALREGEX_REPORT_REASON
26 #define FAIL(x) do { reason = (x); goto error; } while(/*CONSTCOND*/0)
27 #else
28 #define FAIL(x) goto error
29 #endif
32 * Validate the regular expression 'C' locale.
34 int
35 isc_regex_validate(const char *c) {
36 enum {
37 none, parse_bracket, parse_bound,
38 parse_ce, parse_ec, parse_cc
39 } state = none;
40 /* Well known character classes. */
41 const char *cc[] = {
42 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
43 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
44 ":print:", ":xdigit:"
46 isc_boolean_t seen_comma = ISC_FALSE;
47 isc_boolean_t seen_high = ISC_FALSE;
48 isc_boolean_t seen_char = ISC_FALSE;
49 isc_boolean_t seen_ec = ISC_FALSE;
50 isc_boolean_t seen_ce = ISC_FALSE;
51 isc_boolean_t have_atom = ISC_FALSE;
52 int group = 0;
53 int range = 0;
54 int sub = 0;
55 isc_boolean_t empty_ok = ISC_FALSE;
56 isc_boolean_t neg = ISC_FALSE;
57 isc_boolean_t was_multiple = ISC_FALSE;
58 unsigned int low = 0;
59 unsigned int high = 0;
60 const char *ccname = NULL;
61 int range_start = 0;
62 #if VALREGEX_REPORT_REASON
63 const char *reason = "";
64 #endif
66 if (c == NULL || *c == 0)
67 FAIL("empty string");
69 while (c != NULL && *c != 0) {
70 switch (state) {
71 case none:
72 switch (*c) {
73 case '\\': /* make literal */
74 ++c;
75 switch (*c) {
76 case '1': case '2': case '3':
77 case '4': case '5': case '6':
78 case '7': case '8': case '9':
79 if ((*c - '0') > sub)
80 FAIL("bad back reference");
81 have_atom = ISC_TRUE;
82 was_multiple = ISC_FALSE;
83 break;
84 case 0:
85 FAIL("escaped end-of-string");
86 default:
87 goto literal;
89 ++c;
90 break;
91 case '[': /* bracket start */
92 ++c;
93 neg = ISC_FALSE;
94 was_multiple = ISC_FALSE;
95 seen_char = ISC_FALSE;
96 state = parse_bracket;
97 break;
98 case '{': /* bound start */
99 switch (c[1]) {
100 case '0': case '1': case '2': case '3':
101 case '4': case '5': case '6': case '7':
102 case '8': case '9':
103 if (!have_atom)
104 FAIL("no atom");
105 if (was_multiple)
106 FAIL("was multiple");
107 seen_comma = ISC_FALSE;
108 seen_high = ISC_FALSE;
109 low = high = 0;
110 state = parse_bound;
111 break;
112 default:
113 goto literal;
115 ++c;
116 have_atom = ISC_TRUE;
117 was_multiple = ISC_TRUE;
118 break;
119 case '}':
120 goto literal;
121 case '(': /* group start */
122 have_atom = ISC_FALSE;
123 was_multiple = ISC_FALSE;
124 empty_ok = ISC_TRUE;
125 ++group;
126 ++sub;
127 ++c;
128 break;
129 case ')': /* group end */
130 if (group && !have_atom && !empty_ok)
131 FAIL("empty alternative");
132 have_atom = ISC_TRUE;
133 was_multiple = ISC_FALSE;
134 if (group != 0)
135 --group;
136 ++c;
137 break;
138 case '|': /* alternative seperator */
139 if (!have_atom)
140 FAIL("no atom");
141 have_atom = ISC_FALSE;
142 empty_ok = ISC_FALSE;
143 was_multiple = ISC_FALSE;
144 ++c;
145 break;
146 case '^':
147 case '$':
148 have_atom = ISC_TRUE;
149 was_multiple = ISC_TRUE;
150 ++c;
151 break;
152 case '+':
153 case '*':
154 case '?':
155 if (was_multiple)
156 FAIL("was multiple");
157 if (!have_atom)
158 FAIL("no atom");
159 have_atom = ISC_TRUE;
160 was_multiple = ISC_TRUE;
161 ++c;
162 break;
163 case '.':
164 default:
165 literal:
166 have_atom = ISC_TRUE;
167 was_multiple = ISC_FALSE;
168 ++c;
169 break;
171 break;
172 case parse_bound:
173 switch (*c) {
174 case '0': case '1': case '2': case '3': case '4':
175 case '5': case '6': case '7': case '8': case '9':
176 if (!seen_comma) {
177 low = low * 10 + *c - '0';
178 if (low > 255)
179 FAIL("lower bound too big");
180 } else {
181 seen_high = ISC_TRUE;
182 high = high * 10 + *c - '0';
183 if (high > 255)
184 FAIL("upper bound too big");
186 ++c;
187 break;
188 case ',':
189 if (seen_comma)
190 FAIL("multiple commas");
191 seen_comma = ISC_TRUE;
192 ++c;
193 break;
194 default:
195 case '{':
196 FAIL("non digit/comma");
197 case '}':
198 if (seen_high && low > high)
199 FAIL("bad parse bound");
200 seen_comma = ISC_FALSE;
201 state = none;
202 ++c;
203 break;
205 break;
206 case parse_bracket:
207 switch (*c) {
208 case '^':
209 if (seen_char || neg) goto inside;
210 neg = ISC_TRUE;
211 ++c;
212 break;
213 case '-':
214 if (range == 2) goto inside;
215 if (!seen_char) goto inside;
216 if (range == 1)
217 FAIL("bad range");
218 range = 2;
219 ++c;
220 break;
221 case '[':
222 ++c;
223 switch (*c) {
224 case '.': /* collating element */
225 if (range != 0) --range;
226 ++c;
227 state = parse_ce;
228 seen_ce = ISC_FALSE;
229 break;
230 case '=': /* equivalence class */
231 if (range == 2)
232 FAIL("equivalence class in range");
233 ++c;
234 state = parse_ec;
235 seen_ec = ISC_FALSE;
236 break;
237 case ':': /* character class */
238 if (range == 2)
239 FAIL("character class in range");
240 ccname = c;
241 ++c;
242 state = parse_cc;
243 break;
245 seen_char = ISC_TRUE;
246 break;
247 case ']':
248 if (!c[1] && !seen_char)
249 FAIL("unfinished brace");
250 if (!seen_char)
251 goto inside;
252 ++c;
253 range = 0;
254 have_atom = ISC_TRUE;
255 state = none;
256 break;
257 default:
258 inside:
259 seen_char = ISC_TRUE;
260 if (range == 2 && (*c & 0xff) < range_start)
261 FAIL("out of order range");
262 if (range != 0)
263 --range;
264 range_start = *c & 0xff;
265 ++c;
266 break;
268 break;
269 case parse_ce:
270 switch (*c) {
271 case '.':
272 ++c;
273 switch (*c) {
274 case ']':
275 if (!seen_ce)
276 FAIL("empty ce");
277 ++c;
278 state = parse_bracket;
279 break;
280 default:
281 if (seen_ce)
282 range_start = 256;
283 else
284 range_start = '.';
285 seen_ce = ISC_TRUE;
286 break;
288 break;
289 default:
290 if (seen_ce)
291 range_start = 256;
292 else
293 range_start = *c;
294 seen_ce = ISC_TRUE;
295 ++c;
296 break;
298 break;
299 case parse_ec:
300 switch (*c) {
301 case '=':
302 ++c;
303 switch (*c) {
304 case ']':
305 if (!seen_ec)
306 FAIL("no ec");
307 ++c;
308 state = parse_bracket;
309 break;
310 default:
311 seen_ec = ISC_TRUE;
312 break;
314 break;
315 default:
316 seen_ec = ISC_TRUE;
317 ++c;
318 break;
320 break;
321 case parse_cc:
322 switch (*c) {
323 case ':':
324 ++c;
325 switch (*c) {
326 case ']': {
327 unsigned int i;
328 isc_boolean_t found = ISC_FALSE;
329 for (i = 0;
330 i < sizeof(cc)/sizeof(*cc);
331 i++)
333 unsigned int len;
334 len = strlen(cc[i]);
335 if (len !=
336 (unsigned int)(c - ccname))
337 continue;
338 if (strncmp(cc[i], ccname, len))
339 continue;
340 found = ISC_TRUE;
342 if (!found)
343 FAIL("unknown cc");
344 ++c;
345 state = parse_bracket;
346 break;
348 default:
349 break;
351 break;
352 default:
353 ++c;
354 break;
356 break;
359 if (group != 0)
360 FAIL("group open");
361 if (state != none)
362 FAIL("incomplete");
363 if (!have_atom)
364 FAIL("no atom");
365 return (sub);
367 error:
368 #if VALREGEX_REPORT_REASON
369 fprintf(stderr, "%s\n", reason);
370 #endif
371 return (-1);