Ignore machine-check MSRs
[freebsd-src/fkvm-freebsd.git] / usr.bin / vgrind / regexp.c
blob199f3c6c119d767191b84dcff34fc730e56fa45a
1 /*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #ifndef lint
40 static const char copyright[] =
41 "@(#) Copyright (c) 1980, 1993\n\
42 The Regents of the University of California. All rights reserved.\n";
43 #endif
45 #ifndef lint
46 static const char sccsid[] = "@(#)regexp.c 8.1 (Berkeley) 6/6/93";
47 #endif
49 #include <ctype.h>
50 #include <stdlib.h>
51 #include <string.h>
53 #include "extern.h"
55 #define FALSE 0
56 #define TRUE !(FALSE)
57 #define NIL 0
59 static void expconv(void);
61 boolean _escaped; /* true if we are currently _escaped */
62 char *s_start; /* start of string */
63 boolean l_onecase; /* true if upper and lower equivalent */
65 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
67 /* STRNCMP - like strncmp except that we convert the
68 * first string to lower case before comparing
69 * if l_onecase is set.
72 int
73 STRNCMP(s1, s2, len)
74 register char *s1,*s2;
75 register int len;
77 if (l_onecase) {
79 if (*s2 - makelower(*s1))
80 return (*s2 - makelower(*s1));
81 else {
82 s2++;
83 s1++;
85 while (--len);
86 } else {
88 if (*s2 - *s1)
89 return (*s2 - *s1);
90 else {
91 s2++;
92 s1++;
94 while (--len);
96 return(0);
99 /* The following routine converts an irregular expression to
100 * internal format.
102 * Either meta symbols (\a \d or \p) or character strings or
103 * operations ( alternation or perenthesizing ) can be
104 * specified. Each starts with a descriptor byte. The descriptor
105 * byte has STR set for strings, META set for meta symbols
106 * and OPER set for operations.
107 * The descriptor byte can also have the OPT bit set if the object
108 * defined is optional. Also ALT can be set to indicate an alternation.
110 * For metasymbols the byte following the descriptor byte identities
111 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
112 * strings the byte after the descriptor is a character count for
113 * the string:
115 * meta symbols := descriptor
116 * symbol
118 * strings := descriptor
119 * character count
120 * the string
122 * operatins := descriptor
123 * symbol
124 * character count
128 * handy macros for accessing parts of match blocks
130 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
131 #define MNEXT(A) (A+2) /* character following a metasymbol block */
133 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
134 #define OCNT(A) (*(A+2)) /* character count */
135 #define ONEXT(A) (A+3) /* next character after the operation */
136 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
138 #define SCNT(A) (*(A+1)) /* byte count of a string */
139 #define SSTR(A) (A+2) /* address of the string */
140 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
143 * bit flags in the descriptor
145 #define OPT 1
146 #define STR 2
147 #define META 4
148 #define ALT 8
149 #define OPER 16
151 static char *ccre; /* pointer to current position in converted exp*/
152 static char *ure; /* pointer current position in unconverted exp */
154 char *
155 convexp(re)
156 char *re; /* unconverted irregular expression */
158 register char *cre; /* pointer to converted regular expression */
160 /* allocate room for the converted expression */
161 if (re == NIL)
162 return (NIL);
163 if (*re == '\0')
164 return (NIL);
165 cre = malloc (4 * strlen(re) + 3);
166 ccre = cre;
167 ure = re;
169 /* start the conversion with a \a */
170 *cre = META | OPT;
171 MSYM(cre) = 'a';
172 ccre = MNEXT(cre);
174 /* start the conversion (its recursive) */
175 expconv ();
176 *ccre = 0;
177 return (cre);
180 static void
181 expconv()
183 register char *cs; /* pointer to current symbol in converted exp */
184 register char c; /* character being processed */
185 register char *acs; /* pinter to last alternate */
186 register int temp;
188 /* let the conversion begin */
189 acs = NIL;
190 cs = NIL;
191 while (*ure != NIL) {
192 switch (c = *ure++) {
194 case '\\':
195 switch (c = *ure++) {
197 /* escaped characters are just characters */
198 default:
199 if (cs == NIL || (*cs & STR) == 0) {
200 cs = ccre;
201 *cs = STR;
202 SCNT(cs) = 1;
203 ccre += 2;
204 } else
205 SCNT(cs)++;
206 *ccre++ = c;
207 break;
209 /* normal(?) metacharacters */
210 case 'a':
211 case 'd':
212 case 'e':
213 case 'p':
214 if (acs != NIL && acs != cs) {
215 do {
216 temp = OCNT(acs);
217 OCNT(acs) = ccre - acs;
218 acs -= temp;
219 } while (temp != 0);
220 acs = NIL;
222 cs = ccre;
223 *cs = META;
224 MSYM(cs) = c;
225 ccre = MNEXT(cs);
226 break;
228 break;
230 /* just put the symbol in */
231 case '^':
232 case '$':
233 if (acs != NIL && acs != cs) {
234 do {
235 temp = OCNT(acs);
236 OCNT(acs) = ccre - acs;
237 acs -= temp;
238 } while (temp != 0);
239 acs = NIL;
241 cs = ccre;
242 *cs = META;
243 MSYM(cs) = c;
244 ccre = MNEXT(cs);
245 break;
247 /* mark the last match sequence as optional */
248 case '?':
249 if (cs)
250 *cs = *cs | OPT;
251 break;
253 /* recurse and define a subexpression */
254 case '(':
255 if (acs != NIL && acs != cs) {
256 do {
257 temp = OCNT(acs);
258 OCNT(acs) = ccre - acs;
259 acs -= temp;
260 } while (temp != 0);
261 acs = NIL;
263 cs = ccre;
264 *cs = OPER;
265 OSYM(cs) = '(';
266 ccre = ONEXT(cs);
267 expconv ();
268 OCNT(cs) = ccre - cs; /* offset to next symbol */
269 break;
271 /* return from a recursion */
272 case ')':
273 if (acs != NIL) {
274 do {
275 temp = OCNT(acs);
276 OCNT(acs) = ccre - acs;
277 acs -= temp;
278 } while (temp != 0);
279 acs = NIL;
281 cs = ccre;
282 *cs = META;
283 MSYM(cs) = c;
284 ccre = MNEXT(cs);
285 return;
287 /* mark the last match sequence as having an alternate */
288 /* the third byte will contain an offset to jump over the */
289 /* alternate match in case the first did not fail */
290 case '|':
291 if (acs != NIL && acs != cs)
292 OCNT(ccre) = ccre - acs; /* make a back pointer */
293 else
294 OCNT(ccre) = 0;
295 *cs |= ALT;
296 cs = ccre;
297 *cs = OPER;
298 OSYM(cs) = '|';
299 ccre = ONEXT(cs);
300 acs = cs; /* remember that the pointer is to be filles */
301 break;
303 /* if its not a metasymbol just build a scharacter string */
304 default:
305 if (cs == NIL || (*cs & STR) == 0) {
306 cs = ccre;
307 *cs = STR;
308 SCNT(cs) = 1;
309 ccre = SSTR(cs);
310 } else
311 SCNT(cs)++;
312 *ccre++ = c;
313 break;
316 if (acs != NIL) {
317 do {
318 temp = OCNT(acs);
319 OCNT(acs) = ccre - acs;
320 acs -= temp;
321 } while (temp != 0);
322 acs = NIL;
324 return;
326 /* end of convertre */
330 * The following routine recognises an irregular expresion
331 * with the following special characters:
333 * \? - means last match was optional
334 * \a - matches any number of characters
335 * \d - matches any number of spaces and tabs
336 * \p - matches any number of alphanumeric
337 * characters. The
338 * characters matched will be copied into
339 * the area pointed to by 'name'.
340 * \| - alternation
341 * \( \) - grouping used mostly for alternation and
342 * optionality
344 * The irregular expression must be translated to internal form
345 * prior to calling this routine
347 * The value returned is the pointer to the first non \a
348 * character matched.
351 char *
352 expmatch (s, re, mstring)
353 register char *s; /* string to check for a match in */
354 register char *re; /* a converted irregular expression */
355 register char *mstring; /* where to put whatever matches a \p */
357 register char *cs; /* the current symbol */
358 register char *ptr,*s1; /* temporary pointer */
359 boolean matched; /* a temporary boolean */
361 /* initial conditions */
362 if (re == NIL)
363 return (NIL);
364 cs = re;
365 matched = FALSE;
367 /* loop till expression string is exhausted (or at least pretty tired) */
368 while (*cs) {
369 switch (*cs & (OPER | STR | META)) {
371 /* try to match a string */
372 case STR:
373 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
374 if (matched) {
376 /* hoorah it matches */
377 s += SCNT(cs);
378 cs = SNEXT(cs);
379 } else if (*cs & ALT) {
381 /* alternation, skip to next expression */
382 cs = SNEXT(cs);
383 } else if (*cs & OPT) {
385 /* the match is optional */
386 cs = SNEXT(cs);
387 matched = 1; /* indicate a successful match */
388 } else {
390 /* no match, error return */
391 return (NIL);
393 break;
395 /* an operator, do something fancy */
396 case OPER:
397 switch (OSYM(cs)) {
399 /* this is an alternation */
400 case '|':
401 if (matched)
403 /* last thing in the alternation was a match, skip ahead */
404 cs = OPTR(cs);
405 else
407 /* no match, keep trying */
408 cs = ONEXT(cs);
409 break;
411 /* this is a grouping, recurse */
412 case '(':
413 ptr = expmatch (s, ONEXT(cs), mstring);
414 if (ptr != NIL) {
416 /* the subexpression matched */
417 matched = 1;
418 s = ptr;
419 } else if (*cs & ALT) {
421 /* alternation, skip to next expression */
422 matched = 0;
423 } else if (*cs & OPT) {
425 /* the match is optional */
426 matched = 1; /* indicate a successful match */
427 } else {
429 /* no match, error return */
430 return (NIL);
432 cs = OPTR(cs);
433 break;
435 break;
437 /* try to match a metasymbol */
438 case META:
439 switch (MSYM(cs)) {
441 /* try to match anything and remember what was matched */
442 case 'p':
444 * This is really the same as trying the match the
445 * remaining parts of the expression to any subset
446 * of the string.
448 s1 = s;
449 do {
450 ptr = expmatch (s1, MNEXT(cs), mstring);
451 if (ptr != NIL && s1 != s) {
453 /* we have a match, remember the match */
454 strncpy (mstring, s, s1 - s);
455 mstring[s1 - s] = '\0';
456 return (ptr);
457 } else if (ptr != NIL && (*cs & OPT)) {
459 /* it was aoptional so no match is ok */
460 return (ptr);
461 } else if (ptr != NIL) {
463 /* not optional and we still matched */
464 return (NIL);
466 if (!(isalnum(*s1) || *s1 == '_' ||
467 /* C++ destructor */
468 *s1 == '~' ||
469 /* C++ scope operator */
470 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' &&
471 (s1++, TRUE))))
472 return (NIL);
473 if (*s1 == '\\')
474 _escaped = _escaped ? FALSE : TRUE;
475 else
476 _escaped = FALSE;
477 } while (*s1++);
478 return (NIL);
480 /* try to match anything */
481 case 'a':
483 * This is really the same as trying the match the
484 * remaining parts of the expression to any subset
485 * of the string.
487 s1 = s;
488 do {
489 ptr = expmatch (s1, MNEXT(cs), mstring);
490 if (ptr != NIL && s1 != s) {
492 /* we have a match */
493 return (ptr);
494 } else if (ptr != NIL && (*cs & OPT)) {
496 /* it was aoptional so no match is ok */
497 return (ptr);
498 } else if (ptr != NIL) {
500 /* not optional and we still matched */
501 return (NIL);
503 if (*s1 == '\\')
504 _escaped = _escaped ? FALSE : TRUE;
505 else
506 _escaped = FALSE;
507 } while (*s1++);
508 return (NIL);
510 /* fail if we are currently _escaped */
511 case 'e':
512 if (_escaped)
513 return(NIL);
514 cs = MNEXT(cs);
515 break;
517 /* match any number of tabs and spaces */
518 case 'd':
519 ptr = s;
520 while (*s == ' ' || *s == '\t')
521 s++;
522 if (s != ptr || s == s_start) {
524 /* match, be happy */
525 matched = 1;
526 cs = MNEXT(cs);
527 } else if (*s == '\n' || *s == '\0') {
529 /* match, be happy */
530 matched = 1;
531 cs = MNEXT(cs);
532 } else if (*cs & ALT) {
534 /* try the next part */
535 matched = 0;
536 cs = MNEXT(cs);
537 } else if (*cs & OPT) {
539 /* doesn't matter */
540 matched = 1;
541 cs = MNEXT(cs);
542 } else
544 /* no match, error return */
545 return (NIL);
546 break;
548 /* check for end of line */
549 case '$':
550 if (*s == '\0' || *s == '\n') {
552 /* match, be happy */
553 s++;
554 matched = 1;
555 cs = MNEXT(cs);
556 } else if (*cs & ALT) {
558 /* try the next part */
559 matched = 0;
560 cs = MNEXT(cs);
561 } else if (*cs & OPT) {
563 /* doesn't matter */
564 matched = 1;
565 cs = MNEXT(cs);
566 } else
568 /* no match, error return */
569 return (NIL);
570 break;
572 /* check for start of line */
573 case '^':
574 if (s == s_start) {
576 /* match, be happy */
577 matched = 1;
578 cs = MNEXT(cs);
579 } else if (*cs & ALT) {
581 /* try the next part */
582 matched = 0;
583 cs = MNEXT(cs);
584 } else if (*cs & OPT) {
586 /* doesn't matter */
587 matched = 1;
588 cs = MNEXT(cs);
589 } else
591 /* no match, error return */
592 return (NIL);
593 break;
595 /* end of a subexpression, return success */
596 case ')':
597 return (s);
599 break;
602 return (s);