No empty .Rs/.Re
[netbsd-mini2440.git] / usr.bin / vgrind / regexp.c
blob243688e6b095478fe801b156476fad17445daa4f
1 /* $NetBSD: regexp.c,v 1.10 2006/05/01 05:14:22 christos Exp $ */
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <sys/cdefs.h>
34 #ifndef lint
35 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
36 The Regents of the University of California. All rights reserved.");
37 #endif /* not lint */
39 #ifndef lint
40 #if 0
41 static char sccsid[] = "@(#)regexp.c 8.1 (Berkeley) 6/6/93";
42 #endif
43 __RCSID("$NetBSD: regexp.c,v 1.10 2006/05/01 05:14:22 christos Exp $");
44 #endif /* not lint */
46 #include <assert.h>
47 #include <ctype.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include "extern.h"
52 #define FALSE 0
53 #define TRUE !(FALSE)
54 #define NIL 0
56 static void expconv __P((void));
58 boolean x_escaped; /* true if we are currently x_escaped */
59 char *x_start; /* start of string */
60 boolean l_onecase; /* true if upper and lower equivalent */
62 #define makelower(c) (isupper((unsigned char)(c)) ? tolower((unsigned char)(c)) : (c))
64 /* STRNCMP - like strncmp except that we convert the
65 * first string to lower case before comparing
66 * if l_onecase is set.
69 int
70 STRNCMP(s1, s2, len)
71 char *s1,*s2;
72 int len;
74 if (l_onecase) {
76 if (*s2 - makelower(*s1))
77 return (*s2 - makelower(*s1));
78 else {
79 s2++;
80 s1++;
82 while (--len);
83 } else {
85 if (*s2 - *s1)
86 return (*s2 - *s1);
87 else {
88 s2++;
89 s1++;
91 while (--len);
93 return(0);
96 /* The following routine converts an irregular expression to
97 * internal format.
99 * Either meta symbols (\a \d or \p) or character strings or
100 * operations ( alternation or perenthesizing ) can be
101 * specified. Each starts with a descriptor byte. The descriptor
102 * byte has STR set for strings, META set for meta symbols
103 * and OPER set for operations.
104 * The descriptor byte can also have the OPT bit set if the object
105 * defined is optional. Also ALT can be set to indicate an alternation.
107 * For metasymbols the byte following the descriptor byte identities
108 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
109 * strings the byte after the descriptor is a character count for
110 * the string:
112 * meta symbols := descriptor
113 * symbol
115 * strings := descriptor
116 * character count
117 * the string
119 * operatins := descriptor
120 * symbol
121 * character count
125 * handy macros for accessing parts of match blocks
127 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
128 #define MNEXT(A) (A+2) /* character following a metasymbol block */
130 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
131 #define OCNT(A) (*(A+2)) /* character count */
132 #define ONEXT(A) (A+3) /* next character after the operation */
133 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
135 #define SCNT(A) (*(A+1)) /* byte count of a string */
136 #define SSTR(A) (A+2) /* address of the string */
137 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
140 * bit flags in the descriptor
142 #define OPT 1
143 #define STR 2
144 #define META 4
145 #define ALT 8
146 #define OPER 16
148 static char *ccre; /* pointer to current position in converted exp*/
149 static char *ure; /* pointer current position in unconverted exp */
151 char *
152 convexp(re)
153 char *re; /* unconverted irregular expression */
155 char *cre; /* pointer to converted regular expression */
157 /* allocate room for the converted expression */
158 if (re == NIL)
159 return (NIL);
160 if (*re == '\0')
161 return (NIL);
162 cre = malloc(4 * strlen(re) + 3);
163 ccre = cre;
164 ure = re;
166 /* start the conversion with a \a */
167 *cre = META | OPT;
168 MSYM(cre) = 'a';
169 ccre = MNEXT(cre);
171 /* start the conversion (its recursive) */
172 expconv();
173 *ccre = 0;
174 return (cre);
177 static void
178 expconv()
180 char *cs; /* pointer to current symbol in converted exp */
181 char c; /* character being processed */
182 char *acs; /* pinter to last alternate */
183 int temp;
185 /* let the conversion begin */
186 acs = NIL;
187 cs = NIL;
188 while (*ure != NIL) {
189 switch (c = *ure++) {
191 case '\\':
192 switch (c = *ure++) {
194 /* escaped characters are just characters */
195 default:
196 if (cs == NIL || (*cs & STR) == 0) {
197 cs = ccre;
198 *cs = STR;
199 SCNT(cs) = 1;
200 ccre += 2;
201 } else
202 SCNT(cs)++;
203 *ccre++ = c;
204 break;
206 /* normal(?) metacharacters */
207 case 'a':
208 case 'd':
209 case 'e':
210 case 'p':
211 if (acs != NIL && acs != cs) {
212 do {
213 temp = OCNT(acs);
214 OCNT(acs) = ccre - acs;
215 acs -= temp;
216 } while (temp != 0);
217 acs = NIL;
219 cs = ccre;
220 *cs = META;
221 MSYM(cs) = c;
222 ccre = MNEXT(cs);
223 break;
225 break;
227 /* just put the symbol in */
228 case '^':
229 case '$':
230 if (acs != NIL && acs != cs) {
231 do {
232 temp = OCNT(acs);
233 OCNT(acs) = ccre - acs;
234 acs -= temp;
235 } while (temp != 0);
236 acs = NIL;
238 cs = ccre;
239 *cs = META;
240 MSYM(cs) = c;
241 ccre = MNEXT(cs);
242 break;
244 /* mark the last match sequence as optional */
245 case '?':
246 if (cs)
247 *cs = *cs | OPT;
248 break;
250 /* recurse and define a subexpression */
251 case '(':
252 if (acs != NIL && acs != cs) {
253 do {
254 temp = OCNT(acs);
255 OCNT(acs) = ccre - acs;
256 acs -= temp;
257 } while (temp != 0);
258 acs = NIL;
260 cs = ccre;
261 *cs = OPER;
262 OSYM(cs) = '(';
263 ccre = ONEXT(cs);
264 expconv();
265 OCNT(cs) = ccre - cs; /* offset to next symbol */
266 break;
268 /* reurn from a recursion */
269 case ')':
270 if (acs != NIL) {
271 do {
272 temp = OCNT(acs);
273 OCNT(acs) = ccre - acs;
274 acs -= temp;
275 } while (temp != 0);
276 acs = NIL;
278 cs = ccre;
279 *cs = META;
280 MSYM(cs) = c;
281 ccre = MNEXT(cs);
282 return;
284 /* mark the last match sequence as having an alternate */
285 /* the third byte will contain an offset to jump over the */
286 /* alternate match in case the first did not fail */
287 case '|':
288 if (acs != NIL && acs != cs)
289 OCNT(ccre) = ccre - acs; /* make a back pointer */
290 else
291 OCNT(ccre) = 0;
292 assert(cs != NULL);
293 *cs |= ALT;
294 cs = ccre;
295 *cs = OPER;
296 OSYM(cs) = '|';
297 ccre = ONEXT(cs);
298 acs = cs; /* remember that the pointer is to be filles */
299 break;
301 /* if its not a metasymbol just build a scharacter string */
302 default:
303 if (cs == NIL || (*cs & STR) == 0) {
304 cs = ccre;
305 *cs = STR;
306 SCNT(cs) = 1;
307 ccre = SSTR(cs);
308 } else
309 SCNT(cs)++;
310 *ccre++ = c;
311 break;
314 if (acs != NIL) {
315 do {
316 temp = OCNT(acs);
317 OCNT(acs) = ccre - acs;
318 acs -= temp;
319 } while (temp != 0);
320 acs = NIL;
322 return;
324 /* end of convertre */
328 * The following routine recognises an irregular expresion
329 * with the following special characters:
331 * \? - means last match was optional
332 * \a - matches any number of characters
333 * \d - matches any number of spaces and tabs
334 * \p - matches any number of alphanumeric
335 * characters. The
336 * characters matched will be copied into
337 * the area pointed to by 'name'.
338 * \| - alternation
339 * \( \) - grouping used mostly for alternation and
340 * optionality
342 * The irregular expression must be translated to internal form
343 * prior to calling this routine
345 * The value returned is the pointer to the first non \a
346 * character matched.
349 char *
350 expmatch(s, re, mstring)
351 char *s; /* string to check for a match in */
352 char *re; /* a converted irregular expression */
353 char *mstring; /* where to put whatever matches a \p */
355 char *cs; /* the current symbol */
356 char *ptr,*s1; /* temporary pointer */
357 boolean matched; /* a temporary boolean */
359 /* initial conditions */
360 if (re == NIL)
361 return (NIL);
362 cs = re;
363 matched = FALSE;
365 /* loop till expression string is exhausted (or at least pretty tired) */
366 while (*cs) {
367 switch (*cs & (OPER | STR | META)) {
369 /* try to match a string */
370 case STR:
371 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
372 if (matched) {
374 /* hoorah it matches */
375 s += SCNT(cs);
376 cs = SNEXT(cs);
377 } else if (*cs & ALT) {
379 /* alternation, skip to next expression */
380 cs = SNEXT(cs);
381 } else if (*cs & OPT) {
383 /* the match is optional */
384 cs = SNEXT(cs);
385 matched = 1; /* indicate a successful match */
386 } else {
388 /* no match, error return */
389 return (NIL);
391 break;
393 /* an operator, do something fancy */
394 case OPER:
395 switch (OSYM(cs)) {
397 /* this is an alternation */
398 case '|':
399 if (matched)
401 /* last thing in the alternation was a match, skip ahead */
402 cs = OPTR(cs);
403 else
405 /* no match, keep trying */
406 cs = ONEXT(cs);
407 break;
409 /* this is a grouping, recurse */
410 case '(':
411 ptr = expmatch(s, ONEXT(cs), mstring);
412 if (ptr != NIL) {
414 /* the subexpression matched */
415 matched = 1;
416 s = ptr;
417 } else if (*cs & ALT) {
419 /* alternation, skip to next expression */
420 matched = 0;
421 } else if (*cs & OPT) {
423 /* the match is optional */
424 matched = 1; /* indicate a successful match */
425 } else {
427 /* no match, error return */
428 return (NIL);
430 cs = OPTR(cs);
431 break;
433 break;
435 /* try to match a metasymbol */
436 case META:
437 switch (MSYM(cs)) {
439 /* try to match anything and remember what was matched */
440 case 'p':
442 * This is really the same as trying the match the
443 * remaining parts of the expression to any subset
444 * of the string.
446 s1 = s;
447 do {
448 ptr = expmatch(s1, MNEXT(cs), mstring);
449 if (ptr != NIL && s1 != s) {
451 /* we have a match, remember the match */
452 strncpy(mstring, s, s1 - s);
453 mstring[s1 - s] = '\0';
454 return (ptr);
455 } else if (ptr != NIL && (*cs & OPT)) {
457 /* it was aoptional so no match is ok */
458 return (ptr);
459 } else if (ptr != NIL) {
461 /* not optional and we still matched */
462 return (NIL);
464 if (!isalnum((unsigned char)*s1) && *s1 != '_')
465 return (NIL);
466 if (*s1 == '\\')
467 x_escaped = x_escaped ? FALSE : TRUE;
468 else
469 x_escaped = FALSE;
470 } while (*s1++);
471 return (NIL);
473 /* try to match anything */
474 case 'a':
476 * This is really the same as trying the match the
477 * remaining parts of the expression to any subset
478 * of the string.
480 s1 = s;
481 do {
482 ptr = expmatch(s1, MNEXT(cs), mstring);
483 if (ptr != NIL && s1 != s) {
485 /* we have a match */
486 return (ptr);
487 } else if (ptr != NIL && (*cs & OPT)) {
489 /* it was aoptional so no match is ok */
490 return (ptr);
491 } else if (ptr != NIL) {
493 /* not optional and we still matched */
494 return (NIL);
496 if (*s1 == '\\')
497 x_escaped = x_escaped ? FALSE : TRUE;
498 else
499 x_escaped = FALSE;
500 } while (*s1++);
501 return (NIL);
503 /* fail if we are currently x_escaped */
504 case 'e':
505 if (x_escaped)
506 return(NIL);
507 cs = MNEXT(cs);
508 break;
510 /* match any number of tabs and spaces */
511 case 'd':
512 ptr = s;
513 while (*s == ' ' || *s == '\t')
514 s++;
515 if (s != ptr || s == x_start) {
517 /* match, be happy */
518 matched = 1;
519 cs = MNEXT(cs);
520 } else if (*s == '\n' || *s == '\0') {
522 /* match, be happy */
523 matched = 1;
524 cs = MNEXT(cs);
525 } else if (*cs & ALT) {
527 /* try the next part */
528 matched = 0;
529 cs = MNEXT(cs);
530 } else if (*cs & OPT) {
532 /* doesn't matter */
533 matched = 1;
534 cs = MNEXT(cs);
535 } else
537 /* no match, error return */
538 return (NIL);
539 break;
541 /* check for end of line */
542 case '$':
543 if (*s == '\0' || *s == '\n') {
545 /* match, be happy */
546 s++;
547 matched = 1;
548 cs = MNEXT(cs);
549 } else if (*cs & ALT) {
551 /* try the next part */
552 matched = 0;
553 cs = MNEXT(cs);
554 } else if (*cs & OPT) {
556 /* doesn't matter */
557 matched = 1;
558 cs = MNEXT(cs);
559 } else
561 /* no match, error return */
562 return (NIL);
563 break;
565 /* check for start of line */
566 case '^':
567 if (s == x_start) {
569 /* match, be happy */
570 matched = 1;
571 cs = MNEXT(cs);
572 } else if (*cs & ALT) {
574 /* try the next part */
575 matched = 0;
576 cs = MNEXT(cs);
577 } else if (*cs & OPT) {
579 /* doesn't matter */
580 matched = 1;
581 cs = MNEXT(cs);
582 } else
584 /* no match, error return */
585 return (NIL);
586 break;
588 /* end of a subexpression, return success */
589 case ')':
590 return (s);
592 break;
595 return (s);