2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
40 static const char copyright
[] =
41 "@(#) Copyright (c) 1980, 1993\n\
42 The Regents of the University of California. All rights reserved.\n";
46 static const char sccsid
[] = "@(#)regexp.c 8.1 (Berkeley) 6/6/93";
59 static void expconv(void);
61 boolean _escaped
; /* true if we are currently _escaped */
62 char *s_start
; /* start of string */
63 boolean l_onecase
; /* true if upper and lower equivalent */
65 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
67 /* STRNCMP - like strncmp except that we convert the
68 * first string to lower case before comparing
69 * if l_onecase is set.
74 register char *s1
,*s2
;
79 if (*s2
- makelower(*s1
))
80 return (*s2
- makelower(*s1
));
99 /* The following routine converts an irregular expression to
102 * Either meta symbols (\a \d or \p) or character strings or
103 * operations ( alternation or perenthesizing ) can be
104 * specified. Each starts with a descriptor byte. The descriptor
105 * byte has STR set for strings, META set for meta symbols
106 * and OPER set for operations.
107 * The descriptor byte can also have the OPT bit set if the object
108 * defined is optional. Also ALT can be set to indicate an alternation.
110 * For metasymbols the byte following the descriptor byte identities
111 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
112 * strings the byte after the descriptor is a character count for
115 * meta symbols := descriptor
118 * strings := descriptor
122 * operatins := descriptor
128 * handy macros for accessing parts of match blocks
130 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
131 #define MNEXT(A) (A+2) /* character following a metasymbol block */
133 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
134 #define OCNT(A) (*(A+2)) /* character count */
135 #define ONEXT(A) (A+3) /* next character after the operation */
136 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
138 #define SCNT(A) (*(A+1)) /* byte count of a string */
139 #define SSTR(A) (A+2) /* address of the string */
140 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
143 * bit flags in the descriptor
151 static char *ccre
; /* pointer to current position in converted exp*/
152 static char *ure
; /* pointer current position in unconverted exp */
156 char *re
; /* unconverted irregular expression */
158 register char *cre
; /* pointer to converted regular expression */
160 /* allocate room for the converted expression */
165 cre
= malloc (4 * strlen(re
) + 3);
169 /* start the conversion with a \a */
174 /* start the conversion (its recursive) */
183 register char *cs
; /* pointer to current symbol in converted exp */
184 register char c
; /* character being processed */
185 register char *acs
; /* pinter to last alternate */
188 /* let the conversion begin */
191 while (*ure
!= NIL
) {
192 switch (c
= *ure
++) {
195 switch (c
= *ure
++) {
197 /* escaped characters are just characters */
199 if (cs
== NIL
|| (*cs
& STR
) == 0) {
209 /* normal(?) metacharacters */
214 if (acs
!= NIL
&& acs
!= cs
) {
217 OCNT(acs
) = ccre
- acs
;
230 /* just put the symbol in */
233 if (acs
!= NIL
&& acs
!= cs
) {
236 OCNT(acs
) = ccre
- acs
;
247 /* mark the last match sequence as optional */
253 /* recurse and define a subexpression */
255 if (acs
!= NIL
&& acs
!= cs
) {
258 OCNT(acs
) = ccre
- acs
;
268 OCNT(cs
) = ccre
- cs
; /* offset to next symbol */
271 /* return from a recursion */
276 OCNT(acs
) = ccre
- acs
;
287 /* mark the last match sequence as having an alternate */
288 /* the third byte will contain an offset to jump over the */
289 /* alternate match in case the first did not fail */
291 if (acs
!= NIL
&& acs
!= cs
)
292 OCNT(ccre
) = ccre
- acs
; /* make a back pointer */
300 acs
= cs
; /* remember that the pointer is to be filles */
303 /* if its not a metasymbol just build a scharacter string */
305 if (cs
== NIL
|| (*cs
& STR
) == 0) {
319 OCNT(acs
) = ccre
- acs
;
326 /* end of convertre */
330 * The following routine recognises an irregular expresion
331 * with the following special characters:
333 * \? - means last match was optional
334 * \a - matches any number of characters
335 * \d - matches any number of spaces and tabs
336 * \p - matches any number of alphanumeric
338 * characters matched will be copied into
339 * the area pointed to by 'name'.
341 * \( \) - grouping used mostly for alternation and
344 * The irregular expression must be translated to internal form
345 * prior to calling this routine
347 * The value returned is the pointer to the first non \a
352 expmatch (s
, re
, mstring
)
353 register char *s
; /* string to check for a match in */
354 register char *re
; /* a converted irregular expression */
355 register char *mstring
; /* where to put whatever matches a \p */
357 register char *cs
; /* the current symbol */
358 register char *ptr
,*s1
; /* temporary pointer */
359 boolean matched
; /* a temporary boolean */
361 /* initial conditions */
367 /* loop till expression string is exhausted (or at least pretty tired) */
369 switch (*cs
& (OPER
| STR
| META
)) {
371 /* try to match a string */
373 matched
= !STRNCMP (s
, SSTR(cs
), SCNT(cs
));
376 /* hoorah it matches */
379 } else if (*cs
& ALT
) {
381 /* alternation, skip to next expression */
383 } else if (*cs
& OPT
) {
385 /* the match is optional */
387 matched
= 1; /* indicate a successful match */
390 /* no match, error return */
395 /* an operator, do something fancy */
399 /* this is an alternation */
403 /* last thing in the alternation was a match, skip ahead */
407 /* no match, keep trying */
411 /* this is a grouping, recurse */
413 ptr
= expmatch (s
, ONEXT(cs
), mstring
);
416 /* the subexpression matched */
419 } else if (*cs
& ALT
) {
421 /* alternation, skip to next expression */
423 } else if (*cs
& OPT
) {
425 /* the match is optional */
426 matched
= 1; /* indicate a successful match */
429 /* no match, error return */
437 /* try to match a metasymbol */
441 /* try to match anything and remember what was matched */
444 * This is really the same as trying the match the
445 * remaining parts of the expression to any subset
450 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
451 if (ptr
!= NIL
&& s1
!= s
) {
453 /* we have a match, remember the match */
454 strncpy (mstring
, s
, s1
- s
);
455 mstring
[s1
- s
] = '\0';
457 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
459 /* it was aoptional so no match is ok */
461 } else if (ptr
!= NIL
) {
463 /* not optional and we still matched */
466 if (!(isalnum(*s1
) || *s1
== '_' ||
469 /* C++ scope operator */
470 (strlen(s1
) > 1 && *s1
== ':' && s1
[1] == ':' &&
474 _escaped
= _escaped
? FALSE
: TRUE
;
480 /* try to match anything */
483 * This is really the same as trying the match the
484 * remaining parts of the expression to any subset
489 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
490 if (ptr
!= NIL
&& s1
!= s
) {
492 /* we have a match */
494 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
496 /* it was aoptional so no match is ok */
498 } else if (ptr
!= NIL
) {
500 /* not optional and we still matched */
504 _escaped
= _escaped
? FALSE
: TRUE
;
510 /* fail if we are currently _escaped */
517 /* match any number of tabs and spaces */
520 while (*s
== ' ' || *s
== '\t')
522 if (s
!= ptr
|| s
== s_start
) {
524 /* match, be happy */
527 } else if (*s
== '\n' || *s
== '\0') {
529 /* match, be happy */
532 } else if (*cs
& ALT
) {
534 /* try the next part */
537 } else if (*cs
& OPT
) {
544 /* no match, error return */
548 /* check for end of line */
550 if (*s
== '\0' || *s
== '\n') {
552 /* match, be happy */
556 } else if (*cs
& ALT
) {
558 /* try the next part */
561 } else if (*cs
& OPT
) {
568 /* no match, error return */
572 /* check for start of line */
576 /* match, be happy */
579 } else if (*cs
& ALT
) {
581 /* try the next part */
584 } else if (*cs
& OPT
) {
591 /* no match, error return */
595 /* end of a subexpression, return success */