2 * Copyright (c) 1980 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
7 #pragma ident "%Z%%M% %I% %E% SMI"
16 extern boolean l_onecase
; /* true if upper and lower equivalent */
17 extern char *l_idchars
; /* set of characters legal in identifiers
18 in addition to letters and digits */
20 extern char *strchr();
21 static void expconv(void);
24 (isalnum(c) || ((c) != NIL && strchr(l_idchars, (c)) != NIL))
25 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
27 /* STRNCMP - like strncmp except that we convert the
28 * first string to lower case before comparing
29 * if l_onecase is set.
33 STRNCMP(char *s1
, char *s2
, int len
)
37 if (*s2
- makelower(*s1
))
38 return (*s2
- makelower(*s1
));
57 /* The following routine converts an irregular expression to
60 * Either meta symbols (\a \d or \p) or character strings or
61 * operations ( alternation or parenthesizing ) can be
62 * specified. Each starts with a descriptor byte. The descriptor
63 * byte has STR set for strings, META set for meta symbols
64 * and OPER set for operations.
65 * The descriptor byte can also have the OPT bit set if the object
66 * defined is optional. Also ALT can be set to indicate an alternation.
68 * For metasymbols the byte following the descriptor byte identities
69 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
70 * strings the byte after the descriptor is a character count for
73 * meta symbols := descriptor
76 * strings := descriptor
80 * operations := descriptor
86 * handy macros for accessing parts of match blocks
88 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
89 #define MNEXT(A) (A+2) /* character following a metasymbol block */
91 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
92 #define OCNT(A) (*(A+2)) /* character count */
93 #define ONEXT(A) (A+3) /* next character after the operation */
94 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
96 #define SCNT(A) (*(A+1)) /* byte count of a string */
97 #define SSTR(A) (A+2) /* address of the string */
98 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
101 * bit flags in the descriptor
109 char *ure
; /* pointer current position in unconverted exp */
110 char *ccre
; /* pointer to current position in converted exp*/
115 /* re - unconverted irregular expression */
117 char *cre
; /* pointer to converted regular expression */
119 /* allocate room for the converted expression */
124 cre
= malloc (4 * strlen(re
) + 3);
128 /* start the conversion with a \a */
133 /* start the conversion (its recursive) */
142 char *cs
; /* pointer to current symbol in converted exp */
143 char c
; /* character being processed */
144 char *acs
; /* pinter to last alternate */
147 /* let the conversion begin */
150 while (*ure
!= NIL
) {
151 switch (c
= *ure
++) {
154 switch (c
= *ure
++) {
156 /* escaped characters are just characters */
158 if (cs
== NIL
|| (*cs
& STR
) == 0) {
168 /* normal(?) metacharacters */
173 if (acs
!= NIL
&& acs
!= cs
) {
176 OCNT(acs
) = ccre
- acs
;
189 /* just put the symbol in */
192 if (acs
!= NIL
&& acs
!= cs
) {
195 OCNT(acs
) = ccre
- acs
;
206 /* mark the last match sequence as optional */
212 /* recurse and define a subexpression */
214 if (acs
!= NIL
&& acs
!= cs
) {
217 OCNT(acs
) = ccre
- acs
;
227 OCNT(cs
) = ccre
- cs
; /* offset to next symbol */
230 /* return from a recursion */
235 OCNT(acs
) = ccre
- acs
;
246 /* mark the last match sequence as having an alternate */
247 /* the third byte will contain an offset to jump over the */
248 /* alternate match in case the first did not fail */
250 if (acs
!= NIL
&& acs
!= cs
)
251 OCNT(ccre
) = ccre
- acs
; /* make a back pointer */
259 acs
= cs
; /* remember that the pointer is to be filles */
262 /* if its not a metasymbol just build a scharacter string */
264 if (cs
== NIL
|| (*cs
& STR
) == 0) {
278 OCNT(acs
) = ccre
- acs
;
284 /* end of convertre */
288 * The following routine recognises an irregular expresion
289 * with the following special characters:
291 * \? - means last match was optional
292 * \a - matches any number of characters
293 * \d - matches any number of spaces and tabs
294 * \p - matches any number of alphanumeric
296 * characters matched will be copied into
297 * the area pointed to by 'name'.
299 * \( \) - grouping used mostly for alternation and
302 * The irregular expression must be translated to internal form
303 * prior to calling this routine
305 * The value returned is the pointer to the first non \a
309 boolean _escaped
; /* true if we are currently _escaped */
310 char *Start
; /* start of string */
313 expmatch(char *s
, char *re
, char *mstring
)
314 /* s - string to check for a match in */
315 /* re - a converted irregular expression */
316 /* mstring - where to put whatever matches a \p */
318 char *cs
; /* the current symbol */
319 char *ptr
, *s1
; /* temporary pointer */
320 boolean matched
; /* a temporary boolean */
322 /* initial conditions */
328 /* loop till expression string is exhausted (or at least pretty tired) */
330 switch (*cs
& (OPER
| STR
| META
)) {
332 /* try to match a string */
334 matched
= !STRNCMP (s
, SSTR(cs
), SCNT(cs
));
337 /* hoorah it matches */
340 } else if (*cs
& ALT
) {
342 /* alternation, skip to next expression */
344 } else if (*cs
& OPT
) {
346 /* the match is optional */
348 matched
= 1; /* indicate a successful match */
351 /* no match, error return */
356 /* an operator, do something fancy */
360 /* this is an alternation */
364 /* last thing in the alternation was a match, skip ahead */
368 /* no match, keep trying */
372 /* this is a grouping, recurse */
374 ptr
= expmatch (s
, ONEXT(cs
), mstring
);
377 /* the subexpression matched */
380 } else if (*cs
& ALT
) {
382 /* alternation, skip to next expression */
384 } else if (*cs
& OPT
) {
386 /* the match is optional */
387 matched
= 1; /* indicate a successful match */
390 /* no match, error return */
398 /* try to match a metasymbol */
402 /* try to match anything and remember what was matched */
405 * This is really the same as trying the match the
406 * remaining parts of the expression to any subset
411 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
412 if (ptr
!= NIL
&& s1
!= s
) {
414 /* we have a match, remember the match */
415 strncpy (mstring
, s
, s1
- s
);
416 mstring
[s1
- s
] = '\0';
418 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
420 /* it was aoptional so no match is ok */
422 } else if (ptr
!= NIL
) {
424 /* not optional and we still matched */
430 _escaped
= _escaped
? FALSE
: TRUE
;
436 /* try to match anything */
439 * This is really the same as trying the match the
440 * remaining parts of the expression to any subset
445 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
446 if (ptr
!= NIL
&& s1
!= s
) {
448 /* we have a match */
450 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
452 /* it was aoptional so no match is ok */
454 } else if (ptr
!= NIL
) {
456 /* not optional and we still matched */
460 _escaped
= _escaped
? FALSE
: TRUE
;
466 /* fail if we are currently _escaped */
473 /* match any number of tabs and spaces */
476 while (*s
== ' ' || *s
== '\t')
478 if (s
!= ptr
|| s
== Start
) {
480 /* match, be happy */
483 } else if (*s
== '\n' || *s
== '\0') {
485 /* match, be happy */
488 } else if (*cs
& ALT
) {
490 /* try the next part */
493 } else if (*cs
& OPT
) {
500 /* no match, error return */
504 /* check for end of line */
506 if (*s
== '\0' || *s
== '\n') {
508 /* match, be happy */
512 } else if (*cs
& ALT
) {
514 /* try the next part */
517 } else if (*cs
& OPT
) {
524 /* no match, error return */
528 /* check for start of line */
532 /* match, be happy */
535 } else if (*cs
& ALT
) {
537 /* try the next part */
540 } else if (*cs
& OPT
) {
547 /* no match, error return */
551 /* end of a subexpression, return success */