1 /* $NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $ */
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
9 * Commodore Business Machines.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 #include <sys/cdefs.h>
39 static char sccsid
[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93";
41 __RCSID("$NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $");
46 * regular expression parser
48 * external functions and return values are:
51 * FALSE parse failure; error message will be in char rxperr[]
53 * {...} optional pattern, equialent to [...|]
55 * [...] pattern delimiters
58 * TRUE string s matches compiled pattern
59 * FALSE match failure or regexp error
62 * char * reverse-engineered regular expression string
70 /* regexp tokens, arg */
71 #define LIT (-1) /* literal character, char */
72 #define SOT (-2) /* start text anchor, - */
73 #define EOT (-3) /* end text anchor, - */
74 #define GRP_S (-4) /* start alternate grp, ptr_to_end */
75 #define GRP_E (-5) /* end group, - */
76 #define ALT_S (-6) /* alternate starts, ptr_to_next */
77 #define ALT_E (-7) /* alternate ends, - */
78 #define END (-8) /* end of regexp, - */
80 typedef short Rxp_t
; /* type for regexp tokens */
82 static Rxp_t rxpbuf
[RXP_LINE_SZ
]; /* compiled regular expression buffer */
83 char rxperr
[128]; /* parser error message */
85 static int rxp__compile(const char *, int);
86 static char *rxp__expand(int);
87 static int rxp__match(const char *, int, Rxp_t
*, Rxp_t
*, const char *);
90 rxp_compile(const char *s
)
92 return (rxp__compile(s
, TRUE
));
96 rxp__compile(const char *s
, int first
)
99 static const char *sp
;
108 *rp
++ = SOT
; /* auto-anchor: pat is really ^pat$ */
109 *rp
++ = GRP_S
; /* auto-group: ^pat$ is really ^[pat]$ */
116 if (rp
- rxpbuf
>= RXP_LINE_SZ
- 4) {
117 (void)snprintf(rxperr
, sizeof(rxperr
),
118 "regular expression too long %s", s
);
121 if (*sp
== ':' && !esc
)
138 if ((err
= rxp__compile(s
, FALSE
)) != TRUE
)
141 *grp_ptr
= rp
- rxpbuf
;
147 *alt_ptr
= rp
- rxpbuf
;
156 *alt_ptr
= rp
- rxpbuf
;
159 (void)snprintf(rxperr
, sizeof(rxperr
),
160 "unmatched alternator in regexp %s",
175 (void)snprintf(rxperr
, sizeof(rxperr
),
176 "unmatched alternator in regexp %s", s
);
180 *alt_ptr
= rp
- rxpbuf
;
182 *(rxpbuf
+ 2) = rp
- rxpbuf
;
189 * match string against compiled regular expression
192 rxp_match(const char *s
)
194 return (rxp__match(s
, TRUE
, NULL
, NULL
, NULL
));
198 rxp__match(const char *s
,
200 Rxp_t
*j_succ
, /* jump here on successful alt match */
201 Rxp_t
*j_fail
, /* jump here on failed match */
202 const char *sp_fail
) /* reset sp to here on failed match */
205 static const char *sp
;
207 Rxp_t
*grp_end
= NULL
;
213 while (rp
< rxpbuf
+ RXP_LINE_SZ
&& *rp
!= END
)
217 ch
= isascii(*rp
) && isupper(*rp
) ? tolower(*rp
) : *rp
;
237 grp_end
= rxpbuf
+ *rp
++;
241 rxp__match(sp
, FALSE
, grp_end
, rxpbuf
+ *rp
++, sp
);
253 return (*rp
!= END
? FALSE
: TRUE
);
257 * Reverse engineer the regular expression, by picking first of all alternates.
262 return (rxp__expand(TRUE
));
266 rxp__expand(int first
)
268 static char buf
[RXP_LINE_SZ
/2];
278 while (rp
< rxpbuf
+ RXP_LINE_SZ
&& *rp
!= END
)
286 grp_ptr
= rxpbuf
+ *rp
;
288 if ((err
= rxp__expand(FALSE
)) == NULL
)