4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #pragma ident "%Z%%M% %I% %E% SMI"
33 * routines to do regular expression matching
39 * ... returns 0 if the string s was compiled successfully,
40 * a pointer to an error message otherwise.
41 * If passed 0 or a null string returns without changing
42 * the currently compiled re (see note 11 below).
46 * ... returns 1 if the string s matches the last compiled regular
48 * 0 if the string s failed to match the last compiled
49 * regular expression, and
50 * -1 if the compiled regular expression was invalid
51 * (indicating an internal error).
53 * The strings passed to both re_comp and re_exec may have trailing or
54 * embedded newline characters; they are terminated by nulls.
56 * The identity of the author of these routines is lost in antiquity;
57 * this is essentially the same as the re code in the original V6 ed.
59 * The regular expressions recognized are described below. This description
60 * is essentially the same as that for ed.
62 * A regular expression specifies a set of strings of characters.
63 * A member of this set of strings is said to be matched by
64 * the regular expression. In the following specification for
65 * regular expressions the word `character' means any character but NUL.
67 * 1. Any character except a special character matches itself.
68 * Special characters are the regular expression delimiter plus
69 * \ [ . and sometimes ^ * $.
70 * 2. A . matches any character.
71 * 3. A \ followed by any character except a digit or ( )
72 * matches that character.
73 * 4. A nonempty string s bracketed [s] (or [^s]) matches any
74 * character in (or not in) s. In s, \ has no special meaning,
75 * and ] may only appear as the first letter. A substring
76 * a-b, with a and b in ascending ASCII order, stands for
77 * the inclusive range of ASCII characters.
78 * 5. A regular expression of form 1-4 followed by * matches a
79 * sequence of 0 or more matches of the regular expression.
80 * 6. A regular expression, x, of form 1-8, bracketed \(x\)
81 * matches what x matches.
82 * 7. A \ followed by a digit n matches a copy of the string that the
83 * bracketed regular expression beginning with the nth \( matched.
84 * 8. A regular expression of form 1-8, x, followed by a regular
85 * expression of form 1-7, y matches a match for x followed by
86 * a match for y, with the x match being as long as possible
87 * while still permitting a y match.
88 * 9. A regular expression of form 1-8 preceded by ^ (or followed
89 * by $), is constrained to matches that begin at the left
90 * (or end at the right) end of a line.
91 * 10. A regular expression of form 1-9 picks out the longest among
92 * the leftmost matches in a line.
93 * 11. An empty regular expression stands for a copy of the last
94 * regular expression encountered.
102 #include <sys/types.h>
122 static struct re_globals
{
124 char *_braslist
[NBRA
], *_braelist
[NBRA
];
127 #define expbuf (_re->_expbuf)
128 #define braslist (_re->_braslist)
129 #define braelist (_re->_braelist)
130 #define circf (_re->_circf)
132 static int advance(const char *, char *);
133 static int backref(int, const char *);
134 static int cclass(char *, char, int);
137 * compile the regular expression argument into a dfa
140 re_comp(const char *sp
)
143 struct re_globals
*_re
= re_globals
;
145 char cclcnt
, numbra
= 0;
148 char *bracketp
= &bracket
[0];
149 char *retoolong
= "Regular expression too long";
152 _re
= (struct re_globals
*)calloc(1, sizeof (*_re
));
154 return ("Out of memory");
159 #define comerr(msg) {expbuf[0] = 0; return (msg); }
161 if (sp
== NULL
|| *sp
== '\0') {
163 return ("No previous regular expression");
173 if (ep
>= &expbuf
[ESIZE
])
175 if ((c
= *sp
++) == '\0') {
176 if (bracketp
!= bracket
)
177 comerr("unmatched \\(");
191 if (lastep
== NULL
|| *lastep
== CBRA
||
207 if ((c
= *sp
++) == '^') {
214 if (c
== '-' && ep
[-1] != 0) {
215 if ((c
= *sp
++) == ']') {
224 if (ep
>= &expbuf
[ESIZE
])
230 if (ep
>= &expbuf
[ESIZE
])
232 } while ((c
= *sp
++) != ']');
237 if ((c
= *sp
++) == '(') {
239 comerr("too many \\(\\) pairs");
240 *bracketp
++ = numbra
;
246 if (bracketp
<= bracket
)
247 comerr("unmatched \\)");
252 if (c
>= '1' && c
< ('1' + NBRA
)) {
270 * match the argument string against the compiled re
273 re_exec(const char *p1
)
275 struct re_globals
*_re
= re_globals
;
283 for (c
= 0; c
< NBRA
; c
++) {
288 return ((advance(p1
, p2
)));
290 * fast check for first character
297 if (rv
= advance(p1
, p2
))
306 if (rv
= advance(p1
, p2
))
313 * try to match the next thing in the dfa
316 advance(const char *lp
, char *ep
)
322 struct re_globals
*_re
= re_globals
;
346 if (cclass(ep
, *lp
++, 1)) {
353 if (cclass(ep
, *lp
++, 0)) {
360 braslist
[*ep
++] = (char *)lp
;
364 braelist
[*ep
++] = (char *)lp
;
368 if (braelist
[i
= *ep
++] == NULL
)
370 if (backref(i
, lp
)) {
371 lp
+= braelist
[i
] - braslist
[i
];
377 if (braelist
[i
= *ep
++] == NULL
)
380 ct
= braelist
[i
] - braslist
[i
];
381 while (backref(i
, lp
))
383 while (lp
>= curlp
) {
384 if (rv
= advance(lp
, ep
))
406 while (cclass(ep
, *lp
++, ep
[-1] == (CCL
|CSTAR
)))
414 if (rv
= advance(lp
, ep
))
416 } while (lp
> curlp
);
425 backref(int i
, const char *lp
)
428 struct re_globals
*_re
= re_globals
;
431 while (*bp
++ == *lp
++)
432 if (bp
>= braelist
[i
])
438 cclass(char *set
, char c
, int af
)