unstack - fix ipcvecs
[minix.git] / commands / ash / bltin / regexp.c
blob7ef68ba1fa0e95db6109636116ad7fa1ef4abacf
1 /*
2 * Regular expression matching for expr(1). Bugs: The upper bound of
3 * a range specified by the \{ feature cannot be zero.
5 * Copyright (C) 1989 by Kenneth Almquist. All rights reserved.
6 * This file is part of ash, which is distributed under the terms specified
7 * by the Ash General Public License. See the file named LICENSE.
8 */
10 #include "bltin.h"
11 #include "myregexp.h"
13 #include <stdlib.h>
15 #define RE_END 0 /* end of regular expression */
16 #define RE_LITERAL 1 /* normal character follows */
17 #define RE_DOT 2 /* "." */
18 #define RE_CCL 3 /* "[...]" */
19 #define RE_NCCL 4 /* "[^...]" */
20 #define RE_LP 5 /* "\(" */
21 #define RE_RP 6 /* "\)" */
22 #define RE_MATCHED 7 /* "\digit" */
23 #define RE_EOS 8 /* "$" matches end of string */
24 #define RE_STAR 9 /* "*" */
25 #define RE_RANGE 10 /* "\{num,num\}" */
29 char *match_begin[10];
30 short match_length[10];
31 short number_parens;
32 static int match(char *pattern, char *string);
36 char *
37 re_compile(pattern)
38 char *pattern;
40 register char *p;
41 register char c;
42 char *comp;
43 register char *q;
44 char *begin;
45 char *endp;
46 register int len;
47 int first;
48 int type;
49 char *stackp;
50 char stack[10];
51 int paren_num;
52 int i;
54 p = pattern;
55 if (*p == '^')
56 p++;
57 comp = q = malloc(2 * strlen(p) + 1);
58 begin = q;
59 stackp = stack;
60 paren_num = 0;
61 for (;;) {
62 switch (c = *p++) {
63 case '\0':
64 *q = '\0';
65 goto out;
66 case '.':
67 *q++ = RE_DOT;
68 len = 1;
69 break;
70 case '[':
71 begin = q;
72 *q = RE_CCL;
73 if (*p == '^') {
74 *q = RE_NCCL;
75 p++;
77 q++;
78 first = 1;
79 while (*p != ']' || first == 1) {
80 if (p[1] == '-' && p[2] != ']') {
81 *q++ = '-';
82 *q++ = p[0];
83 *q++ = p[2];
84 p += 3;
85 } else if (*p == '-') {
86 *q++ = '-';
87 *q++ = '-';
88 *q++ = '-';
89 p++;
90 } else {
91 *q++ = *p++;
93 first = 0;
95 p++;
96 *q++ = '\0';
97 len = q - begin;
98 break;
99 case '$':
100 if (*p != '\0')
101 goto dft;
102 *q++ = RE_EOS;
103 break;
104 case '*':
105 if (len == 0)
106 goto dft;
107 type = RE_STAR;
108 range:
109 i = (type == RE_RANGE)? 3 : 1;
110 endp = q + i;
111 begin = q - len;
112 do {
113 --q;
114 *(q + i) = *q;
115 } while (--len > 0);
116 q = begin;
117 *q++ = type;
118 if (type == RE_RANGE) {
119 i = 0;
120 while ((unsigned)(*p - '0') <= 9)
121 i = 10 * i + (*p++ - '0');
122 *q++ = i;
123 if (*p != ',') {
124 *q++ = i;
125 } else {
126 p++;
127 i = 0;
128 while ((unsigned)(*p - '0') <= 9)
129 i = 10 * i + (*p++ - '0');
130 *q++ = i;
132 if (*p != '\\' || *++p != '}')
133 error("RE error");
134 p++;
136 q = endp;
137 break;
138 case '\\':
139 if ((c = *p++) == '(') {
140 if (++paren_num > 9)
141 error("RE error");
142 *q++ = RE_LP;
143 *q++ = paren_num;
144 *stackp++ = paren_num;
145 len = 0;
146 } else if (c == ')') {
147 if (stackp == stack)
148 error("RE error");
149 *q++ = RE_RP;
150 *q++ = *--stackp;
151 len = 0;
152 } else if (c == '{') {
153 type = RE_RANGE;
154 goto range;
155 } else if ((unsigned)(c - '1') < 9) {
156 /* should check validity here */
157 *q++ = RE_MATCHED;
158 *q++ = c - '0';
159 len = 2;
160 } else {
161 goto dft;
163 break;
164 default:
165 dft: *q++ = RE_LITERAL;
166 *q++ = c;
167 len = 2;
168 break;
171 out:
172 if (stackp != stack)
173 error("RE error");
174 number_parens = paren_num;
175 return comp;
181 re_match(pattern, string)
182 char *pattern;
183 char *string;
185 char **pp;
187 match_begin[0] = string;
188 for (pp = &match_begin[1] ; pp <= &match_begin[9] ; pp++)
189 *pp = 0;
190 return match(pattern, string);
195 static
196 match(pattern, string)
197 char *pattern;
198 char *string;
200 register char *p, *q;
201 int counting;
202 int low, high, count;
203 char *curpat;
204 char *start_count;
205 int negate;
206 int found;
207 char *r;
208 int len;
209 char c;
211 p = pattern;
212 q = string;
213 counting = 0;
214 for (;;) {
215 if (counting) {
216 if (++count > high)
217 goto bad;
218 p = curpat;
220 switch (*p++) {
221 case RE_END:
222 match_length[0] = q - match_begin[0];
223 return 1;
224 case RE_LITERAL:
225 if (*q++ != *p++)
226 goto bad;
227 break;
228 case RE_DOT:
229 if (*q++ == '\0')
230 goto bad;
231 break;
232 case RE_CCL:
233 negate = 0;
234 goto ccl;
235 case RE_NCCL:
236 negate = 1;
237 ccl:
238 found = 0;
239 c = *q++;
240 while (*p) {
241 if (*p == '-') {
242 if (c >= *++p && c <= *++p)
243 found = 1;
244 } else {
245 if (c == *p)
246 found = 1;
248 p++;
250 p++;
251 if (found == negate)
252 goto bad;
253 break;
254 case RE_LP:
255 match_begin[*p++] = q;
256 break;
257 case RE_RP:
258 match_length[*p] = q - match_begin[*p];
259 p++;
260 break;
261 case RE_MATCHED:
262 r = match_begin[*p];
263 len = match_length[*p++];
264 while (--len >= 0) {
265 if (*q++ != *r++)
266 goto bad;
268 break;
269 case RE_EOS:
270 if (*q != '\0')
271 goto bad;
272 break;
273 case RE_STAR:
274 low = 0;
275 high = 32767;
276 goto range;
277 case RE_RANGE:
278 low = *p++;
279 high = *p++;
280 if (high == 0)
281 high = 32767;
282 range:
283 curpat = p;
284 start_count = q;
285 count = 0;
286 counting++;
287 break;
290 bad:
291 if (! counting)
292 return 0;
293 len = 1;
294 if (*curpat == RE_MATCHED)
295 len = match_length[curpat[1]];
296 while (--count >= low) {
297 if (match(p, start_count + count * len))
298 return 1;
300 return 0;