tools/llvm: Do not build with symbols
[minix3.git] / external / bsd / nvi / dist / regex / regcomp.c
blobbe7094b815b9264aa462bc513310053a63679077
1 /* $NetBSD: regcomp.c,v 1.2 2013/11/22 15:52:06 christos Exp $ */
2 /*-
3 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Henry Spencer of the University of Toronto.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
38 * @(#)regcomp.c 8.4 (Berkeley) 3/19/94
41 #if defined(LIBC_SCCS) && !defined(lint)
42 static char sccsid[] = "@(#)regcomp.c 8.4 (Berkeley) 3/19/94";
43 #endif /* LIBC_SCCS and not lint */
45 #include <sys/types.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <ctype.h>
49 #include <limits.h>
50 #include <stdlib.h>
51 #include <regex.h>
53 #include "utils.h"
54 #include "regex2.h"
56 #include "cclass.h"
57 #include "cname.h"
60 * parse structure, passed up and down to avoid global variables and
61 * other clumsinesses
63 struct parse {
64 RCHAR_T *next; /* next character in RE */
65 RCHAR_T *end; /* end of string (-> NUL normally) */
66 int error; /* has an error been seen? */
67 sop *strip; /* malloced strip */
68 RCHAR_T *stripdata; /* malloced stripdata */
69 sopno ssize; /* malloced strip size (allocated) */
70 sopno slen; /* malloced strip length (used) */
71 int ncsalloc; /* number of csets allocated */
72 struct re_guts *g;
73 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
74 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
75 sopno pend[NPAREN]; /* -> ) ([0] unused) */
78 /* ========= begin header generated by ./mkh ========= */
79 #ifdef __cplusplus
80 extern "C" {
81 #endif
83 /* === regcomp.c === */
84 static void p_ere __P((struct parse *p, int stop, size_t reclimit));
85 static void p_ere_exp __P((struct parse *p, size_t reclimit));
86 static void p_str __P((struct parse *p));
87 static void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit));
88 static int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit));
89 static int p_count __P((struct parse *p));
90 static void p_bracket __P((struct parse *p));
91 static void p_b_term __P((struct parse *p, cset *cs));
92 static void p_b_cclass __P((struct parse *p, cset *cs));
93 static void p_b_eclass __P((struct parse *p, cset *cs));
94 static char p_b_symbol __P((struct parse *p));
95 static char p_b_coll_elem __P((struct parse *p, int endc));
96 static char othercase __P((int ch));
97 static void bothcases __P((struct parse *p, int ch));
98 static void ordinary __P((struct parse *p, int ch));
99 static void nonnewline __P((struct parse *p));
100 static void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit));
101 static int seterr __P((struct parse *p, int e));
102 static cset *allocset __P((struct parse *p));
103 static void freeset __P((struct parse *p, cset *cs));
104 static int freezeset __P((struct parse *p, cset *cs));
105 static int firstch __P((struct parse *p, cset *cs));
106 static int nch __P((struct parse *p, cset *cs));
107 static void mcadd __P((struct parse *p, cset *cs, const char *cp));
108 #ifdef notdef
109 static void mcsub __P((cset *cs, char *cp));
110 static int mcin __P((cset *cs, char *cp));
111 static char *mcfind __P((cset *cs, char *cp));
112 #endif
113 static void mcinvert __P((struct parse *p, cset *cs));
114 static void mccase __P((struct parse *p, cset *cs));
115 #ifdef notdef
116 static int isinsets __P((struct re_guts *g, int c));
117 static int samesets __P((struct re_guts *g, int c1, int c2));
118 #endif
119 static void categorize __P((struct parse *p, struct re_guts *g));
120 static sopno dupl __P((struct parse *p, sopno start, sopno finish));
121 static void doemit __P((struct parse *p, sop op, size_t opnd));
122 static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
123 static void dofwd __P((struct parse *p, sopno pos, sop value));
124 static int enlarge __P((struct parse *p, sopno size));
125 static void stripsnug __P((struct parse *p, struct re_guts *g));
126 static void findmust __P((struct parse *p, struct re_guts *g));
127 static sopno pluscount __P((struct parse *p, struct re_guts *g));
129 #ifdef __cplusplus
131 #endif
132 /* ========= end header generated by ./mkh ========= */
134 static RCHAR_T nuls[10]; /* place to point scanner in event of error */
137 * macros for use with parse structure
138 * BEWARE: these know that the parse structure is named `p' !!!
140 #define PEEK() (*p->next)
141 #define PEEK2() (*(p->next+1))
142 #define MORE() (p->next < p->end)
143 #define MORE2() (p->next+1 < p->end)
144 #define SEE(c) (MORE() && PEEK() == (c))
145 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
146 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
147 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
148 #define NEXT() (p->next++)
149 #define NEXT2() (p->next += 2)
150 #define NEXTn(n) (p->next += (n))
151 #define GETNEXT() (*p->next++)
152 #define SETERROR(e) seterr(p, (e))
153 #define REQUIRE(co, e) ((co) || SETERROR(e))
154 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
155 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
156 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
157 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
158 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
159 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
160 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
161 #define HERE() (p->slen)
162 #define THERE() (p->slen - 1)
163 #define THERETHERE() (p->slen - 2)
164 #define DROP(n) (p->slen -= (n))
166 #ifndef NDEBUG
167 static int never = 0; /* for use in asserts; shuts lint up */
168 #else
169 #define never 0 /* some <assert.h>s have bugs too */
170 #endif
172 #define MEMLIMIT 0x8000000
173 #define MEMSIZE(p) \
174 ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
175 (p)->ncsalloc * sizeof(cset) + \
176 (p)->ssize * sizeof(sop))
177 #define RECLIMIT 256
180 - regcomp - interface for parser and compilation
181 = extern int regcomp(regex_t *, const RCHAR_T *, int);
182 = #define REG_BASIC 0000
183 = #define REG_EXTENDED 0001
184 = #define REG_ICASE 0002
185 = #define REG_NOSUB 0004
186 = #define REG_NEWLINE 0010
187 = #define REG_NOSPEC 0020
188 = #define REG_PEND 0040
189 = #define REG_DUMP 0200
191 int /* 0 success, otherwise REG_something */
192 regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
194 struct parse pa;
195 register struct re_guts *g;
196 register struct parse *p = &pa;
197 register int i;
198 register size_t len;
199 #ifdef REDEBUG
200 # define GOODFLAGS(f) (f)
201 #else
202 # define GOODFLAGS(f) ((f)&~REG_DUMP)
203 #endif
205 cflags = GOODFLAGS(cflags);
206 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
207 return(REG_INVARG);
209 if (cflags&REG_PEND) {
210 if (preg->re_endp < pattern)
211 return(REG_INVARG);
212 len = preg->re_endp - pattern;
213 } else
214 len = STRLEN(pattern);
216 /* do the mallocs early so failure handling is easy */
217 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
218 (NC-1)*sizeof(cat_t));
219 if (g == NULL)
220 return(REG_ESPACE);
221 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
222 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
223 if (p->strip == NULL) {
224 free((char *)g);
225 return(REG_ESPACE);
227 p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T));
228 if (p->stripdata == NULL) {
229 free((char *)p->strip);
230 free((char *)g);
231 return(REG_ESPACE);
233 p->slen = 0;
235 /* set things up */
236 p->g = g;
237 p->next = (RCHAR_T *)__UNCONST(pattern); /* convenience; we do not modify it */
238 p->end = p->next + len;
239 p->error = 0;
240 p->ncsalloc = 0;
241 for (i = 0; i < NPAREN; i++) {
242 p->pbegin[i] = 0;
243 p->pend[i] = 0;
245 g->csetsize = NC;
246 g->sets = NULL;
247 g->setbits = NULL;
248 g->ncsets = 0;
249 g->cflags = cflags;
250 g->iflags = 0;
251 g->nbol = 0;
252 g->neol = 0;
253 g->must = NULL;
254 g->mlen = 0;
255 g->nsub = 0;
256 #if 0
257 g->ncategories = 1; /* category 0 is "everything else" */
258 g->categories = &g->catspace[-(CHAR_MIN)];
259 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
260 #endif
261 g->backrefs = 0;
263 /* do it */
264 EMIT(OEND, 0);
265 g->firststate = THERE();
266 if (cflags&REG_EXTENDED)
267 p_ere(p, OUT, 0);
268 else if (cflags&REG_NOSPEC)
269 p_str(p);
270 else
271 p_bre(p, OUT, OUT, 0);
272 EMIT(OEND, 0);
273 g->laststate = THERE();
275 /* tidy up loose ends and fill things in */
276 categorize(p, g);
277 stripsnug(p, g);
278 findmust(p, g);
279 g->nplus = pluscount(p, g);
280 g->magic = MAGIC2;
281 preg->re_nsub = g->nsub;
282 preg->re_g = g;
283 preg->re_magic = MAGIC1;
284 #ifndef REDEBUG
285 /* not debugging, so can't rely on the assert() in regexec() */
286 if (g->iflags&BAD)
287 SETERROR(REG_ASSERT);
288 #endif
290 /* win or lose, we're done */
291 if (p->error != 0) /* lose */
292 regfree(preg);
293 return(p->error);
297 - p_ere - ERE parser top level, concatenation and alternation
298 == static void p_ere(register struct parse *p, int stop, size_t reclimit);
300 static void
301 p_ere(register struct parse *p, int stop, size_t reclimit)
303 /* character this ERE should end at */
305 register char c;
306 register sopno prevback = 0;
307 register sopno prevfwd = 0;
308 register sopno conc;
309 register int first = 1; /* is this the first alternative? */
311 if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
312 p->error = REG_ESPACE;
313 return;
316 for (;;) {
317 /* do a bunch of concatenated expressions */
318 conc = HERE();
319 while (MORE() && (c = PEEK()) != '|' && c != stop)
320 p_ere_exp(p, reclimit);
321 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
323 if (!EAT('|'))
324 break; /* NOTE BREAK OUT */
326 if (first) {
327 INSERT(OCH_, conc); /* offset is wrong */
328 prevfwd = conc;
329 prevback = conc;
330 first = 0;
332 ASTERN(OOR1, prevback);
333 prevback = THERE();
334 AHEAD(prevfwd); /* fix previous offset */
335 prevfwd = HERE();
336 EMIT(OOR2, 0); /* offset is very wrong */
339 if (!first) { /* tail-end fixups */
340 AHEAD(prevfwd);
341 ASTERN(O_CH, prevback);
344 assert(!MORE() || SEE(stop));
348 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
349 == static void p_ere_exp(register struct parse *p);
351 static void
352 p_ere_exp(register struct parse *p, size_t reclimit)
354 register char c;
355 register sopno pos;
356 register int count;
357 register int count2;
358 register sopno subno;
359 int wascaret = 0;
361 assert(MORE()); /* caller should have ensured this */
362 c = GETNEXT();
364 pos = HERE();
365 switch (c) {
366 case '(':
367 (void)REQUIRE(MORE(), REG_EPAREN);
368 p->g->nsub++;
369 subno = p->g->nsub;
370 if (subno < NPAREN)
371 p->pbegin[subno] = HERE();
372 EMIT(OLPAREN, subno);
373 if (!SEE(')'))
374 p_ere(p, ')', reclimit);
375 if (subno < NPAREN) {
376 p->pend[subno] = HERE();
377 assert(p->pend[subno] != 0);
379 EMIT(ORPAREN, subno);
380 (void)MUSTEAT(')', REG_EPAREN);
381 break;
382 #ifndef POSIX_MISTAKE
383 case ')': /* happens only if no current unmatched ( */
385 * You may ask, why the ifndef? Because I didn't notice
386 * this until slightly too late for 1003.2, and none of the
387 * other 1003.2 regular-expression reviewers noticed it at
388 * all. So an unmatched ) is legal POSIX, at least until
389 * we can get it fixed.
391 SETERROR(REG_EPAREN);
392 break;
393 #endif
394 case '^':
395 EMIT(OBOL, 0);
396 p->g->iflags |= USEBOL;
397 p->g->nbol++;
398 wascaret = 1;
399 break;
400 case '$':
401 EMIT(OEOL, 0);
402 p->g->iflags |= USEEOL;
403 p->g->neol++;
404 break;
405 case '|':
406 SETERROR(REG_EMPTY);
407 break;
408 case '*':
409 case '+':
410 case '?':
411 SETERROR(REG_BADRPT);
412 break;
413 case '.':
414 if (p->g->cflags&REG_NEWLINE)
415 nonnewline(p);
416 else
417 EMIT(OANY, 0);
418 break;
419 case '[':
420 p_bracket(p);
421 break;
422 case '\\':
423 (void)REQUIRE(MORE(), REG_EESCAPE);
424 c = GETNEXT();
425 ordinary(p, c);
426 break;
427 case '{': /* okay as ordinary except if digit follows */
428 (void)REQUIRE(!MORE() || !ISDIGIT((UCHAR_T)PEEK()), REG_BADRPT);
429 /* FALLTHROUGH */
430 default:
431 ordinary(p, c);
432 break;
435 if (!MORE())
436 return;
437 c = PEEK();
438 /* we call { a repetition if followed by a digit */
439 if (!( c == '*' || c == '+' || c == '?' ||
440 (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ))
441 return; /* no repetition, we're done */
442 NEXT();
444 (void)REQUIRE(!wascaret, REG_BADRPT);
445 switch (c) {
446 case '*': /* implemented as +? */
447 /* this case does not require the (y|) trick, noKLUDGE */
448 INSERT(OPLUS_, pos);
449 ASTERN(O_PLUS, pos);
450 INSERT(OQUEST_, pos);
451 ASTERN(O_QUEST, pos);
452 break;
453 case '+':
454 INSERT(OPLUS_, pos);
455 ASTERN(O_PLUS, pos);
456 break;
457 case '?':
458 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
459 INSERT(OCH_, pos); /* offset slightly wrong */
460 ASTERN(OOR1, pos); /* this one's right */
461 AHEAD(pos); /* fix the OCH_ */
462 EMIT(OOR2, 0); /* offset very wrong... */
463 AHEAD(THERE()); /* ...so fix it */
464 ASTERN(O_CH, THERETHERE());
465 break;
466 case '{':
467 count = p_count(p);
468 if (EAT(',')) {
469 if (ISDIGIT((UCHAR_T)PEEK())) {
470 count2 = p_count(p);
471 (void)REQUIRE(count <= count2, REG_BADBR);
472 } else /* single number with comma */
473 count2 = INFINITY;
474 } else /* just a single number */
475 count2 = count;
476 repeat(p, pos, count, count2, 0);
477 if (!EAT('}')) { /* error heuristics */
478 while (MORE() && PEEK() != '}')
479 NEXT();
480 (void)REQUIRE(MORE(), REG_EBRACE);
481 SETERROR(REG_BADBR);
483 break;
486 if (!MORE())
487 return;
488 c = PEEK();
489 if (!( c == '*' || c == '+' || c == '?' ||
490 (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) )
491 return;
492 SETERROR(REG_BADRPT);
496 - p_str - string (no metacharacters) "parser"
497 == static void p_str(register struct parse *p);
499 static void
500 p_str(register struct parse *p)
502 (void)REQUIRE(MORE(), REG_EMPTY);
503 while (MORE())
504 ordinary(p, GETNEXT());
508 - p_bre - BRE parser top level, anchoring and concatenation
509 == static void p_bre(register struct parse *p, register int end1, \
510 == register int end2, size_t reclimit);
511 * Giving end1 as OUT essentially eliminates the end1/end2 check.
513 * This implementation is a bit of a kludge, in that a trailing $ is first
514 * taken as an ordinary character and then revised to be an anchor. The
515 * only undesirable side effect is that '$' gets included as a character
516 * category in such cases. This is fairly harmless; not worth fixing.
517 * The amount of lookahead needed to avoid this kludge is excessive.
519 static void
520 p_bre(register struct parse *p, register int end1, register int end2, size_t reclimit)
522 /* first terminating character */
523 /* second terminating character */
525 register sopno start;
526 register int first = 1; /* first subexpression? */
527 register int wasdollar = 0;
529 if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
530 p->error = REG_ESPACE;
531 return;
534 start = HERE();
536 if (EAT('^')) {
537 EMIT(OBOL, 0);
538 p->g->iflags |= USEBOL;
539 p->g->nbol++;
541 while (MORE() && !SEETWO(end1, end2)) {
542 wasdollar = p_simp_re(p, first, reclimit);
543 first = 0;
545 if (wasdollar) { /* oops, that was a trailing anchor */
546 DROP(1);
547 EMIT(OEOL, 0);
548 p->g->iflags |= USEEOL;
549 p->g->neol++;
552 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
556 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
557 == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit);
559 static int /* was the simple RE an unbackslashed $? */
560 p_simp_re(register struct parse *p, int starordinary, size_t reclimit)
562 /* is a leading * an ordinary character? */
564 register int c;
565 register int count;
566 register int count2;
567 register sopno pos;
568 register int i;
569 register sopno subno;
570 int backsl;
572 pos = HERE(); /* repetion op, if any, covers from here */
574 assert(MORE()); /* caller should have ensured this */
575 c = GETNEXT();
576 backsl = c == '\\';
577 if (backsl) {
578 (void)REQUIRE(MORE(), REG_EESCAPE);
579 c = (unsigned char)GETNEXT();
580 switch (c) {
581 case '{':
582 SETERROR(REG_BADRPT);
583 break;
584 case '(':
585 p->g->nsub++;
586 subno = p->g->nsub;
587 if (subno < NPAREN)
588 p->pbegin[subno] = HERE();
589 EMIT(OLPAREN, subno);
590 /* the MORE here is an error heuristic */
591 if (MORE() && !SEETWO('\\', ')'))
592 p_bre(p, '\\', ')', reclimit);
593 if (subno < NPAREN) {
594 p->pend[subno] = HERE();
595 assert(p->pend[subno] != 0);
597 EMIT(ORPAREN, subno);
598 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
599 break;
600 case ')': /* should not get here -- must be user */
601 case '}':
602 SETERROR(REG_EPAREN);
603 break;
604 case '1':
605 case '2':
606 case '3':
607 case '4':
608 case '5':
609 case '6':
610 case '7':
611 case '8':
612 case '9':
613 i = c - '0';
614 assert(i < NPAREN);
615 if (p->pend[i] != 0) {
616 assert(i <= p->g->nsub);
617 EMIT(OBACK_, i);
618 assert(p->pbegin[i] != 0);
619 assert(p->strip[p->pbegin[i]] == OLPAREN);
620 assert(p->strip[p->pend[i]] == ORPAREN);
621 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
622 EMIT(O_BACK, i);
623 } else
624 SETERROR(REG_ESUBREG);
625 p->g->backrefs = 1;
626 break;
627 default:
628 ordinary(p, c);
629 break;
631 } else {
632 switch (c) {
633 case '.':
634 if (p->g->cflags&REG_NEWLINE)
635 nonnewline(p);
636 else
637 EMIT(OANY, 0);
638 break;
639 case '[':
640 p_bracket(p);
641 break;
642 case '*':
643 (void)REQUIRE(starordinary, REG_BADRPT);
644 /* FALLTHROUGH */
645 default:
646 ordinary(p, c);
647 break;
651 if (EAT('*')) { /* implemented as +? */
652 /* this case does not require the (y|) trick, noKLUDGE */
653 INSERT(OPLUS_, pos);
654 ASTERN(O_PLUS, pos);
655 INSERT(OQUEST_, pos);
656 ASTERN(O_QUEST, pos);
657 } else if (EATTWO('\\', '{')) {
658 count = p_count(p);
659 if (EAT(',')) {
660 if (MORE() && ISDIGIT((UCHAR_T)PEEK())) {
661 count2 = p_count(p);
662 (void)REQUIRE(count <= count2, REG_BADBR);
663 } else /* single number with comma */
664 count2 = INFINITY;
665 } else /* just a single number */
666 count2 = count;
667 repeat(p, pos, count, count2, reclimit);
668 if (!EATTWO('\\', '}')) { /* error heuristics */
669 while (MORE() && !SEETWO('\\', '}'))
670 NEXT();
671 (void)REQUIRE(MORE(), REG_EBRACE);
672 SETERROR(REG_BADBR);
674 } else if (!backsl && c == (unsigned char)'$') /* $ (but not \$) ends it */
675 return(1);
677 return(0);
681 - p_count - parse a repetition count
682 == static int p_count(register struct parse *p);
684 static int /* the value */
685 p_count(register struct parse *p)
687 register int count = 0;
688 register int ndigits = 0;
690 while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) {
691 count = count*10 + (GETNEXT() - '0');
692 ndigits++;
695 (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
696 return(count);
700 - p_bracket - parse a bracketed character list
701 == static void p_bracket(register struct parse *p);
703 * Note a significant property of this code: if the allocset() did SETERROR,
704 * no set operations are done.
706 static void
707 p_bracket(register struct parse *p)
709 register cset *cs;
710 register int invert = 0;
711 static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
712 static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
714 cs = allocset(p);
715 if (cs == NULL)
716 return;
718 /* Dept of Truly Sickening Special-Case Kludges */
719 if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
720 EMIT(OBOW, 0);
721 NEXTn(6);
722 return;
724 if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) {
725 EMIT(OEOW, 0);
726 NEXTn(6);
727 return;
730 if (EAT('^'))
731 invert++; /* make note to invert set at end */
732 if (EAT(']'))
733 CHadd(cs, ']');
734 else if (EAT('-'))
735 CHadd(cs, '-');
736 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
737 p_b_term(p, cs);
738 if (EAT('-'))
739 CHadd(cs, '-');
740 (void)MUSTEAT(']', REG_EBRACK);
742 if (p->error != 0) /* don't mess things up further */
743 return;
745 if (p->g->cflags&REG_ICASE) {
746 register int i;
747 register int ci;
749 for (i = p->g->csetsize - 1; i >= 0; i--)
750 if (CHIN(cs, i) && isalpha(i)) {
751 ci = othercase(i);
752 if (ci != i)
753 CHadd(cs, ci);
755 if (cs->multis != NULL)
756 mccase(p, cs);
758 if (invert) {
759 register int i;
761 for (i = p->g->csetsize - 1; i >= 0; i--)
762 if (CHIN(cs, i))
763 CHsub(cs, i);
764 else
765 CHadd(cs, i);
766 if (p->g->cflags&REG_NEWLINE)
767 CHsub(cs, '\n');
768 if (cs->multis != NULL)
769 mcinvert(p, cs);
772 assert(cs->multis == NULL); /* xxx */
774 if (nch(p, cs) == 1) { /* optimize singleton sets */
775 ordinary(p, firstch(p, cs));
776 freeset(p, cs);
777 } else
778 EMIT(OANYOF, freezeset(p, cs));
782 - p_b_term - parse one term of a bracketed character list
783 == static void p_b_term(register struct parse *p, register cset *cs);
785 static void
786 p_b_term(register struct parse *p, register cset *cs)
788 register char c;
789 register char start, finish;
790 register int i;
792 /* classify what we've got */
793 switch ((MORE()) ? PEEK() : '\0') {
794 case '[':
795 c = (MORE2()) ? PEEK2() : '\0';
796 break;
797 case '-':
798 SETERROR(REG_ERANGE);
799 return; /* NOTE RETURN */
800 break;
801 default:
802 c = '\0';
803 break;
806 switch (c) {
807 case ':': /* character class */
808 NEXT2();
809 (void)REQUIRE(MORE(), REG_EBRACK);
810 c = PEEK();
811 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
812 p_b_cclass(p, cs);
813 (void)REQUIRE(MORE(), REG_EBRACK);
814 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
815 break;
816 case '=': /* equivalence class */
817 NEXT2();
818 (void)REQUIRE(MORE(), REG_EBRACK);
819 c = PEEK();
820 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
821 p_b_eclass(p, cs);
822 (void)REQUIRE(MORE(), REG_EBRACK);
823 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
824 break;
825 default: /* symbol, ordinary character, or range */
826 /* xxx revision needed for multichar stuff */
827 start = p_b_symbol(p);
828 if (SEE('-') && MORE2() && PEEK2() != ']') {
829 /* range */
830 NEXT();
831 if (EAT('-'))
832 finish = '-';
833 else
834 finish = p_b_symbol(p);
835 } else
836 finish = start;
837 /* xxx what about signed chars here... */
838 (void)REQUIRE(start <= finish, REG_ERANGE);
839 for (i = start; i <= finish; i++)
840 CHadd(cs, i);
841 break;
846 - p_b_cclass - parse a character-class name and deal with it
847 == static void p_b_cclass(register struct parse *p, register cset *cs);
849 static void
850 p_b_cclass(register struct parse *p, register cset *cs)
852 register RCHAR_T *sp = p->next;
853 register struct cclass *cp;
854 register size_t len;
855 register const char *u;
856 register char c;
858 while (MORE() && isalpha(PEEK()))
859 NEXT();
860 len = p->next - sp;
861 for (cp = cclasses; cp->name != NULL; cp++)
862 if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len))
863 break;
864 if (cp->name == NULL) {
865 /* oops, didn't find it */
866 SETERROR(REG_ECTYPE);
867 return;
870 u = cp->chars;
871 while ((c = *u++) != '\0')
872 CHadd(cs, c);
873 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
874 MCadd(p, cs, u);
878 - p_b_eclass - parse an equivalence-class name and deal with it
879 == static void p_b_eclass(register struct parse *p, register cset *cs);
881 * This implementation is incomplete. xxx
883 static void
884 p_b_eclass(register struct parse *p, register cset *cs)
886 register char c;
888 c = p_b_coll_elem(p, '=');
889 CHadd(cs, c);
893 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
894 == static char p_b_symbol(register struct parse *p);
896 static char /* value of symbol */
897 p_b_symbol(register struct parse *p)
899 register char value;
901 (void)REQUIRE(MORE(), REG_EBRACK);
902 if (!EATTWO('[', '.'))
903 return(GETNEXT());
905 /* collating symbol */
906 value = p_b_coll_elem(p, '.');
907 (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
908 return(value);
912 - p_b_coll_elem - parse a collating-element name and look it up
913 == static char p_b_coll_elem(register struct parse *p, int endc);
915 static char /* value of collating element */
916 p_b_coll_elem(register struct parse *p, int endc)
918 /* name ended by endc,']' */
920 register RCHAR_T *sp = p->next;
921 register struct cname *cp;
922 register size_t len;
924 while (MORE() && !SEETWO(endc, ']'))
925 NEXT();
926 if (!MORE()) {
927 SETERROR(REG_EBRACK);
928 return(0);
930 len = p->next - sp;
931 for (cp = cnames; cp->name != NULL; cp++)
932 if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len))
933 return(cp->code); /* known name */
934 if (len == 1)
935 return(*sp); /* single character */
936 SETERROR(REG_ECOLLATE); /* neither */
937 return(0);
941 - othercase - return the case counterpart of an alphabetic
942 == static char othercase(int ch);
944 static char /* if no counterpart, return ch */
945 othercase(int ch)
947 assert(isalpha(ch));
948 if (isupper(ch))
949 return(tolower(ch));
950 else if (islower(ch))
951 return(toupper(ch));
952 else /* peculiar, but could happen */
953 return(ch);
957 - bothcases - emit a dualcase version of a two-case character
958 == static void bothcases(register struct parse *p, int ch);
960 * Boy, is this implementation ever a kludge...
962 static void
963 bothcases(register struct parse *p, int ch)
965 register RCHAR_T *oldnext = p->next;
966 register RCHAR_T *oldend = p->end;
967 RCHAR_T bracket[3];
969 assert(othercase(ch) != ch); /* p_bracket() would recurse */
970 p->next = bracket;
971 p->end = bracket+2;
972 bracket[0] = ch;
973 bracket[1] = ']';
974 bracket[2] = '\0';
975 p_bracket(p);
976 assert(p->next == bracket+2);
977 p->next = oldnext;
978 p->end = oldend;
982 - ordinary - emit an ordinary character
983 == static void ordinary(register struct parse *p, register int ch);
985 static void
986 ordinary(register struct parse *p, register int ch)
989 register cat_t *cap = p->g->categories;
992 if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
993 bothcases(p, ch);
994 else {
995 EMIT(OCHAR, (UCHAR_T)ch);
997 if (cap[ch] == 0)
998 cap[ch] = p->g->ncategories++;
1004 - nonnewline - emit REG_NEWLINE version of OANY
1005 == static void nonnewline(register struct parse *p);
1007 * Boy, is this implementation ever a kludge...
1009 static void
1010 nonnewline(register struct parse *p)
1012 register RCHAR_T *oldnext = p->next;
1013 register RCHAR_T *oldend = p->end;
1014 RCHAR_T bracket[4];
1016 p->next = bracket;
1017 p->end = bracket+3;
1018 bracket[0] = '^';
1019 bracket[1] = '\n';
1020 bracket[2] = ']';
1021 bracket[3] = '\0';
1022 p_bracket(p);
1023 assert(p->next == bracket+3);
1024 p->next = oldnext;
1025 p->end = oldend;
1029 - repeat - generate code for a bounded repetition, recursively if needed
1030 == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit);
1032 static void
1033 repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit)
1035 /* operand from here to end of strip */
1036 /* repeated from this number */
1037 /* to this number of times (maybe INFINITY) */
1039 register sopno finish;
1040 # define N 2
1041 # define INF 3
1042 # define REP(f, t) ((f)*8 + (t))
1043 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1044 register sopno copy;
1046 if (reclimit++ > RECLIMIT)
1047 p->error = REG_ESPACE;
1048 if (p->error)
1049 return;
1051 finish = HERE();
1053 assert(from <= to);
1055 switch (REP(MAP(from), MAP(to))) {
1056 case REP(0, 0): /* must be user doing this */
1057 DROP(finish-start); /* drop the operand */
1058 break;
1059 case REP(0, 1): /* as x{1,1}? */
1060 case REP(0, N): /* as x{1,n}? */
1061 case REP(0, INF): /* as x{1,}? */
1062 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1063 INSERT(OCH_, start); /* offset is wrong... */
1064 repeat(p, start+1, 1, to, reclimit);
1065 ASTERN(OOR1, start);
1066 AHEAD(start); /* ... fix it */
1067 EMIT(OOR2, 0);
1068 AHEAD(THERE());
1069 ASTERN(O_CH, THERETHERE());
1070 break;
1071 case REP(1, 1): /* trivial case */
1072 /* done */
1073 break;
1074 case REP(1, N): /* as x?x{1,n-1} */
1075 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1076 INSERT(OCH_, start);
1077 ASTERN(OOR1, start);
1078 AHEAD(start);
1079 EMIT(OOR2, 0); /* offset very wrong... */
1080 AHEAD(THERE()); /* ...so fix it */
1081 ASTERN(O_CH, THERETHERE());
1082 copy = dupl(p, start+1, finish+1);
1083 assert(copy == finish+4);
1084 repeat(p, copy, 1, to-1, reclimit);
1085 break;
1086 case REP(1, INF): /* as x+ */
1087 INSERT(OPLUS_, start);
1088 ASTERN(O_PLUS, start);
1089 break;
1090 case REP(N, N): /* as xx{m-1,n-1} */
1091 copy = dupl(p, start, finish);
1092 repeat(p, copy, from-1, to-1, reclimit);
1093 break;
1094 case REP(N, INF): /* as xx{n-1,INF} */
1095 copy = dupl(p, start, finish);
1096 repeat(p, copy, from-1, to, reclimit);
1097 break;
1098 default: /* "can't happen" */
1099 SETERROR(REG_ASSERT); /* just in case */
1100 break;
1105 - seterr - set an error condition
1106 == static int seterr(register struct parse *p, int e);
1108 static int /* useless but makes type checking happy */
1109 seterr(register struct parse *p, int e)
1111 if (p->error == 0) /* keep earliest error condition */
1112 p->error = e;
1113 p->next = nuls; /* try to bring things to a halt */
1114 p->end = nuls;
1115 return(0); /* make the return value well-defined */
1119 - allocset - allocate a set of characters for []
1120 == static cset *allocset(register struct parse *p);
1122 static cset *
1123 allocset(register struct parse *p)
1125 register int no = p->g->ncsets++;
1126 register size_t nc;
1127 register size_t nbytes;
1128 register cset *cs;
1129 register size_t css = (size_t)p->g->csetsize;
1130 register int i;
1132 if (no >= p->ncsalloc) { /* need another column of space */
1133 p->ncsalloc += CHAR_BIT;
1134 nc = p->ncsalloc;
1135 assert(nc % CHAR_BIT == 0);
1136 nbytes = nc / CHAR_BIT * css;
1137 if (MEMSIZE(p) > MEMLIMIT)
1138 goto oomem;
1139 if (p->g->sets == NULL)
1140 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1141 else
1142 p->g->sets = (cset *)realloc((char *)p->g->sets,
1143 nc * sizeof(cset));
1144 if (p->g->setbits == NULL)
1145 p->g->setbits = (uch *)malloc(nbytes);
1146 else {
1147 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1148 nbytes);
1149 /* xxx this isn't right if setbits is now NULL */
1150 for (i = 0; i < no; i++)
1151 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1153 if (p->g->sets != NULL && p->g->setbits != NULL)
1154 (void) memset((char *)p->g->setbits + (nbytes - css),
1155 0, css);
1156 else {
1157 oomem:
1158 no = 0;
1159 SETERROR(REG_ESPACE);
1160 /* caller's responsibility not to do set ops */
1161 return NULL;
1165 cs = &p->g->sets[no];
1166 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1167 cs->mask = 1 << ((no) % CHAR_BIT);
1168 cs->hash = 0;
1169 cs->smultis = 0;
1170 cs->multis = NULL;
1172 return(cs);
1176 - freeset - free a now-unused set
1177 == static void freeset(register struct parse *p, register cset *cs);
1179 static void
1180 freeset(register struct parse *p, register cset *cs)
1182 register size_t i;
1183 register cset *top = &p->g->sets[p->g->ncsets];
1184 register size_t css = (size_t)p->g->csetsize;
1186 for (i = 0; i < css; i++)
1187 CHsub(cs, i);
1188 if (cs == top-1) /* recover only the easy case */
1189 p->g->ncsets--;
1193 - freezeset - final processing on a set of characters
1194 == static int freezeset(register struct parse *p, register cset *cs);
1196 * The main task here is merging identical sets. This is usually a waste
1197 * of time (although the hash code minimizes the overhead), but can win
1198 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1199 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1200 * the same value!
1202 static int /* set number */
1203 freezeset(register struct parse *p, register cset *cs)
1205 register uch h = cs->hash;
1206 register size_t i;
1207 register cset *top = &p->g->sets[p->g->ncsets];
1208 register cset *cs2;
1209 register size_t css = (size_t)p->g->csetsize;
1211 /* look for an earlier one which is the same */
1212 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1213 if (cs2->hash == h && cs2 != cs) {
1214 /* maybe */
1215 for (i = 0; i < css; i++)
1216 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1217 break; /* no */
1218 if (i == css)
1219 break; /* yes */
1222 if (cs2 < top) { /* found one */
1223 freeset(p, cs);
1224 cs = cs2;
1227 return((int)(cs - p->g->sets));
1231 - firstch - return first character in a set (which must have at least one)
1232 == static int firstch(register struct parse *p, register cset *cs);
1234 static int /* character; there is no "none" value */
1235 firstch(register struct parse *p, register cset *cs)
1237 register size_t i;
1238 register size_t css = (size_t)p->g->csetsize;
1240 for (i = 0; i < css; i++)
1241 if (CHIN(cs, i))
1242 return((char)i);
1243 assert(never);
1244 return(0); /* arbitrary */
1248 - nch - number of characters in a set
1249 == static int nch(register struct parse *p, register cset *cs);
1251 static int
1252 nch(register struct parse *p, register cset *cs)
1254 register size_t i;
1255 register size_t css = (size_t)p->g->csetsize;
1256 register int n = 0;
1258 for (i = 0; i < css; i++)
1259 if (CHIN(cs, i))
1260 n++;
1261 return(n);
1265 - mcadd - add a collating element to a cset
1266 == static void mcadd(register struct parse *p, register cset *cs, \
1267 == register char *cp);
1269 static void
1270 mcadd(register struct parse *p, register cset *cs, register const char *cp)
1272 register size_t oldend = cs->smultis;
1274 cs->smultis += strlen(cp) + 1;
1275 if (cs->multis == NULL)
1276 cs->multis = malloc(cs->smultis);
1277 else
1278 cs->multis = realloc(cs->multis, cs->smultis);
1279 if (cs->multis == NULL) {
1280 SETERROR(REG_ESPACE);
1281 return;
1284 (void) strcpy(cs->multis + oldend - 1, cp);
1285 cs->multis[cs->smultis - 1] = '\0';
1288 #ifdef notdef
1290 - mcsub - subtract a collating element from a cset
1291 == static void mcsub(register cset *cs, register char *cp);
1293 static void
1294 mcsub(register cset *cs, register char *cp)
1296 register char *fp = mcfind(cs, cp);
1297 register size_t len = strlen(fp);
1299 assert(fp != NULL);
1300 (void) memmove(fp, fp + len + 1,
1301 cs->smultis - (fp + len + 1 - cs->multis));
1302 cs->smultis -= len;
1304 if (cs->smultis == 0) {
1305 free(cs->multis);
1306 cs->multis = NULL;
1307 return;
1310 cs->multis = realloc(cs->multis, cs->smultis);
1311 assert(cs->multis != NULL);
1315 - mcin - is a collating element in a cset?
1316 == static int mcin(register cset *cs, register char *cp);
1318 static int
1319 mcin(register cset *cs, register char *cp)
1321 return(mcfind(cs, cp) != NULL);
1325 - mcfind - find a collating element in a cset
1326 == static char *mcfind(register cset *cs, register char *cp);
1328 static char *
1329 mcfind(register cset *cs, register char *cp)
1331 register char *p;
1333 if (cs->multis == NULL)
1334 return(NULL);
1335 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1336 if (strcmp(cp, p) == 0)
1337 return(p);
1338 return(NULL);
1340 #endif
1343 - mcinvert - invert the list of collating elements in a cset
1344 == static void mcinvert(register struct parse *p, register cset *cs);
1346 * This would have to know the set of possibilities. Implementation
1347 * is deferred.
1349 static void
1350 mcinvert(register struct parse *p, register cset *cs)
1352 assert(cs->multis == NULL); /* xxx */
1356 - mccase - add case counterparts of the list of collating elements in a cset
1357 == static void mccase(register struct parse *p, register cset *cs);
1359 * This would have to know the set of possibilities. Implementation
1360 * is deferred.
1362 static void
1363 mccase(register struct parse *p, register cset *cs)
1365 assert(cs->multis == NULL); /* xxx */
1368 #ifdef notdef
1370 - isinsets - is this character in any sets?
1371 == static int isinsets(register struct re_guts *g, int c);
1373 static int /* predicate */
1374 isinsets(register struct re_guts *g, int c)
1376 register uch *col;
1377 register int i;
1378 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1379 register unsigned uc = (unsigned char)c;
1381 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1382 if (col[uc] != 0)
1383 return(1);
1384 return(0);
1388 - samesets - are these two characters in exactly the same sets?
1389 == static int samesets(register struct re_guts *g, int c1, int c2);
1391 static int /* predicate */
1392 samesets(register struct re_guts *g, int c1, int c2)
1394 register uch *col;
1395 register int i;
1396 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1397 register unsigned uc1 = (unsigned char)c1;
1398 register unsigned uc2 = (unsigned char)c2;
1400 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1401 if (col[uc1] != col[uc2])
1402 return(0);
1403 return(1);
1405 #endif
1408 - categorize - sort out character categories
1409 == static void categorize(struct parse *p, register struct re_guts *g);
1411 static void
1412 categorize(struct parse *p, register struct re_guts *g)
1414 #ifdef notdef
1415 register cat_t *cats = g->categories;
1416 register int c;
1417 register int c2;
1418 register cat_t cat;
1420 /* avoid making error situations worse */
1421 if (p->error != 0)
1422 return;
1424 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1425 if (cats[c] == 0 && isinsets(g, c)) {
1426 cat = g->ncategories++;
1427 cats[c] = cat;
1428 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1429 if (cats[c2] == 0 && samesets(g, c, c2))
1430 cats[c2] = cat;
1432 #endif
1436 - dupl - emit a duplicate of a bunch of sops
1437 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1439 static sopno /* start of duplicate */
1440 dupl(register struct parse *p, sopno start, sopno finish)
1442 /* from here */
1443 /* to this less one */
1445 register sopno ret = HERE();
1446 register sopno len = finish - start;
1448 assert(finish >= start);
1449 if (len == 0)
1450 return(ret);
1451 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
1452 return ret;
1453 assert(p->ssize >= p->slen + len);
1454 (void) memcpy((char *)(p->strip + p->slen),
1455 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1456 (void) memcpy((char *)(p->stripdata + p->slen),
1457 (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T));
1458 p->slen += len;
1459 return(ret);
1463 - doemit - emit a strip operator
1464 == static void doemit(register struct parse *p, sop op, size_t opnd);
1466 * It might seem better to implement this as a macro with a function as
1467 * hard-case backup, but it's just too big and messy unless there are
1468 * some changes to the data structures. Maybe later.
1470 static void
1471 doemit(register struct parse *p, sop op, size_t opnd)
1473 /* avoid making error situations worse */
1474 if (p->error != 0)
1475 return;
1477 /* deal with oversize operands ("can't happen", more or less) */
1478 assert(opnd < 1);
1480 /* deal with undersized strip */
1481 if (p->slen >= p->ssize)
1482 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
1483 return;
1485 /* finally, it's all reduced to the easy case */
1486 p->strip[p->slen] = op;
1487 p->stripdata[p->slen] = opnd;
1488 p->slen++;
1492 - doinsert - insert a sop into the strip
1493 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1495 static void
1496 doinsert(register struct parse *p, sop op, size_t opnd, sopno pos)
1498 register sopno sn;
1499 register sop s;
1500 register RCHAR_T d;
1501 register int i;
1503 /* avoid making error situations worse */
1504 if (p->error != 0)
1505 return;
1507 sn = HERE();
1508 EMIT(op, opnd); /* do checks, ensure space */
1509 assert(HERE() == sn+1);
1510 s = p->strip[sn];
1511 d = p->stripdata[sn];
1513 /* adjust paren pointers */
1514 assert(pos > 0);
1515 for (i = 1; i < NPAREN; i++) {
1516 if (p->pbegin[i] >= pos) {
1517 p->pbegin[i]++;
1519 if (p->pend[i] >= pos) {
1520 p->pend[i]++;
1524 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1525 (HERE()-pos-1)*sizeof(sop));
1526 memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos],
1527 (HERE()-pos-1)*sizeof(RCHAR_T));
1528 p->strip[pos] = s;
1529 p->stripdata[pos] = d;
1533 - dofwd - complete a forward reference
1534 == static void dofwd(register struct parse *p, sopno pos, sop value);
1536 static void
1537 dofwd(register struct parse *p, register sopno pos, sop value)
1539 /* avoid making error situations worse */
1540 if (p->error != 0)
1541 return;
1543 assert(value < 1);
1544 p->stripdata[pos] = value;
1548 - enlarge - enlarge the strip
1549 == static int enlarge(register struct parse *p, sopno size);
1551 static int
1552 enlarge(register struct parse *p, register sopno size)
1554 register sop *sp;
1555 register RCHAR_T *dp;
1556 sopno osize;
1558 if (p->ssize >= size)
1559 return 1;
1561 osize = p->ssize;
1562 p->ssize = size;
1563 if (MEMSIZE(p) > MEMLIMIT)
1564 goto oomem;
1565 sp = realloc(p->strip, p->ssize * sizeof(sop));
1566 if (sp == NULL)
1567 goto oomem;
1568 p->strip = sp;
1569 dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
1570 if (dp == NULL) {
1571 oomem:
1572 p->ssize = osize;
1573 SETERROR(REG_ESPACE);
1574 return 0;
1576 p->stripdata = dp;
1577 return 1;
1581 - stripsnug - compact the strip
1582 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1584 static void
1585 stripsnug(register struct parse *p, register struct re_guts *g)
1587 g->nstates = p->slen;
1588 g->strip = (sop *)realloc((char *)p->strip,
1589 p->slen * sizeof(sop));
1590 if (g->strip == NULL) {
1591 SETERROR(REG_ESPACE);
1592 g->strip = p->strip;
1594 g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata,
1595 p->slen * sizeof(RCHAR_T));
1596 if (g->stripdata == NULL) {
1597 SETERROR(REG_ESPACE);
1598 g->stripdata = p->stripdata;
1603 - findmust - fill in must and mlen with longest mandatory literal string
1604 == static void findmust(register struct parse *p, register struct re_guts *g);
1606 * This algorithm could do fancy things like analyzing the operands of |
1607 * for common subsequences. Someday. This code is simple and finds most
1608 * of the interesting cases.
1610 * Note that must and mlen got initialized during setup.
1612 static void
1613 findmust(struct parse *p, register struct re_guts *g)
1615 register sop *scans;
1616 register RCHAR_T *scand;
1617 sop *starts = 0;
1618 RCHAR_T *startd = NULL;
1619 register sop *newstarts = 0;
1620 register RCHAR_T *newstartd = NULL;
1621 register sopno newlen;
1622 register sop s;
1623 register RCHAR_T d;
1624 register RCHAR_T *cp;
1625 register sopno i;
1627 /* avoid making error situations worse */
1628 if (p->error != 0)
1629 return;
1631 /* find the longest OCHAR sequence in strip */
1632 newlen = 0;
1633 scans = g->strip + 1;
1634 scand = g->stripdata + 1;
1635 do {
1636 s = *scans++;
1637 d = *scand++;
1638 switch (s) {
1639 case OCHAR: /* sequence member */
1640 if (newlen == 0) { /* new sequence */
1641 newstarts = scans - 1;
1642 newstartd = scand - 1;
1644 newlen++;
1645 break;
1646 case OPLUS_: /* things that don't break one */
1647 case OLPAREN:
1648 case ORPAREN:
1649 break;
1650 case OQUEST_: /* things that must be skipped */
1651 case OCH_:
1652 scans--;
1653 scand--;
1654 do {
1655 scans += d;
1656 scand += d;
1657 s = *scans;
1658 d = *scand;
1659 /* assert() interferes w debug printouts */
1660 if (s != O_QUEST && s != O_CH && s != OOR2) {
1661 g->iflags |= BAD;
1662 return;
1664 } while (s != O_QUEST && s != O_CH);
1665 /* fallthrough */
1666 default: /* things that break a sequence */
1667 if (newlen > g->mlen) { /* ends one */
1668 starts = newstarts;
1669 startd = newstartd;
1670 g->mlen = newlen;
1672 newlen = 0;
1673 break;
1675 } while (s != OEND);
1677 if (g->mlen == 0) /* there isn't one */
1678 return;
1680 /* turn it into a character string */
1681 g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T));
1682 if (g->must == NULL) { /* argh; just forget it */
1683 g->mlen = 0;
1684 return;
1686 cp = g->must;
1687 scans = starts;
1688 scand = startd;
1689 for (i = g->mlen; i > 0; i--) {
1690 for (;;) {
1691 s = *scans++;
1692 d = *scand++;
1693 if (s == OCHAR)
1694 break;
1696 assert(cp < g->must + g->mlen);
1697 *cp++ = d;
1699 assert(cp == g->must + g->mlen);
1700 *cp++ = '\0'; /* just on general principles */
1704 - pluscount - count + nesting
1705 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1707 static sopno /* nesting depth */
1708 pluscount(struct parse *p, register struct re_guts *g)
1710 register sop *scan;
1711 register sop s;
1712 register sopno plusnest = 0;
1713 register sopno maxnest = 0;
1715 if (p->error != 0)
1716 return(0); /* there may not be an OEND */
1718 scan = g->strip + 1;
1719 do {
1720 s = *scan++;
1721 switch (s) {
1722 case OPLUS_:
1723 plusnest++;
1724 break;
1725 case O_PLUS:
1726 if (plusnest > maxnest)
1727 maxnest = plusnest;
1728 plusnest--;
1729 break;
1731 } while (s != OEND);
1732 if (plusnest != 0)
1733 g->iflags |= BAD;
1734 return(maxnest);