Expand PMF_FN_* macros.
[netbsd-mini2440.git] / dist / nawk / b.c
blob3652bf225b4a283377a47f8a04c0dbacbc87510b
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 /* lasciate ogne speranza, voi ch'intrate. */
27 #if HAVE_NBTOOL_CONFIG_H
28 #include "nbtool_config.h"
29 #endif
31 #define DEBUG
33 #include <ctype.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <assert.h>
38 #include "awk.h"
39 #include "awkgram.h"
41 #define HAT (NCHARS+2) /* matches ^ in regular expr */
42 /* NCHARS is 2**n */
43 #define MAXLIN 22
45 #define type(v) (v)->nobj /* badly overloaded here */
46 #define info(v) (v)->ntype /* badly overloaded here */
47 #define left(v) (v)->narg[0]
48 #define right(v) (v)->narg[1]
49 #define parent(v) (v)->nnext
51 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
52 #define ELEAF case EMPTYRE: /* empty string in regexp */
53 #define UNARY case STAR: case PLUS: case QUEST:
55 /* encoding in tree Nodes:
56 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
57 left is index, right contains value or pointer to value
58 unary (STAR, PLUS, QUEST): left is child, right is null
59 binary (CAT, OR): left and right are children
60 parent contains pointer to parent
64 int *setvec;
65 int *tmpset;
66 int maxsetvec = 0;
68 int rtok; /* next token in current re */
69 int rlxval;
70 static uschar *rlxstr;
71 static uschar *prestr; /* current position in current re */
72 static uschar *lastre; /* origin of last re */
74 static int setcnt;
75 static int poscnt;
77 uschar *patbeg;
78 int patlen;
80 #define NFA 128 /* cache this many dynamic fa's */
81 fa *fatab[NFA];
82 int nfatab = 0; /* entries in fatab */
84 static void
85 resize_state(fa *fa, int state)
87 void *p;
88 int i, new_count;
90 if (++state < fa->state_count)
91 return;
93 new_count = state + 10; /* needs to be tuned */
95 p = realloc(fa->gototab, new_count * sizeof(fa->gototab[0]));
96 if (p == NULL)
97 goto out;
98 fa->gototab = p;
100 p = realloc(fa->out, new_count * sizeof(fa->out[0]));
101 if (p == NULL)
102 goto out;
103 fa->out = p;
105 p = realloc(fa->posns, new_count * sizeof(fa->posns[0]));
106 if (p == NULL)
107 goto out;
108 fa->posns = p;
110 for (i = fa->state_count; i < new_count; ++i) {
111 fa->gototab[i] = calloc(1, NCHARS * sizeof (**fa->gototab));
112 if (fa->gototab[i] == NULL)
113 goto out;
114 fa->out[i] = 0;
115 fa->posns[i] = NULL;
117 fa->state_count = new_count;
118 return;
119 out:
120 overflo("out of memory in resize_state");
123 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
125 int i, use, nuse;
126 fa *pfa;
127 static int now = 1;
129 if (setvec == 0) { /* first time through any RE */
130 maxsetvec = MAXLIN;
131 setvec = (int *) malloc(maxsetvec * sizeof(int));
132 tmpset = (int *) malloc(maxsetvec * sizeof(int));
133 if (setvec == 0 || tmpset == 0)
134 overflo("out of space initializing makedfa");
137 if (compile_time) /* a constant for sure */
138 return mkdfa(s, anchor);
139 for (i = 0; i < nfatab; i++) /* is it there already? */
140 if (fatab[i]->anchor == anchor
141 && strcmp((const char *) fatab[i]->restr, s) == 0) {
142 fatab[i]->use = now++;
143 return fatab[i];
145 pfa = mkdfa(s, anchor);
146 if (nfatab < NFA) { /* room for another */
147 fatab[nfatab] = pfa;
148 fatab[nfatab]->use = now++;
149 nfatab++;
150 return pfa;
152 use = fatab[0]->use; /* replace least-recently used */
153 nuse = 0;
154 for (i = 1; i < nfatab; i++)
155 if (fatab[i]->use < use) {
156 use = fatab[i]->use;
157 nuse = i;
159 freefa(fatab[nuse]);
160 fatab[nuse] = pfa;
161 pfa->use = now++;
162 return pfa;
165 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
166 /* anchor = 1 for anchored matches, else 0 */
168 Node *p, *p1;
169 fa *f;
171 p = reparse(s);
172 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
173 /* put ALL STAR in front of reg. exp. */
174 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
175 /* put FINAL after reg. exp. */
177 poscnt = 0;
178 penter(p1); /* enter parent pointers and leaf indices */
179 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
180 overflo("out of space for fa");
181 f->accept = poscnt-1; /* penter has computed number of positions in re */
182 cfoll(f, p1); /* set up follow sets */
183 freetr(p1);
184 resize_state(f, 1);
185 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
186 overflo("out of space in makedfa");
187 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
188 overflo("out of space in makedfa");
189 *f->posns[1] = 0;
190 f->initstat = makeinit(f, anchor);
191 f->anchor = anchor;
192 f->restr = (uschar *) tostring(s);
193 return f;
196 int makeinit(fa *f, int anchor)
198 int i, k;
200 resize_state(f, 2);
201 f->curstat = 2;
202 f->out[2] = 0;
203 k = *(f->re[0].lfollow);
204 xfree(f->posns[2]);
205 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
206 overflo("out of space in makeinit");
207 for (i=0; i <= k; i++) {
208 (f->posns[2])[i] = (f->re[0].lfollow)[i];
210 if ((f->posns[2])[1] == f->accept)
211 f->out[2] = 1;
212 for (i=0; i < NCHARS; i++)
213 f->gototab[2][i] = 0;
214 f->curstat = cgoto(f, 2, HAT);
215 if (anchor) {
216 *f->posns[2] = k-1; /* leave out position 0 */
217 for (i=0; i < k; i++) {
218 (f->posns[0])[i] = (f->posns[2])[i];
221 f->out[0] = f->out[2];
222 if (f->curstat != 2) {
223 resize_state(f, f->curstat);
224 --(*f->posns[f->curstat]);
227 return f->curstat;
230 void penter(Node *p) /* set up parent pointers and leaf indices */
232 switch (type(p)) {
233 ELEAF
234 LEAF
235 info(p) = poscnt;
236 poscnt++;
237 break;
238 UNARY
239 penter(left(p));
240 parent(left(p)) = p;
241 break;
242 case CAT:
243 case OR:
244 penter(left(p));
245 penter(right(p));
246 parent(left(p)) = p;
247 parent(right(p)) = p;
248 break;
249 default: /* can't happen */
250 FATAL("can't happen: unknown type %d in penter", type(p));
251 break;
255 void freetr(Node *p) /* free parse tree */
257 switch (type(p)) {
258 ELEAF
259 LEAF
260 xfree(p);
261 break;
262 UNARY
263 freetr(left(p));
264 xfree(p);
265 break;
266 case CAT:
267 case OR:
268 freetr(left(p));
269 freetr(right(p));
270 xfree(p);
271 break;
272 default: /* can't happen */
273 FATAL("can't happen: unknown type %d in freetr", type(p));
274 break;
278 /* in the parsing of regular expressions, metacharacters like . have */
279 /* to be seen literally; \056 is not a metacharacter. */
281 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
282 { /* only pick up one 8-bit byte (2 chars) */
283 uschar *p;
284 int n = 0;
285 int i;
287 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
288 if (isdigit(*p))
289 n = 16 * n + *p - '0';
290 else if (*p >= 'a' && *p <= 'f')
291 n = 16 * n + *p - 'a' + 10;
292 else if (*p >= 'A' && *p <= 'F')
293 n = 16 * n + *p - 'A' + 10;
295 *pp = (char *) p;
296 return n;
299 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
301 int quoted(uschar **pp) /* pick up next thing after a \\ */
302 /* and increment *pp */
304 uschar *p = *pp;
305 int c;
307 if ((c = *p++) == 't')
308 c = '\t';
309 else if (c == 'n')
310 c = '\n';
311 else if (c == 'f')
312 c = '\f';
313 else if (c == 'r')
314 c = '\r';
315 else if (c == 'b')
316 c = '\b';
317 else if (c == '\\')
318 c = '\\';
319 else if (c == 'x') { /* hexadecimal goo follows */
320 c = hexstr(&p); /* this adds a null if number is invalid */
321 } else if (isoctdigit(c)) { /* \d \dd \ddd */
322 int n = c - '0';
323 if (isoctdigit(*p)) {
324 n = 8 * n + *p++ - '0';
325 if (isoctdigit(*p))
326 n = 8 * n + *p++ - '0';
328 c = n;
329 } /* else */
330 /* c = c; */
331 *pp = p;
332 return c;
335 char *cclenter(const char *argp) /* add a character class */
337 int i, c, c2;
338 uschar *p = (uschar *) argp;
339 uschar *op, *bp;
340 static uschar *buf = 0;
341 static int bufsz = 100;
343 op = p;
344 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
345 FATAL("out of space for character class [%.10s...] 1", p);
346 bp = buf;
347 for (i = 0; (c = *p++) != 0; ) {
348 if (c == '\\') {
349 c = quoted(&p);
350 } else if (c == '-' && i > 0 && bp[-1] != 0) {
351 if (*p != 0) {
352 c = bp[-1];
353 c2 = *p++;
354 if (c2 == '\\')
355 c2 = quoted(&p);
356 if (c > c2) { /* empty; ignore */
357 bp--;
358 i--;
359 continue;
361 while (c < c2) {
362 if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter1"))
363 FATAL("out of space for character class [%.10s...] 2", p);
364 *bp++ = ++c;
365 i++;
367 continue;
370 if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter2"))
371 FATAL("out of space for character class [%.10s...] 3", p);
372 *bp++ = c;
373 i++;
375 *bp = 0;
376 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
377 xfree(op);
378 return (char *) tostring((char *) buf);
381 void overflo(const char *s)
383 FATAL("regular expression too big: %.30s...", s);
386 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
388 int i;
389 int *p;
391 switch (type(v)) {
392 ELEAF
393 LEAF
394 f->re[info(v)].ltype = type(v);
395 f->re[info(v)].lval.np = right(v);
396 while (f->accept >= maxsetvec) { /* guessing here! */
397 maxsetvec *= 4;
398 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
399 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
400 if (setvec == 0 || tmpset == 0)
401 overflo("out of space in cfoll()");
403 for (i = 0; i <= f->accept; i++)
404 setvec[i] = 0;
405 setcnt = 0;
406 follow(v); /* computes setvec and setcnt */
407 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
408 overflo("out of space building follow set");
409 f->re[info(v)].lfollow = p;
410 *p = setcnt;
411 for (i = f->accept; i >= 0; i--)
412 if (setvec[i] == 1)
413 *++p = i;
414 break;
415 UNARY
416 cfoll(f,left(v));
417 break;
418 case CAT:
419 case OR:
420 cfoll(f,left(v));
421 cfoll(f,right(v));
422 break;
423 default: /* can't happen */
424 FATAL("can't happen: unknown type %d in cfoll", type(v));
428 int first(Node *p) /* collects initially active leaves of p into setvec */
429 /* returns 0 if p matches empty string */
431 int b, lp;
433 switch (type(p)) {
434 ELEAF
435 LEAF
436 lp = info(p); /* look for high-water mark of subscripts */
437 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
438 maxsetvec *= 4;
439 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
440 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
441 if (setvec == 0 || tmpset == 0)
442 overflo("out of space in first()");
444 if (type(p) == EMPTYRE) {
445 setvec[lp] = 0;
446 return(0);
448 if (setvec[lp] != 1) {
449 setvec[lp] = 1;
450 setcnt++;
452 if (type(p) == CCL && (*(char *) right(p)) == '\0')
453 return(0); /* empty CCL */
454 else return(1);
455 case PLUS:
456 if (first(left(p)) == 0) return(0);
457 return(1);
458 case STAR:
459 case QUEST:
460 first(left(p));
461 return(0);
462 case CAT:
463 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
464 return(1);
465 case OR:
466 b = first(right(p));
467 if (first(left(p)) == 0 || b == 0) return(0);
468 return(1);
470 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
471 return(-1);
474 void follow(Node *v) /* collects leaves that can follow v into setvec */
476 Node *p;
478 if (type(v) == FINAL)
479 return;
480 p = parent(v);
481 switch (type(p)) {
482 case STAR:
483 case PLUS:
484 first(v);
485 follow(p);
486 return;
488 case OR:
489 case QUEST:
490 follow(p);
491 return;
493 case CAT:
494 if (v == left(p)) { /* v is left child of p */
495 if (first(right(p)) == 0) {
496 follow(p);
497 return;
499 } else /* v is right child */
500 follow(p);
501 return;
505 int member(int c, const char *sarg) /* is c in s? */
507 uschar *s = (uschar *) sarg;
509 while (*s)
510 if (c == *s++)
511 return(1);
512 return(0);
515 int match(fa *f, const char *p0) /* shortest match ? */
517 int s, ns;
518 uschar *p = (uschar *) p0;
520 s = f->initstat;
521 assert (s < f->state_count);
523 if (f->out[s])
524 return(1);
525 do {
526 /* assert(*p < NCHARS); */
527 if ((ns = f->gototab[s][*p]) != 0)
528 s = ns;
529 else
530 s = cgoto(f, s, *p);
532 assert (s < f->state_count);
534 if (f->out[s])
535 return(1);
536 } while (*p++ != 0);
537 return(0);
540 int pmatch(fa *f, const char *p0) /* longest match, for sub */
542 int s, ns;
543 uschar *p = (uschar *) p0;
544 uschar *q;
546 s = f->initstat;
547 assert(s < f->state_count);
548 patbeg = p;
549 patlen = -1;
550 do {
551 q = p;
552 do {
553 if (f->out[s]) /* final state */
554 patlen = q-p;
555 /* assert(*q < NCHARS); */
556 if ((ns = f->gototab[s][*q]) != 0)
557 s = ns;
558 else
559 s = cgoto(f, s, *q);
561 assert(s < f->state_count);
563 if (s == 1) { /* no transition */
564 if (patlen >= 0) {
565 patbeg = p;
566 return(1);
568 else
569 goto nextin; /* no match */
571 } while (*q++ != 0);
572 if (f->out[s])
573 patlen = q-p-1; /* don't count $ */
574 if (patlen >= 0) {
575 patbeg = p;
576 return(1);
578 nextin:
579 s = 2;
580 } while (*p++ != 0);
581 return (0);
584 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
586 int s, ns;
587 uschar *p = (uschar *) p0;
588 uschar *q;
590 s = f->initstat;
591 assert(s < f->state_count);
593 patlen = -1;
594 while (*p) {
595 q = p;
596 do {
597 if (f->out[s]) /* final state */
598 patlen = q-p;
599 /* assert(*q < NCHARS); */
600 if ((ns = f->gototab[s][*q]) != 0)
601 s = ns;
602 else
603 s = cgoto(f, s, *q);
605 assert(s < f->state_count);
607 if (s == 1) { /* no transition */
608 if (patlen > 0) {
609 patbeg = p;
610 return(1);
611 } else
612 goto nnextin; /* no nonempty match */
614 } while (*q++ != 0);
615 if (f->out[s])
616 patlen = q-p-1; /* don't count $ */
617 if (patlen > 0 ) {
618 patbeg = p;
619 return(1);
621 nnextin:
622 s = 2;
623 p++;
625 return (0);
628 Node *reparse(const char *p) /* parses regular expression pointed to by p */
629 { /* uses relex() to scan regular expression */
630 Node *np;
632 dprintf( ("reparse <%s>\n", p) );
633 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
634 rtok = relex();
635 /* GNU compatibility: an empty regexp matches anything */
636 if (rtok == '\0') {
637 /* FATAL("empty regular expression"); previous */
638 return(op2(EMPTYRE, NIL, NIL));
640 np = regexp();
641 if (rtok != '\0')
642 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
643 return(np);
646 Node *regexp(void) /* top-level parse of reg expr */
648 return (alt(concat(primary())));
651 Node *primary(void)
653 Node *np;
655 switch (rtok) {
656 case CHAR:
657 np = op2(CHAR, NIL, itonp(rlxval));
658 rtok = relex();
659 return (unary(np));
660 case ALL:
661 rtok = relex();
662 return (unary(op2(ALL, NIL, NIL)));
663 case EMPTYRE:
664 rtok = relex();
665 return (unary(op2(ALL, NIL, NIL)));
666 case DOT:
667 rtok = relex();
668 return (unary(op2(DOT, NIL, NIL)));
669 case CCL:
670 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
671 rtok = relex();
672 return (unary(np));
673 case NCCL:
674 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
675 rtok = relex();
676 return (unary(np));
677 case '^':
678 rtok = relex();
679 return (unary(op2(CHAR, NIL, itonp(HAT))));
680 case '$':
681 rtok = relex();
682 return (unary(op2(CHAR, NIL, NIL)));
683 case '(':
684 rtok = relex();
685 if (rtok == ')') { /* special pleading for () */
686 rtok = relex();
687 return unary(op2(CCL, NIL, (Node *) tostring("")));
689 np = regexp();
690 if (rtok == ')') {
691 rtok = relex();
692 return (unary(np));
694 else
695 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
696 default:
697 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
699 return 0; /*NOTREACHED*/
702 Node *concat(Node *np)
704 switch (rtok) {
705 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
706 return (concat(op2(CAT, np, primary())));
708 return (np);
711 Node *alt(Node *np)
713 if (rtok == OR) {
714 rtok = relex();
715 return (alt(op2(OR, np, concat(primary()))));
717 return (np);
720 Node *unary(Node *np)
722 switch (rtok) {
723 case STAR:
724 rtok = relex();
725 return (unary(op2(STAR, np, NIL)));
726 case PLUS:
727 rtok = relex();
728 return (unary(op2(PLUS, np, NIL)));
729 case QUEST:
730 rtok = relex();
731 return (unary(op2(QUEST, np, NIL)));
732 default:
733 return (np);
738 * Character class definitions conformant to the POSIX locale as
739 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
740 * and operating character sets are both ASCII (ISO646) or supersets
741 * thereof.
743 * Note that to avoid overflowing the temporary buffer used in
744 * relex(), the expanded character class (prior to range expansion)
745 * must be less than twice the size of their full name.
748 /* Because isblank doesn't show up in any of the header files on any
749 * system i use, it's defined here. if some other locale has a richer
750 * definition of "blank", define HAS_ISBLANK and provide your own
751 * version.
752 * the parentheses here are an attempt to find a path through the maze
753 * of macro definition and/or function and/or version provided. thanks
754 * to nelson beebe for the suggestion; let's see if it works everywhere.
757 #ifndef HAS_ISBLANK
759 int (isblank)(int c)
761 return c==' ' || c=='\t';
764 #endif
766 static const struct charclass {
767 const char *cc_name;
768 int cc_namelen;
769 int (*cc_func)(int);
770 } charclasses[] = {
771 { "alnum", 5, isalnum },
772 { "alpha", 5, isalpha },
773 { "blank", 5, isblank },
774 { "cntrl", 5, iscntrl },
775 { "digit", 5, isdigit },
776 { "graph", 5, isgraph },
777 { "lower", 5, islower },
778 { "print", 5, isprint },
779 { "punct", 5, ispunct },
780 { "space", 5, isspace },
781 { "upper", 5, isupper },
782 { "xdigit", 6, isxdigit },
783 { NULL, 0, NULL },
787 int relex(void) /* lexical analyzer for reparse */
789 int c, n;
790 int cflag;
791 static uschar *buf = 0;
792 static int bufsz = 100;
793 uschar *bp;
794 const struct charclass *cc;
795 int i;
797 switch (c = *prestr++) {
798 case '|': return OR;
799 case '*': return STAR;
800 case '+': return PLUS;
801 case '?': return QUEST;
802 case '.': return DOT;
803 case '\0': prestr--; return '\0';
804 case '^':
805 case '$':
806 case '(':
807 case ')':
808 return c;
809 case '\\':
810 rlxval = quoted(&prestr);
811 return CHAR;
812 default:
813 rlxval = c;
814 return CHAR;
815 case '[':
816 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
817 FATAL("out of space in reg expr %.10s..", lastre);
818 bp = buf;
819 if (*prestr == '^') {
820 cflag = 1;
821 prestr++;
823 else
824 cflag = 0;
825 n = 2 * strlen((const char *) prestr)+1;
826 if (!adjbuf(&buf, &bufsz, n, n, &bp, "relex1"))
827 FATAL("out of space for reg expr %.10s...", lastre);
828 for (; ; ) {
829 if ((c = *prestr++) == '\\') {
830 *bp++ = '\\';
831 if ((c = *prestr++) == '\0')
832 FATAL("nonterminated character class %.20s...", lastre);
833 *bp++ = c;
834 /* } else if (c == '\n') { */
835 /* FATAL("newline in character class %.20s...", lastre); */
836 } else if (c == '[' && *prestr == ':') {
837 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
838 for (cc = charclasses; cc->cc_name; cc++)
839 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
840 break;
841 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
842 prestr[2 + cc->cc_namelen] == ']') {
843 prestr += cc->cc_namelen + 3;
844 for (i = 1; i < NCHARS; i++) {
845 if (!adjbuf(&buf, &bufsz, bp-buf+1, 100, &bp, "relex2"))
846 FATAL("out of space for reg expr %.10s...", lastre);
847 if (cc->cc_func(i)) {
848 *bp++ = i;
849 n++;
852 } else
853 *bp++ = c;
854 } else if (c == '\0') {
855 FATAL("nonterminated character class %.20s", lastre);
856 } else if (bp == buf) { /* 1st char is special */
857 *bp++ = c;
858 } else if (c == ']') {
859 *bp++ = 0;
860 rlxstr = (uschar *) tostring((char *) buf);
861 if (cflag == 0)
862 return CCL;
863 else
864 return NCCL;
865 } else
866 *bp++ = c;
871 int cgoto(fa *f, int s, int c)
873 int i, j, k;
874 int *p, *q;
876 assert(c == HAT || c < NCHARS);
877 while (f->accept >= maxsetvec) { /* guessing here! */
878 maxsetvec *= 4;
879 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
880 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
881 if (setvec == 0 || tmpset == 0)
882 overflo("out of space in cgoto()");
884 for (i = 0; i <= f->accept; i++)
885 setvec[i] = 0;
886 setcnt = 0;
887 resize_state(f, s);
888 /* compute positions of gototab[s,c] into setvec */
889 p = f->posns[s];
890 for (i = 1; i <= *p; i++) {
891 if ((k = f->re[p[i]].ltype) != FINAL) {
892 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
893 || (k == DOT && c != 0 && c != HAT)
894 || (k == ALL && c != 0)
895 || (k == EMPTYRE && c != 0)
896 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
897 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
898 q = f->re[p[i]].lfollow;
899 for (j = 1; j <= *q; j++) {
900 if (q[j] >= maxsetvec) {
901 maxsetvec *= 4;
902 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
903 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
904 if (setvec == 0 || tmpset == 0)
905 overflo("cgoto overflow");
907 if (setvec[q[j]] == 0) {
908 setcnt++;
909 setvec[q[j]] = 1;
915 /* determine if setvec is a previous state */
916 tmpset[0] = setcnt;
917 j = 1;
918 for (i = f->accept; i >= 0; i--)
919 if (setvec[i]) {
920 tmpset[j++] = i;
923 resize_state(f, f->curstat > s ? f->curstat : s);
924 /* tmpset == previous state? */
925 for (i = 1; i <= f->curstat; i++) {
926 p = f->posns[i];
927 if ((k = tmpset[0]) != p[0])
928 goto different;
929 for (j = 1; j <= k; j++)
930 if (tmpset[j] != p[j])
931 goto different;
932 /* setvec is state i */
933 if (c != HAT)
934 f->gototab[s][c] = i;
935 return i;
936 different:;
939 /* add tmpset to current set of states */
940 ++(f->curstat);
941 resize_state(f, f->curstat);
942 for (i = 0; i < NCHARS; i++)
943 f->gototab[f->curstat][i] = 0;
944 xfree(f->posns[f->curstat]);
945 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
946 overflo("out of space in cgoto");
948 f->posns[f->curstat] = p;
949 if (c != HAT)
950 f->gototab[s][c] = f->curstat;
951 for (i = 0; i <= setcnt; i++)
952 p[i] = tmpset[i];
953 if (setvec[f->accept])
954 f->out[f->curstat] = 1;
955 else
956 f->out[f->curstat] = 0;
957 return f->curstat;
961 void freefa(fa *f) /* free a finite automaton */
963 int i;
965 if (f == NULL)
966 return;
967 for (i = 0; i < f->state_count; i++) {
968 xfree(f->gototab[i])
969 xfree(f->posns[i]);
971 for (i = 0; i <= f->accept; i++) {
972 xfree(f->re[i].lfollow);
973 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
974 xfree((f->re[i].lval.np));
976 xfree(f->restr);
977 xfree(f->out);
978 xfree(f->posns);
979 xfree(f->gototab);
980 xfree(f);