Fixup fromcvs/togit conversion
[minix-pkgsrc.git] / lang / nawk / files / b.c
blob2be1ee6d52034320d6297d6a33fdc0ef673feaef
1 /* $NetBSD$ */
3 /****************************************************************
4 Copyright (C) Lucent Technologies 1997
5 All Rights Reserved
7 Permission to use, copy, modify, and distribute this software and
8 its documentation for any purpose and without fee is hereby
9 granted, provided that the above copyright notice appear in all
10 copies and that both that the copyright notice and this
11 permission notice and warranty disclaimer appear in supporting
12 documentation, and that the name Lucent Technologies or any of
13 its entities not be used in advertising or publicity pertaining
14 to distribution of the software without specific, written prior
15 permission.
17 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
19 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
20 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
21 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
22 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
23 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
24 THIS SOFTWARE.
25 ****************************************************************/
27 /* lasciate ogne speranza, voi ch'entrate. */
29 #define DEBUG
31 #include <ctype.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <stdlib.h>
35 #include "awk.h"
36 #include "ytab.h"
38 #define HAT (NCHARS+2) /* matches ^ in regular expr */
39 /* NCHARS is 2**n */
40 #define MAXLIN 22
42 #define type(v) (v)->nobj /* badly overloaded here */
43 #define info(v) (v)->ntype /* badly overloaded here */
44 #define left(v) (v)->narg[0]
45 #define right(v) (v)->narg[1]
46 #define parent(v) (v)->nnext
48 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
49 #define UNARY case STAR: case PLUS: case QUEST:
51 /* encoding in tree Nodes:
52 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53 left is index, right contains value or pointer to value
54 unary (STAR, PLUS, QUEST): left is child, right is null
55 binary (CAT, OR): left and right are children
56 parent contains pointer to parent
60 int *setvec;
61 int *tmpset;
62 int maxsetvec = 0;
64 int rtok; /* next token in current re */
65 int rlxval;
66 static uschar *rlxstr;
67 static uschar *prestr; /* current position in current re */
68 static uschar *lastre; /* origin of last re */
70 static int setcnt;
71 static int poscnt;
73 char *patbeg;
74 int patlen;
76 #define NFA 20 /* cache this many dynamic fa's */
77 fa *fatab[NFA];
78 int nfatab = 0; /* entries in fatab */
80 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
82 int i, use, nuse;
83 fa *pfa;
84 static int now = 1;
86 if (setvec == 0) { /* first time through any RE */
87 maxsetvec = MAXLIN;
88 setvec = (int *) malloc(maxsetvec * sizeof(int));
89 tmpset = (int *) malloc(maxsetvec * sizeof(int));
90 if (setvec == 0 || tmpset == 0)
91 overflo("out of space initializing makedfa");
94 if (compile_time) /* a constant for sure */
95 return mkdfa(s, anchor);
96 for (i = 0; i < nfatab; i++) /* is it there already? */
97 if (fatab[i]->anchor == anchor
98 && strcmp((const char *) fatab[i]->restr, s) == 0) {
99 fatab[i]->use = now++;
100 return fatab[i];
102 pfa = mkdfa(s, anchor);
103 if (nfatab < NFA) { /* room for another */
104 fatab[nfatab] = pfa;
105 fatab[nfatab]->use = now++;
106 nfatab++;
107 return pfa;
109 use = fatab[0]->use; /* replace least-recently used */
110 nuse = 0;
111 for (i = 1; i < nfatab; i++)
112 if (fatab[i]->use < use) {
113 use = fatab[i]->use;
114 nuse = i;
116 freefa(fatab[nuse]);
117 fatab[nuse] = pfa;
118 pfa->use = now++;
119 return pfa;
122 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
123 /* anchor = 1 for anchored matches, else 0 */
125 Node *p, *p1;
126 fa *f;
128 p = reparse(s);
129 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
130 /* put ALL STAR in front of reg. exp. */
131 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
132 /* put FINAL after reg. exp. */
134 poscnt = 0;
135 penter(p1); /* enter parent pointers and leaf indices */
136 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
137 overflo("out of space for fa");
138 f->accept = poscnt-1; /* penter has computed number of positions in re */
139 cfoll(f, p1); /* set up follow sets */
140 freetr(p1);
141 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
142 overflo("out of space in makedfa");
143 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
144 overflo("out of space in makedfa");
145 *f->posns[1] = 0;
146 f->initstat = makeinit(f, anchor);
147 f->anchor = anchor;
148 f->restr = (uschar *) tostring(s);
149 return f;
152 int makeinit(fa *f, int anchor)
154 int i, k;
156 f->curstat = 2;
157 f->out[2] = 0;
158 f->reset = 0;
159 k = *(f->re[0].lfollow);
160 xfree(f->posns[2]);
161 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
162 overflo("out of space in makeinit");
163 for (i=0; i <= k; i++) {
164 (f->posns[2])[i] = (f->re[0].lfollow)[i];
166 if ((f->posns[2])[1] == f->accept)
167 f->out[2] = 1;
168 for (i=0; i < NCHARS; i++)
169 f->gototab[2][i] = 0;
170 f->curstat = cgoto(f, 2, HAT);
171 if (anchor) {
172 *f->posns[2] = k-1; /* leave out position 0 */
173 for (i=0; i < k; i++) {
174 (f->posns[0])[i] = (f->posns[2])[i];
177 f->out[0] = f->out[2];
178 if (f->curstat != 2)
179 --(*f->posns[f->curstat]);
181 return f->curstat;
184 void penter(Node *p) /* set up parent pointers and leaf indices */
186 switch (type(p)) {
187 LEAF
188 info(p) = poscnt;
189 poscnt++;
190 break;
191 UNARY
192 penter(left(p));
193 parent(left(p)) = p;
194 break;
195 case CAT:
196 case OR:
197 penter(left(p));
198 penter(right(p));
199 parent(left(p)) = p;
200 parent(right(p)) = p;
201 break;
202 default: /* can't happen */
203 FATAL("can't happen: unknown type %d in penter", type(p));
204 break;
208 void freetr(Node *p) /* free parse tree */
210 switch (type(p)) {
211 LEAF
212 xfree(p);
213 break;
214 UNARY
215 freetr(left(p));
216 xfree(p);
217 break;
218 case CAT:
219 case OR:
220 freetr(left(p));
221 freetr(right(p));
222 xfree(p);
223 break;
224 default: /* can't happen */
225 FATAL("can't happen: unknown type %d in freetr", type(p));
226 break;
230 /* in the parsing of regular expressions, metacharacters like . have */
231 /* to be seen literally; \056 is not a metacharacter. */
233 int hexstr(char **pp) /* find and eval hex string at pp, return new p */
234 { /* only pick up one 8-bit byte (2 chars) */
235 uschar *p;
236 int n = 0;
237 int i;
239 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
240 if (isdigit(*p))
241 n = 16 * n + *p - '0';
242 else if (*p >= 'a' && *p <= 'f')
243 n = 16 * n + *p - 'a' + 10;
244 else if (*p >= 'A' && *p <= 'F')
245 n = 16 * n + *p - 'A' + 10;
247 *pp = (char *) p;
248 return n;
251 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
253 int quoted(char **pp) /* pick up next thing after a \\ */
254 /* and increment *pp */
256 char *p = *pp;
257 int c;
259 if ((c = *p++) == 't')
260 c = '\t';
261 else if (c == 'n')
262 c = '\n';
263 else if (c == 'f')
264 c = '\f';
265 else if (c == 'r')
266 c = '\r';
267 else if (c == 'b')
268 c = '\b';
269 else if (c == '\\')
270 c = '\\';
271 else if (c == 'x') { /* hexadecimal goo follows */
272 c = hexstr(&p); /* this adds a null if number is invalid */
273 } else if (isoctdigit(c)) { /* \d \dd \ddd */
274 int n = c - '0';
275 if (isoctdigit(*p)) {
276 n = 8 * n + *p++ - '0';
277 if (isoctdigit(*p))
278 n = 8 * n + *p++ - '0';
280 c = n;
281 } /* else */
282 /* c = c; */
283 *pp = p;
284 return c;
287 char *cclenter(const char *argp) /* add a character class */
289 int i, c, c2;
290 uschar *p = (uschar *) argp;
291 uschar *op, *bp;
292 static uschar *buf = 0;
293 static int bufsz = 100;
295 op = p;
296 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
297 FATAL("out of space for character class [%.10s...] 1", p);
298 bp = buf;
299 for (i = 0; (c = *p++) != 0; ) {
300 if (c == '\\') {
301 c = quoted((char **) &p);
302 } else if (c == '-' && i > 0 && bp[-1] != 0) {
303 if (*p != 0) {
304 c = bp[-1];
305 c2 = *p++;
306 if (c2 == '\\')
307 c2 = quoted((char **) &p);
308 if (c > c2) { /* empty; ignore */
309 bp--;
310 i--;
311 continue;
313 while (c < c2) {
314 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
315 FATAL("out of space for character class [%.10s...] 2", p);
316 *bp++ = ++c;
317 i++;
319 continue;
322 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
323 FATAL("out of space for character class [%.10s...] 3", p);
324 *bp++ = c;
325 i++;
327 *bp = 0;
328 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
329 xfree(op);
330 return (char *) tostring((char *) buf);
333 void overflo(const char *s)
335 FATAL("regular expression too big: %.30s...", s);
338 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
340 int i;
341 int *p;
343 switch (type(v)) {
344 LEAF
345 f->re[info(v)].ltype = type(v);
346 f->re[info(v)].lval.np = right(v);
347 while (f->accept >= maxsetvec) { /* guessing here! */
348 maxsetvec *= 4;
349 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
350 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
351 if (setvec == 0 || tmpset == 0)
352 overflo("out of space in cfoll()");
354 for (i = 0; i <= f->accept; i++)
355 setvec[i] = 0;
356 setcnt = 0;
357 follow(v); /* computes setvec and setcnt */
358 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
359 overflo("out of space building follow set");
360 f->re[info(v)].lfollow = p;
361 *p = setcnt;
362 for (i = f->accept; i >= 0; i--)
363 if (setvec[i] == 1)
364 *++p = i;
365 break;
366 UNARY
367 cfoll(f,left(v));
368 break;
369 case CAT:
370 case OR:
371 cfoll(f,left(v));
372 cfoll(f,right(v));
373 break;
374 default: /* can't happen */
375 FATAL("can't happen: unknown type %d in cfoll", type(v));
379 int first(Node *p) /* collects initially active leaves of p into setvec */
380 /* returns 1 if p matches empty string */
382 int b, lp;
384 switch (type(p)) {
385 LEAF
386 lp = info(p); /* look for high-water mark of subscripts */
387 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
388 maxsetvec *= 4;
389 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
390 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
391 if (setvec == 0 || tmpset == 0)
392 overflo("out of space in first()");
394 if (setvec[lp] != 1) {
395 setvec[lp] = 1;
396 setcnt++;
398 if (type(p) == CCL && (*(char *) right(p)) == '\0')
399 return(0); /* empty CCL */
400 else return(1);
401 case PLUS:
402 if (first(left(p)) == 0) return(0);
403 return(1);
404 case STAR:
405 case QUEST:
406 first(left(p));
407 return(0);
408 case CAT:
409 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
410 return(1);
411 case OR:
412 b = first(right(p));
413 if (first(left(p)) == 0 || b == 0) return(0);
414 return(1);
416 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
417 return(-1);
420 void follow(Node *v) /* collects leaves that can follow v into setvec */
422 Node *p;
424 if (type(v) == FINAL)
425 return;
426 p = parent(v);
427 switch (type(p)) {
428 case STAR:
429 case PLUS:
430 first(v);
431 follow(p);
432 return;
434 case OR:
435 case QUEST:
436 follow(p);
437 return;
439 case CAT:
440 if (v == left(p)) { /* v is left child of p */
441 if (first(right(p)) == 0) {
442 follow(p);
443 return;
445 } else /* v is right child */
446 follow(p);
447 return;
451 int member(int c, const char *sarg) /* is c in s? */
453 uschar *s = (uschar *) sarg;
455 while (*s)
456 if (c == *s++)
457 return(1);
458 return(0);
461 int match(fa *f, const char *p0) /* shortest match ? */
463 int s, ns;
464 uschar *p = (uschar *) p0;
466 s = f->reset ? makeinit(f,0) : f->initstat;
467 if (f->out[s])
468 return(1);
469 do {
470 assert(*p < NCHARS);
471 if ((ns = f->gototab[s][*p]) != 0)
472 s = ns;
473 else
474 s = cgoto(f, s, *p);
475 if (f->out[s])
476 return(1);
477 } while (*p++ != 0);
478 return(0);
481 int pmatch(fa *f, const char *p0) /* longest match, for sub */
483 int s, ns;
484 uschar *p = (uschar *) p0;
485 uschar *q;
486 int i, k;
488 /* s = f->reset ? makeinit(f,1) : f->initstat; */
489 if (f->reset) {
490 f->initstat = s = makeinit(f,1);
491 } else {
492 s = f->initstat;
494 patbeg = (char *) p;
495 patlen = -1;
496 do {
497 q = p;
498 do {
499 if (f->out[s]) /* final state */
500 patlen = q-p;
501 assert(*q < NCHARS);
502 if ((ns = f->gototab[s][*q]) != 0)
503 s = ns;
504 else
505 s = cgoto(f, s, *q);
506 if (s == 1) { /* no transition */
507 if (patlen >= 0) {
508 patbeg = (char *) p;
509 return(1);
511 else
512 goto nextin; /* no match */
514 } while (*q++ != 0);
515 if (f->out[s])
516 patlen = q-p-1; /* don't count $ */
517 if (patlen >= 0) {
518 patbeg = (char *) p;
519 return(1);
521 nextin:
522 s = 2;
523 if (f->reset) {
524 for (i = 2; i <= f->curstat; i++)
525 xfree(f->posns[i]);
526 k = *f->posns[0];
527 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
528 overflo("out of space in pmatch");
529 for (i = 0; i <= k; i++)
530 (f->posns[2])[i] = (f->posns[0])[i];
531 f->initstat = f->curstat = 2;
532 f->out[2] = f->out[0];
533 for (i = 0; i < NCHARS; i++)
534 f->gototab[2][i] = 0;
536 } while (*p++ != 0);
537 return (0);
540 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
542 int s, ns;
543 uschar *p = (uschar *) p0;
544 uschar *q;
545 int i, k;
547 /* s = f->reset ? makeinit(f,1) : f->initstat; */
548 if (f->reset) {
549 f->initstat = s = makeinit(f,1);
550 } else {
551 s = f->initstat;
553 patlen = -1;
554 while (*p) {
555 q = p;
556 do {
557 if (f->out[s]) /* final state */
558 patlen = q-p;
559 assert(*q < NCHARS);
560 if ((ns = f->gototab[s][*q]) != 0)
561 s = ns;
562 else
563 s = cgoto(f, s, *q);
564 if (s == 1) { /* no transition */
565 if (patlen > 0) {
566 patbeg = (char *) p;
567 return(1);
568 } else
569 goto nnextin; /* no nonempty match */
571 } while (*q++ != 0);
572 if (f->out[s])
573 patlen = q-p-1; /* don't count $ */
574 if (patlen > 0 ) {
575 patbeg = (char *) p;
576 return(1);
578 nnextin:
579 s = 2;
580 if (f->reset) {
581 for (i = 2; i <= f->curstat; i++)
582 xfree(f->posns[i]);
583 k = *f->posns[0];
584 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
585 overflo("out of state space");
586 for (i = 0; i <= k; i++)
587 (f->posns[2])[i] = (f->posns[0])[i];
588 f->initstat = f->curstat = 2;
589 f->out[2] = f->out[0];
590 for (i = 0; i < NCHARS; i++)
591 f->gototab[2][i] = 0;
593 p++;
595 return (0);
598 Node *reparse(const char *p) /* parses regular expression pointed to by p */
599 { /* uses relex() to scan regular expression */
600 Node *np;
602 dprintf( ("reparse <%s>\n", p) );
603 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
604 rtok = relex();
605 /* GNU compatibility: an empty regexp matches anything */
606 if (rtok == '\0')
607 /* FATAL("empty regular expression"); previous */
608 return(op2(ALL, NIL, NIL));
609 np = regexp();
610 if (rtok != '\0')
611 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
612 return(np);
615 Node *regexp(void) /* top-level parse of reg expr */
617 return (alt(concat(primary())));
620 Node *primary(void)
622 Node *np;
624 switch (rtok) {
625 case CHAR:
626 np = op2(CHAR, NIL, itonp(rlxval));
627 rtok = relex();
628 return (unary(np));
629 case ALL:
630 rtok = relex();
631 return (unary(op2(ALL, NIL, NIL)));
632 case DOT:
633 rtok = relex();
634 return (unary(op2(DOT, NIL, NIL)));
635 case CCL:
636 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
637 rtok = relex();
638 return (unary(np));
639 case NCCL:
640 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
641 rtok = relex();
642 return (unary(np));
643 case '^':
644 rtok = relex();
645 return (unary(op2(CHAR, NIL, itonp(HAT))));
646 case '$':
647 rtok = relex();
648 return (unary(op2(CHAR, NIL, NIL)));
649 case '(':
650 rtok = relex();
651 if (rtok == ')') { /* special pleading for () */
652 rtok = relex();
653 return unary(op2(CCL, NIL, (Node *) tostring("")));
655 np = regexp();
656 if (rtok == ')') {
657 rtok = relex();
658 return (unary(np));
660 else
661 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
662 default:
663 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
665 return 0; /*NOTREACHED*/
668 Node *concat(Node *np)
670 switch (rtok) {
671 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
672 return (concat(op2(CAT, np, primary())));
674 return (np);
677 Node *alt(Node *np)
679 if (rtok == OR) {
680 rtok = relex();
681 return (alt(op2(OR, np, concat(primary()))));
683 return (np);
686 Node *unary(Node *np)
688 switch (rtok) {
689 case STAR:
690 rtok = relex();
691 return (unary(op2(STAR, np, NIL)));
692 case PLUS:
693 rtok = relex();
694 return (unary(op2(PLUS, np, NIL)));
695 case QUEST:
696 rtok = relex();
697 return (unary(op2(QUEST, np, NIL)));
698 default:
699 return (np);
704 * Character class definitions conformant to the POSIX locale as
705 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
706 * and operating character sets are both ASCII (ISO646) or supersets
707 * thereof.
709 * Note that to avoid overflowing the temporary buffer used in
710 * relex(), the expanded character class (prior to range expansion)
711 * must be less than twice the size of their full name.
714 /* Because isblank doesn't show up in any of the header files on any
715 * system i use, it's defined here. if some other locale has a richer
716 * definition of "blank", define HAS_ISBLANK and provide your own
717 * version.
718 * the parentheses here are an attempt to find a path through the maze
719 * of macro definition and/or function and/or version provided. thanks
720 * to nelson beebe for the suggestion; let's see if it works everywhere.
723 #ifndef HAS_ISBLANK
725 int (isblank)(int c)
727 return c==' ' || c=='\t';
730 #endif
732 struct charclass {
733 const char *cc_name;
734 int cc_namelen;
735 int (*cc_func)(int);
736 } charclasses[] = {
737 { "alnum", 5, isalnum },
738 { "alpha", 5, isalpha },
739 { "blank", 5, isblank },
740 { "cntrl", 5, iscntrl },
741 { "digit", 5, isdigit },
742 { "graph", 5, isgraph },
743 { "lower", 5, islower },
744 { "print", 5, isprint },
745 { "punct", 5, ispunct },
746 { "space", 5, isspace },
747 { "upper", 5, isupper },
748 { "xdigit", 6, isxdigit },
749 { NULL, 0, NULL },
753 int relex(void) /* lexical analyzer for reparse */
755 int c, n;
756 int cflag;
757 static uschar *buf = 0;
758 static int bufsz = 100;
759 uschar *bp;
760 struct charclass *cc;
761 int i;
763 switch (c = *prestr++) {
764 case '|': return OR;
765 case '*': return STAR;
766 case '+': return PLUS;
767 case '?': return QUEST;
768 case '.': return DOT;
769 case '\0': prestr--; return '\0';
770 case '^':
771 case '$':
772 case '(':
773 case ')':
774 return c;
775 case '\\':
776 rlxval = quoted((char **) &prestr);
777 return CHAR;
778 default:
779 rlxval = c;
780 return CHAR;
781 case '[':
782 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
783 FATAL("out of space in reg expr %.10s..", lastre);
784 bp = buf;
785 if (*prestr == '^') {
786 cflag = 1;
787 prestr++;
789 else
790 cflag = 0;
791 n = 2 * strlen((const char *) prestr)+1;
792 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
793 FATAL("out of space for reg expr %.10s...", lastre);
794 for (; ; ) {
795 if ((c = *prestr++) == '\\') {
796 *bp++ = '\\';
797 if ((c = *prestr++) == '\0')
798 FATAL("nonterminated character class %.20s...", lastre);
799 *bp++ = c;
800 /* } else if (c == '\n') { */
801 /* FATAL("newline in character class %.20s...", lastre); */
802 } else if (c == '[' && *prestr == ':') {
803 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
804 for (cc = charclasses; cc->cc_name; cc++)
805 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
806 break;
807 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
808 prestr[2 + cc->cc_namelen] == ']') {
809 prestr += cc->cc_namelen + 3;
810 for (i = 0; i < NCHARS; i++) {
811 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
812 FATAL("out of space for reg expr %.10s...", lastre);
813 if (cc->cc_func(i)) {
814 *bp++ = i;
815 n++;
818 } else
819 *bp++ = c;
820 } else if (c == '\0') {
821 FATAL("nonterminated character class %.20s", lastre);
822 } else if (bp == buf) { /* 1st char is special */
823 *bp++ = c;
824 } else if (c == ']') {
825 *bp++ = 0;
826 rlxstr = (uschar *) tostring((char *) buf);
827 if (cflag == 0)
828 return CCL;
829 else
830 return NCCL;
831 } else
832 *bp++ = c;
837 int cgoto(fa *f, int s, int c)
839 int i, j, k;
840 int *p, *q;
842 assert(c == HAT || c < NCHARS);
843 while (f->accept >= maxsetvec) { /* guessing here! */
844 maxsetvec *= 4;
845 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
846 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
847 if (setvec == 0 || tmpset == 0)
848 overflo("out of space in cgoto()");
850 for (i = 0; i <= f->accept; i++)
851 setvec[i] = 0;
852 setcnt = 0;
853 /* compute positions of gototab[s,c] into setvec */
854 p = f->posns[s];
855 for (i = 1; i <= *p; i++) {
856 if ((k = f->re[p[i]].ltype) != FINAL) {
857 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
858 || (k == DOT && c != 0 && c != HAT)
859 || (k == ALL && c != 0)
860 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
861 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
862 q = f->re[p[i]].lfollow;
863 for (j = 1; j <= *q; j++) {
864 if (q[j] >= maxsetvec) {
865 maxsetvec *= 4;
866 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
867 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
868 if (setvec == 0 || tmpset == 0)
869 overflo("cgoto overflow");
871 if (setvec[q[j]] == 0) {
872 setcnt++;
873 setvec[q[j]] = 1;
879 /* determine if setvec is a previous state */
880 tmpset[0] = setcnt;
881 j = 1;
882 for (i = f->accept; i >= 0; i--)
883 if (setvec[i]) {
884 tmpset[j++] = i;
886 /* tmpset == previous state? */
887 for (i = 1; i <= f->curstat; i++) {
888 p = f->posns[i];
889 if ((k = tmpset[0]) != p[0])
890 goto different;
891 for (j = 1; j <= k; j++)
892 if (tmpset[j] != p[j])
893 goto different;
894 /* setvec is state i */
895 f->gototab[s][c] = i;
896 return i;
897 different:;
900 /* add tmpset to current set of states */
901 if (f->curstat >= NSTATES-1) {
902 f->curstat = 2;
903 f->reset = 1;
904 for (i = 2; i < NSTATES; i++)
905 xfree(f->posns[i]);
906 } else
907 ++(f->curstat);
908 for (i = 0; i < NCHARS; i++)
909 f->gototab[f->curstat][i] = 0;
910 xfree(f->posns[f->curstat]);
911 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
912 overflo("out of space in cgoto");
914 f->posns[f->curstat] = p;
915 f->gototab[s][c] = f->curstat;
916 for (i = 0; i <= setcnt; i++)
917 p[i] = tmpset[i];
918 if (setvec[f->accept])
919 f->out[f->curstat] = 1;
920 else
921 f->out[f->curstat] = 0;
922 return f->curstat;
926 void freefa(fa *f) /* free a finite automaton */
928 int i;
930 if (f == NULL)
931 return;
932 for (i = 0; i <= f->curstat; i++)
933 xfree(f->posns[i]);
934 for (i = 0; i <= f->accept; i++) {
935 xfree(f->re[i].lfollow);
936 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
937 xfree((f->re[i].lval.np));
939 xfree(f->restr);
940 xfree(f);