8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / awk / b.c
blobe1bed57ab1f6c8b7bc548d32e68081f697c3c949
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
31 #pragma ident "%Z%%M% %I% %E% SMI"
33 #define DEBUG
35 #include "awk.h"
36 #include "y.tab.h"
38 #define HAT (NCHARS-1) /* matches ^ in regular expr */
39 /* NCHARS is 2**n */
40 #define MAXLIN (3 * LINE_MAX)
42 #define type(v) (v)->nobj
43 #define left(v) (v)->narg[0]
44 #define right(v) (v)->narg[1]
45 #define parent(v) (v)->nnext
47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define UNARY case STAR: case PLUS: case QUEST:
51 * encoding in tree Nodes:
52 * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53 * left is index, right contains value or pointer to value
54 * unary (STAR, PLUS, QUEST): left is child, right is null
55 * binary (CAT, OR): left and right are children
56 * parent contains pointer to parent
59 int setvec[MAXLIN];
60 int tmpset[MAXLIN];
61 Node *point[MAXLIN];
63 int rtok; /* next token in current re */
64 int rlxval;
65 uchar *rlxstr;
66 uchar *prestr; /* current position in current re */
67 uchar *lastre; /* origin of last re */
69 static int setcnt;
70 static int poscnt;
72 uchar *patbeg;
73 int patlen;
75 #define NFA 20 /* cache this many dynamic fa's */
76 fa *fatab[NFA];
77 int nfatab = 0; /* entries in fatab */
79 static fa *mkdfa(uchar *, int);
80 static int makeinit(fa *, int);
81 static void penter(Node *);
82 static void freetr(Node *);
83 static void overflo(char *);
84 static void cfoll(fa *, Node *);
85 static void follow(Node *);
86 static Node *reparse(uchar *);
87 static int relex(void);
88 static void freefa(fa *);
89 static int cgoto(fa *, int, int);
91 fa *
92 makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
94 int i, use, nuse;
95 fa *pfa;
97 if (compile_time) /* a constant for sure */
98 return (mkdfa(s, anchor));
99 for (i = 0; i < nfatab; i++) { /* is it there already? */
100 if (fatab[i]->anchor == anchor &&
101 strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
102 fatab[i]->use++;
103 return (fatab[i]);
106 pfa = mkdfa(s, anchor);
107 if (nfatab < NFA) { /* room for another */
108 fatab[nfatab] = pfa;
109 fatab[nfatab]->use = 1;
110 nfatab++;
111 return (pfa);
113 use = fatab[0]->use; /* replace least-recently used */
114 nuse = 0;
115 for (i = 1; i < nfatab; i++)
116 if (fatab[i]->use < use) {
117 use = fatab[i]->use;
118 nuse = i;
120 freefa(fatab[nuse]);
121 fatab[nuse] = pfa;
122 pfa->use = 1;
123 return (pfa);
126 fa *
127 mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
128 /* anchor = 1 for anchored matches, else 0 */
130 Node *p, *p1;
131 fa *f;
133 p = reparse(s);
134 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
135 /* put ALL STAR in front of reg. exp. */
136 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
137 /* put FINAL after reg. exp. */
139 poscnt = 0;
140 penter(p1); /* enter parent pointers and leaf indices */
141 if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
142 overflo("no room for fa");
143 /* penter has computed number of positions in re */
144 f->accept = poscnt-1;
145 cfoll(f, p1); /* set up follow sets */
146 freetr(p1);
147 if ((f->posns[0] =
148 (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
149 overflo("out of space in makedfa");
151 if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
152 overflo("out of space in makedfa");
153 *f->posns[1] = 0;
154 f->initstat = makeinit(f, anchor);
155 f->anchor = anchor;
156 f->restr = tostring(s);
157 return (f);
160 static int
161 makeinit(fa *f, int anchor)
163 register int i, k;
165 f->curstat = 2;
166 f->out[2] = 0;
167 f->reset = 0;
168 k = *(f->re[0].lfollow);
169 xfree(f->posns[2]);
170 if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
171 overflo("out of space in makeinit");
172 for (i = 0; i <= k; i++) {
173 (f->posns[2])[i] = (f->re[0].lfollow)[i];
175 if ((f->posns[2])[1] == f->accept)
176 f->out[2] = 1;
177 for (i = 0; i < NCHARS; i++)
178 f->gototab[2][i] = 0;
179 f->curstat = cgoto(f, 2, HAT);
180 if (anchor) {
181 *f->posns[2] = k-1; /* leave out position 0 */
182 for (i = 0; i < k; i++) {
183 (f->posns[0])[i] = (f->posns[2])[i];
186 f->out[0] = f->out[2];
187 if (f->curstat != 2)
188 --(*f->posns[f->curstat]);
190 return (f->curstat);
193 void
194 penter(Node *p) /* set up parent pointers and leaf indices */
196 switch (type(p)) {
197 LEAF
198 left(p) = (Node *) poscnt;
199 point[poscnt++] = p;
200 break;
201 UNARY
202 penter(left(p));
203 parent(left(p)) = p;
204 break;
205 case CAT:
206 case OR:
207 penter(left(p));
208 penter(right(p));
209 parent(left(p)) = p;
210 parent(right(p)) = p;
211 break;
212 default:
213 ERROR "unknown type %d in penter", type(p) FATAL;
214 break;
218 static void
219 freetr(Node *p) /* free parse tree */
221 switch (type(p)) {
222 LEAF
223 xfree(p);
224 break;
225 UNARY
226 freetr(left(p));
227 xfree(p);
228 break;
229 case CAT:
230 case OR:
231 freetr(left(p));
232 freetr(right(p));
233 xfree(p);
234 break;
235 default:
236 ERROR "unknown type %d in freetr", type(p) FATAL;
237 break;
241 uchar *
242 cclenter(uchar *p)
244 register int i, c;
245 uchar *op, *chars, *ret;
246 size_t bsize;
248 init_buf(&chars, &bsize, LINE_INCR);
249 op = p;
250 i = 0;
251 while ((c = *p++) != 0) {
252 if (c == '\\') {
253 if ((c = *p++) == 't')
254 c = '\t';
255 else if (c == 'n')
256 c = '\n';
257 else if (c == 'f')
258 c = '\f';
259 else if (c == 'r')
260 c = '\r';
261 else if (c == 'b')
262 c = '\b';
263 else if (c == '\\')
264 c = '\\';
265 else if (isdigit(c)) {
266 int n = c - '0';
267 if (isdigit(*p)) {
268 n = 8 * n + *p++ - '0';
269 if (isdigit(*p))
270 n = 8 * n + *p++ - '0';
272 c = n;
273 } /* else */
274 /* c = c; */
275 } else if (c == '-' && i > 0 && chars[i-1] != 0) {
276 if (*p != 0) {
277 c = chars[i-1];
278 while ((uchar)c < *p) { /* fails if *p is \\ */
279 expand_buf(&chars, &bsize, i);
280 chars[i++] = ++c;
282 p++;
283 continue;
286 expand_buf(&chars, &bsize, i);
287 chars[i++] = c;
289 chars[i++] = '\0';
290 dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
291 xfree(op);
292 ret = tostring(chars);
293 free(chars);
294 return (ret);
297 static void
298 overflo(char *s)
300 ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
303 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
304 static void
305 cfoll(fa *f, Node *v)
307 register int i;
308 register int *p;
310 switch (type(v)) {
311 LEAF
312 f->re[(int)left(v)].ltype = type(v);
313 f->re[(int)left(v)].lval = (int)right(v);
314 for (i = 0; i <= f->accept; i++)
315 setvec[i] = 0;
316 setcnt = 0;
317 follow(v); /* computes setvec and setcnt */
318 if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
319 overflo("follow set overflow");
320 f->re[(int)left(v)].lfollow = p;
321 *p = setcnt;
322 for (i = f->accept; i >= 0; i--) {
323 if (setvec[i] == 1)
324 *++p = i;
326 break;
327 UNARY
328 cfoll(f, left(v));
329 break;
330 case CAT:
331 case OR:
332 cfoll(f, left(v));
333 cfoll(f, right(v));
334 break;
335 default:
336 ERROR "unknown type %d in cfoll", type(v) FATAL;
341 * collects initially active leaves of p into setvec
342 * returns 0 or 1 depending on whether p matches empty string
344 static int
345 first(Node *p)
347 register int b;
349 switch (type(p)) {
350 LEAF
351 if (setvec[(int)left(p)] != 1) {
352 setvec[(int)left(p)] = 1;
353 setcnt++;
355 if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
356 return (0); /* empty CCL */
357 else
358 return (1);
359 case PLUS:
360 if (first(left(p)) == 0)
361 return (0);
362 return (1);
363 case STAR:
364 case QUEST:
365 (void) first(left(p));
366 return (0);
367 case CAT:
368 if (first(left(p)) == 0 && first(right(p)) == 0)
369 return (0);
370 return (1);
371 case OR:
372 b = first(right(p));
373 if (first(left(p)) == 0 || b == 0)
374 return (0);
375 return (1);
377 ERROR "unknown type %d in first", type(p) FATAL;
378 return (-1);
381 /* collects leaves that can follow v into setvec */
382 static void
383 follow(Node *v)
385 Node *p;
387 if (type(v) == FINAL)
388 return;
389 p = parent(v);
390 switch (type(p)) {
391 case STAR:
392 case PLUS:
393 (void) first(v);
394 follow(p);
395 return;
397 case OR:
398 case QUEST:
399 follow(p);
400 return;
402 case CAT:
403 if (v == left(p)) { /* v is left child of p */
404 if (first(right(p)) == 0) {
405 follow(p);
406 return;
408 } else /* v is right child */
409 follow(p);
410 return;
411 default:
412 ERROR "unknown type %d in follow", type(p) FATAL;
413 break;
417 static int
418 member(uchar c, uchar *s) /* is c in s? */
420 while (*s)
421 if (c == *s++)
422 return (1);
423 return (0);
428 match(fa *f, uchar *p)
430 register int s, ns;
432 s = f->reset ? makeinit(f, 0) : f->initstat;
433 if (f->out[s])
434 return (1);
435 do {
436 if ((ns = f->gototab[s][*p]) != 0)
437 s = ns;
438 else
439 s = cgoto(f, s, *p);
440 if (f->out[s])
441 return (1);
442 } while (*p++ != 0);
443 return (0);
447 pmatch(fa *f, uchar *p)
449 register int s, ns;
450 register uchar *q;
451 int i, k;
453 if (f->reset) {
454 f->initstat = s = makeinit(f, 1);
455 } else {
456 s = f->initstat;
458 patbeg = p;
459 patlen = -1;
460 do {
461 q = p;
462 do {
463 if (f->out[s]) /* final state */
464 patlen = q-p;
465 if ((ns = f->gototab[s][*q]) != 0)
466 s = ns;
467 else
468 s = cgoto(f, s, *q);
469 if (s == 1) { /* no transition */
470 if (patlen >= 0) {
471 patbeg = p;
472 return (1);
473 } else
474 goto nextin; /* no match */
476 } while (*q++ != 0);
477 if (f->out[s])
478 patlen = q - p - 1; /* don't count $ */
479 if (patlen >= 0) {
480 patbeg = p;
481 return (1);
483 nextin:
484 s = 2;
485 if (f->reset) {
486 for (i = 2; i <= f->curstat; i++)
487 xfree(f->posns[i]);
488 k = *f->posns[0];
489 if ((f->posns[2] =
490 (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
491 overflo("out of space in pmatch");
493 for (i = 0; i <= k; i++)
494 (f->posns[2])[i] = (f->posns[0])[i];
495 f->initstat = f->curstat = 2;
496 f->out[2] = f->out[0];
497 for (i = 0; i < NCHARS; i++)
498 f->gototab[2][i] = 0;
500 } while (*p++ != 0);
501 return (0);
505 nematch(fa *f, uchar *p)
507 register int s, ns;
508 register uchar *q;
509 int i, k;
511 if (f->reset) {
512 f->initstat = s = makeinit(f, 1);
513 } else {
514 s = f->initstat;
516 patlen = -1;
517 while (*p) {
518 q = p;
519 do {
520 if (f->out[s]) /* final state */
521 patlen = q-p;
522 if ((ns = f->gototab[s][*q]) != 0)
523 s = ns;
524 else
525 s = cgoto(f, s, *q);
526 if (s == 1) { /* no transition */
527 if (patlen > 0) {
528 patbeg = p;
529 return (1);
530 } else
531 goto nnextin; /* no nonempty match */
533 } while (*q++ != 0);
534 if (f->out[s])
535 patlen = q - p - 1; /* don't count $ */
536 if (patlen > 0) {
537 patbeg = p;
538 return (1);
540 nnextin:
541 s = 2;
542 if (f->reset) {
543 for (i = 2; i <= f->curstat; i++)
544 xfree(f->posns[i]);
545 k = *f->posns[0];
546 if ((f->posns[2] =
547 (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
548 overflo("out of state space");
550 for (i = 0; i <= k; i++)
551 (f->posns[2])[i] = (f->posns[0])[i];
552 f->initstat = f->curstat = 2;
553 f->out[2] = f->out[0];
554 for (i = 0; i < NCHARS; i++)
555 f->gototab[2][i] = 0;
557 p++;
559 return (0);
562 static Node *regexp(void), *primary(void), *concat(Node *);
563 static Node *alt(Node *), *unary(Node *);
565 static Node *
566 reparse(uchar *p)
568 /* parses regular expression pointed to by p */
569 /* uses relex() to scan regular expression */
570 Node *np;
572 dprintf(("reparse <%s>\n", p));
573 lastre = prestr = p; /* prestr points to string to be parsed */
574 rtok = relex();
575 if (rtok == '\0')
576 ERROR "empty regular expression" FATAL;
577 np = regexp();
578 if (rtok == '\0') {
579 return (np);
580 } else {
581 ERROR "syntax error in regular expression %s at %s",
582 lastre, prestr FATAL;
584 /*NOTREACHED*/
585 return (NULL);
588 static Node *
589 regexp(void)
591 return (alt(concat(primary())));
594 static Node *
595 primary(void)
597 Node *np;
599 switch (rtok) {
600 case CHAR:
601 np = op2(CHAR, NIL, (Node *)rlxval);
602 rtok = relex();
603 return (unary(np));
604 case ALL:
605 rtok = relex();
606 return (unary(op2(ALL, NIL, NIL)));
607 case DOT:
608 rtok = relex();
609 return (unary(op2(DOT, NIL, NIL)));
610 case CCL:
611 /*LINTED align*/
612 np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
613 rtok = relex();
614 return (unary(np));
615 case NCCL:
616 /*LINTED align*/
617 np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
618 rtok = relex();
619 return (unary(np));
620 case '^':
621 rtok = relex();
622 return (unary(op2(CHAR, NIL, (Node *)HAT)));
623 case '$':
624 rtok = relex();
625 return (unary(op2(CHAR, NIL, NIL)));
626 case '(':
627 rtok = relex();
628 if (rtok == ')') { /* special pleading for () */
629 rtok = relex();
630 return (unary(op2(CCL, NIL,
631 /*LINTED align*/
632 (Node *)tostring((uchar *)""))));
634 np = regexp();
635 if (rtok == ')') {
636 rtok = relex();
637 return (unary(np));
638 } else {
639 ERROR "syntax error in regular expression %s at %s",
640 lastre, prestr FATAL;
642 default:
643 ERROR "illegal primary in regular expression %s at %s",
644 lastre, prestr FATAL;
646 /*NOTREACHED*/
647 return (NULL);
650 static Node *
651 concat(Node *np)
653 switch (rtok) {
654 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
655 return (concat(op2(CAT, np, primary())));
656 default:
657 return (np);
661 static Node *
662 alt(Node *np)
664 if (rtok == OR) {
665 rtok = relex();
666 return (alt(op2(OR, np, concat(primary()))));
668 return (np);
671 static Node *
672 unary(Node *np)
674 switch (rtok) {
675 case STAR:
676 rtok = relex();
677 return (unary(op2(STAR, np, NIL)));
678 case PLUS:
679 rtok = relex();
680 return (unary(op2(PLUS, np, NIL)));
681 case QUEST:
682 rtok = relex();
683 return (unary(op2(QUEST, np, NIL)));
684 default:
685 return (np);
689 static int
690 relex(void) /* lexical analyzer for reparse */
692 register int c;
693 uchar *cbuf;
694 int clen, cflag;
696 switch (c = *prestr++) {
697 case '|': return OR;
698 case '*': return STAR;
699 case '+': return PLUS;
700 case '?': return QUEST;
701 case '.': return DOT;
702 case '\0': prestr--; return '\0';
703 case '^':
704 case '$':
705 case '(':
706 case ')':
707 return (c);
708 case '\\':
709 if ((c = *prestr++) == 't')
710 c = '\t';
711 else if (c == 'n')
712 c = '\n';
713 else if (c == 'f')
714 c = '\f';
715 else if (c == 'r')
716 c = '\r';
717 else if (c == 'b')
718 c = '\b';
719 else if (c == '\\')
720 c = '\\';
721 else if (isdigit(c)) {
722 int n = c - '0';
723 if (isdigit(*prestr)) {
724 n = 8 * n + *prestr++ - '0';
725 if (isdigit(*prestr))
726 n = 8 * n + *prestr++ - '0';
728 c = n;
729 } /* else it's now in c */
730 rlxval = c;
731 return (CHAR);
732 default:
733 rlxval = c;
734 return (CHAR);
735 case '[':
736 clen = 0;
737 if (*prestr == '^') {
738 cflag = 1;
739 prestr++;
740 } else
741 cflag = 0;
742 init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
743 for (;;) {
744 if ((c = *prestr++) == '\\') {
745 cbuf[clen++] = '\\';
746 if ((c = *prestr++) == '\0') {
747 ERROR
748 "nonterminated character class %s", lastre FATAL;
750 cbuf[clen++] = c;
751 } else if (c == ']') {
752 cbuf[clen] = 0;
753 rlxstr = tostring(cbuf);
754 free(cbuf);
755 if (cflag == 0)
756 return (CCL);
757 else
758 return (NCCL);
759 } else if (c == '\n') {
760 ERROR "newline in character class %s...",
761 lastre FATAL;
762 } else if (c == '\0') {
763 ERROR "nonterminated character class %s",
764 lastre FATAL;
765 } else
766 cbuf[clen++] = c;
768 /*NOTREACHED*/
770 return (0);
773 static int
774 cgoto(fa *f, int s, int c)
776 register int i, j, k;
777 register int *p, *q;
779 for (i = 0; i <= f->accept; i++)
780 setvec[i] = 0;
781 setcnt = 0;
782 /* compute positions of gototab[s,c] into setvec */
783 p = f->posns[s];
784 for (i = 1; i <= *p; i++) {
785 if ((k = f->re[p[i]].ltype) != FINAL) {
786 if (k == CHAR && c == f->re[p[i]].lval ||
787 k == DOT && c != 0 && c != HAT ||
788 k == ALL && c != 0 ||
789 k == CCL &&
790 member(c, (uchar *)f->re[p[i]].lval) ||
791 k == NCCL &&
792 !member(c, (uchar *)f->re[p[i]].lval) &&
793 c != 0 && c != HAT) {
794 q = f->re[p[i]].lfollow;
795 for (j = 1; j <= *q; j++) {
796 if (setvec[q[j]] == 0) {
797 setcnt++;
798 setvec[q[j]] = 1;
804 /* determine if setvec is a previous state */
805 tmpset[0] = setcnt;
806 j = 1;
807 for (i = f->accept; i >= 0; i--)
808 if (setvec[i]) {
809 tmpset[j++] = i;
811 /* tmpset == previous state? */
812 for (i = 1; i <= f->curstat; i++) {
813 p = f->posns[i];
814 if ((k = tmpset[0]) != p[0])
815 goto different;
816 for (j = 1; j <= k; j++)
817 if (tmpset[j] != p[j])
818 goto different;
819 /* setvec is state i */
820 f->gototab[s][c] = i;
821 return (i);
822 different:;
825 /* add tmpset to current set of states */
826 if (f->curstat >= NSTATES-1) {
827 f->curstat = 2;
828 f->reset = 1;
829 for (i = 2; i < NSTATES; i++)
830 xfree(f->posns[i]);
831 } else
832 ++(f->curstat);
833 for (i = 0; i < NCHARS; i++)
834 f->gototab[f->curstat][i] = 0;
835 xfree(f->posns[f->curstat]);
836 if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
837 overflo("out of space in cgoto");
839 f->posns[f->curstat] = p;
840 f->gototab[s][c] = f->curstat;
841 for (i = 0; i <= setcnt; i++)
842 p[i] = tmpset[i];
843 if (setvec[f->accept])
844 f->out[f->curstat] = 1;
845 else
846 f->out[f->curstat] = 0;
847 return (f->curstat);
850 static void
851 freefa(fa *f)
854 register int i;
856 if (f == NULL)
857 return;
858 for (i = 0; i <= f->curstat; i++)
859 xfree(f->posns[i]);
860 for (i = 0; i <= f->accept; i++)
861 xfree(f->re[i].lfollow);
862 xfree(f->restr);
863 xfree(f);