4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
31 #pragma ident "%Z%%M% %I% %E% SMI"
38 #define HAT (NCHARS-1) /* matches ^ in regular expr */
40 #define MAXLIN (3 * LINE_MAX)
42 #define type(v) (v)->nobj
43 #define left(v) (v)->narg[0]
44 #define right(v) (v)->narg[1]
45 #define parent(v) (v)->nnext
47 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define UNARY case STAR: case PLUS: case QUEST:
51 * encoding in tree Nodes:
52 * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53 * left is index, right contains value or pointer to value
54 * unary (STAR, PLUS, QUEST): left is child, right is null
55 * binary (CAT, OR): left and right are children
56 * parent contains pointer to parent
63 int rtok
; /* next token in current re */
66 uchar
*prestr
; /* current position in current re */
67 uchar
*lastre
; /* origin of last re */
75 #define NFA 20 /* cache this many dynamic fa's */
77 int nfatab
= 0; /* entries in fatab */
79 static fa
*mkdfa(uchar
*, int);
80 static int makeinit(fa
*, int);
81 static void penter(Node
*);
82 static void freetr(Node
*);
83 static void overflo(char *);
84 static void cfoll(fa
*, Node
*);
85 static void follow(Node
*);
86 static Node
*reparse(uchar
*);
87 static int relex(void);
88 static void freefa(fa
*);
89 static int cgoto(fa
*, int, int);
92 makedfa(uchar
*s
, int anchor
) /* returns dfa for reg expr s */
97 if (compile_time
) /* a constant for sure */
98 return (mkdfa(s
, anchor
));
99 for (i
= 0; i
< nfatab
; i
++) { /* is it there already? */
100 if (fatab
[i
]->anchor
== anchor
&&
101 strcmp((char *)fatab
[i
]->restr
, (char *)s
) == 0) {
106 pfa
= mkdfa(s
, anchor
);
107 if (nfatab
< NFA
) { /* room for another */
109 fatab
[nfatab
]->use
= 1;
113 use
= fatab
[0]->use
; /* replace least-recently used */
115 for (i
= 1; i
< nfatab
; i
++)
116 if (fatab
[i
]->use
< use
) {
127 mkdfa(uchar
*s
, int anchor
) /* does the real work of making a dfa */
128 /* anchor = 1 for anchored matches, else 0 */
134 p1
= op2(CAT
, op2(STAR
, op2(ALL
, NIL
, NIL
), NIL
), p
);
135 /* put ALL STAR in front of reg. exp. */
136 p1
= op2(CAT
, p1
, op2(FINAL
, NIL
, NIL
));
137 /* put FINAL after reg. exp. */
140 penter(p1
); /* enter parent pointers and leaf indices */
141 if ((f
= (fa
*)calloc(1, sizeof (fa
) + poscnt
* sizeof (rrow
))) == NULL
)
142 overflo("no room for fa");
143 /* penter has computed number of positions in re */
144 f
->accept
= poscnt
-1;
145 cfoll(f
, p1
); /* set up follow sets */
148 (int *)calloc(1, *(f
->re
[0].lfollow
) * sizeof (int))) == NULL
) {
149 overflo("out of space in makedfa");
151 if ((f
->posns
[1] = (int *)calloc(1, sizeof (int))) == NULL
)
152 overflo("out of space in makedfa");
154 f
->initstat
= makeinit(f
, anchor
);
156 f
->restr
= tostring(s
);
161 makeinit(fa
*f
, int anchor
)
168 k
= *(f
->re
[0].lfollow
);
170 if ((f
->posns
[2] = (int *)calloc(1, (k
+1) * sizeof (int))) == NULL
)
171 overflo("out of space in makeinit");
172 for (i
= 0; i
<= k
; i
++) {
173 (f
->posns
[2])[i
] = (f
->re
[0].lfollow
)[i
];
175 if ((f
->posns
[2])[1] == f
->accept
)
177 for (i
= 0; i
< NCHARS
; i
++)
178 f
->gototab
[2][i
] = 0;
179 f
->curstat
= cgoto(f
, 2, HAT
);
181 *f
->posns
[2] = k
-1; /* leave out position 0 */
182 for (i
= 0; i
< k
; i
++) {
183 (f
->posns
[0])[i
] = (f
->posns
[2])[i
];
186 f
->out
[0] = f
->out
[2];
188 --(*f
->posns
[f
->curstat
]);
194 penter(Node
*p
) /* set up parent pointers and leaf indices */
198 left(p
) = (Node
*) poscnt
;
210 parent(right(p
)) = p
;
213 ERROR
"unknown type %d in penter", type(p
) FATAL
;
219 freetr(Node
*p
) /* free parse tree */
236 ERROR
"unknown type %d in freetr", type(p
) FATAL
;
245 uchar
*op
, *chars
, *ret
;
248 init_buf(&chars
, &bsize
, LINE_INCR
);
251 while ((c
= *p
++) != 0) {
253 if ((c
= *p
++) == 't')
265 else if (isdigit(c
)) {
268 n
= 8 * n
+ *p
++ - '0';
270 n
= 8 * n
+ *p
++ - '0';
275 } else if (c
== '-' && i
> 0 && chars
[i
-1] != 0) {
278 while ((uchar
)c
< *p
) { /* fails if *p is \\ */
279 expand_buf(&chars
, &bsize
, i
);
286 expand_buf(&chars
, &bsize
, i
);
290 dprintf(("cclenter: in = |%s|, out = |%s|\n", op
, chars
));
292 ret
= tostring(chars
);
300 ERROR
"regular expression too big: %s", gettext((char *)s
) FATAL
;
303 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
305 cfoll(fa
*f
, Node
*v
)
312 f
->re
[(int)left(v
)].ltype
= type(v
);
313 f
->re
[(int)left(v
)].lval
= (int)right(v
);
314 for (i
= 0; i
<= f
->accept
; i
++)
317 follow(v
); /* computes setvec and setcnt */
318 if ((p
= (int *)calloc(1, (setcnt
+1) * sizeof (int))) == NULL
)
319 overflo("follow set overflow");
320 f
->re
[(int)left(v
)].lfollow
= p
;
322 for (i
= f
->accept
; i
>= 0; i
--) {
336 ERROR
"unknown type %d in cfoll", type(v
) FATAL
;
341 * collects initially active leaves of p into setvec
342 * returns 0 or 1 depending on whether p matches empty string
351 if (setvec
[(int)left(p
)] != 1) {
352 setvec
[(int)left(p
)] = 1;
355 if (type(p
) == CCL
&& (*(uchar
*)right(p
)) == '\0')
356 return (0); /* empty CCL */
360 if (first(left(p
)) == 0)
365 (void) first(left(p
));
368 if (first(left(p
)) == 0 && first(right(p
)) == 0)
373 if (first(left(p
)) == 0 || b
== 0)
377 ERROR
"unknown type %d in first", type(p
) FATAL
;
381 /* collects leaves that can follow v into setvec */
387 if (type(v
) == FINAL
)
403 if (v
== left(p
)) { /* v is left child of p */
404 if (first(right(p
)) == 0) {
408 } else /* v is right child */
412 ERROR
"unknown type %d in follow", type(p
) FATAL
;
418 member(uchar c
, uchar
*s
) /* is c in s? */
428 match(fa
*f
, uchar
*p
)
432 s
= f
->reset
? makeinit(f
, 0) : f
->initstat
;
436 if ((ns
= f
->gototab
[s
][*p
]) != 0)
447 pmatch(fa
*f
, uchar
*p
)
454 f
->initstat
= s
= makeinit(f
, 1);
463 if (f
->out
[s
]) /* final state */
465 if ((ns
= f
->gototab
[s
][*q
]) != 0)
469 if (s
== 1) { /* no transition */
474 goto nextin
; /* no match */
478 patlen
= q
- p
- 1; /* don't count $ */
486 for (i
= 2; i
<= f
->curstat
; i
++)
490 (int *)calloc(1, (k
+ 1) * sizeof (int))) == NULL
) {
491 overflo("out of space in pmatch");
493 for (i
= 0; i
<= k
; i
++)
494 (f
->posns
[2])[i
] = (f
->posns
[0])[i
];
495 f
->initstat
= f
->curstat
= 2;
496 f
->out
[2] = f
->out
[0];
497 for (i
= 0; i
< NCHARS
; i
++)
498 f
->gototab
[2][i
] = 0;
505 nematch(fa
*f
, uchar
*p
)
512 f
->initstat
= s
= makeinit(f
, 1);
520 if (f
->out
[s
]) /* final state */
522 if ((ns
= f
->gototab
[s
][*q
]) != 0)
526 if (s
== 1) { /* no transition */
531 goto nnextin
; /* no nonempty match */
535 patlen
= q
- p
- 1; /* don't count $ */
543 for (i
= 2; i
<= f
->curstat
; i
++)
547 (int *)calloc(1, (k
+ 1) * sizeof (int))) == NULL
) {
548 overflo("out of state space");
550 for (i
= 0; i
<= k
; i
++)
551 (f
->posns
[2])[i
] = (f
->posns
[0])[i
];
552 f
->initstat
= f
->curstat
= 2;
553 f
->out
[2] = f
->out
[0];
554 for (i
= 0; i
< NCHARS
; i
++)
555 f
->gototab
[2][i
] = 0;
562 static Node
*regexp(void), *primary(void), *concat(Node
*);
563 static Node
*alt(Node
*), *unary(Node
*);
568 /* parses regular expression pointed to by p */
569 /* uses relex() to scan regular expression */
572 dprintf(("reparse <%s>\n", p
));
573 lastre
= prestr
= p
; /* prestr points to string to be parsed */
576 ERROR
"empty regular expression" FATAL
;
581 ERROR
"syntax error in regular expression %s at %s",
582 lastre
, prestr FATAL
;
591 return (alt(concat(primary())));
601 np
= op2(CHAR
, NIL
, (Node
*)rlxval
);
606 return (unary(op2(ALL
, NIL
, NIL
)));
609 return (unary(op2(DOT
, NIL
, NIL
)));
612 np
= op2(CCL
, NIL
, (Node
*)cclenter(rlxstr
));
617 np
= op2(NCCL
, NIL
, (Node
*)cclenter(rlxstr
));
622 return (unary(op2(CHAR
, NIL
, (Node
*)HAT
)));
625 return (unary(op2(CHAR
, NIL
, NIL
)));
628 if (rtok
== ')') { /* special pleading for () */
630 return (unary(op2(CCL
, NIL
,
632 (Node
*)tostring((uchar
*)""))));
639 ERROR
"syntax error in regular expression %s at %s",
640 lastre
, prestr FATAL
;
643 ERROR
"illegal primary in regular expression %s at %s",
644 lastre
, prestr FATAL
;
654 case CHAR
: case DOT
: case ALL
: case CCL
: case NCCL
: case '$': case '(':
655 return (concat(op2(CAT
, np
, primary())));
666 return (alt(op2(OR
, np
, concat(primary()))));
677 return (unary(op2(STAR
, np
, NIL
)));
680 return (unary(op2(PLUS
, np
, NIL
)));
683 return (unary(op2(QUEST
, np
, NIL
)));
690 relex(void) /* lexical analyzer for reparse */
696 switch (c
= *prestr
++) {
698 case '*': return STAR
;
699 case '+': return PLUS
;
700 case '?': return QUEST
;
701 case '.': return DOT
;
702 case '\0': prestr
--; return '\0';
709 if ((c
= *prestr
++) == 't')
721 else if (isdigit(c
)) {
723 if (isdigit(*prestr
)) {
724 n
= 8 * n
+ *prestr
++ - '0';
725 if (isdigit(*prestr
))
726 n
= 8 * n
+ *prestr
++ - '0';
729 } /* else it's now in c */
737 if (*prestr
== '^') {
742 init_buf(&cbuf
, NULL
, strlen((char *)prestr
) * 2 + 1);
744 if ((c
= *prestr
++) == '\\') {
746 if ((c
= *prestr
++) == '\0') {
748 "nonterminated character class %s", lastre FATAL
;
751 } else if (c
== ']') {
753 rlxstr
= tostring(cbuf
);
759 } else if (c
== '\n') {
760 ERROR
"newline in character class %s...",
762 } else if (c
== '\0') {
763 ERROR
"nonterminated character class %s",
774 cgoto(fa
*f
, int s
, int c
)
776 register int i
, j
, k
;
779 for (i
= 0; i
<= f
->accept
; i
++)
782 /* compute positions of gototab[s,c] into setvec */
784 for (i
= 1; i
<= *p
; i
++) {
785 if ((k
= f
->re
[p
[i
]].ltype
) != FINAL
) {
786 if (k
== CHAR
&& c
== f
->re
[p
[i
]].lval
||
787 k
== DOT
&& c
!= 0 && c
!= HAT
||
788 k
== ALL
&& c
!= 0 ||
790 member(c
, (uchar
*)f
->re
[p
[i
]].lval
) ||
792 !member(c
, (uchar
*)f
->re
[p
[i
]].lval
) &&
793 c
!= 0 && c
!= HAT
) {
794 q
= f
->re
[p
[i
]].lfollow
;
795 for (j
= 1; j
<= *q
; j
++) {
796 if (setvec
[q
[j
]] == 0) {
804 /* determine if setvec is a previous state */
807 for (i
= f
->accept
; i
>= 0; i
--)
811 /* tmpset == previous state? */
812 for (i
= 1; i
<= f
->curstat
; i
++) {
814 if ((k
= tmpset
[0]) != p
[0])
816 for (j
= 1; j
<= k
; j
++)
817 if (tmpset
[j
] != p
[j
])
819 /* setvec is state i */
820 f
->gototab
[s
][c
] = i
;
825 /* add tmpset to current set of states */
826 if (f
->curstat
>= NSTATES
-1) {
829 for (i
= 2; i
< NSTATES
; i
++)
833 for (i
= 0; i
< NCHARS
; i
++)
834 f
->gototab
[f
->curstat
][i
] = 0;
835 xfree(f
->posns
[f
->curstat
]);
836 if ((p
= (int *)calloc(1, (setcnt
+ 1) * sizeof (int))) == NULL
)
837 overflo("out of space in cgoto");
839 f
->posns
[f
->curstat
] = p
;
840 f
->gototab
[s
][c
] = f
->curstat
;
841 for (i
= 0; i
<= setcnt
; i
++)
843 if (setvec
[f
->accept
])
844 f
->out
[f
->curstat
] = 1;
846 f
->out
[f
->curstat
] = 0;
858 for (i
= 0; i
<= f
->curstat
; i
++)
860 for (i
= 0; i
<= f
->accept
; i
++)
861 xfree(f
->re
[i
].lfollow
);