4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
22 /* Copyright (c) 1990 AT&T */
23 /* All Rights Reserved */
27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
31 #pragma ident "%Z%%M% %I% %E% SMI"
34 * cscope - interactive C symbol or text cross-reference
36 * text searching functions
53 typedef struct re_bm
{
78 typedef struct re_cw
{
79 int maxdepth
, mindepth
;
87 /* lit expression types */
88 Literal
, Dot
, Charclass
, EOP
,
90 /* non-lit expression types */
91 Cat
, Alternate
, Star
, Plus
, Quest
,
93 /* not really expression types, just helping */
100 typedef struct Expr
{
101 Exprtype type
; /* Type of expression (in grammar) */
102 ID id
; /* unique ID of lit expression */
103 int lit
; /* Literal character or tag */
104 int flen
; /* Number of following lit expressions */
105 ID
*follow
; /* Array of IDs of following lit expressions */
106 struct Expr
*l
; /* pointer to Left child (or ccl count) */
107 struct Expr
*r
; /* pointer to Right child (or ccl mask) */
108 struct Expr
*parent
; /* pointer to Parent */
111 typedef struct State
{
112 struct State
*tab
[256];
113 int cnt
; /* number of matched chars on full match, -1 otherwise */
114 int npos
; /* number of IDs in position set for this state. */
115 int pos
; /* index into posbase for beginning of IDs */
119 ID id
; /* Lit Expression id */
120 int fcount
; /* Number of Lit exp matches before this one. */
123 typedef struct Positionset
{
124 int count
; /* Number of lit exps in position set */
125 ID last
; /* ID of last lit exp in position set */
126 FID
*base
; /* array of MAXID FIDS */
127 /* 0 means not in position set */
128 /* -1 means first in position set */
129 /* n (>0) is ID of prev member of position set. */
132 typedef struct re_re
{
133 FID
*posbase
; /* Array of IDs from all states */
134 int nposalloc
; /* Allocated size of posbase */
135 int posnext
; /* Index into free space in posbase */
136 int posreset
; /* Index into end of IDs for initial state in posbase */
137 int maxid
; /* Number of (also maximum ID of) lit expressions */
138 Expr
*root
; /* Pointer to root (EOP) expression */
139 Expr
**ptr
; /* Pointer to array of ptrs to lit expressions. */
140 uchar_t
*cmap
; /* Character mapping array */
141 Positionset firstpos
;
143 int nstates
; /* Number of current states defined */
144 int statelim
; /* Limit on number of states before flushing */
145 State
*states
; /* Array of states */
146 State istate
; /* Initial state */
163 BEGIN
, /* File is not yet in buffer at all */
164 MORE
, /* File is partly in buffer */
165 NO_MORE
/* File has been completely read into buffer */
169 uchar_t
*prntbuf
; /* current line of input from data file */
170 uchar_t
*newline
; /* end of line (real or sentinel \n) */
171 long ln
; /* line number */
177 #define CCL_SET(a, c) ((a)[(c) >> 3] |= bittab[(c) & 07])
178 #define CCL_CLR(a, c) ((a)[(c) >> 3] &= ~bittab[(c) & 07])
179 #define CCL_CHK(a, c) ((a)[(c) >> 3] & bittab[(c) & 07])
181 #define ESIZE (BUFSIZ)
182 #define MAXBUFSIZE (64*BUFSIZ)
184 #define MAXMALLOCS 1024
185 #define MAXLIT 256 /* is plenty big enough */
186 #define LARGE MAXBUFSIZE+ESIZE+2
188 #define CLEAR(r, rps) (void) memset((char *)(rps)->base, 0, \
189 (int)((r)->maxid * sizeof (FID))), \
190 (rps)->count = 0, (rps)->last = -1
191 #define SET(rps, n, cnt) { \
192 if ((rps)->base[n].id == 0) {\
194 (rps)->base[n].fcount = (cnt);\
195 (rps)->base[n].id = (rps)->last;\
197 } else if ((cnt) > (rps)->base[n].fcount) {\
198 (rps)->base[n].fcount = (cnt);\
201 #define START { _p = tmp; }
202 #define ADDL(c) { if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; }
203 #define FINISH { ADDL(0) if ((_p-tmp) > bestlen) \
204 (void) memmove(best, tmp, bestlen = _p-tmp); }
207 #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = NULL, \
208 t->node = N, t): newlink(0, N))
209 #define ADD(N) if (qtail) qtail = qtail->next = NEW(N); \
210 else qtail = qhead = NEW(N)
211 #define DEL() { Link *_l = qhead; if ((qhead = qhead->next) == NULL) \
212 qtail = NULL; _l->next = froot; froot = _l; }
214 static uchar_t
*buffer
;
215 static uchar_t
*bufend
;
219 static int file_desc
;
220 static FILE_STAT file_stat
;
221 static PATTERN match_pattern
;
222 static uchar_t char_map
[2][256];
224 static Exprtype toktype
;
225 static int toklit
, parno
, maxid
;
226 static uchar_t tmp
[MAXLIT
], best
[MAXLIT
];
229 static Node
*next_node
;
230 static Link
*froot
, *next_link
;
234 static uchar_t
*mallocs
[MAXMALLOCS
];
236 static uchar_t bittab
[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
239 #define TRACE(n) (n < debug)
240 #define EPRINTSIZE 50000
241 static void spr(int c
, int *p
, uchar_t
*buf
);
242 void epr(Expr
*e
, uchar_t
*res
);
243 static int debug
= 12;
246 static void init_file(LINE
*cur_ptr
);
247 static void get_line(LINE
*cur_ptr
, uchar_t
*s
);
248 static void get_ncount(LINE
*cur_ptr
, uchar_t
*s
);
249 static int execute(void);
250 static State
*startstate(re_re
*r
);
251 static State
*stateof(re_re
*r
, Positionset
*ps
);
252 static State
*nextstate(re_re
*r
, State
*s
, int a
);
253 static State
*addstate(re_re
*r
, Positionset
*ps
, int cnt
);
254 static BOOL
match(Expr
*e
, int a
);
255 static BOOL
first_lit(Positionset
*fpos
, Expr
*e
);
256 static void eptr(re_re
*r
, Expr
*e
);
257 static void efollow(re_re
*r
, Positionset
*fpos
, Expr
*e
);
258 static void follow(Positionset
*fpos
, Expr
*e
);
259 static void followstate(re_re
*r
, State
*s
, int a
, Positionset
*fpos
);
260 static Expr
*eall(re_re
*r
, PATTERN
*pat
);
261 static Expr
*d0(re_re
*r
, PATTERN
*pat
);
262 static Expr
*d1(re_re
*r
, PATTERN
*pat
);
263 static Expr
*d2(re_re
*r
, PATTERN
*pat
);
264 static Expr
*d3(re_re
*r
, PATTERN
*pat
);
265 static Expr
*newexpr(Exprtype t
, int lit
, Expr
*left
, Expr
*right
);
266 static void lex(re_re
*r
, PATTERN
*pat
);
267 static int re_lit(PATTERN
*pat
, uchar_t
**b
, uchar_t
**e
);
268 static void traverse(PATTERN
*pat
, Expr
*e
);
269 static int ccl(PATTERN
*pat
, uchar_t
*tab
);
270 static BOOL
altlist(), word();
271 static BOOL
altlist(Expr
*e
, uchar_t
*buf
, re_cw
*pat
);
272 static Node
*newnode(re_cw
*c
, int d
);
273 static Link
*newlink(uchar_t lit
, Node
*n
);
274 static void fail(Node
*root
);
275 static void zeroroot(Node
*root
, Node
*n
);
276 static void shift(re_cw
*c
);
277 static void shifttab(Node
*n
);
278 static void shiftprop(re_cw
*c
, Node
*n
);
279 static void delta_2(re_bm
*b
);
280 static int getstate(re_re
*r
, Positionset
*ps
);
281 static void savestate(re_re
*r
);
282 static void stateinit(re_re
*r
);
283 static re_bm
*re_bmcomp(uchar_t
*pb
, uchar_t
*pe
, uchar_t
*cmap
);
284 static re_cw
*re_cwinit(uchar_t
*cmap
);
285 static re_cw
*re_recw(re_re
*r
, uchar_t
*map
);
286 static re_re
*egprep(PATTERN
*pat
);
287 static void re_cwadd(re_cw
*c
, uchar_t
*s
, uchar_t
*e
);
288 static void re_cwcomp(re_cw
*c
);
289 static void eginit(re_re
*r
);
290 static BOOL
re_bmexec(PATTERN
*pat
, uchar_t
*s
, uchar_t
*e
, uchar_t
**mb
,
292 static BOOL
re_cwexec(PATTERN
*pat
, uchar_t
*rs
, uchar_t
*re
, uchar_t
**mb
,
294 static BOOL
re_reexec(PATTERN
*pat
, uchar_t
*b
, uchar_t
*e
, uchar_t
**mb
,
296 static uchar_t
*egmalloc(size_t n
);
297 static void fgetfile(LINE
*cur_ptr
);
298 static void dogre(PATTERN
*pat
);
299 static BOOL
pattern_match(PATTERN
*pat
, LINE
*lptr
);
300 static BOOL
fixloc(uchar_t
**mb
, uchar_t
**me
);
301 static BOOL
grepmatch(PATTERN
*pat
, uchar_t
**mb
, uchar_t
**me
);
302 static void err(char *s
);
304 static char *message
;
309 iflag
= i
; /* simulate "egrep -i" */
313 egrepinit(char *expression
)
315 static int firsttime
= 1;
320 for (i
= 0; i
< 256; i
++) {
321 char_map
[0][i
] = (uchar_t
)i
;
322 char_map
[1][i
] = tolower(i
);
325 for (i
= 0; i
< nmalloc
; i
++)
330 match_pattern
.expression
= (uchar_t
*)expression
;
331 match_pattern
.cmap
= char_map
[iflag
];
332 if (setjmp(env
) == 0) {
333 dogre(&match_pattern
);
336 PATTERN
*p
= match_pattern
;
337 if (p
->procfn
== re_bmexec
)
339 if (p
->succfn
== re_reexec
)
340 printf("PARTIAL BOYER_MOORE\n");
342 printf("PARTIAL B_M with GREP\n");
344 printf("FULL BOYER_MOORE\n");
345 else if (p
->procfn
== re_cwexec
)
361 printf("PATTERN %s\n", pat
->expression
);
363 pat
->re_ptr
= egprep(pat
);
364 bestlen
= re_lit(pat
, &lb
, &le
);
366 if (bestlen
&& pat
->fullmatch
) { /* Full Boyer Moore */
368 printf("BESTLEN %d\n", bestlen
);
371 for (p
= lb
; p
< le
; p
++) printf("%c", *p
);
375 pat
->bm_ptr
= re_bmcomp(lb
, le
, pat
->cmap
);
376 pat
->procfn
= re_bmexec
;
380 /* Partial Boyer Moore */
381 pat
->bm_ptr
= re_bmcomp(lb
, le
, pat
->cmap
);
382 pat
->procfn
= re_bmexec
;
385 pat
->fullmatch
= YES
;
386 if ((pat
->cw_ptr
= re_recw(pat
->re_ptr
, pat
->cmap
)) != NULL
) {
387 pat
->procfn
= re_cwexec
; /* CW */
391 /* general egrep regular expression */
392 pat
->succfn
= re_reexec
;
394 if (pat
->fullmatch
) {
395 pat
->procfn
= pat
->succfn
;
401 fixloc(uchar_t
**mb
, uchar_t
**me
)
403 /* Handle match to null string */
408 if (*(*me
- 1) != NL
)
412 /* Handle match to new-line only */
414 if (*mb
== *me
- 1 && **mb
== NL
) {
418 /* Handle match including beginning or ending new-line */
422 if (*(*me
- 1) == NL
)
428 grepmatch(PATTERN
*pat
, uchar_t
**mb
, uchar_t
**me
)
433 return (fixloc(mb
, me
));
435 for (f
= *me
- 1; *f
!= NL
; f
++) {
438 for (s
= *mb
; *s
!= NL
; s
--) {
441 if ((*pat
->succfn
)(pat
, s
, f
, mb
, me
)) {
454 r
->ptr
= (Expr
**)egmalloc(r
->maxid
* sizeof (Expr
*));
456 n
= r
->maxid
* sizeof (FID
);
457 r
->firstpos
.base
= (FID
*)egmalloc(n
);
458 r
->tmp
.base
= (FID
*)egmalloc(n
);
459 CLEAR(r
, &r
->firstpos
);
460 if (!first_lit(&r
->firstpos
, r
->root
->l
)) {
462 * This expression matches the null string!!!!
463 * Add EOP to beginning position set.
465 SET(&r
->firstpos
, r
->root
->id
, 0)
466 /* (void) printf("first of root->l == 0, b=%s\n", b); */
469 (void) addstate(r
, &r
->firstpos
, 0);
474 eptr(re_re
*r
, Expr
*e
)
476 if ((e
->id
< 0) || (e
->id
>= r
->maxid
)) {
477 err("internal error");
480 if (e
->type
!= Charclass
) {
481 if (e
->l
) eptr(r
, e
->l
);
482 if (e
->r
) eptr(r
, e
->r
);
487 re_reexec(PATTERN
*pat
, uchar_t
*b
, uchar_t
*e
, uchar_t
**mb
, uchar_t
**me
)
489 re_re
*r
= pat
->re_ptr
;
494 uchar_t buf
[EPRINTSIZE
];
496 (void) printf("expr='%s'\n", buf
);
507 (void) printf("match at input '%s'\n", b
);
519 (void) printf("state %d: char '%c'\n", s
-r
->states
, *b
);
521 if ((t
= s
->tab
[c
]) != NULL
) s
= t
;
522 else s
= nextstate(r
, s
, (int)c
);
527 uchar_t buf
[EPRINTSIZE
];
530 (void) printf("pat = %s\n", buf
);
537 match(Expr
*e
, int a
)
540 case Dot
: return ((BOOL
)(a
!= NL
));
541 case Literal
: return ((BOOL
)(a
== e
->lit
));
542 case Charclass
: return ((BOOL
)(CCL_CHK((uchar_t
*)e
->r
, a
)));
543 default: return (NO
);
548 * generates the followset for a node in fpos
551 follow(Positionset
*fpos
, Expr
*e
)
565 (void) first_lit(fpos
, e
);
573 if (e
== p
->r
|| !first_lit(fpos
, p
->r
))
582 * first_lit returns NO if e is nullable and in the process,
586 first_lit(Positionset
*fpos
, Expr
*e
)
599 (void) first_lit(fpos
, e
->l
);
602 return (first_lit(fpos
, e
->l
));
604 return ((BOOL
)(first_lit(fpos
, e
->l
) || first_lit(fpos
, e
->r
)));
606 k
= first_lit(fpos
, e
->r
);
607 return ((BOOL
)(first_lit(fpos
, e
->l
) && k
));
609 err("internal error");
615 efollow(re_re
*r
, Positionset
*fpos
, Expr
*e
)
621 e
->flen
= fpos
->count
;
622 e
->follow
= (ID
*)egmalloc(e
->flen
* sizeof (ID
));
625 printf("ID = %d LIT %c FLEN = %d\n", e
->id
, e
->lit
, e
->flen
);
627 for (i
= fpos
->last
; i
> 0; i
= fpos
->base
[i
].id
) {
630 printf("FOLLOW ID = %d LIT %c\n", r
->ptr
[i
]->id
, r
->ptr
[i
]->lit
);
633 if (p
!= e
->follow
+ e
->flen
) {
634 err("internal error");
639 addstate(re_re
*r
, Positionset
*ps
, int cnt
)
646 s
= r
->states
+ getstate(r
, ps
);
647 (void) memset((char *)s
->tab
, 0, sizeof (s
->tab
));
648 s
->cnt
= r
->istate
.cnt
;
654 r
->posnext
+= ps
->count
;
656 p
= r
->posbase
+ s
->pos
;
657 for (j
= ps
->last
; j
> 0; p
++, j
= q
->id
) {
660 p
->fcount
= q
->fcount
;
661 if (p
->id
== r
->root
->id
&& s
->cnt
< p
->fcount
)
667 spr(s
->npos
, s
->pos
+r
->posbase
, buf
);
668 (void) printf("new state[%d] %s%s\n", s
-r
->states
, buf
,
676 nextstate(re_re
*r
, State
*s
, int a
)
681 followstate(r
, s
, a
, &r
->tmp
);
682 if (s
!= &r
->istate
) followstate(r
, &r
->istate
, a
, &r
->tmp
);
688 (void) printf("nextstate(%d, '%c'): found %s\n", s
-r
->states
,
692 if (r
->tmp
.count
== 0)
694 else if ((news
= stateof(r
, &r
->tmp
)) == NULL
)
695 news
= addstate(r
, &r
->tmp
, 1);
699 (void) printf("nextstate(%d, '%c'): returning %ld\n",
700 s
-r
->states
, a
, news
);
707 followstate(re_re
*r
, State
*s
, int a
, Positionset
*fpos
)
713 for (j
= s
->pos
; j
< (s
->pos
+ s
->npos
); j
++) {
714 e
= r
->ptr
[r
->posbase
[j
].id
];
715 if (e
->type
== EOP
) continue;
717 if (e
->follow
== NULL
) efollow(r
, &r
->firstpos
, e
);
718 for (q
= e
->follow
, eq
= q
+ e
->flen
; q
< eq
; q
++) {
719 SET(fpos
, *q
, r
->posbase
[j
].fcount
+ 1)
721 printf("CHAR %c FC %c COUNT %d\n",
724 r
->posbase
[j
].fcount
+1);
736 x
= (uchar_t
*)mymalloc(n
);
737 mallocs
[nmalloc
++] = x
;
738 if (nmalloc
>= MAXMALLOCS
)
739 nmalloc
= MAXMALLOCS
- 1;
745 ppr(Positionse
*ps
, char *p
)
750 (void) sprintf(p
, "{}");
754 for (n
= ps
->last
; n
> 0; n
= ps
->base
[n
].id
) {
755 (void) sprintf(p
, "%d,", n
);
762 epr(Expr
*e
, uchar_t
*res
)
764 uchar_t r1
[EPRINTSIZE
], r2
[EPRINTSIZE
];
768 (void) sprintf(res
, "!0!");
774 spr(e
->flen
, e
->follow
, r1
);
775 (void) sprintf(res
, "%c%s", e
->lit
, r1
);
779 for (i
= 0; i
< 256; i
++)
780 if (CCL_CHK((uchar_t
*)e
->r
, i
)) {
789 (void) sprintf(res
, "%s%s", r1
, r2
);
794 (void) sprintf(res
, "(%s|%s)", r1
, r2
);
798 (void) sprintf(res
, "(%s)*", r1
);
802 (void) sprintf(res
, "(%s)+", r1
);
806 (void) sprintf(res
, "(%s)?", r1
);
810 (void) sprintf(res
, "%s<EOP>", r1
);
813 (void) sprintf(res
, "<undef type %d>", e
->type
);
820 spr(int c
, int *p
, uchar_t
*buf
)
826 (void) sprintf(buf
, "%d,", *p
++);
827 buf
= strchr(buf
, 0);
829 (void) sprintf(buf
, "%d}", *p
);
831 (void) sprintf(buf
, "{}");
838 /* CONSTANTCONDITION */
839 r
->statelim
= (sizeof (int) < 4 ? 32 : 128);
840 r
->states
= (State
*)egmalloc(r
->statelim
* sizeof (State
));
842 /* CONSTANTCONDITION */
843 r
->nposalloc
= (sizeof (int) < 4 ? 2048 : 8192);
844 r
->posbase
= (FID
*)egmalloc(r
->nposalloc
* sizeof (FID
));
845 r
->nstates
= r
->posnext
= 0;
851 r
->nstates
= 0; /* reclaim space for states and positions */
852 r
->posnext
= r
->posreset
;
853 (void) memset((char *)r
->istate
.tab
, 0, sizeof (r
->istate
.tab
));
859 r
->posreset
= r
->posnext
; /* save for reset */
869 getstate(re_re
*r
, Positionset
*ps
)
871 if (r
->nstates
>= r
->statelim
||
872 r
->posnext
+ ps
->count
>= r
->nposalloc
) {
875 printf("%d STATES FLUSHED\n", r
->statelim
);
878 return (r
->nstates
++);
882 stateof(re_re
*r
, Positionset
*ps
)
888 for (i
= 0, s
= r
->states
; i
< r
->nstates
; i
++, s
++) {
889 if (s
->npos
== ps
->count
) {
890 for (p
= s
->pos
+r
->posbase
, e
= p
+s
->npos
; p
< e
; p
++)
891 if (ps
->base
[p
->id
].id
== 0 ||
892 ps
->base
[p
->id
].fcount
!= p
->fcount
)
906 r
= (re_re
*)egmalloc(sizeof (re_re
));
907 (void) memset((char *)r
, 0, sizeof (re_re
));
909 pat
->loc1
= pat
->expression
;
910 pat
->loc2
= pat
->expression
+ strlen((char *)pat
->expression
);
916 r
->root
= newexpr(EOP
, '#', eall(r
, pat
), (Expr
*)NULL
);
924 newexpr(Exprtype t
, int lit
, Expr
*left
, Expr
*right
)
926 Expr
*e
= (Expr
*)egmalloc(sizeof (Expr
));
932 if (e
->lit
) e
->id
= maxid
++;
935 if ((e
->l
= left
) != NULL
) {
938 if ((e
->r
= right
) != NULL
) {
947 lex(re_re
*r
, PATTERN
*pat
)
949 if (pat
->loc1
== pat
->loc2
) {
952 } else switch (toklit
= *pat
->loc1
++) {
953 case '.': toktype
= Dot
; break;
954 case '*': toktype
= Star
; break;
955 case '+': toktype
= Plus
; break;
956 case '?': toktype
= Quest
; break;
957 case '[': toktype
= Charclass
; break;
958 case '|': toktype
= Alternate
; break;
959 case '(': toktype
= Lpar
; break;
960 case ')': toktype
= Rpar
; break;
961 case '\\': toktype
= Backslash
;
962 if (pat
->loc1
== pat
->loc2
) {
963 err("syntax error - missing character "
966 toklit
= r
->cmap
[*pat
->loc1
++];
968 case '^': case '$': toktype
= Literal
; toklit
= NL
; break;
969 default: toktype
= Literal
; toklit
= r
->cmap
[toklit
]; break;
974 ccl(PATTERN
*pat
, uchar_t
*tab
)
982 (void) memset((char *)tab
, 0, CCL_SIZ
* sizeof (uchar_t
));
983 if (*pat
->loc1
== '^') {
987 if (*pat
->loc1
== ']') {
988 uchar_t c
= pat
->cmap
[*pat
->loc1
];
990 lastc
= *pat
->loc1
++;
993 for (; (pat
->loc1
< pat
->loc2
) && (*pat
->loc1
!= ']');
995 if (*pat
->loc1
== '-') {
996 if (lastc
< 0) CCL_SET(tab
, pat
->cmap
['-']);
1001 for (i
= *pat
->loc1
; i
>= lastc
; i
--) {
1002 CCL_SET(tab
, pat
->cmap
[i
]);
1005 uchar_t c
= pat
->cmap
[*pat
->loc1
];
1013 if (range
) CCL_SET(tab
, pat
->cmap
['-']);
1015 if (pat
->loc1
< pat
->loc2
) pat
->loc1
++;
1016 else err("syntax error - missing ]");
1019 CCL_SET(tab
, pat
->cmap
[NL
]);
1020 for (i
= 0; i
< CCL_SIZ
; i
++) tab
[i
] ^= 0xff;
1022 for (i
= 0; i
< 256; i
++) {
1023 if (pat
->cmap
[i
] != i
) CCL_CLR(tab
, i
);
1024 if (CCL_CHK(tab
, i
)) {
1037 * Alternation: d0: d1 { '|' d1 }*
1038 * Concatenation: d1: d2 { d2 }*
1039 * Repetition: d2: d3 { '*' | '?' | '+' }
1040 * Literal: d3: lit | '.' | '[]' | '(' d0 ')'
1044 d3(re_re
*r
, PATTERN
*pat
)
1053 e
= newexpr(Literal
, toklit
, (Expr
*)NULL
, (Expr
*)NULL
);
1057 e
= newexpr(Dot
, '.', (Expr
*)NULL
, (Expr
*)NULL
);
1061 tab
= egmalloc(CCL_SIZ
* sizeof (uchar_t
));
1062 count
= ccl(pat
, tab
);
1065 e
= newexpr(Literal
, toklit
, (Expr
*)NULL
,
1068 e
= newexpr(Charclass
, '[', (Expr
*)NULL
, (Expr
*)NULL
);
1069 e
->l
= (Expr
*)count
; /* number of chars */
1070 e
->r
= (Expr
*)tab
; /* bitmap of chars */
1078 if (toktype
== Rpar
)
1081 err("syntax error - missing )");
1084 err("syntax error");
1091 d2(re_re
*r
, PATTERN
*pat
)
1097 while ((toktype
== Star
) || (toktype
== Plus
) || (toktype
== Quest
)) {
1100 e
= newexpr(t
, 0, e
, (Expr
*)NULL
);
1106 d1(re_re
*r
, PATTERN
*pat
)
1111 while ((toktype
== Literal
) || (toktype
== Dot
) || (toktype
== Lpar
) ||
1112 (toktype
== Backslash
) || (toktype
== Charclass
)) {
1114 e
= newexpr(Cat
, 0, e
, f
);
1120 d0(re_re
*r
, PATTERN
*pat
)
1125 while (toktype
== Alternate
) {
1130 e
= newexpr(Alternate
, 0, e
, f
);
1136 eall(re_re
*r
, PATTERN
*pat
)
1140 while (toktype
== Alternate
) /* bogus but user-friendly */
1143 while (toktype
== Alternate
) /* bogus but user-friendly */
1146 err("syntax error");
1158 re_lit(PATTERN
*pat
, uchar_t
**b
, uchar_t
**e
)
1161 pat
->fullmatch
= YES
;
1163 traverse(pat
, pat
->re_ptr
->root
->l
);
1167 *b
= egmalloc(bestlen
* sizeof (uchar_t
));
1168 (void) memmove(*b
, best
, bestlen
);
1169 *e
= *b
+ bestlen
- 1;
1170 return (bestlen
- 1);
1174 traverse(PATTERN
*pat
, Expr
*e
)
1181 traverse(pat
, e
->l
);
1182 traverse(pat
, e
->r
);
1185 traverse(pat
, e
->l
);
1186 FINISH
/* can't go on past a + */
1187 pat
->fullmatch
= NO
;
1188 START
/* but we can start with one! */
1189 traverse(pat
, e
->l
);
1193 pat
->fullmatch
= NO
;
1200 re_bmcomp(uchar_t
*pb
, uchar_t
*pe
, uchar_t
*cmap
)
1206 b
= (re_bm
*)egmalloc(sizeof (*b
));
1208 b
->patlen
= pe
- pb
;
1214 for (j
= 0; j
< 256; j
++)
1215 delta
[j
] = b
->patlen
;
1217 for (pe
--; pb
< pe
; pb
++)
1218 delta
[b
->cmap
[*pb
]] = pe
- pb
;
1220 delta
[b
->cmap
[*pb
]] = LARGE
;
1222 for (j
= 0; j
< 256; j
++)
1223 b
->delta0
[j
] = delta
[b
->cmap
[j
]];
1233 b
->delta2
= (int *)egmalloc(m
* sizeof (int));
1235 for (j
= 0; j
< m
; j
++) {
1238 while (k
<= j
&& b
->bmpat
[j
-k
] == b
->bmpat
[j
]) k
++;
1240 for (i
= j
+ 1; i
< m
; i
++) {
1241 if (k
<= i
&& b
->bmpat
[i
-k
] != b
->bmpat
[i
]) {
1246 b
->delta2
[j
] = k
+ m
- j
- 1;
1251 re_bmexec(PATTERN
*pat
, uchar_t
*s
, uchar_t
*e
, uchar_t
**mb
, uchar_t
**me
)
1253 re_bm
*b
= pat
->bm_ptr
;
1258 while ((unsigned long)s
< (unsigned long)e
) {
1259 while ((unsigned long)(s
+= b
->delta0
[*s
]) < (unsigned long)e
)
1261 if ((unsigned long)s
< (unsigned long)(e
+ b
->patlen
))
1262 return (NO
); /* no match */
1264 for (k
= b
->patlen
-2, sp
= s
-1; k
>= 0; k
--) {
1265 if (b
->cmap
[*sp
--] != b
->bmpat
[k
]) break;
1270 if (grepmatch(pat
, mb
, me
))
1278 k
= b
->delta0
[*++sp
];
1279 if ((j
> k
) || (k
== (int)LARGE
))
1288 re_recw(re_re
*r
, uchar_t
*map
)
1290 Expr
*e
, *root
= r
->root
;
1294 if (root
->type
!= EOP
)
1297 pat
= re_cwinit(map
);
1298 buf
= (uchar_t
*)egmalloc(20000 * sizeof (uchar_t
));
1299 if (!altlist(e
, buf
, pat
)) {
1307 altlist(Expr
*e
, uchar_t
*buf
, re_cw
*pat
)
1309 if (e
->type
== Alternate
)
1310 return ((BOOL
)(altlist(e
->l
, buf
, pat
) &&
1311 altlist(e
->r
, buf
, pat
)));
1312 return (word(e
, buf
, pat
));
1316 word(Expr
*e
, uchar_t
*buf
, re_cw
*pat
)
1322 if (e
->type
== Cat
) {
1323 if (!word(e
->l
, (uchar_t
*)NULL
, pat
))
1325 if (!word(e
->r
, (uchar_t
*)NULL
, pat
))
1327 } else if (e
->type
== Literal
)
1333 re_cwadd(pat
, buf
, p
);
1338 re_cwinit(uchar_t
*cmap
)
1345 c
= (re_cw
*)egmalloc(sizeof (*c
));
1348 c
->mindepth
= 10000;
1349 c
->root
= newnode(c
, 0);
1355 re_cwadd(re_cw
*c
, uchar_t
*s
, uchar_t
*e
)
1363 for (l
= state
->alts
; l
; l
= l
->next
)
1373 l
= newlink(*e
--, p
= newnode(c
, depth
++));
1374 l
->next
= state
->alts
;
1378 state
->alts
= newlink(*e
--, p
= newnode(c
, depth
++));
1381 if (c
->mindepth
>= depth
) c
->mindepth
= depth
-1;
1392 printf("%d[%d,%d]: ", n
->id
, n
->shift1
, n
->shift2
);
1393 for (l
= n
->alts
; l
; l
= l
->next
) {
1394 printf("edge from \"%d\" to \"%d\" label {\"%c\"};",
1395 n
->id
, l
->node
->id
, l
->lit
);
1397 printf(" draw \"%d\" as Doublecircle;", l
->node
->id
);
1399 if (l
->node
->fail
) {
1400 printf(" edge from \"%d\" to \"%d\" dashed;",
1401 l
->node
->id
, l
->node
->fail
->id
);
1412 Link
*qhead
= NULL
, *qtail
= NULL
;
1415 Node
*state
, *r
, *s
;
1418 for (l
= root
->alts
; l
; l
= l
->next
) {
1420 l
->node
->fail
= root
;
1425 for (l
= r
->alts
; l
; l
= l
->next
) {
1431 for (ll
= state
->alts
; ll
; ll
= ll
->next
)
1434 if (ll
|| (state
== root
)) {
1438 * do it here as only other exit is
1446 state
= state
->fail
;
1451 zeroroot(root
, root
);
1455 zeroroot(Node
*root
, Node
*n
)
1459 if (n
->fail
== root
)
1461 for (l
= n
->alts
; l
; l
= l
->next
)
1462 zeroroot(root
, l
->node
);
1468 Link
*qhead
= NULL
, *qtail
= NULL
;
1471 Node
*state
, *r
, *s
;
1474 for (k
= 0; k
< 256; k
++)
1475 c
->step
[k
] = c
->mindepth
+1;
1476 c
->root
->shift1
= 1;
1477 c
->root
->shift2
= c
->mindepth
;
1478 for (l
= c
->root
->alts
; l
; l
= l
->next
) {
1479 l
->node
->shift2
= c
->root
->shift2
;
1481 l
->node
->fail
= NULL
;
1486 r
->shift1
= c
->mindepth
;
1487 r
->shift2
= c
->mindepth
;
1488 if ((state
= r
->fail
) != NULL
) {
1490 k
= r
->d
- state
->d
;
1491 if (k
< state
->shift1
) {
1494 if (r
->out
&& (k
< state
->shift2
)) {
1497 } while ((state
= state
->fail
) != NULL
);
1499 for (l
= r
->alts
; l
; l
= l
->next
) {
1504 shiftprop(c
, c
->root
);
1515 n
->tab
= nn
= (Node
**)egmalloc(256 * sizeof (Node
*));
1516 (void) memset((char *)n
->tab
, 0, 256 * sizeof (Node
*));
1517 for (l
= n
->alts
; l
; l
= l
->next
)
1518 nn
[l
->lit
] = l
->node
;
1522 shiftprop(re_cw
*c
, Node
*n
)
1527 for (l
= n
->alts
; l
; l
= l
->next
) {
1528 if (c
->step
[l
->lit
] > l
->node
->d
)
1529 c
->step
[l
->lit
] = l
->node
->d
;
1531 if (n
->shift2
< nn
->shift2
)
1532 nn
->shift2
= n
->shift2
;
1545 re_cwexec(PATTERN
*pat
, uchar_t
*rs
, uchar_t
*re
, uchar_t
**mb
, uchar_t
**me
)
1554 re_cw
*c
= pat
->cw_ptr
;
1561 s
= rs
+c
->mindepth
-1;
1565 for (sp
= s
; (ostate
= state
) != NULL
; ) {
1566 mappedsp
= c
->cmap
[*sp
];
1568 if ((state
= state
->tab
[mappedsp
]) == NULL
)
1571 for (l
= state
->alts
; ; l
= l
->next
) {
1574 if (l
->lit
== mappedsp
) {
1592 k
= c
->step
[c
->cmap
[*sp
]] - ostate
->d
- 1;
1593 if (k
< ostate
->shift1
)
1595 if (k
> ostate
->shift2
)
1604 newnode(re_cw
*c
, int d
)
1606 static Node
*lim
= NULL
;
1607 static int incr
= 256;
1609 if (!next_node
) lim
= NULL
;
1610 if (next_node
== lim
) {
1611 next_node
= (Node
*)egmalloc(incr
* sizeof (Node
));
1612 lim
= next_node
+ incr
;
1615 if (d
> c
->maxdepth
) c
->maxdepth
= d
;
1616 next_node
->id
= c
->nodeid
++;
1617 next_node
->alts
= NULL
;
1618 next_node
->tab
= NULL
;
1620 return (next_node
++);
1624 newlink(uchar_t lit
, Node
*n
)
1626 static Link
*lim
= NULL
;
1627 static int incr
= 256;
1629 if (!next_link
) lim
= NULL
;
1630 if (next_link
== lim
) {
1631 next_link
= (Link
*)egmalloc(incr
* sizeof (Link
));
1632 lim
= next_link
+ incr
;
1634 next_link
->lit
= lit
;
1635 next_link
->node
= n
;
1636 next_link
->next
= NULL
;
1637 return (next_link
++);
1641 egrep(char *f
, FILE *o
, char *fo
)
1643 uchar_t buff
[MAXBUFSIZE
+ ESIZE
];
1646 *buffer
++ = NL
; /* New line precedes buffer to prevent runover */
1658 BOOL all_searched
; /* file being matched until end */
1660 if ((file_desc
= open(file
, O_RDONLY
)) < 0) {
1663 init_file(¤t
);
1664 /* while there is more get more text from the data stream */
1669 * Find next new-line in buffer.
1670 * Begin after previous new line character
1673 current
.prntbuf
= current
.newline
+ 1;
1674 current
.ln
++; /* increment line number */
1676 if (current
.prntbuf
< bufend
) {
1677 /* There is more text in buffer */
1681 * "line" as the entire remaining buffer.
1682 * However, if there is more of the file yet
1683 * to be read in, exclude any incomplete
1686 if (file_stat
== NO_MORE
) {
1688 current
.newline
= bufend
- 1;
1689 if (*current
.newline
!= NL
) {
1690 current
.newline
= bufend
;
1694 * Find end of the last
1695 * complete line in the buffer.
1697 current
.newline
= bufend
;
1698 while (*--current
.newline
!= NL
) {
1700 if (current
.newline
< current
.prntbuf
) {
1702 current
.newline
= bufend
;
1706 /* There is no more text in the buffer. */
1707 current
.newline
= bufend
;
1709 if (current
.newline
>= bufend
) {
1711 * There is no more text in the buffer,
1712 * or no new line was found.
1714 switch (file_stat
) {
1715 case MORE
: /* file partly unread */
1720 continue; /* with while loop */
1724 /* Nothing more to read in for this file. */
1725 if (current
.newline
<= current
.prntbuf
) {
1726 /* Nothing in the buffer, either */
1727 /* We are done with the file. */
1729 break; /* out of while loop */
1731 /* There is no NL at the end of the file */
1734 matched
= pattern_match(&match_pattern
, ¤t
);
1738 get_ncount(¤t
, match_pattern
.loc1
);
1739 get_line(¤t
, match_pattern
.loc2
);
1741 nc
= current
.newline
+ 1 - current
.prntbuf
;
1742 (void) fprintf(output
, format
, file
, current
.ln
);
1743 (void) fwrite((char *)current
.prntbuf
, 1, nc
, output
);
1746 break; /* out of while loop */
1748 get_ncount(¤t
, current
.newline
+ 1);
1752 (void) close(file_desc
);
1757 init_file(LINE
*cur_ptr
)
1762 cur_ptr
->newline
= buffer
- 1;
1766 pattern_match(PATTERN
*pat
, LINE
*lptr
)
1768 if ((*pat
->procfn
)(pat
, lptr
->prntbuf
- 1, lptr
->newline
+ 1,
1769 &pat
->loc1
, &pat
->loc2
)) {
1772 pat
->loc1
= lptr
->prntbuf
;
1773 pat
->loc2
= lptr
->newline
- 1;
1776 } /* end of function pattern_match() */
1779 fgetfile(LINE
*cur_ptr
)
1782 * This function reads as much of the current file into the buffer
1785 int bytes
; /* bytes read in current buffer */
1786 int bufsize
= MAXBUFSIZE
; /* free space in data buffer */
1788 uchar_t
*begin
= cur_ptr
->prntbuf
;
1791 * Bytes of current incomplete line, if any, save_current in buffer.
1792 * These must be saved.
1794 save_current
= (int)(bufend
- begin
);
1796 if (file_stat
== MORE
) {
1798 * A portion of the file fills the buffer. We must clear
1799 * out the dead wood to make room for more of the file.
1807 * We have one humungous current line,
1808 * which fills the whole buffer.
1817 bufend
= begin
+ save_current
;
1819 bufsize
= MAXBUFSIZE
- save_current
;
1821 if (save_current
> 0) {
1823 * Must save portion of current line.
1824 * Copy to beginning of buffer.
1826 (void) memmove(buffer
, buffer
+ k
, save_current
);
1830 /* Now read in the file. */
1833 bytes
= read(file_desc
, (char *)bufend
, (unsigned int)bufsize
);
1835 /* can't read any more of file */
1840 } while (bytes
> 0 && bufsize
> 0);
1843 if (begin
>= bufend
) {
1844 /* No new lines or incomplete line in buffer */
1845 file_stat
= NO_MORE
;
1846 } else if (bufsize
) {
1847 /* Still space in the buffer, so we have read entire file */
1848 file_stat
= NO_MORE
;
1850 /* We filled entire buffer, so there may be more yet to read */
1853 /* Note: bufend is 1 past last good char */
1854 *bufend
= NL
; /* Sentinel new-line character */
1855 /* Set newline to character preceding the current line */
1856 cur_ptr
->newline
= begin
- 1;
1860 get_line(LINE
*cur_ptr
, uchar_t
*s
)
1862 while (*s
++ != NL
) {
1864 cur_ptr
->newline
= --s
;
1869 get_ncount(LINE
*cur_ptr
, uchar_t
*s
)
1871 uchar_t
*p
= cur_ptr
->prntbuf
;
1873 while (*--s
!= NL
) {
1875 cur_ptr
->newline
= s
++;
1877 (p
= (uchar_t
*)memchr((char *)p
, NL
, s
- p
)) != NULL
) {
1882 cur_ptr
->prntbuf
= s
;