1 /* $NetBSD: engine.c,v 1.24 2012/03/13 21:13:42 christos Exp $ */
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)engine.c 8.5 (Berkeley) 3/20/94
38 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
40 * This code is derived from software contributed to Berkeley by
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * @(#)engine.c 8.5 (Berkeley) 3/20/94
75 * The matching engine and friends. This file is #included by regexec.c
76 * after suitable #defines of a variety of macros used herein, so that
77 * different state representations can be used without duplicating masses
82 #define matcher smatcher
85 #define dissect sdissect
86 #define backref sbackref
94 #define matcher lmatcher
97 #define dissect ldissect
98 #define backref lbackref
106 /* another structure passed up and down to avoid zillions of parameters */
110 regmatch_t
*pmatch
; /* [nsub+1] (0 element unused) */
111 const char *offp
; /* offsets work from here */
112 const char *beginp
; /* start of string -- virtual NUL precedes */
113 const char *endp
; /* end of string -- virtual NUL here */
114 const char *coldp
; /* can be no match starting before here */
115 const char **lastpos
; /* [nplus+1] */
117 states st
; /* current states */
118 states fresh
; /* states for a fresh start */
119 states tmp
; /* temporary */
120 states empty
; /* empty set of states */
123 /* ========= begin header generated by ./mkh ========= */
128 /* === engine.c === */
129 static int matcher(struct re_guts
*g
, const char *string
, size_t nmatch
, regmatch_t pmatch
[], int eflags
);
130 static const char *dissect(struct match
*m
, const char *start
, const char *stop
, sopno startst
, sopno stopst
);
131 static const char *backref(struct match
*m
, const char *start
, const char *stop
, sopno startst
, sopno stopst
, sopno lev
);
132 static const char *fast(struct match
*m
, const char *start
, const char *stop
, sopno startst
, sopno stopst
);
133 static const char *slow(struct match
*m
, const char *start
, const char *stop
, sopno startst
, sopno stopst
);
134 static states
step(struct re_guts
*g
, sopno start
, sopno stop
, states bef
, int ch
, states aft
);
137 #define BOLEOL (BOL+2)
138 #define NOTHING (BOL+3)
141 #define CODEMAX (BOL+5) /* highest code used */
142 #define NONCHAR(c) ((c) > CHAR_MAX)
143 #define NNONCHAR (CODEMAX-CHAR_MAX)
145 static void print(struct match
*m
, char *caption
, states st
, int ch
, FILE *d
);
148 static void at(struct match
*m
, char *title
, char *start
, char *stop
, sopno startst
, sopno stopst
);
151 static char *pchar(int ch
);
157 /* ========= end header generated by ./mkh ========= */
160 #define SP(t, s, c) print(m, t, s, c, stdout)
161 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
162 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); }
165 #define SP(t, s, c) /* nothing */
166 #define AT(t, p1, p2, s1, s2) /* nothing */
167 #define NOTE(s) /* nothing */
171 - matcher - the actual matching engine
172 == static int matcher(struct re_guts *g, char *string, \
173 == size_t nmatch, regmatch_t pmatch[], int eflags);
175 static int /* 0 success, REG_NOMATCH failure */
186 struct match
*m
= &mv
;
188 const sopno gf
= g
->firststate
+1; /* +1 for OEND */
189 const sopno gl
= g
->laststate
;
194 _DIAGASSERT(g
!= NULL
);
195 _DIAGASSERT(string
!= NULL
);
196 /* pmatch checked below */
198 /* simplify the situation where possible */
199 if (g
->cflags
®_NOSUB
)
201 if (eflags
®_STARTEND
) {
202 _DIAGASSERT(pmatch
!= NULL
);
203 start
= string
+ (size_t)pmatch
[0].rm_so
;
204 stop
= string
+ (size_t)pmatch
[0].rm_eo
;
207 stop
= start
+ strlen(start
);
212 /* prescreening; this does wonders for this rather slow code */
213 if (g
->must
!= NULL
) {
214 for (dp
= start
; dp
< stop
; dp
++)
215 if (*dp
== g
->must
[0] && (size_t)(stop
- dp
) >= g
->mlen
&&
216 memcmp(dp
, g
->must
, g
->mlen
) == 0)
218 if (dp
== stop
) /* we didn't find g->must */
222 /* match struct setup */
237 /* this loop does only one repetition except for backrefs */
239 endp
= fast(m
, start
, stop
, gf
, gl
);
240 if (endp
== NULL
) { /* a miss */
244 if (nmatch
== 0 && !g
->backrefs
)
245 break; /* no further info needed */
248 assert(m
->coldp
!= NULL
);
250 NOTE("finding start");
251 endp
= slow(m
, m
->coldp
, stop
, gf
, gl
);
254 assert(m
->coldp
< m
->endp
);
257 if (nmatch
== 1 && !g
->backrefs
)
258 break; /* no further info needed */
260 /* oh my, he wants the subexpressions... */
261 if (m
->pmatch
== NULL
)
262 m
->pmatch
= (regmatch_t
*)malloc((m
->g
->nsub
+ 1) *
264 if (m
->pmatch
== NULL
) {
268 for (i
= 1; i
<= m
->g
->nsub
; i
++)
269 m
->pmatch
[i
].rm_so
= m
->pmatch
[i
].rm_eo
= (regoff_t
)-1;
270 if (!g
->backrefs
&& !(m
->eflags
®_BACKR
)) {
272 dp
= dissect(m
, m
->coldp
, endp
, gf
, gl
);
274 if (g
->nplus
> 0 && m
->lastpos
== NULL
)
275 m
->lastpos
= malloc((g
->nplus
+1) *
276 sizeof(const char *));
277 if (g
->nplus
> 0 && m
->lastpos
== NULL
) {
281 NOTE("backref dissect");
282 dp
= backref(m
, m
->coldp
, endp
, gf
, gl
, (sopno
)0);
287 /* uh-oh... we couldn't find a subexpression-level match */
288 assert(g
->backrefs
); /* must be back references doing it */
289 assert(g
->nplus
== 0 || m
->lastpos
!= NULL
);
291 if (dp
!= NULL
|| endp
<= m
->coldp
)
294 endp
= slow(m
, m
->coldp
, endp
-1, gf
, gl
);
297 /* try it on a shorter possibility */
299 for (i
= 1; i
<= m
->g
->nsub
; i
++) {
300 assert(m
->pmatch
[i
].rm_so
== (regoff_t
)-1);
301 assert(m
->pmatch
[i
].rm_eo
== (regoff_t
)-1);
304 NOTE("backoff dissect");
305 dp
= backref(m
, m
->coldp
, endp
, gf
, gl
, (sopno
)0);
307 assert(dp
== NULL
|| dp
== endp
);
308 if (dp
!= NULL
) /* found a shorter one */
311 /* despite initial appearances, there is no match here */
313 start
= m
->coldp
+ 1; /* recycle starting later */
314 assert(start
<= stop
);
317 /* fill in the details if requested */
319 _DIAGASSERT(pmatch
!= NULL
);
320 pmatch
[0].rm_so
= m
->coldp
- m
->offp
;
321 pmatch
[0].rm_eo
= endp
- m
->offp
;
324 assert(m
->pmatch
!= NULL
);
325 for (i
= 1; i
< nmatch
; i
++)
327 pmatch
[i
] = m
->pmatch
[i
];
329 pmatch
[i
].rm_so
= (regoff_t
)-1;
330 pmatch
[i
].rm_eo
= (regoff_t
)-1;
335 if (m
->pmatch
!= NULL
) {
339 if (m
->lastpos
!= NULL
) {
348 - dissect - figure out what matched what, no back references
349 == static const char *dissect(struct match *m, const char *start, \
350 == const char *stop, sopno startst, sopno stopst);
352 static const char * /* == stop (success) always */
361 sopno ss
; /* start sop of current subRE */
362 sopno es
; /* end sop of current subRE */
363 const char *sp
; /* start of string matched by it */
364 const char *stp
; /* string matched by it cannot pass here */
365 const char *rest
; /* start of rest of string */
366 const char *tail
; /* string unmatched by rest of RE */
367 sopno ssub
; /* start sop of subsubRE */
368 sopno esub
; /* end sop of subsubRE */
369 const char *ssp
; /* start of string matched by subsubRE */
370 const char *sep
; /* end of string matched by subsubRE */
371 const char *oldssp
; /* previous ssp */
376 _DIAGASSERT(m
!= NULL
);
377 _DIAGASSERT(start
!= NULL
);
378 _DIAGASSERT(stop
!= NULL
);
380 AT("diss", start
, stop
, startst
, stopst
);
382 for (ss
= startst
; ss
< stopst
; ss
= es
) {
383 /* identify end of subRE */
385 switch (OP(m
->g
->strip
[es
])) {
388 es
+= OPND(m
->g
->strip
[es
]);
391 while (OP(m
->g
->strip
[es
]) != O_CH
)
392 es
+= OPND(m
->g
->strip
[es
]);
397 /* figure out what it matched */
398 switch (OP(m
->g
->strip
[ss
])) {
418 /* cases where length of match is hard to find */
422 /* how long could this one be? */
423 rest
= slow(m
, sp
, stp
, ss
, es
);
424 assert(rest
!= NULL
); /* it did match */
425 /* could the rest match the rest? */
426 tail
= slow(m
, rest
, stop
, es
, stopst
);
429 /* no -- try a shorter match for this one */
431 assert(stp
>= sp
); /* it did work */
435 /* did innards match? */
436 if (slow(m
, sp
, rest
, ssub
, esub
) != NULL
) {
442 dissect(m
, sp
, rest
, ssub
, esub
);
451 /* how long could this one be? */
452 rest
= slow(m
, sp
, stp
, ss
, es
);
453 assert(rest
!= NULL
); /* it did match */
454 /* could the rest match the rest? */
455 tail
= slow(m
, rest
, stop
, es
, stopst
);
458 /* no -- try a shorter match for this one */
460 assert(stp
>= sp
); /* it did work */
466 for (;;) { /* find last match of innards */
467 sep
= slow(m
, ssp
, rest
, ssub
, esub
);
468 if (sep
== NULL
|| sep
== ssp
)
469 break; /* failed or matched null */
470 oldssp
= ssp
; /* on to next try */
474 /* last successful match */
478 assert(sep
== rest
); /* must exhaust substring */
479 assert(slow(m
, ssp
, sep
, ssub
, esub
) == rest
);
485 dissect(m
, ssp
, sep
, ssub
, esub
);
492 /* how long could this one be? */
493 rest
= slow(m
, sp
, stp
, ss
, es
);
494 assert(rest
!= NULL
); /* it did match */
495 /* could the rest match the rest? */
496 tail
= slow(m
, rest
, stop
, es
, stopst
);
499 /* no -- try a shorter match for this one */
501 assert(stp
>= sp
); /* it did work */
504 esub
= ss
+ OPND(m
->g
->strip
[ss
]) - 1;
505 assert(OP(m
->g
->strip
[esub
]) == OOR1
);
506 for (;;) { /* find first matching branch */
507 if (slow(m
, sp
, rest
, ssub
, esub
) == rest
)
508 break; /* it matched all of it */
509 /* that one missed, try next one */
510 assert(OP(m
->g
->strip
[esub
]) == OOR1
);
512 assert(OP(m
->g
->strip
[esub
]) == OOR2
);
514 esub
+= OPND(m
->g
->strip
[esub
]);
515 if (OP(m
->g
->strip
[esub
]) == OOR2
)
518 assert(OP(m
->g
->strip
[esub
]) == O_CH
);
525 dissect(m
, sp
, rest
, ssub
, esub
);
537 i
= OPND(m
->g
->strip
[ss
]);
538 assert(0 < i
&& i
<= m
->g
->nsub
);
539 m
->pmatch
[i
].rm_so
= sp
- m
->offp
;
542 i
= OPND(m
->g
->strip
[ss
]);
543 assert(0 < i
&& i
<= m
->g
->nsub
);
544 m
->pmatch
[i
].rm_eo
= sp
- m
->offp
;
557 - backref - figure out what matched what, figuring in back references
558 == static const char *backref(struct match *m, const char *start, \
559 == const char *stop, sopno startst, sopno stopst, sopno lev);
561 static const char * /* == stop (success) or NULL (failure) */
568 sopno lev
) /* PLUS nesting level */
571 sopno ss
; /* start sop of current subRE */
572 const char *sp
; /* start of string matched by it */
573 sopno ssub
; /* start sop of subsubRE */
574 sopno esub
; /* end sop of subsubRE */
575 const char *ssp
; /* start of string matched by subsubRE */
583 _DIAGASSERT(m
!= NULL
);
584 _DIAGASSERT(start
!= NULL
);
585 _DIAGASSERT(stop
!= NULL
);
587 AT("back", start
, stop
, startst
, stopst
);
590 /* get as far as we can with easy stuff */
592 for (ss
= startst
; !hard
&& ss
< stopst
; ss
++)
593 switch (OP(s
= m
->g
->strip
[ss
])) {
595 if (sp
== stop
|| *sp
++ != (char)OPND(s
))
604 cs
= &m
->g
->sets
[OPND(s
)];
605 if (sp
== stop
|| !CHIN(cs
, *sp
++))
609 if ( (sp
== m
->beginp
&& !(m
->eflags
®_NOTBOL
)) ||
610 (sp
< m
->endp
&& *(sp
-1) == '\n' &&
611 (m
->g
->cflags
®_NEWLINE
)) )
617 if ( (sp
== m
->endp
&& !(m
->eflags
®_NOTEOL
)) ||
618 (sp
< m
->endp
&& *sp
== '\n' &&
619 (m
->g
->cflags
®_NEWLINE
)) )
625 if (( (sp
== m
->beginp
&& !(m
->eflags
®_NOTBOL
)) ||
626 (sp
< m
->endp
&& *(sp
-1) == '\n' &&
627 (m
->g
->cflags
®_NEWLINE
)) ||
629 !ISWORD(*(sp
-1))) ) &&
630 (sp
< m
->endp
&& ISWORD(*sp
)) )
636 if (( (sp
== m
->endp
&& !(m
->eflags
®_NOTEOL
)) ||
637 (sp
< m
->endp
&& *sp
== '\n' &&
638 (m
->g
->cflags
®_NEWLINE
)) ||
639 (sp
< m
->endp
&& !ISWORD(*sp
)) ) &&
640 (sp
> m
->beginp
&& ISWORD(*(sp
-1))) )
647 case OOR1
: /* matches null but needs to skip */
651 assert(OP(s
) == OOR2
);
653 } while (OP(s
= m
->g
->strip
[ss
]) != O_CH
);
654 /* note that the ss++ gets us past the O_CH */
656 default: /* have to make a choice */
660 if (!hard
) { /* that was it! */
665 ss
--; /* adjust for the for's final increment */
668 AT("hard", sp
, stop
, ss
, stopst
);
671 case OBACK_
: /* the vilest depths */
673 assert(0 < i
&& i
<= m
->g
->nsub
);
674 if (m
->pmatch
[i
].rm_eo
== (regoff_t
)-1)
676 assert(m
->pmatch
[i
].rm_so
!= (regoff_t
)-1);
677 len
= (size_t)(m
->pmatch
[i
].rm_eo
- m
->pmatch
[i
].rm_so
);
680 assert(stop
- m
->beginp
>= len
);
682 return(NULL
); /* not enough left to match */
683 ssp
= m
->offp
+ (size_t)m
->pmatch
[i
].rm_so
;
684 if (memcmp(sp
, ssp
, len
) != 0)
686 while (m
->g
->strip
[ss
] != SOP(O_BACK
, i
))
688 return(backref(m
, sp
+len
, stop
, ss
+1, stopst
, lev
));
690 case OQUEST_
: /* to null or not */
691 dp
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
);
693 return(dp
); /* not */
694 return(backref(m
, sp
, stop
, ss
+OPND(s
)+1, stopst
, lev
));
697 assert(m
->lastpos
!= NULL
);
698 assert(lev
+1 <= m
->g
->nplus
);
699 m
->lastpos
[lev
+1] = sp
;
700 return(backref(m
, sp
, stop
, ss
+1, stopst
, lev
+1));
703 if (sp
== m
->lastpos
[lev
]) /* last pass matched null */
704 return(backref(m
, sp
, stop
, ss
+1, stopst
, lev
-1));
705 /* try another pass */
706 m
->lastpos
[lev
] = sp
;
707 dp
= backref(m
, sp
, stop
, ss
-OPND(s
)+1, stopst
, lev
);
709 dp
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
-1);
712 case OCH_
: /* find the right one, if any */
714 esub
= ss
+ OPND(s
) - 1;
715 assert(OP(m
->g
->strip
[esub
]) == OOR1
);
716 for (;;) { /* find first matching branch */
717 dp
= backref(m
, sp
, stop
, ssub
, esub
, lev
);
720 /* that one missed, try next one */
721 if (OP(m
->g
->strip
[esub
]) == O_CH
)
722 return(NULL
); /* there is none */
724 assert(OP(m
->g
->strip
[esub
]) == OOR2
);
726 esub
+= OPND(m
->g
->strip
[esub
]);
727 if (OP(m
->g
->strip
[esub
]) == OOR2
)
730 assert(OP(m
->g
->strip
[esub
]) == O_CH
);
733 case OLPAREN
: /* must undo assignment if rest fails */
735 assert(0 < i
&& i
<= m
->g
->nsub
);
736 offsave
= m
->pmatch
[i
].rm_so
;
737 m
->pmatch
[i
].rm_so
= sp
- m
->offp
;
738 dp
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
);
741 m
->pmatch
[i
].rm_so
= offsave
;
744 case ORPAREN
: /* must undo assignment if rest fails */
746 assert(0 < i
&& i
<= m
->g
->nsub
);
747 offsave
= m
->pmatch
[i
].rm_eo
;
748 m
->pmatch
[i
].rm_eo
= sp
- m
->offp
;
749 dp
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
);
752 m
->pmatch
[i
].rm_eo
= offsave
;
767 - fast - step through the string at top speed
768 == static const char *fast(struct match *m, const char *start, \
769 == const char *stop, sopno startst, sopno stopst);
771 static const char * /* where tentative match ended, or NULL */
780 states fresh
= m
->fresh
;
782 const char *p
= start
;
783 int c
= (start
== m
->beginp
) ? OUT
: *(start
-1);
784 int lastc
; /* previous c */
787 const char *coldp
; /* last p after which no match was underway */
789 _DIAGASSERT(m
!= NULL
);
790 _DIAGASSERT(start
!= NULL
);
791 _DIAGASSERT(stop
!= NULL
);
795 st
= step(m
->g
, startst
, stopst
, st
, NOTHING
, st
);
802 c
= (p
== m
->endp
) ? OUT
: *p
;
806 /* is there an EOL and/or BOL between lastc and c? */
809 if ( (lastc
== '\n' && m
->g
->cflags
®_NEWLINE
) ||
810 (lastc
== OUT
&& !(m
->eflags
®_NOTBOL
)) ) {
814 if ( (c
== '\n' && m
->g
->cflags
®_NEWLINE
) ||
815 (c
== OUT
&& !(m
->eflags
®_NOTEOL
)) ) {
816 flagch
= (flagch
== BOL
) ? BOLEOL
: EOL
;
821 st
= step(m
->g
, startst
, stopst
, st
, flagch
, st
);
825 /* how about a word boundary? */
826 if ( (flagch
== BOL
|| (lastc
!= OUT
&& !ISWORD(lastc
))) &&
827 (c
!= OUT
&& ISWORD(c
)) ) {
830 if ( (lastc
!= OUT
&& ISWORD(lastc
)) &&
831 (flagch
== EOL
|| (c
!= OUT
&& !ISWORD(c
))) ) {
834 if (flagch
== BOW
|| flagch
== EOW
) {
835 st
= step(m
->g
, startst
, stopst
, st
, flagch
, st
);
840 if (ISSET(st
, stopst
) || p
== stop
)
841 break; /* NOTE BREAK OUT */
843 /* no, we must deal with this character */
847 st
= step(m
->g
, startst
, stopst
, tmp
, c
, st
);
849 assert(EQ(step(m
->g
, startst
, stopst
, st
, NOTHING
, st
), st
));
853 assert(coldp
!= NULL
);
855 if (ISSET(st
, stopst
))
862 - slow - step through the string more deliberately
863 == static const char *slow(struct match *m, const char *start, \
864 == const char *stop, sopno startst, sopno stopst);
866 static const char * /* where it ended */
875 states empty
= m
->empty
;
877 const char *p
= start
;
878 int c
= (start
== m
->beginp
) ? OUT
: *(start
-1);
879 int lastc
; /* previous c */
882 const char *matchp
; /* last p at which a match ended */
884 _DIAGASSERT(m
!= NULL
);
885 _DIAGASSERT(start
!= NULL
);
886 _DIAGASSERT(stop
!= NULL
);
888 AT("slow", start
, stop
, startst
, stopst
);
891 SP("sstart", st
, *p
);
892 st
= step(m
->g
, startst
, stopst
, st
, NOTHING
, st
);
897 c
= (p
== m
->endp
) ? OUT
: *p
;
899 /* is there an EOL and/or BOL between lastc and c? */
902 if ( (lastc
== '\n' && m
->g
->cflags
®_NEWLINE
) ||
903 (lastc
== OUT
&& !(m
->eflags
®_NOTBOL
)) ) {
907 if ( (c
== '\n' && m
->g
->cflags
®_NEWLINE
) ||
908 (c
== OUT
&& !(m
->eflags
®_NOTEOL
)) ) {
909 flagch
= (flagch
== BOL
) ? BOLEOL
: EOL
;
914 st
= step(m
->g
, startst
, stopst
, st
, flagch
, st
);
915 SP("sboleol", st
, c
);
918 /* how about a word boundary? */
919 if ( (flagch
== BOL
|| (lastc
!= OUT
&& !ISWORD(lastc
))) &&
920 (c
!= OUT
&& ISWORD(c
)) ) {
923 if ( (lastc
!= OUT
&& ISWORD(lastc
)) &&
924 (flagch
== EOL
|| (c
!= OUT
&& !ISWORD(c
))) ) {
927 if (flagch
== BOW
|| flagch
== EOW
) {
928 st
= step(m
->g
, startst
, stopst
, st
, flagch
, st
);
929 SP("sboweow", st
, c
);
933 if (ISSET(st
, stopst
))
935 if (EQ(st
, empty
) || p
== stop
)
936 break; /* NOTE BREAK OUT */
938 /* no, we must deal with this character */
942 st
= step(m
->g
, startst
, stopst
, tmp
, c
, st
);
944 assert(EQ(step(m
->g
, startst
, stopst
, st
, NOTHING
, st
), st
));
953 - step - map set of states reachable before char to set reachable after
954 == static states step(struct re_guts *g, sopno start, sopno stop, \
955 == states bef, int ch, states aft);
956 == #define BOL (OUT+1)
957 == #define EOL (BOL+1)
958 == #define BOLEOL (BOL+2)
959 == #define NOTHING (BOL+3)
960 == #define BOW (BOL+4)
961 == #define EOW (BOL+5)
962 == #define CODEMAX (BOL+5) // highest code used
963 == #define NONCHAR(c) ((c) > CHAR_MAX)
964 == #define NNONCHAR (CODEMAX-CHAR_MAX)
969 sopno start
, /* start state within strip */
970 sopno stop
, /* state after stop state within strip */
971 states bef
, /* states reachable before */
972 int ch
, /* character or NONCHAR code */
973 states aft
) /* states already known reachable after */
978 onestate here
; /* note, macros know this name */
982 _DIAGASSERT(g
!= NULL
);
984 for (pc
= start
, INIT(here
, pc
); pc
!= stop
; pc
++, INC(here
)) {
988 assert(pc
== stop
-1);
991 /* only characters can match */
992 assert(!NONCHAR(ch
) || ch
!= (char)OPND(s
));
993 if (ch
== (char)OPND(s
))
997 if (ch
== BOL
|| ch
== BOLEOL
)
1001 if (ch
== EOL
|| ch
== BOLEOL
)
1017 cs
= &g
->sets
[OPND(s
)];
1018 if (!NONCHAR(ch
) && CHIN(cs
, ch
))
1021 case OBACK_
: /* ignored here */
1025 case OPLUS_
: /* forward, this is just an empty */
1028 case O_PLUS
: /* both forward and back */
1030 i
= ISSETBACK(aft
, OPND(s
));
1031 BACK(aft
, aft
, OPND(s
));
1032 if (!i
&& ISSETBACK(aft
, OPND(s
))) {
1033 /* oho, must reconsider loop body */
1038 case OQUEST_
: /* two branches, both forward */
1040 FWD(aft
, aft
, OPND(s
));
1042 case O_QUEST
: /* just an empty */
1045 case OLPAREN
: /* not significant here */
1049 case OCH_
: /* mark the first two branches */
1051 assert(OP(g
->strip
[pc
+OPND(s
)]) == OOR2
);
1052 FWD(aft
, aft
, OPND(s
));
1054 case OOR1
: /* done a branch, find the O_CH */
1055 if (ISSTATEIN(aft
, here
)) {
1057 OP(s
= g
->strip
[pc
+look
]) != O_CH
;
1059 assert(OP(s
) == OOR2
);
1060 FWD(aft
, aft
, look
);
1063 case OOR2
: /* propagate OCH_'s marking */
1065 if (OP(g
->strip
[pc
+OPND(s
)]) != O_CH
) {
1066 assert(OP(g
->strip
[pc
+OPND(s
)]) == OOR2
);
1067 FWD(aft
, aft
, OPND(s
));
1070 case O_CH
: /* just empty */
1073 default: /* ooooops... */
1084 - print - print a set of states
1086 == static void print(struct match *m, char *caption, states st, \
1087 == int ch, FILE *d);
1098 struct re_guts
*g
= m
->g
;
1102 _DIAGASSERT(m
!= NULL
);
1103 _DIAGASSERT(caption
!= NULL
);
1105 if (!(m
->eflags
®_TRACE
))
1108 _DIAGASSERT(d
!= NULL
);
1110 fprintf(d
, "%s", caption
);
1112 fprintf(d
, " %s", pchar(ch
));
1113 for (i
= 0; i
< g
->nstates
; i
++)
1115 fprintf(d
, "%s%d", (first
) ? "\t" : ", ", i
);
1122 - at - print current situation
1124 == static void at(struct match *m, char *title, char *start, char *stop, \
1125 == sopno startst, sopno stopst);
1138 _DIAGASSERT(m
!= NULL
);
1139 _DIAGASSERT(title
!= NULL
);
1140 _DIAGASSERT(start
!= NULL
);
1141 _DIAGASSERT(stop
!= NULL
);
1143 if (!(m
->eflags
®_TRACE
))
1146 printf("%s %s-", title
, pchar(*start
));
1147 printf("%s ", pchar(*stop
));
1148 printf("%ld-%ld\n", (long)startst
, (long)stopst
);
1152 #define PCHARDONE /* never again */
1154 - pchar - make a character printable
1156 == static char *pchar(int ch);
1159 * Is this identical to regchar() over in debug.c? Well, yes. But a
1160 * duplicate here avoids having a debugging-capable regexec.o tied to
1161 * a matching debug.o, and this is convenient. It all disappears in
1162 * the non-debug compilation anyway, so it doesn't matter much.
1164 static char * /* -> representation */
1168 static char pbuf
[10];
1170 if (isprint(ch
) || ch
== ' ')
1171 (void)snprintf(pbuf
, sizeof pbuf
, "%c", ch
);
1173 (void)snprintf(pbuf
, sizeof pbuf
, "\\%o", ch
);