Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / dist / nvi / regex / engine.c
blob35405fdb80a3bdefb65e2533fd96c51a767ecea9
1 /* $NetBSD: engine.c,v 1.5 2009/02/22 11:34:53 tnozaki Exp $ */
3 /*-
4 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Henry Spencer of the University of Toronto.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
39 * @(#)engine.c 8.4 (Berkeley) 3/19/94
43 * The matching engine and friends. This file is #included by regexec.c
44 * after suitable #defines of a variety of macros used herein, so that
45 * different state representations can be used without duplicating masses
46 * of code.
49 #ifdef SNAMES
50 #define matcher smatcher
51 #define fast sfast
52 #define slow sslow
53 #define dissect sdissect
54 #define backref sbackref
55 #define step sstep
56 #define print sprint
57 #define at sat
58 #define match smat
59 #endif
60 #ifdef LNAMES
61 #define matcher lmatcher
62 #define fast lfast
63 #define slow lslow
64 #define dissect ldissect
65 #define backref lbackref
66 #define step lstep
67 #define print lprint
68 #define at lat
69 #define match lmat
70 #endif
72 /* another structure passed up and down to avoid zillions of parameters */
73 struct match {
74 struct re_guts *g;
75 int eflags;
76 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
77 RCHAR_T *offp; /* offsets work from here */
78 RCHAR_T *beginp; /* start of string -- virtual NUL precedes */
79 RCHAR_T *endp; /* end of string -- virtual NUL here */
80 RCHAR_T *coldp; /* can be no match starting before here */
81 RCHAR_T **lastpos; /* [nplus+1] */
82 STATEVARS;
83 states st; /* current states */
84 states fresh; /* states for a fresh start */
85 states tmp; /* temporary */
86 states empty; /* empty set of states */
89 /* ========= begin header generated by ./mkh ========= */
90 #ifdef __cplusplus
91 extern "C" {
92 #endif
94 /* === engine.c === */
95 static int matcher __P((struct re_guts *g, RCHAR_T *string, size_t nmatch, regmatch_t pmatch[], int eflags));
96 static RCHAR_T *dissect __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst));
97 static RCHAR_T *backref __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst, sopno lev));
98 static RCHAR_T *fast __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst));
99 static RCHAR_T *slow __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst));
100 static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int flag, RCHAR_T ch, states aft));
101 #define BOL (1)
102 #define EOL (BOL+1)
103 #define BOLEOL (BOL+2)
104 #define NOTHING (BOL+3)
105 #define BOW (BOL+4)
106 #define EOW (BOL+5)
107 #ifdef REDEBUG
108 static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
109 #endif
110 #ifdef REDEBUG
111 static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
112 #endif
113 #ifdef REDEBUG
114 static char *pchar __P((int ch));
115 #endif
117 #ifdef __cplusplus
119 #endif
120 /* ========= end header generated by ./mkh ========= */
122 #ifdef REDEBUG
123 #define SP(t, s, c) print(m, t, s, c, stdout)
124 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
125 #define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
126 #else
127 #define SP(t, s, c) /* nothing */
128 #define AT(t, p1, p2, s1, s2) /* nothing */
129 #define NOTE(s) /* nothing */
130 #endif
133 - matcher - the actual matching engine
134 == static int matcher(register struct re_guts *g, RCHAR_T *string, \
135 == size_t nmatch, regmatch_t pmatch[], int eflags);
137 static int /* 0 success, REG_NOMATCH failure */
138 matcher(g, string, nmatch, pmatch, eflags)
139 register struct re_guts *g;
140 RCHAR_T *string;
141 size_t nmatch;
142 regmatch_t pmatch[];
143 int eflags;
145 register RCHAR_T *endp;
146 register size_t i;
147 struct match mv;
148 register struct match *m = &mv;
149 register RCHAR_T *dp;
150 register const sopno gf = g->firststate+1; /* +1 for OEND */
151 register const sopno gl = g->laststate;
152 RCHAR_T *start;
153 RCHAR_T *stop;
155 /* simplify the situation where possible */
156 if (g->cflags&REG_NOSUB)
157 nmatch = 0;
158 if (eflags&REG_STARTEND) {
159 start = string + pmatch[0].rm_so;
160 stop = string + pmatch[0].rm_eo;
161 } else {
162 start = string;
163 stop = start + STRLEN(start);
165 if (stop < start)
166 return(REG_INVARG);
168 /* prescreening; this does wonders for this rather slow code */
169 if (g->must != NULL) {
170 for (dp = start; dp < stop; dp++)
171 if (*dp == g->must[0] && stop - dp >= g->mlen &&
172 MEMCMP(dp, g->must, (size_t)g->mlen) == 0)
173 break;
174 if (dp == stop) /* we didn't find g->must */
175 return(REG_NOMATCH);
178 /* match struct setup */
179 m->g = g;
180 m->eflags = eflags;
181 m->pmatch = NULL;
182 m->lastpos = NULL;
183 m->offp = string;
184 m->beginp = start;
185 m->endp = stop;
186 STATESETUP(m, 4);
187 SETUP(m->st);
188 SETUP(m->fresh);
189 SETUP(m->tmp);
190 SETUP(m->empty);
191 CLEAR(m->empty);
193 /* this loop does only one repetition except for backrefs */
194 for (;;) {
195 endp = fast(m, start, stop, gf, gl);
196 if (endp == NULL) { /* a miss */
197 STATETEARDOWN(m);
198 return(REG_NOMATCH);
200 if (nmatch == 0 && !g->backrefs)
201 break; /* no further info needed */
203 /* where? */
204 assert(m->coldp != NULL);
205 for (;;) {
206 NOTE("finding start");
207 endp = slow(m, m->coldp, stop, gf, gl);
208 if (endp != NULL)
209 break;
210 assert(m->coldp < m->endp);
211 m->coldp++;
213 if (nmatch == 1 && !g->backrefs)
214 break; /* no further info needed */
216 /* oh my, he wants the subexpressions... */
217 if (m->pmatch == NULL)
218 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
219 sizeof(regmatch_t));
220 if (m->pmatch == NULL) {
221 STATETEARDOWN(m);
222 return(REG_ESPACE);
224 for (i = 1; i <= m->g->nsub; i++)
225 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
226 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
227 NOTE("dissecting");
228 dp = dissect(m, m->coldp, endp, gf, gl);
229 } else {
230 if (g->nplus > 0 && m->lastpos == NULL)
231 m->lastpos = (RCHAR_T **)malloc((g->nplus+1) *
232 sizeof(RCHAR_T *));
233 if (g->nplus > 0 && m->lastpos == NULL) {
234 free(m->pmatch);
235 STATETEARDOWN(m);
236 return(REG_ESPACE);
238 NOTE("backref dissect");
239 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
241 if (dp != NULL)
242 break;
244 /* uh-oh... we couldn't find a subexpression-level match */
245 assert(g->backrefs); /* must be back references doing it */
246 assert(g->nplus == 0 || m->lastpos != NULL);
247 for (;;) {
248 if (dp != NULL || endp <= m->coldp)
249 break; /* defeat */
250 NOTE("backoff");
251 endp = slow(m, m->coldp, endp-1, gf, gl);
252 if (endp == NULL)
253 break; /* defeat */
254 /* try it on a shorter possibility */
255 #ifndef NDEBUG
256 for (i = 1; i <= m->g->nsub; i++) {
257 assert(m->pmatch[i].rm_so == -1);
258 assert(m->pmatch[i].rm_eo == -1);
260 #endif
261 NOTE("backoff dissect");
262 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
264 assert(dp == NULL || dp == endp);
265 if (dp != NULL) /* found a shorter one */
266 break;
268 /* despite initial appearances, there is no match here */
269 NOTE("false alarm");
270 start = m->coldp + 1; /* recycle starting later */
271 assert(start <= stop);
274 /* fill in the details if requested */
275 if (nmatch > 0) {
276 pmatch[0].rm_so = m->coldp - m->offp;
277 pmatch[0].rm_eo = endp - m->offp;
279 if (nmatch > 1) {
280 assert(m->pmatch != NULL);
281 for (i = 1; i < nmatch; i++)
282 if (i <= m->g->nsub)
283 pmatch[i] = m->pmatch[i];
284 else {
285 pmatch[i].rm_so = -1;
286 pmatch[i].rm_eo = -1;
290 if (m->pmatch != NULL)
291 free((char *)m->pmatch);
292 if (m->lastpos != NULL)
293 free((char *)m->lastpos);
294 STATETEARDOWN(m);
295 return(0);
299 - dissect - figure out what matched what, no back references
300 == static RCHAR_T *dissect(register struct match *m, RCHAR_T *start, \
301 == RCHAR_T *stop, sopno startst, sopno stopst);
303 static RCHAR_T * /* == stop (success) always */
304 dissect(m, start, stop, startst, stopst)
305 register struct match *m;
306 RCHAR_T *start;
307 RCHAR_T *stop;
308 sopno startst;
309 sopno stopst;
311 register int i;
312 register sopno ss; /* start sop of current subRE */
313 register sopno es; /* end sop of current subRE */
314 register RCHAR_T *sp; /* start of string matched by it */
315 register RCHAR_T *stp; /* string matched by it cannot pass here */
316 register RCHAR_T *rest; /* start of rest of string */
317 register RCHAR_T *tail; /* string unmatched by rest of RE */
318 register sopno ssub; /* start sop of subsubRE */
319 register sopno esub; /* end sop of subsubRE */
320 register RCHAR_T *ssp; /* start of string matched by subsubRE */
321 register RCHAR_T *sep; /* end of string matched by subsubRE */
322 register RCHAR_T *oldssp; /* previous ssp */
323 register RCHAR_T *dp;
325 AT("diss", start, stop, startst, stopst);
326 sp = start;
327 for (ss = startst; ss < stopst; ss = es) {
328 /* identify end of subRE */
329 es = ss;
330 switch (m->g->strip[es]) {
331 case OPLUS_:
332 case OQUEST_:
333 es += m->g->stripdata[es];
334 break;
335 case OCH_:
336 while (m->g->strip[es] != O_CH)
337 es += m->g->stripdata[es];
338 break;
340 es++;
342 /* figure out what it matched */
343 switch (m->g->strip[ss]) {
344 case OEND:
345 assert(nope);
346 break;
347 case OCHAR:
348 sp++;
349 break;
350 case OBOL:
351 case OEOL:
352 case OBOW:
353 case OEOW:
354 break;
355 case OANY:
356 case OANYOF:
357 sp++;
358 break;
359 case OBACK_:
360 case O_BACK:
361 assert(nope);
362 break;
363 /* cases where length of match is hard to find */
364 case OQUEST_:
365 stp = stop;
366 for (;;) {
367 /* how long could this one be? */
368 rest = slow(m, sp, stp, ss, es);
369 assert(rest != NULL); /* it did match */
370 /* could the rest match the rest? */
371 tail = slow(m, rest, stop, es, stopst);
372 if (tail == stop)
373 break; /* yes! */
374 /* no -- try a shorter match for this one */
375 stp = rest - 1;
376 assert(stp >= sp); /* it did work */
378 ssub = ss + 1;
379 esub = es - 1;
380 /* did innards match? */
381 if (slow(m, sp, rest, ssub, esub) != NULL) {
382 dp = dissect(m, sp, rest, ssub, esub);
383 assert(dp == rest);
384 } else /* no */
385 assert(sp == rest);
386 sp = rest;
387 break;
388 case OPLUS_:
389 stp = stop;
390 for (;;) {
391 /* how long could this one be? */
392 rest = slow(m, sp, stp, ss, es);
393 assert(rest != NULL); /* it did match */
394 /* could the rest match the rest? */
395 tail = slow(m, rest, stop, es, stopst);
396 if (tail == stop)
397 break; /* yes! */
398 /* no -- try a shorter match for this one */
399 stp = rest - 1;
400 assert(stp >= sp); /* it did work */
402 ssub = ss + 1;
403 esub = es - 1;
404 ssp = sp;
405 oldssp = ssp;
406 for (;;) { /* find last match of innards */
407 sep = slow(m, ssp, rest, ssub, esub);
408 if (sep == NULL || sep == ssp)
409 break; /* failed or matched null */
410 oldssp = ssp; /* on to next try */
411 ssp = sep;
413 if (sep == NULL) {
414 /* last successful match */
415 sep = ssp;
416 ssp = oldssp;
418 assert(sep == rest); /* must exhaust substring */
419 assert(slow(m, ssp, sep, ssub, esub) == rest);
420 dp = dissect(m, ssp, sep, ssub, esub);
421 assert(dp == sep);
422 sp = rest;
423 break;
424 case OCH_:
425 stp = stop;
426 for (;;) {
427 /* how long could this one be? */
428 rest = slow(m, sp, stp, ss, es);
429 assert(rest != NULL); /* it did match */
430 /* could the rest match the rest? */
431 tail = slow(m, rest, stop, es, stopst);
432 if (tail == stop)
433 break; /* yes! */
434 /* no -- try a shorter match for this one */
435 stp = rest - 1;
436 assert(stp >= sp); /* it did work */
438 ssub = ss + 1;
439 esub = ss + m->g->stripdata[ss] - 1;
440 assert(m->g->strip[esub] == OOR1);
441 for (;;) { /* find first matching branch */
442 if (slow(m, sp, rest, ssub, esub) == rest)
443 break; /* it matched all of it */
444 /* that one missed, try next one */
445 assert(m->g->strip[esub] == OOR1);
446 esub++;
447 assert(m->g->strip[esub] == OOR2);
448 ssub = esub + 1;
449 esub += m->g->stripdata[esub];
450 if (m->g->strip[esub] == OOR2)
451 esub--;
452 else
453 assert(m->g->strip[esub] == O_CH);
455 dp = dissect(m, sp, rest, ssub, esub);
456 assert(dp == rest);
457 sp = rest;
458 break;
459 case O_PLUS:
460 case O_QUEST:
461 case OOR1:
462 case OOR2:
463 case O_CH:
464 assert(nope);
465 break;
466 case OLPAREN:
467 i = m->g->stripdata[ss];
468 assert(0 < i && i <= m->g->nsub);
469 m->pmatch[i].rm_so = sp - m->offp;
470 break;
471 case ORPAREN:
472 i = m->g->stripdata[ss];
473 assert(0 < i && i <= m->g->nsub);
474 m->pmatch[i].rm_eo = sp - m->offp;
475 break;
476 default: /* uh oh */
477 assert(nope);
478 break;
482 assert(sp == stop);
483 return(sp);
487 - backref - figure out what matched what, figuring in back references
488 == static RCHAR_T *backref(register struct match *m, RCHAR_T *start, \
489 == RCHAR_T *stop, sopno startst, sopno stopst, sopno lev);
491 static RCHAR_T * /* == stop (success) or NULL (failure) */
492 backref(m, start, stop, startst, stopst, lev)
493 register struct match *m;
494 RCHAR_T *start;
495 RCHAR_T *stop;
496 sopno startst;
497 sopno stopst;
498 sopno lev; /* PLUS nesting level */
500 register int i;
501 register sopno ss; /* start sop of current subRE */
502 register RCHAR_T *sp; /* start of string matched by it */
503 register sopno ssub; /* start sop of subsubRE */
504 register sopno esub; /* end sop of subsubRE */
505 register RCHAR_T *ssp; /* start of string matched by subsubRE */
506 register RCHAR_T *dp;
507 register size_t len;
508 register int hard;
509 register sop s;
510 register RCHAR_T d;
511 register regoff_t offsave;
512 register cset *cs;
514 AT("back", start, stop, startst, stopst);
515 sp = start;
517 /* get as far as we can with easy stuff */
518 hard = 0;
519 for (ss = startst; !hard && ss < stopst; ss++) {
520 s = m->g->strip[ss];
521 d = m->g->stripdata[ss];
522 switch (s) {
523 case OCHAR:
524 if (sp == stop || *sp++ != d)
525 return(NULL);
526 break;
527 case OANY:
528 if (sp == stop)
529 return(NULL);
530 sp++;
531 break;
532 case OANYOF:
533 cs = &m->g->sets[d];
534 if (sp == stop || !CHIN(cs, *sp++))
535 return(NULL);
536 break;
537 case OBOL:
538 if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
539 (sp < m->endp && *(sp-1) == '\n' &&
540 (m->g->cflags&REG_NEWLINE)) )
541 { /* yes */ }
542 else
543 return(NULL);
544 break;
545 case OEOL:
546 if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
547 (sp < m->endp && *sp == '\n' &&
548 (m->g->cflags&REG_NEWLINE)) )
549 { /* yes */ }
550 else
551 return(NULL);
552 break;
553 case OBOW:
554 if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
555 (sp < m->endp && *(sp-1) == '\n' &&
556 (m->g->cflags&REG_NEWLINE)) ||
557 (sp > m->beginp &&
558 !ISWORD(*(sp-1))) ) &&
559 (sp < m->endp && ISWORD(*sp)) )
560 { /* yes */ }
561 else
562 return(NULL);
563 break;
564 case OEOW:
565 if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
566 (sp < m->endp && *sp == '\n' &&
567 (m->g->cflags&REG_NEWLINE)) ||
568 (sp < m->endp && !ISWORD(*sp)) ) &&
569 (sp > m->beginp && ISWORD(*(sp-1))) )
570 { /* yes */ }
571 else
572 return(NULL);
573 break;
574 case O_QUEST:
575 break;
576 case OOR1: /* matches null but needs to skip */
577 ss++;
578 s = m->g->strip[ss];
579 d = m->g->stripdata[ss];
580 do {
581 assert(s == OOR2);
582 ss += d;
583 s = m->g->strip[ss];
584 d = m->g->stripdata[ss];
585 } while (s != O_CH);
586 /* note that the ss++ gets us past the O_CH */
587 break;
588 default: /* have to make a choice */
589 hard = 1;
590 break;
593 if (!hard) { /* that was it! */
594 if (sp != stop)
595 return(NULL);
596 return(sp);
598 ss--; /* adjust for the for's final increment */
600 /* the hard stuff */
601 AT("hard", sp, stop, ss, stopst);
602 s = m->g->strip[ss];
603 d = m->g->stripdata[ss];
604 switch (s) {
605 case OBACK_: /* the vilest depths */
606 i = d;
607 assert(0 < i && i <= m->g->nsub);
608 if (m->pmatch[i].rm_eo == -1)
609 return(NULL);
610 assert(m->pmatch[i].rm_so != -1);
611 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
612 assert(stop - m->beginp >= len);
613 if (sp > stop - len)
614 return(NULL); /* not enough left to match */
615 ssp = m->offp + m->pmatch[i].rm_so;
616 if (memcmp(sp, ssp, len) != 0)
617 return(NULL);
618 while (m->g->strip[ss] != O_BACK || m->g->stripdata[ss] != i)
619 ss++;
620 return(backref(m, sp+len, stop, ss+1, stopst, lev));
621 break;
622 case OQUEST_: /* to null or not */
623 dp = backref(m, sp, stop, ss+1, stopst, lev);
624 if (dp != NULL)
625 return(dp); /* not */
626 return(backref(m, sp, stop, ss+d+1, stopst, lev));
627 break;
628 case OPLUS_:
629 assert(m->lastpos != NULL);
630 assert(lev+1 <= m->g->nplus);
631 m->lastpos[lev+1] = sp;
632 return(backref(m, sp, stop, ss+1, stopst, lev+1));
633 break;
634 case O_PLUS:
635 if (sp == m->lastpos[lev]) /* last pass matched null */
636 return(backref(m, sp, stop, ss+1, stopst, lev-1));
637 /* try another pass */
638 m->lastpos[lev] = sp;
639 dp = backref(m, sp, stop, ss-d+1, stopst, lev);
640 if (dp == NULL)
641 return(backref(m, sp, stop, ss+1, stopst, lev-1));
642 else
643 return(dp);
644 break;
645 case OCH_: /* find the right one, if any */
646 ssub = ss + 1;
647 esub = ss + d - 1;
648 assert(m->g->strip[esub] == OOR1);
649 for (;;) { /* find first matching branch */
650 dp = backref(m, sp, stop, ssub, esub, lev);
651 if (dp != NULL)
652 return(dp);
653 /* that one missed, try next one */
654 if (m->g->strip[esub] == O_CH)
655 return(NULL); /* there is none */
656 esub++;
657 assert(m->g->strip[esub] == OOR2);
658 ssub = esub + 1;
659 esub += m->g->stripdata[esub];
660 if (m->g->strip[esub] == OOR2)
661 esub--;
662 else
663 assert(m->g->strip[esub] == O_CH);
665 break;
666 case OLPAREN: /* must undo assignment if rest fails */
667 i = d;
668 assert(0 < i && i <= m->g->nsub);
669 offsave = m->pmatch[i].rm_so;
670 m->pmatch[i].rm_so = sp - m->offp;
671 dp = backref(m, sp, stop, ss+1, stopst, lev);
672 if (dp != NULL)
673 return(dp);
674 m->pmatch[i].rm_so = offsave;
675 return(NULL);
676 break;
677 case ORPAREN: /* must undo assignment if rest fails */
678 i = d;
679 assert(0 < i && i <= m->g->nsub);
680 offsave = m->pmatch[i].rm_eo;
681 m->pmatch[i].rm_eo = sp - m->offp;
682 dp = backref(m, sp, stop, ss+1, stopst, lev);
683 if (dp != NULL)
684 return(dp);
685 m->pmatch[i].rm_eo = offsave;
686 return(NULL);
687 break;
688 default: /* uh oh */
689 assert(nope);
690 break;
693 /* "can't happen" */
694 assert(nope);
695 /* NOTREACHED */
696 return NULL;
700 - fast - step through the string at top speed
701 == static RCHAR_T *fast(register struct match *m, RCHAR_T *start, \
702 == RCHAR_T *stop, sopno startst, sopno stopst);
704 static RCHAR_T * /* where tentative match ended, or NULL */
705 fast(m, start, stop, startst, stopst)
706 register struct match *m;
707 RCHAR_T *start;
708 RCHAR_T *stop;
709 sopno startst;
710 sopno stopst;
712 register states st = m->st;
713 register states fresh = m->fresh;
714 register states tmp = m->tmp;
715 register RCHAR_T *p = start;
716 register RCHAR_T c = (start == m->beginp) ? OUT : *(start-1);
717 register RCHAR_T lastc; /* previous c */
718 register int flag;
719 register int i;
720 register RCHAR_T *coldp; /* last p after which no match was underway */
722 CLEAR(st);
723 SET1(st, startst);
724 st = step(m->g, startst, stopst, st, NOTHING, OUT, st);
725 ASSIGN(fresh, st);
726 SP("start", st, *p);
727 coldp = NULL;
728 for (;;) {
729 /* next character */
730 lastc = c;
731 c = (p == m->endp) ? OUT : *p;
732 if (EQ(st, fresh))
733 coldp = p;
735 /* is there an EOL and/or BOL between lastc and c? */
736 flag = 0;
737 i = 0;
738 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
739 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
740 flag = BOL;
741 i = m->g->nbol;
743 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
744 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
745 flag = (flag == BOL) ? BOLEOL : EOL;
746 i += m->g->neol;
748 if (i != 0) {
749 for (; i > 0; i--)
750 st = step(m->g, startst, stopst, st, flag, OUT, st);
751 SP("boleol", st, c);
754 /* how about a word boundary? */
755 if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) &&
756 (c != OUT && ISWORD(c)) ) {
757 flag = BOW;
759 if ( (lastc != OUT && ISWORD(lastc)) &&
760 (flag == EOL || (c != OUT && !ISWORD(c))) ) {
761 flag = EOW;
763 if (flag == BOW || flag == EOW) {
764 st = step(m->g, startst, stopst, st, flag, OUT, st);
765 SP("boweow", st, c);
768 /* are we done? */
769 if (ISSET(st, stopst) || p == stop)
770 break; /* NOTE BREAK OUT */
772 /* no, we must deal with this character */
773 ASSIGN(tmp, st);
774 ASSIGN(st, fresh);
775 assert(c != OUT);
776 st = step(m->g, startst, stopst, tmp, 0, c, st);
777 SP("aft", st, c);
778 assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st));
779 p++;
782 assert(coldp != NULL);
783 m->coldp = coldp;
784 if (ISSET(st, stopst))
785 return(p+1);
786 else
787 return(NULL);
791 - slow - step through the string more deliberately
792 == static RCHAR_T *slow(register struct match *m, RCHAR_T *start, \
793 == RCHAR_T *stop, sopno startst, sopno stopst);
795 static RCHAR_T * /* where it ended */
796 slow(m, start, stop, startst, stopst)
797 register struct match *m;
798 RCHAR_T *start;
799 RCHAR_T *stop;
800 sopno startst;
801 sopno stopst;
803 register states st = m->st;
804 register states empty = m->empty;
805 register states tmp = m->tmp;
806 register RCHAR_T *p = start;
807 register RCHAR_T c = (start == m->beginp) ? OUT : *(start-1);
808 register RCHAR_T lastc; /* previous c */
809 register int flag;
810 register int i;
811 register RCHAR_T *matchp; /* last p at which a match ended */
813 AT("slow", start, stop, startst, stopst);
814 CLEAR(st);
815 SET1(st, startst);
816 SP("sstart", st, *p);
817 st = step(m->g, startst, stopst, st, NOTHING, OUT, st);
818 matchp = NULL;
819 for (;;) {
820 /* next character */
821 lastc = c;
822 c = (p == m->endp) ? OUT : *p;
824 /* is there an EOL and/or BOL between lastc and c? */
825 flag = 0;
826 i = 0;
827 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
828 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
829 flag = BOL;
830 i = m->g->nbol;
832 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
833 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
834 flag = (flag == BOL) ? BOLEOL : EOL;
835 i += m->g->neol;
837 if (i != 0) {
838 for (; i > 0; i--)
839 st = step(m->g, startst, stopst, st, flag, OUT, st);
840 SP("sboleol", st, c);
843 /* how about a word boundary? */
844 if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) &&
845 (c != OUT && ISWORD(c)) ) {
846 flag = BOW;
848 if ( (lastc != OUT && ISWORD(lastc)) &&
849 (flag == EOL || (c != OUT && !ISWORD(c))) ) {
850 flag = EOW;
852 if (flag == BOW || flag == EOW) {
853 st = step(m->g, startst, stopst, st, flag, OUT, st);
854 SP("sboweow", st, c);
857 /* are we done? */
858 if (ISSET(st, stopst))
859 matchp = p;
860 if (EQ(st, empty) || p == stop)
861 break; /* NOTE BREAK OUT */
863 /* no, we must deal with this character */
864 ASSIGN(tmp, st);
865 ASSIGN(st, empty);
866 assert(c != OUT);
867 st = step(m->g, startst, stopst, tmp, 0, c, st);
868 SP("saft", st, c);
869 assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st));
870 p++;
873 return(matchp);
878 - step - map set of states reachable before char to set reachable after
879 == static states step(register struct re_guts *g, sopno start, sopno stop, \
880 == register states bef, int flag, RCHAR_T ch, register states aft);
881 == #define BOL (1)
882 == #define EOL (BOL+1)
883 == #define BOLEOL (BOL+2)
884 == #define NOTHING (BOL+3)
885 == #define BOW (BOL+4)
886 == #define EOW (BOL+5)
888 static states
889 step(g, start, stop, bef, flag, ch, aft)
890 register struct re_guts *g;
891 sopno start; /* start state within strip */
892 sopno stop; /* state after stop state within strip */
893 register states bef; /* states reachable before */
894 int flag; /* NONCHAR flag */
895 RCHAR_T ch; /* character code */
896 register states aft; /* states already known reachable after */
898 register cset *cs;
899 register sop s;
900 register RCHAR_T d;
901 register sopno pc;
902 register onestate here; /* note, macros know this name */
903 register sopno look;
904 register int i;
906 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
907 s = g->strip[pc];
908 d = g->stripdata[pc];
909 switch (s) {
910 case OEND:
911 assert(pc == stop-1);
912 break;
913 case OCHAR:
914 /* only characters can match */
915 assert(!flag || ch != d);
916 if (ch == d)
917 FWD(aft, bef, 1);
918 break;
919 case OBOL:
920 if (flag == BOL || flag == BOLEOL)
921 FWD(aft, bef, 1);
922 break;
923 case OEOL:
924 if (flag == EOL || flag == BOLEOL)
925 FWD(aft, bef, 1);
926 break;
927 case OBOW:
928 if (flag == BOW)
929 FWD(aft, bef, 1);
930 break;
931 case OEOW:
932 if (flag == EOW)
933 FWD(aft, bef, 1);
934 break;
935 case OANY:
936 if (!flag)
937 FWD(aft, bef, 1);
938 break;
939 case OANYOF:
940 cs = &g->sets[d];
941 if (!flag && CHIN(cs, ch))
942 FWD(aft, bef, 1);
943 break;
944 case OBACK_: /* ignored here */
945 case O_BACK:
946 FWD(aft, aft, 1);
947 break;
948 case OPLUS_: /* forward, this is just an empty */
949 FWD(aft, aft, 1);
950 break;
951 case O_PLUS: /* both forward and back */
952 FWD(aft, aft, 1);
953 i = ISSETBACK(aft, d);
954 BACK(aft, aft, d);
955 if (!i && ISSETBACK(aft, d)) {
956 /* oho, must reconsider loop body */
957 pc -= d + 1;
958 INIT(here, pc);
960 break;
961 case OQUEST_: /* two branches, both forward */
962 FWD(aft, aft, 1);
963 FWD(aft, aft, d);
964 break;
965 case O_QUEST: /* just an empty */
966 FWD(aft, aft, 1);
967 break;
968 case OLPAREN: /* not significant here */
969 case ORPAREN:
970 FWD(aft, aft, 1);
971 break;
972 case OCH_: /* mark the first two branches */
973 FWD(aft, aft, 1);
974 assert(OP(g->strip[pc+d]) == OOR2);
975 FWD(aft, aft, d);
976 break;
977 case OOR1: /* done a branch, find the O_CH */
978 if (ISSTATEIN(aft, here)) {
979 for (look = 1; /**/; look += d) {
980 s = g->strip[pc+look];
981 d = g->stripdata[pc+look];
982 if (s == O_CH)
983 break;
984 assert(s == OOR2);
986 FWD(aft, aft, look);
988 break;
989 case OOR2: /* propagate OCH_'s marking */
990 FWD(aft, aft, 1);
991 if (g->strip[pc+d] != O_CH) {
992 assert(g->strip[pc+d] == OOR2);
993 FWD(aft, aft, d);
995 break;
996 case O_CH: /* just empty */
997 FWD(aft, aft, 1);
998 break;
999 default: /* ooooops... */
1000 assert(nope);
1001 break;
1005 return(aft);
1008 #ifdef REDEBUG
1010 - print - print a set of states
1011 == #ifdef REDEBUG
1012 == static void print(struct match *m, char *caption, states st, \
1013 == int ch, FILE *d);
1014 == #endif
1016 static void
1017 print(m, caption, st, ch, d)
1018 struct match *m;
1019 char *caption;
1020 states st;
1021 int ch;
1022 FILE *d;
1024 register struct re_guts *g = m->g;
1025 register int i;
1026 register int first = 1;
1028 if (!(m->eflags&REG_TRACE))
1029 return;
1031 fprintf(d, "%s", caption);
1032 if (ch != '\0')
1033 fprintf(d, " %s", pchar(ch));
1034 for (i = 0; i < g->nstates; i++)
1035 if (ISSET(st, i)) {
1036 fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1037 first = 0;
1039 fprintf(d, "\n");
1043 - at - print current situation
1044 == #ifdef REDEBUG
1045 == static void at(struct match *m, char *title, char *start, char *stop, \
1046 == sopno startst, sopno stopst);
1047 == #endif
1049 static void
1050 at(m, title, start, stop, startst, stopst)
1051 struct match *m;
1052 char *title;
1053 char *start;
1054 char *stop;
1055 sopno startst;
1056 sopno stopst;
1058 if (!(m->eflags&REG_TRACE))
1059 return;
1061 printf("%s %s-", title, pchar(*start));
1062 printf("%s ", pchar(*stop));
1063 printf("%ld-%ld\n", (long)startst, (long)stopst);
1066 #ifndef PCHARDONE
1067 #define PCHARDONE /* never again */
1069 - pchar - make a character printable
1070 == #ifdef REDEBUG
1071 == static char *pchar(int ch);
1072 == #endif
1074 * Is this identical to regchar() over in debug.c? Well, yes. But a
1075 * duplicate here avoids having a debugging-capable regexec.o tied to
1076 * a matching debug.o, and this is convenient. It all disappears in
1077 * the non-debug compilation anyway, so it doesn't matter much.
1079 static char * /* -> representation */
1080 pchar(ch)
1081 int ch;
1083 static char pbuf[10];
1085 if (isprint(ch) || ch == ' ')
1086 sprintf(pbuf, "%c", ch);
1087 else
1088 sprintf(pbuf, "\\%o", ch);
1089 return(pbuf);
1091 #endif
1092 #endif
1094 #undef matcher
1095 #undef fast
1096 #undef slow
1097 #undef dissect
1098 #undef backref
1099 #undef step
1100 #undef print
1101 #undef at
1102 #undef match