Remove building with NOCRYPTO option
[minix3.git] / external / bsd / nvi / dist / regex / engine.c
blob1245dd1a0261ee283871b9c9b944b9adcb0a5dff
1 /* $NetBSD: engine.c,v 1.3 2014/01/07 21:48:12 christos Exp $ */
2 /*-
3 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Henry Spencer of the University of Toronto.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
38 * @(#)engine.c 8.4 (Berkeley) 3/19/94
42 * The matching engine and friends. This file is #included by regexec.c
43 * after suitable #defines of a variety of macros used herein, so that
44 * different state representations can be used without duplicating masses
45 * of code.
48 #ifdef SNAMES
49 #define matcher smatcher
50 #define fast sfast
51 #define slow sslow
52 #define dissect sdissect
53 #define backref sbackref
54 #define step sstep
55 #define print sprint
56 #define at sat
57 #define match smat
58 #endif
59 #ifdef LNAMES
60 #define matcher lmatcher
61 #define fast lfast
62 #define slow lslow
63 #define dissect ldissect
64 #define backref lbackref
65 #define step lstep
66 #define print lprint
67 #define at lat
68 #define match lmat
69 #endif
71 /* another structure passed up and down to avoid zillions of parameters */
72 struct match {
73 struct re_guts *g;
74 int eflags;
75 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
76 RCHAR_T *offp; /* offsets work from here */
77 RCHAR_T *beginp; /* start of string -- virtual NUL precedes */
78 RCHAR_T *endp; /* end of string -- virtual NUL here */
79 RCHAR_T *coldp; /* can be no match starting before here */
80 RCHAR_T **lastpos; /* [nplus+1] */
81 STATEVARS;
82 states st; /* current states */
83 states fresh; /* states for a fresh start */
84 states tmp; /* temporary */
85 states empty; /* empty set of states */
88 /* ========= begin header generated by ./mkh ========= */
89 #ifdef __cplusplus
90 extern "C" {
91 #endif
93 /* === engine.c === */
94 static int matcher __P((struct re_guts *g, RCHAR_T *string, size_t nmatch, regmatch_t pmatch[], int eflags));
95 static RCHAR_T *dissect __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst));
96 static RCHAR_T *backref __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst, sopno lev));
97 static RCHAR_T *fast __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst));
98 static RCHAR_T *slow __P((struct match *m, RCHAR_T *start, RCHAR_T *stop, sopno startst, sopno stopst));
99 static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int flag, RCHAR_T ch, states aft));
100 #define BOL (1)
101 #define EOL (BOL+1)
102 #define BOLEOL (BOL+2)
103 #define NOTHING (BOL+3)
104 #define BOW (BOL+4)
105 #define EOW (BOL+5)
106 #ifdef REDEBUG
107 static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
108 #endif
109 #ifdef REDEBUG
110 static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
111 #endif
112 #ifdef REDEBUG
113 static char *pchar __P((int ch));
114 #endif
116 #ifdef __cplusplus
118 #endif
119 /* ========= end header generated by ./mkh ========= */
121 #ifdef REDEBUG
122 #define SP(t, s, c) print(m, t, s, c, stdout)
123 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
124 #define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
125 #else
126 #define SP(t, s, c) /* nothing */
127 #define AT(t, p1, p2, s1, s2) /* nothing */
128 #define NOTE(s) /* nothing */
129 #endif
132 - matcher - the actual matching engine
133 == static int matcher(struct re_guts *g, RCHAR_T *string, \
134 == size_t nmatch, regmatch_t pmatch[], int eflags);
136 static int /* 0 success, REG_NOMATCH failure */
137 matcher(g, string, nmatch, pmatch, eflags)
138 struct re_guts *g;
139 RCHAR_T *string;
140 size_t nmatch;
141 regmatch_t pmatch[];
142 int eflags;
144 RCHAR_T *endp;
145 size_t i;
146 struct match mv;
147 struct match *m = &mv;
148 RCHAR_T *dp;
149 const sopno gf = g->firststate+1; /* +1 for OEND */
150 const sopno gl = g->laststate;
151 RCHAR_T *start;
152 RCHAR_T *stop;
154 /* simplify the situation where possible */
155 if (g->cflags&REG_NOSUB)
156 nmatch = 0;
157 if (eflags&REG_STARTEND) {
158 start = string + pmatch[0].rm_so;
159 stop = string + pmatch[0].rm_eo;
160 } else {
161 start = string;
162 stop = start + STRLEN(start);
164 if (stop < start)
165 return(REG_INVARG);
167 /* prescreening; this does wonders for this rather slow code */
168 if (g->must != NULL) {
169 for (dp = start; dp < stop; dp++)
170 if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
171 MEMCMP(dp, g->must, g->mlen) == 0)
172 break;
173 if (dp == stop) /* we didn't find g->must */
174 return(REG_NOMATCH);
177 /* match struct setup */
178 m->g = g;
179 m->eflags = eflags;
180 m->pmatch = NULL;
181 m->lastpos = NULL;
182 m->offp = string;
183 m->beginp = start;
184 m->endp = stop;
185 STATESETUP(m, 4);
186 SETUP(m->st);
187 SETUP(m->fresh);
188 SETUP(m->tmp);
189 SETUP(m->empty);
190 CLEAR(m->empty);
192 /* this loop does only one repetition except for backrefs */
193 for (;;) {
194 endp = fast(m, start, stop, gf, gl);
195 if (endp == NULL) { /* a miss */
196 STATETEARDOWN(m);
197 return(REG_NOMATCH);
199 if (nmatch == 0 && !g->backrefs)
200 break; /* no further info needed */
202 /* where? */
203 assert(m->coldp != NULL);
204 for (;;) {
205 NOTE("finding start");
206 endp = slow(m, m->coldp, stop, gf, gl);
207 if (endp != NULL)
208 break;
209 assert(m->coldp < m->endp);
210 m->coldp++;
212 if (nmatch == 1 && !g->backrefs)
213 break; /* no further info needed */
215 /* oh my, he wants the subexpressions... */
216 if (m->pmatch == NULL)
217 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
218 sizeof(regmatch_t));
219 if (m->pmatch == NULL) {
220 STATETEARDOWN(m);
221 return(REG_ESPACE);
223 for (i = 1; i <= m->g->nsub; i++)
224 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
225 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
226 NOTE("dissecting");
227 dp = dissect(m, m->coldp, endp, gf, gl);
228 } else {
229 if (g->nplus > 0 && m->lastpos == NULL)
230 m->lastpos = (RCHAR_T **)malloc((g->nplus+1) *
231 sizeof(RCHAR_T *));
232 if (g->nplus > 0 && m->lastpos == NULL) {
233 free(m->pmatch);
234 STATETEARDOWN(m);
235 return(REG_ESPACE);
237 NOTE("backref dissect");
238 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
240 if (dp != NULL)
241 break;
243 /* uh-oh... we couldn't find a subexpression-level match */
244 assert(g->backrefs); /* must be back references doing it */
245 assert(g->nplus == 0 || m->lastpos != NULL);
246 for (;;) {
247 if (dp != NULL || endp <= m->coldp)
248 break; /* defeat */
249 NOTE("backoff");
250 endp = slow(m, m->coldp, endp-1, gf, gl);
251 if (endp == NULL)
252 break; /* defeat */
253 /* try it on a shorter possibility */
254 #ifndef NDEBUG
255 for (i = 1; i <= m->g->nsub; i++) {
256 assert(m->pmatch[i].rm_so == -1);
257 assert(m->pmatch[i].rm_eo == -1);
259 #endif
260 NOTE("backoff dissect");
261 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
263 assert(dp == NULL || dp == endp);
264 if (dp != NULL) /* found a shorter one */
265 break;
267 /* despite initial appearances, there is no match here */
268 NOTE("false alarm");
269 start = m->coldp + 1; /* recycle starting later */
270 assert(start <= stop);
273 /* fill in the details if requested */
274 if (nmatch > 0) {
275 pmatch[0].rm_so = m->coldp - m->offp;
276 pmatch[0].rm_eo = endp - m->offp;
278 if (nmatch > 1) {
279 assert(m->pmatch != NULL);
280 for (i = 1; i < nmatch; i++)
281 if (i <= m->g->nsub)
282 pmatch[i] = m->pmatch[i];
283 else {
284 pmatch[i].rm_so = -1;
285 pmatch[i].rm_eo = -1;
289 if (m->pmatch != NULL)
290 free((char *)m->pmatch);
291 if (m->lastpos != NULL)
292 free((char *)m->lastpos);
293 STATETEARDOWN(m);
294 return(0);
298 - dissect - figure out what matched what, no back references
299 == static RCHAR_T *dissect(struct match *m, RCHAR_T *start, \
300 == RCHAR_T *stop, sopno startst, sopno stopst);
302 static RCHAR_T * /* == stop (success) always */
303 dissect(m, start, stop, startst, stopst)
304 struct match *m;
305 RCHAR_T *start;
306 RCHAR_T *stop;
307 sopno startst;
308 sopno stopst;
310 int i;
311 sopno ss; /* start sop of current subRE */
312 sopno es; /* end sop of current subRE */
313 RCHAR_T *sp; /* start of string matched by it */
314 RCHAR_T *stp; /* string matched by it cannot pass here */
315 RCHAR_T *rest; /* start of rest of string */
316 RCHAR_T *tail; /* string unmatched by rest of RE */
317 sopno ssub; /* start sop of subsubRE */
318 sopno esub; /* end sop of subsubRE */
319 RCHAR_T *ssp; /* start of string matched by subsubRE */
320 RCHAR_T *sep; /* end of string matched by subsubRE */
321 RCHAR_T *oldssp; /* previous ssp */
322 RCHAR_T *dp;
324 AT("diss", start, stop, startst, stopst);
325 sp = start;
326 for (ss = startst; ss < stopst; ss = es) {
327 /* identify end of subRE */
328 es = ss;
329 switch (m->g->strip[es]) {
330 case OPLUS_:
331 case OQUEST_:
332 es += m->g->stripdata[es];
333 break;
334 case OCH_:
335 while (m->g->strip[es] != O_CH)
336 es += m->g->stripdata[es];
337 break;
339 es++;
341 /* figure out what it matched */
342 switch (m->g->strip[ss]) {
343 case OEND:
344 assert(nope);
345 break;
346 case OCHAR:
347 sp++;
348 break;
349 case OBOL:
350 case OEOL:
351 case OBOW:
352 case OEOW:
353 break;
354 case OANY:
355 case OANYOF:
356 sp++;
357 break;
358 case OBACK_:
359 case O_BACK:
360 assert(nope);
361 break;
362 /* cases where length of match is hard to find */
363 case OQUEST_:
364 stp = stop;
365 for (;;) {
366 /* how long could this one be? */
367 rest = slow(m, sp, stp, ss, es);
368 assert(rest != NULL); /* it did match */
369 /* could the rest match the rest? */
370 tail = slow(m, rest, stop, es, stopst);
371 if (tail == stop)
372 break; /* yes! */
373 /* no -- try a shorter match for this one */
374 stp = rest - 1;
375 assert(stp >= sp); /* it did work */
377 ssub = ss + 1;
378 esub = es - 1;
379 /* did innards match? */
380 if (slow(m, sp, rest, ssub, esub) != NULL) {
381 dp = dissect(m, sp, rest, ssub, esub);
382 __USE(dp);
383 assert(dp == rest);
384 } else /* no */
385 assert(sp == rest);
386 sp = rest;
387 break;
388 case OPLUS_:
389 stp = stop;
390 for (;;) {
391 /* how long could this one be? */
392 rest = slow(m, sp, stp, ss, es);
393 assert(rest != NULL); /* it did match */
394 /* could the rest match the rest? */
395 tail = slow(m, rest, stop, es, stopst);
396 if (tail == stop)
397 break; /* yes! */
398 /* no -- try a shorter match for this one */
399 stp = rest - 1;
400 assert(stp >= sp); /* it did work */
402 ssub = ss + 1;
403 esub = es - 1;
404 ssp = sp;
405 oldssp = ssp;
406 for (;;) { /* find last match of innards */
407 sep = slow(m, ssp, rest, ssub, esub);
408 if (sep == NULL || sep == ssp)
409 break; /* failed or matched null */
410 oldssp = ssp; /* on to next try */
411 ssp = sep;
413 if (sep == NULL) {
414 /* last successful match */
415 sep = ssp;
416 ssp = oldssp;
418 assert(sep == rest); /* must exhaust substring */
419 assert(slow(m, ssp, sep, ssub, esub) == rest);
420 dp = dissect(m, ssp, sep, ssub, esub);
421 assert(dp == sep);
422 sp = rest;
423 break;
424 case OCH_:
425 stp = stop;
426 for (;;) {
427 /* how long could this one be? */
428 rest = slow(m, sp, stp, ss, es);
429 assert(rest != NULL); /* it did match */
430 /* could the rest match the rest? */
431 tail = slow(m, rest, stop, es, stopst);
432 if (tail == stop)
433 break; /* yes! */
434 /* no -- try a shorter match for this one */
435 stp = rest - 1;
436 assert(stp >= sp); /* it did work */
438 ssub = ss + 1;
439 esub = ss + m->g->stripdata[ss] - 1;
440 assert(m->g->strip[esub] == OOR1);
441 for (;;) { /* find first matching branch */
442 if (slow(m, sp, rest, ssub, esub) == rest)
443 break; /* it matched all of it */
444 /* that one missed, try next one */
445 assert(m->g->strip[esub] == OOR1);
446 esub++;
447 assert(m->g->strip[esub] == OOR2);
448 ssub = esub + 1;
449 esub += m->g->stripdata[esub];
450 if (m->g->strip[esub] == OOR2)
451 esub--;
452 else
453 assert(m->g->strip[esub] == O_CH);
455 dp = dissect(m, sp, rest, ssub, esub);
456 assert(dp == rest);
457 sp = rest;
458 break;
459 case O_PLUS:
460 case O_QUEST:
461 case OOR1:
462 case OOR2:
463 case O_CH:
464 assert(nope);
465 break;
466 case OLPAREN:
467 i = m->g->stripdata[ss];
468 assert(0 < i && i <= m->g->nsub);
469 m->pmatch[i].rm_so = sp - m->offp;
470 break;
471 case ORPAREN:
472 i = m->g->stripdata[ss];
473 assert(0 < i && i <= m->g->nsub);
474 m->pmatch[i].rm_eo = sp - m->offp;
475 break;
476 default: /* uh oh */
477 assert(nope);
478 break;
482 assert(sp == stop);
483 return(sp);
487 - backref - figure out what matched what, figuring in back references
488 == static RCHAR_T *backref(struct match *m, RCHAR_T *start, \
489 == RCHAR_T *stop, sopno startst, sopno stopst, sopno lev);
491 static RCHAR_T * /* == stop (success) or NULL (failure) */
492 backref(m, start, stop, startst, stopst, lev)
493 struct match *m;
494 RCHAR_T *start;
495 RCHAR_T *stop;
496 sopno startst;
497 sopno stopst;
498 sopno lev; /* PLUS nesting level */
500 int i;
501 sopno ss; /* start sop of current subRE */
502 RCHAR_T *sp; /* start of string matched by it */
503 sopno ssub; /* start sop of subsubRE */
504 sopno esub; /* end sop of subsubRE */
505 RCHAR_T *ssp; /* start of string matched by subsubRE */
506 RCHAR_T *dp;
507 size_t len;
508 int hard;
509 sop s;
510 RCHAR_T d;
511 regoff_t offsave;
512 cset *cs;
514 AT("back", start, stop, startst, stopst);
515 sp = start;
517 /* get as far as we can with easy stuff */
518 hard = 0;
519 for (ss = startst; !hard && ss < stopst; ss++) {
520 s = m->g->strip[ss];
521 d = m->g->stripdata[ss];
522 switch (s) {
523 case OCHAR:
524 if (sp == stop || *sp++ != d)
525 return(NULL);
526 break;
527 case OANY:
528 if (sp == stop)
529 return(NULL);
530 sp++;
531 break;
532 case OANYOF:
533 cs = &m->g->sets[d];
534 if (sp == stop || !CHIN(cs, *sp++))
535 return(NULL);
536 break;
537 case OBOL:
538 if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
539 (sp < m->endp && *(sp-1) == '\n' &&
540 (m->g->cflags&REG_NEWLINE)) )
541 { /* yes */ }
542 else
543 return(NULL);
544 break;
545 case OEOL:
546 if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
547 (sp < m->endp && *sp == '\n' &&
548 (m->g->cflags&REG_NEWLINE)) )
549 { /* yes */ }
550 else
551 return(NULL);
552 break;
553 case OBOW:
554 if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
555 (sp < m->endp && *(sp-1) == '\n' &&
556 (m->g->cflags&REG_NEWLINE)) ||
557 (sp > m->beginp &&
558 !ISWORD(*(sp-1))) ) &&
559 (sp < m->endp && ISWORD(*sp)) )
560 { /* yes */ }
561 else
562 return(NULL);
563 break;
564 case OEOW:
565 if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
566 (sp < m->endp && *sp == '\n' &&
567 (m->g->cflags&REG_NEWLINE)) ||
568 (sp < m->endp && !ISWORD(*sp)) ) &&
569 (sp > m->beginp && ISWORD(*(sp-1))) )
570 { /* yes */ }
571 else
572 return(NULL);
573 break;
574 case O_QUEST:
575 break;
576 case OOR1: /* matches null but needs to skip */
577 ss++;
578 s = m->g->strip[ss];
579 d = m->g->stripdata[ss];
580 do {
581 assert(s == OOR2);
582 ss += d;
583 s = m->g->strip[ss];
584 d = m->g->stripdata[ss];
585 } while (s != O_CH);
586 /* note that the ss++ gets us past the O_CH */
587 break;
588 default: /* have to make a choice */
589 hard = 1;
590 break;
593 if (!hard) { /* that was it! */
594 if (sp != stop)
595 return(NULL);
596 return(sp);
598 ss--; /* adjust for the for's final increment */
600 /* the hard stuff */
601 AT("hard", sp, stop, ss, stopst);
602 s = m->g->strip[ss];
603 d = m->g->stripdata[ss];
604 switch (s) {
605 case OBACK_: /* the vilest depths */
606 i = d;
607 assert(0 < i && i <= m->g->nsub);
608 if (m->pmatch[i].rm_eo == -1)
609 return(NULL);
610 assert(m->pmatch[i].rm_so != -1);
611 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
612 assert(stop - m->beginp >= len);
613 if (sp > stop - len)
614 return(NULL); /* not enough left to match */
615 ssp = m->offp + m->pmatch[i].rm_so;
616 if (memcmp(sp, ssp, len) != 0)
617 return(NULL);
618 while (m->g->strip[ss] != O_BACK || m->g->stripdata[ss] != i)
619 ss++;
620 return(backref(m, sp+len, stop, ss+1, stopst, lev));
621 break;
622 case OQUEST_: /* to null or not */
623 dp = backref(m, sp, stop, ss+1, stopst, lev);
624 if (dp != NULL)
625 return(dp); /* not */
626 return(backref(m, sp, stop, ss+d+1, stopst, lev));
627 break;
628 case OPLUS_:
629 assert(m->lastpos != NULL);
630 assert(lev+1 <= m->g->nplus);
631 m->lastpos[lev+1] = sp;
632 return(backref(m, sp, stop, ss+1, stopst, lev+1));
633 break;
634 case O_PLUS:
635 if (sp == m->lastpos[lev]) /* last pass matched null */
636 return(backref(m, sp, stop, ss+1, stopst, lev-1));
637 /* try another pass */
638 m->lastpos[lev] = sp;
639 dp = backref(m, sp, stop, ss-d+1, stopst, lev);
640 if (dp == NULL)
641 return(backref(m, sp, stop, ss+1, stopst, lev-1));
642 else
643 return(dp);
644 break;
645 case OCH_: /* find the right one, if any */
646 ssub = ss + 1;
647 esub = ss + d - 1;
648 assert(m->g->strip[esub] == OOR1);
649 for (;;) { /* find first matching branch */
650 dp = backref(m, sp, stop, ssub, esub, lev);
651 if (dp != NULL)
652 return(dp);
653 /* that one missed, try next one */
654 if (m->g->strip[esub] == O_CH)
655 return(NULL); /* there is none */
656 esub++;
657 assert(m->g->strip[esub] == OOR2);
658 ssub = esub + 1;
659 esub += m->g->stripdata[esub];
660 if (m->g->strip[esub] == OOR2)
661 esub--;
662 else
663 assert(m->g->strip[esub] == O_CH);
665 break;
666 case OLPAREN: /* must undo assignment if rest fails */
667 i = d;
668 assert(0 < i && i <= m->g->nsub);
669 offsave = m->pmatch[i].rm_so;
670 m->pmatch[i].rm_so = sp - m->offp;
671 dp = backref(m, sp, stop, ss+1, stopst, lev);
672 if (dp != NULL)
673 return(dp);
674 m->pmatch[i].rm_so = offsave;
675 return(NULL);
676 break;
677 case ORPAREN: /* must undo assignment if rest fails */
678 i = d;
679 assert(0 < i && i <= m->g->nsub);
680 offsave = m->pmatch[i].rm_eo;
681 m->pmatch[i].rm_eo = sp - m->offp;
682 dp = backref(m, sp, stop, ss+1, stopst, lev);
683 if (dp != NULL)
684 return(dp);
685 m->pmatch[i].rm_eo = offsave;
686 return(NULL);
687 break;
688 default: /* uh oh */
689 assert(nope);
690 break;
693 /* "can't happen" */
694 assert(nope);
695 /* NOTREACHED */
696 return NULL;
700 - fast - step through the string at top speed
701 == static RCHAR_T *fast(struct match *m, RCHAR_T *start, \
702 == RCHAR_T *stop, sopno startst, sopno stopst);
704 static RCHAR_T * /* where tentative match ended, or NULL */
705 fast(m, start, stop, startst, stopst)
706 struct match *m;
707 RCHAR_T *start;
708 RCHAR_T *stop;
709 sopno startst;
710 sopno stopst;
712 states st = m->st;
713 states fresh = m->fresh;
714 states tmp = m->tmp;
715 RCHAR_T *p = start;
716 RCHAR_T c = (start == m->beginp) ? OUT : *(start-1);
717 RCHAR_T lastc; /* previous c */
718 int flag;
719 int i;
720 RCHAR_T *coldp; /* last p after which no match was underway */
722 CLEAR(st);
723 SET1(st, startst);
724 st = step(m->g, startst, stopst, st, NOTHING, OUT, st);
725 ASSIGN(fresh, st);
726 SP("start", st, *p);
727 coldp = NULL;
728 for (;;) {
729 /* next character */
730 lastc = c;
731 c = (p == m->endp) ? OUT : *p;
732 if (EQ(st, fresh))
733 coldp = p;
735 /* is there an EOL and/or BOL between lastc and c? */
736 flag = 0;
737 i = 0;
738 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
739 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
740 flag = BOL;
741 i = m->g->nbol;
743 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
744 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
745 flag = (flag == BOL) ? BOLEOL : EOL;
746 i += m->g->neol;
748 if (i != 0) {
749 for (; i > 0; i--)
750 st = step(m->g, startst, stopst, st, flag, OUT, st);
751 SP("boleol", st, c);
754 /* how about a word boundary? */
755 if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) &&
756 (c != OUT && ISWORD(c)) ) {
757 flag = BOW;
759 if ( (lastc != OUT && ISWORD(lastc)) &&
760 (flag == EOL || (c != OUT && !ISWORD(c))) ) {
761 flag = EOW;
763 if (flag == BOW || flag == EOW) {
764 st = step(m->g, startst, stopst, st, flag, OUT, st);
765 SP("boweow", st, c);
768 /* are we done? */
769 if (ISSET(st, stopst) || p == stop)
770 break; /* NOTE BREAK OUT */
772 /* no, we must deal with this character */
773 ASSIGN(tmp, st);
774 ASSIGN(st, fresh);
775 assert(c != OUT);
776 st = step(m->g, startst, stopst, tmp, 0, c, st);
777 SP("aft", st, c);
778 assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st));
779 p++;
782 assert(coldp != NULL);
783 m->coldp = coldp;
784 if (ISSET(st, stopst))
785 return(p+1);
786 else
787 return(NULL);
791 - slow - step through the string more deliberately
792 == static RCHAR_T *slow(struct match *m, RCHAR_T *start, \
793 == RCHAR_T *stop, sopno startst, sopno stopst);
795 static RCHAR_T * /* where it ended */
796 slow(m, start, stop, startst, stopst)
797 struct match *m;
798 RCHAR_T *start;
799 RCHAR_T *stop;
800 sopno startst;
801 sopno stopst;
803 states st = m->st;
804 states empty = m->empty;
805 states tmp = m->tmp;
806 RCHAR_T *p = start;
807 RCHAR_T c = (start == m->beginp) ? OUT : *(start-1);
808 RCHAR_T lastc; /* previous c */
809 int flag;
810 int i;
811 RCHAR_T *matchp; /* last p at which a match ended */
813 AT("slow", start, stop, startst, stopst);
814 CLEAR(st);
815 SET1(st, startst);
816 SP("sstart", st, *p);
817 st = step(m->g, startst, stopst, st, NOTHING, OUT, st);
818 matchp = NULL;
819 for (;;) {
820 /* next character */
821 lastc = c;
822 c = (p == m->endp) ? OUT : *p;
824 /* is there an EOL and/or BOL between lastc and c? */
825 flag = 0;
826 i = 0;
827 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
828 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
829 flag = BOL;
830 i = m->g->nbol;
832 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
833 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
834 flag = (flag == BOL) ? BOLEOL : EOL;
835 i += m->g->neol;
837 if (i != 0) {
838 for (; i > 0; i--)
839 st = step(m->g, startst, stopst, st, flag, OUT, st);
840 SP("sboleol", st, c);
843 /* how about a word boundary? */
844 if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) &&
845 (c != OUT && ISWORD(c)) ) {
846 flag = BOW;
848 if ( (lastc != OUT && ISWORD(lastc)) &&
849 (flag == EOL || (c != OUT && !ISWORD(c))) ) {
850 flag = EOW;
852 if (flag == BOW || flag == EOW) {
853 st = step(m->g, startst, stopst, st, flag, OUT, st);
854 SP("sboweow", st, c);
857 /* are we done? */
858 if (ISSET(st, stopst))
859 matchp = p;
860 if (EQ(st, empty) || p == stop)
861 break; /* NOTE BREAK OUT */
863 /* no, we must deal with this character */
864 ASSIGN(tmp, st);
865 ASSIGN(st, empty);
866 assert(c != OUT);
867 st = step(m->g, startst, stopst, tmp, 0, c, st);
868 SP("saft", st, c);
869 assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st));
870 p++;
873 return(matchp);
878 - step - map set of states reachable before char to set reachable after
879 == static states step(struct re_guts *g, sopno start, sopno stop, \
880 == states bef, int flag, RCHAR_T ch, states aft);
881 == #define BOL (1)
882 == #define EOL (BOL+1)
883 == #define BOLEOL (BOL+2)
884 == #define NOTHING (BOL+3)
885 == #define BOW (BOL+4)
886 == #define EOW (BOL+5)
888 static states
889 step(g, start, stop, bef, flag, ch, aft)
890 struct re_guts *g;
891 sopno start; /* start state within strip */
892 sopno stop; /* state after stop state within strip */
893 states bef; /* states reachable before */
894 int flag; /* NONCHAR flag */
895 RCHAR_T ch; /* character code */
896 states aft; /* states already known reachable after */
898 cset *cs;
899 sop s;
900 RCHAR_T d;
901 sopno pc;
902 onestate here; /* note, macros know this name */
903 sopno look;
904 int i;
906 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
907 s = g->strip[pc];
908 d = g->stripdata[pc];
909 switch (s) {
910 case OEND:
911 assert(pc == stop-1);
912 break;
913 case OCHAR:
914 /* only characters can match */
915 assert(!flag || ch != d);
916 if (ch == d)
917 FWD(aft, bef, 1);
918 break;
919 case OBOL:
920 if (flag == BOL || flag == BOLEOL)
921 FWD(aft, bef, 1);
922 break;
923 case OEOL:
924 if (flag == EOL || flag == BOLEOL)
925 FWD(aft, bef, 1);
926 break;
927 case OBOW:
928 if (flag == BOW)
929 FWD(aft, bef, 1);
930 break;
931 case OEOW:
932 if (flag == EOW)
933 FWD(aft, bef, 1);
934 break;
935 case OANY:
936 if (!flag)
937 FWD(aft, bef, 1);
938 break;
939 case OANYOF:
940 cs = &g->sets[d];
941 if (!flag && CHIN(cs, ch))
942 FWD(aft, bef, 1);
943 break;
944 case OBACK_: /* ignored here */
945 case O_BACK:
946 FWD(aft, aft, 1);
947 break;
948 case OPLUS_: /* forward, this is just an empty */
949 FWD(aft, aft, 1);
950 break;
951 case O_PLUS: /* both forward and back */
952 FWD(aft, aft, 1);
953 i = ISSETBACK(aft, d);
954 BACK(aft, aft, d);
955 if (!i && ISSETBACK(aft, d)) {
956 /* oho, must reconsider loop body */
957 pc -= d + 1;
958 INIT(here, pc);
960 break;
961 case OQUEST_: /* two branches, both forward */
962 FWD(aft, aft, 1);
963 FWD(aft, aft, d);
964 break;
965 case O_QUEST: /* just an empty */
966 FWD(aft, aft, 1);
967 break;
968 case OLPAREN: /* not significant here */
969 case ORPAREN:
970 FWD(aft, aft, 1);
971 break;
972 case OCH_: /* mark the first two branches */
973 FWD(aft, aft, 1);
974 assert(OP(g->strip[pc+d]) == OOR2);
975 FWD(aft, aft, d);
976 break;
977 case OOR1: /* done a branch, find the O_CH */
978 if (ISSTATEIN(aft, here)) {
979 for (look = 1; /**/; look += d) {
980 s = g->strip[pc+look];
981 d = g->stripdata[pc+look];
982 if (s == O_CH)
983 break;
984 assert(s == OOR2);
986 FWD(aft, aft, look);
988 break;
989 case OOR2: /* propagate OCH_'s marking */
990 FWD(aft, aft, 1);
991 if (g->strip[pc+d] != O_CH) {
992 assert(g->strip[pc+d] == OOR2);
993 FWD(aft, aft, d);
995 break;
996 case O_CH: /* just empty */
997 FWD(aft, aft, 1);
998 break;
999 default: /* ooooops... */
1000 assert(nope);
1001 break;
1005 return(aft);
1008 #ifdef REDEBUG
1010 - print - print a set of states
1011 == #ifdef REDEBUG
1012 == static void print(struct match *m, char *caption, states st, \
1013 == int ch, FILE *d);
1014 == #endif
1016 static void
1017 print(m, caption, st, ch, d)
1018 struct match *m;
1019 char *caption;
1020 states st;
1021 int ch;
1022 FILE *d;
1024 struct re_guts *g = m->g;
1025 int i;
1026 int first = 1;
1028 if (!(m->eflags&REG_TRACE))
1029 return;
1031 fprintf(d, "%s", caption);
1032 if (ch != '\0')
1033 fprintf(d, " %s", pchar(ch));
1034 for (i = 0; i < g->nstates; i++)
1035 if (ISSET(st, i)) {
1036 fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1037 first = 0;
1039 fprintf(d, "\n");
1043 - at - print current situation
1044 == #ifdef REDEBUG
1045 == static void at(struct match *m, char *title, char *start, char *stop, \
1046 == sopno startst, sopno stopst);
1047 == #endif
1049 static void
1050 at(m, title, start, stop, startst, stopst)
1051 struct match *m;
1052 char *title;
1053 char *start;
1054 char *stop;
1055 sopno startst;
1056 sopno stopst;
1058 if (!(m->eflags&REG_TRACE))
1059 return;
1061 printf("%s %s-", title, pchar(*start));
1062 printf("%s ", pchar(*stop));
1063 printf("%ld-%ld\n", (long)startst, (long)stopst);
1066 #ifndef PCHARDONE
1067 #define PCHARDONE /* never again */
1069 - pchar - make a character printable
1070 == #ifdef REDEBUG
1071 == static char *pchar(int ch);
1072 == #endif
1074 * Is this identical to regchar() over in debug.c? Well, yes. But a
1075 * duplicate here avoids having a debugging-capable regexec.o tied to
1076 * a matching debug.o, and this is convenient. It all disappears in
1077 * the non-debug compilation anyway, so it doesn't matter much.
1079 static char * /* -> representation */
1080 pchar(ch)
1081 int ch;
1083 static char pbuf[10];
1085 if (isprint(ch) || ch == ' ')
1086 sprintf(pbuf, "%c", ch);
1087 else
1088 sprintf(pbuf, "\\%o", ch);
1089 return(pbuf);
1091 #endif
1092 #endif
1094 #undef matcher
1095 #undef fast
1096 #undef slow
1097 #undef dissect
1098 #undef backref
1099 #undef step
1100 #undef print
1101 #undef at
1102 #undef match