dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / lib / libast / common / regex / regnexec.c
blob13bb1b7238c420384c64ba4887ecad6ff3c5e22e
1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
25 * posix regex executor
26 * single sized-record interface
29 #include "reglib.h"
31 #if _AST_REGEX_DEBUG
33 #define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
34 #define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
35 #define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
37 static unsigned long debug;
38 static unsigned long debug_flag;
40 static const char* rexnames[] =
42 "REX_NULL",
43 "REX_ALT",
44 "REX_ALT_CATCH",
45 "REX_BACK",
46 "REX_BEG",
47 "REX_BEG_STR",
48 "REX_BM",
49 "REX_CAT",
50 "REX_CLASS",
51 "REX_COLL_CLASS",
52 "REX_CONJ",
53 "REX_CONJ_LEFT",
54 "REX_CONJ_RIGHT",
55 "REX_DONE",
56 "REX_DOT",
57 "REX_END",
58 "REX_END_STR",
59 "REX_EXEC",
60 "REX_FIN_STR",
61 "REX_GROUP",
62 "REX_GROUP_CATCH",
63 "REX_GROUP_AHEAD",
64 "REX_GROUP_AHEAD_CATCH",
65 "REX_GROUP_AHEAD_NOT",
66 "REX_GROUP_BEHIND",
67 "REX_GROUP_BEHIND_CATCH",
68 "REX_GROUP_BEHIND_NOT",
69 "REX_GROUP_BEHIND_NOT_CATCH",
70 "REX_GROUP_COND",
71 "REX_GROUP_COND_CATCH",
72 "REX_GROUP_CUT",
73 "REX_GROUP_CUT_CATCH",
74 "REX_KMP",
75 "REX_NEG",
76 "REX_NEG_CATCH",
77 "REX_NEST",
78 "REX_ONECHAR",
79 "REX_REP",
80 "REX_REP_CATCH",
81 "REX_STRING",
82 "REX_TRIE",
83 "REX_WBEG",
84 "REX_WEND",
85 "REX_WORD",
86 "REX_WORD_NOT"
89 static const char* rexname(Rex_t* rex)
91 if (!rex)
92 return "NIL";
93 if (rex->type >= elementsof(rexnames))
94 return "ERROR";
95 return rexnames[rex->type];
98 #else
100 #define DEBUG_INIT()
101 #define DEBUG_TEST(f,y,n) (n)
102 #define DEBUG_CODE(f,y,n) do {n} while(0)
104 #endif
106 #define BEG_ALT 1 /* beginning of an alt */
107 #define BEG_ONE 2 /* beginning of one iteration of a rep */
108 #define BEG_REP 3 /* beginning of a repetition */
109 #define BEG_SUB 4 /* beginning of a subexpression */
110 #define END_ANY 5 /* end of any of above */
113 * returns from parse()
116 #define NONE 0 /* no parse found */
117 #define GOOD 1 /* some parse was found */
118 #define CUT 2 /* no match and no backtrack */
119 #define BEST 3 /* an unbeatable parse was found */
120 #define BAD 4 /* error ocurred */
123 * REG_SHELL_DOT test
126 #define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
129 * Pos_t is for comparing parses. An entry is made in the
130 * array at the beginning and at the end of each Group_t,
131 * each iteration in a Group_t, and each Binary_t.
134 typedef struct
136 unsigned char* p; /* where in string */
137 size_t length; /* length in string */
138 short serial; /* preorder subpattern number */
139 short be; /* which end of pair */
140 } Pos_t;
142 /* ===== begin library support ===== */
144 #define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
146 static Vector_t*
147 vecopen(int inc, int siz)
149 Vector_t* v;
150 Stk_t* sp;
152 if (inc <= 0)
153 inc = 16;
154 if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155 return 0;
156 if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
158 stkclose(sp);
159 return 0;
161 v->stk = sp;
162 v->vec = (char*)v + sizeof(Vector_t);
163 v->max = v->inc = inc;
164 v->siz = siz;
165 v->cur = 0;
166 return v;
169 static void*
170 vecseek(Vector_t** p, int index)
172 Vector_t* v = *p;
174 if (index >= v->max)
176 while ((v->max += v->inc) <= index);
177 if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178 return 0;
179 *p = v;
180 v->vec = (char*)v + sizeof(Vector_t);
182 return v->vec + index * v->siz;
185 static void
186 vecclose(Vector_t* v)
188 if (v)
189 stkclose(v->stk);
192 typedef struct
194 Stk_pos_t pos;
195 char data[1];
196 } Stk_frame_t;
198 #define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199 #define stkold(s,p) stkset(s,(p)->base,(p)->offset)
201 #define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
202 #define stkdata(s,t) ((t*)stkframe(s)->data)
203 #define stkpop(s) stkold(s,&(stkframe(s)->pos))
205 static void*
206 stkpush(Stk_t* sp, size_t size)
208 Stk_frame_t* f;
209 Stk_pos_t p;
211 stknew(sp, &p);
212 size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213 if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214 return 0;
215 f->pos = p;
216 stkframe(sp) = f;
217 return f->data;
220 /* ===== end library support ===== */
223 * Match_frame_t is for saving and restoring match records
224 * around alternate attempts, so that fossils will not be
225 * left in the match array. These are the only entries in
226 * the match array that are not otherwise guaranteed to
227 * have current data in them when they get used.
230 typedef struct
232 size_t size;
233 regmatch_t* match;
234 regmatch_t save[1];
235 } Match_frame_t;
237 #define matchframe stkdata(stkstd,Match_frame_t)
238 #define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
239 #define matchcopy(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
240 #define matchpop(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
242 #define pospop(e) (--(e)->pos->cur)
245 * allocate a frame and push a match onto the stack
248 static int
249 _matchpush(Env_t* env, Rex_t* rex)
251 Match_frame_t* f;
252 regmatch_t* m;
253 regmatch_t* e;
254 regmatch_t* s;
255 int num;
257 if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
258 num = 0;
259 if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
261 env->error = REG_ESPACE;
262 return 1;
264 f->size = num * sizeof(regmatch_t);
265 f->match = m = env->match + rex->re.group.number;
266 e = m + num;
267 s = f->save;
268 while (m < e)
270 *s++ = *m;
271 *m++ = state.nomatch;
273 return 0;
277 * allocate a frame and push a pos onto the stack
280 static int
281 pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
283 Pos_t* pos;
285 if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
287 env->error = REG_ESPACE;
288 return 1;
290 pos->serial = rex->serial;
291 pos->p = p;
292 pos->be = be;
293 env->pos->cur++;
294 return 0;
298 * two matches are known to have the same length
299 * os is start of old pos array, ns is start of new,
300 * oend and nend are end+1 pointers to ends of arrays.
301 * oe and ne are ends (not end+1) of subarrays.
302 * returns 1 if new is better, -1 if old, else 0.
305 static int
306 better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
308 Pos_t* oe;
309 Pos_t* ne;
310 int k;
311 int n;
313 if (env->error)
314 return -1;
315 for (;;)
317 DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
318 if (ns >= nend)
319 return DEBUG_TEST(0x8000,(os < oend),(0));
320 if (os >= oend)
321 return DEBUG_TEST(0x8000,(-1),(1));
322 n = os->serial;
323 if (ns->serial > n)
324 return -1;
325 if (n > ns->serial)
327 env->error = REG_PANIC;
328 return -1;
330 if (ns->p > os->p)
331 return 1;
332 if (os->p > ns->p)
333 return -1;
334 oe = os;
335 k = 0;
336 for (;;)
337 if ((++oe)->serial == n)
339 if (oe->be != END_ANY)
340 k++;
341 else if (k-- <= 0)
342 break;
344 ne = ns;
345 k = 0;
346 for (;;)
347 if ((++ne)->serial == n)
349 if (ne->be != END_ANY)
350 k++;
351 else if (k-- <= 0)
352 break;
354 if (ne->p > oe->p)
355 return 1;
356 if (oe->p > ne->p)
357 return -1;
358 if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
359 return k;
360 os = oe + 1;
361 ns = ne + 1;
365 #if _AST_REGEX_DEBUG
367 static void
368 showmatch(regmatch_t* p)
370 sfputc(sfstdout, '(');
371 if (p->rm_so < 0)
372 sfputc(sfstdout, '?');
373 else
374 sfprintf(sfstdout, "%d", p->rm_so);
375 sfputc(sfstdout, ',');
376 if (p->rm_eo < 0)
377 sfputc(sfstdout, '?');
378 else
379 sfprintf(sfstdout, "%d", p->rm_eo);
380 sfputc(sfstdout, ')');
383 static int
384 _better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
386 int i;
388 DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
389 i = better(env, os, ns, oend, nend, 0);
390 DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
391 return i;
394 #define better _better
396 #endif
398 #define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
400 static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
402 static int
403 parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
405 int i;
406 int r = NONE;
407 Rex_t catcher;
409 DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
410 if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
412 if (env->stack && pospush(env, rex, s, END_ANY))
413 return BAD;
414 i = follow(env, rex, cont, s);
415 if (env->stack)
416 pospop(env);
417 switch (i)
419 case BAD:
420 return BAD;
421 case CUT:
422 return CUT;
423 case BEST:
424 case GOOD:
425 return BEST;
428 if (n < rex->hi)
430 catcher.type = REX_REP_CATCH;
431 catcher.serial = rex->serial;
432 catcher.re.rep_catch.ref = rex;
433 catcher.re.rep_catch.cont = cont;
434 catcher.re.rep_catch.beg = s;
435 catcher.re.rep_catch.n = n + 1;
436 catcher.next = rex->next;
437 if (n == 0)
438 rex->re.rep_catch.beg = s;
439 if (env->stack)
441 if (matchpush(env, rex))
442 return BAD;
443 if (pospush(env, rex, s, BEG_ONE))
444 return BAD;
445 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
447 r = parse(env, rex->re.group.expr.rex, &catcher, s);
448 DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
449 if (env->stack)
451 pospop(env);
452 matchpop(env, rex);
453 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP %d %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
455 switch (r)
457 case BAD:
458 return BAD;
459 case BEST:
460 return BEST;
461 case CUT:
462 r = NONE;
463 break;
464 case GOOD:
465 if (rex->flags & REG_MINIMAL)
466 return BEST;
467 r = GOOD;
468 break;
471 if (n < rex->lo)
472 return r;
473 if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
475 if (env->stack && pospush(env, rex, s, END_ANY))
476 return BAD;
477 i = follow(env, rex, cont, s);
478 if (env->stack)
479 pospop(env);
480 switch (i)
482 case BAD:
483 r = BAD;
484 break;
485 case CUT:
486 r = CUT;
487 break;
488 case BEST:
489 r = BEST;
490 break;
491 case GOOD:
492 r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
493 break;
496 return r;
499 static int
500 parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
502 unsigned char* p;
503 int r;
505 if (p = rex->map)
507 for (;;)
509 if (s >= env->end)
510 return NONE;
511 while (x->c != p[*s])
512 if (!(x = x->sib))
513 return NONE;
514 if (x->end)
515 break;
516 x = x->son;
517 s++;
520 else
522 for (;;)
524 if (s >= env->end)
525 return NONE;
526 while (x->c != *s)
527 if (!(x = x->sib))
528 return NONE;
529 if (x->end)
530 break;
531 x = x->son;
532 s++;
535 s++;
536 if (rex->flags & REG_MINIMAL)
537 switch (follow(env, rex, cont, s))
539 case BAD:
540 return BAD;
541 case CUT:
542 return CUT;
543 case BEST:
544 case GOOD:
545 return BEST;
547 if (x->son)
548 switch (parsetrie(env, x->son, rex, cont, s))
550 case BAD:
551 return BAD;
552 case CUT:
553 return CUT;
554 case BEST:
555 return BEST;
556 case GOOD:
557 if (rex->flags & REG_MINIMAL)
558 return BEST;
559 r = GOOD;
560 break;
561 default:
562 r = NONE;
563 break;
565 else
566 r = NONE;
567 if (!(rex->flags & REG_MINIMAL))
568 switch (follow(env, rex, cont, s))
570 case BAD:
571 return BAD;
572 case CUT:
573 return CUT;
574 case BEST:
575 return BEST;
576 case GOOD:
577 return GOOD;
579 return r;
582 static int
583 collelt(register Celt_t* ce, char* key, int c, int x)
585 Ckey_t elt;
587 mbxfrm(elt, key, COLL_KEY_MAX);
588 for (;; ce++)
590 switch (ce->typ)
592 case COLL_call:
593 if (!x && (*ce->fun)(c))
594 return 1;
595 continue;
596 case COLL_char:
597 if (!strcmp((char*)ce->beg, (char*)elt))
598 return 1;
599 continue;
600 case COLL_range:
601 if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
602 return 1;
603 continue;
604 case COLL_range_lc:
605 if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
606 return 1;
607 continue;
608 case COLL_range_uc:
609 if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
610 return 1;
611 continue;
613 break;
615 return 0;
618 static int
619 collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
621 if (!x)
623 if (collelt(ce, key, c, x))
624 return 1;
625 if (iswlower(c))
626 c = towupper(c);
627 else if (iswupper(c))
628 c = towlower(c);
629 else
630 return 0;
631 x = mbconv(key, c);
632 key[x] = 0;
633 return collelt(ce, key, c, 0);
635 while (*nxt)
637 if (collic(ce, key, nxt + 1, c, x))
638 return 1;
639 if (islower(*nxt))
640 *nxt = toupper(*nxt);
641 else if (isupper(*nxt))
642 *nxt = tolower(*nxt);
643 else
644 return 0;
645 nxt++;
647 return collelt(ce, key, c, x);
650 static int
651 collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
653 unsigned char* t;
654 wchar_t c;
655 int w;
656 int r;
657 int x;
658 int ic;
659 Ckey_t key;
660 Ckey_t elt;
662 ic = (rex->flags & REG_ICASE);
663 if ((w = MBSIZE(s)) > 1)
665 memcpy((char*)key, (char*)s, w);
666 key[w] = 0;
667 t = s;
668 c = mbchar(t);
669 #if !_lib_wctype
670 c &= 0xff;
671 #endif
672 x = 0;
674 else
676 c = s[0];
677 if (ic && isupper(c))
678 c = tolower(c);
679 key[0] = c;
680 key[1] = 0;
681 if (isalpha(c))
683 x = e - s;
684 if (x > COLL_KEY_MAX)
685 x = COLL_KEY_MAX;
686 while (w < x)
688 c = s[w];
689 if (!isalpha(c))
690 break;
691 r = mbxfrm(elt, key, COLL_KEY_MAX);
692 if (ic && isupper(c))
693 c = tolower(c);
694 key[w] = c;
695 key[w + 1] = 0;
696 if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
697 break;
698 w++;
701 key[w] = 0;
702 c = key[0];
703 x = w - 1;
705 r = 1;
706 for (;;)
708 if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
709 break;
710 if (!x)
712 r = 0;
713 break;
715 w = x--;
716 key[w] = 0;
718 *p = s + w;
719 return rex->re.collate.invert ? !r : r;
722 static unsigned char*
723 nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
725 register int c;
726 register int cc;
727 unsigned int n;
728 int oc;
730 if (type[co] & (REX_NEST_literal|REX_NEST_quote))
732 n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
733 while (s < e)
735 c = *s++;
736 if (c == co)
737 return s;
738 else if (type[c] & n)
740 if (s >= e || (type[c] & REX_NEST_terminator))
741 break;
742 s++;
746 else
748 cc = type[co] >> REX_NEST_SHIFT;
749 oc = type[co] & (REX_NEST_open|REX_NEST_close);
750 n = 1;
751 while (s < e)
753 c = *s++;
754 switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
756 case REX_NEST_delimiter:
757 case REX_NEST_terminator:
758 return oc ? 0 : s;
759 case REX_NEST_separator:
760 if (!oc)
761 return s;
762 break;
763 case REX_NEST_escape:
764 if (s >= e)
765 return 0;
766 s++;
767 break;
768 case REX_NEST_open|REX_NEST_close:
769 if (c == cc)
771 if (!--n)
772 return s;
774 /*FALLTHROUGH*/
775 case REX_NEST_open:
776 if (c == co)
778 if (!++n)
779 return 0;
781 else if (!(s = nestmatch(s, e, type, c)))
782 return 0;
783 break;
784 case REX_NEST_close:
785 if (c != cc)
786 return 0;
787 if (!--n)
788 return s;
789 break;
792 return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
794 return 0;
797 static int
798 parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
800 int c;
801 int d;
802 int i;
803 int m;
804 int n;
805 int r;
806 int* f;
807 unsigned char* p;
808 unsigned char* t;
809 unsigned char* b;
810 unsigned char* e;
811 char* u;
812 regmatch_t* o;
813 Trie_node_t* x;
814 Rex_t* q;
815 Rex_t catcher;
816 Rex_t next;
818 for (;;)
820 DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
821 switch (rex->type)
823 case REX_ALT:
824 if (env->stack)
826 if (matchpush(env, rex))
827 return BAD;
828 if (pospush(env, rex, s, BEG_ALT))
829 return BAD;
830 catcher.type = REX_ALT_CATCH;
831 catcher.serial = rex->serial;
832 catcher.re.alt_catch.cont = cont;
833 catcher.next = rex->next;
834 r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
835 if (r < BEST || (rex->flags & REG_MINIMAL))
837 matchcopy(env, rex);
838 ((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
839 n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
840 if (n != NONE)
841 r = n;
843 pospop(env);
844 matchpop(env, rex);
846 else
848 if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
849 r = parse(env, rex->re.group.expr.binary.right, cont, s);
850 if (r == GOOD)
851 r = BEST;
853 return r;
854 case REX_ALT_CATCH:
855 if (pospush(env, rex, s, END_ANY))
856 return BAD;
857 r = follow(env, rex, rex->re.alt_catch.cont, s);
858 pospop(env);
859 return r;
860 case REX_BACK:
861 o = &env->match[rex->lo];
862 if (o->rm_so < 0)
863 return NONE;
864 i = o->rm_eo - o->rm_so;
865 e = s + i;
866 if (e > env->end)
867 return NONE;
868 t = env->beg + o->rm_so;
869 if (!(p = rex->map))
871 while (s < e)
872 if (*s++ != *t++)
873 return NONE;
875 else if (!mbwide())
877 while (s < e)
878 if (p[*s++] != p[*t++])
879 return NONE;
881 else
883 while (s < e)
885 c = mbchar(s);
886 d = mbchar(t);
887 if (towupper(c) != towupper(d))
888 return NONE;
891 break;
892 case REX_BEG:
893 if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
894 return NONE;
895 break;
896 case REX_CLASS:
897 if (LEADING(env, rex, s))
898 return NONE;
899 n = rex->hi;
900 if (n > env->end - s)
901 n = env->end - s;
902 m = rex->lo;
903 if (m > n)
904 return NONE;
905 r = NONE;
906 if (!(rex->flags & REG_MINIMAL))
908 for (i = 0; i < n; i++)
909 if (!settst(rex->re.charclass, s[i]))
911 n = i;
912 break;
914 for (s += n; n-- >= m; s--)
915 switch (follow(env, rex, cont, s))
917 case BAD:
918 return BAD;
919 case CUT:
920 return CUT;
921 case BEST:
922 return BEST;
923 case GOOD:
924 r = GOOD;
925 break;
928 else
930 for (e = s + m; s < e; s++)
931 if (!settst(rex->re.charclass, *s))
932 return r;
933 e += n - m;
934 for (;;)
936 switch (follow(env, rex, cont, s))
938 case BAD:
939 return BAD;
940 case CUT:
941 return CUT;
942 case BEST:
943 case GOOD:
944 return BEST;
946 if (s >= e || !settst(rex->re.charclass, *s))
947 break;
948 s++;
951 return r;
952 case REX_COLL_CLASS:
953 if (LEADING(env, rex, s))
954 return NONE;
955 n = rex->hi;
956 if (n > env->end - s)
957 n = env->end - s;
958 m = rex->lo;
959 if (m > n)
960 return NONE;
961 r = NONE;
962 e = env->end;
963 if (!(rex->flags & REG_MINIMAL))
965 if (!(b = (unsigned char*)stkpush(stkstd, n)))
967 env->error = REG_ESPACE;
968 return BAD;
970 for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
972 b[i] = t - s;
973 s = t;
975 for (; i-- >= rex->lo; s -= b[i])
976 switch (follow(env, rex, cont, s))
978 case BAD:
979 stkpop(stkstd);
980 return BAD;
981 case CUT:
982 stkpop(stkstd);
983 return CUT;
984 case BEST:
985 stkpop(stkstd);
986 return BEST;
987 case GOOD:
988 r = GOOD;
989 break;
991 stkpop(stkstd);
993 else
995 for (i = 0; i < m && s < e; i++, s = t)
996 if (!collmatch(rex, s, e, &t))
997 return r;
998 while (i++ <= n)
1000 switch (follow(env, rex, cont, s))
1002 case BAD:
1003 return BAD;
1004 case CUT:
1005 return CUT;
1006 case BEST:
1007 case GOOD:
1008 return BEST;
1010 if (s >= e || !collmatch(rex, s, e, &s))
1011 break;
1014 return r;
1015 case REX_CONJ:
1016 next.type = REX_CONJ_RIGHT;
1017 next.re.conj_right.cont = cont;
1018 next.next = rex->next;
1019 catcher.type = REX_CONJ_LEFT;
1020 catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1021 catcher.re.conj_left.cont = &next;
1022 catcher.re.conj_left.beg = s;
1023 catcher.next = 0;
1024 return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1025 case REX_CONJ_LEFT:
1026 rex->re.conj_left.cont->re.conj_right.end = s;
1027 cont = rex->re.conj_left.cont;
1028 s = rex->re.conj_left.beg;
1029 rex = rex->re.conj_left.right;
1030 continue;
1031 case REX_CONJ_RIGHT:
1032 if (rex->re.conj_right.end != s)
1033 return NONE;
1034 cont = rex->re.conj_right.cont;
1035 break;
1036 case REX_DONE:
1037 if (!env->stack)
1038 return BEST;
1039 n = s - env->beg;
1040 r = env->nsub;
1041 DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1042 if ((i = env->best[0].rm_eo) >= 0)
1044 if (rex->flags & REG_MINIMAL)
1046 if (n > i)
1047 return GOOD;
1049 else
1051 if (n < i)
1052 return GOOD;
1054 if (n == i && better(env,
1055 (Pos_t*)env->bestpos->vec,
1056 (Pos_t*)env->pos->vec,
1057 (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1058 (Pos_t*)env->pos->vec+env->pos->cur,
1059 0) <= 0)
1060 return GOOD;
1062 env->best[0].rm_eo = n;
1063 memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1064 n = env->pos->cur;
1065 if (!vector(Pos_t, env->bestpos, n))
1067 env->error = REG_ESPACE;
1068 return BAD;
1070 env->bestpos->cur = n;
1071 memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1072 DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1073 return GOOD;
1074 case REX_DOT:
1075 if (LEADING(env, rex, s))
1076 return NONE;
1077 n = rex->hi;
1078 if (n > env->end - s)
1079 n = env->end - s;
1080 m = rex->lo;
1081 if (m > n)
1082 return NONE;
1083 if ((c = rex->explicit) >= 0 && !mbwide())
1084 for (i = 0; i < n; i++)
1085 if (s[i] == c)
1087 n = i;
1088 break;
1090 r = NONE;
1091 if (!(rex->flags & REG_MINIMAL))
1093 if (!mbwide())
1095 for (s += n; n-- >= m; s--)
1096 switch (follow(env, rex, cont, s))
1098 case BAD:
1099 return BAD;
1100 case CUT:
1101 return CUT;
1102 case BEST:
1103 return BEST;
1104 case GOOD:
1105 r = GOOD;
1106 break;
1109 else
1111 if (!(b = (unsigned char*)stkpush(stkstd, n)))
1113 env->error = REG_ESPACE;
1114 return BAD;
1116 e = env->end;
1117 for (i = 0; s < e && i < n && *s != c; i++)
1118 s += b[i] = MBSIZE(s);
1119 for (; i-- >= m; s -= b[i])
1120 switch (follow(env, rex, cont, s))
1122 case BAD:
1123 stkpop(stkstd);
1124 return BAD;
1125 case CUT:
1126 stkpop(stkstd);
1127 return CUT;
1128 case BEST:
1129 stkpop(stkstd);
1130 return BEST;
1131 case GOOD:
1132 r = GOOD;
1133 break;
1135 stkpop(stkstd);
1138 else
1140 if (!mbwide())
1142 e = s + n;
1143 for (s += m; s <= e; s++)
1144 switch (follow(env, rex, cont, s))
1146 case BAD:
1147 return BAD;
1148 case CUT:
1149 return CUT;
1150 case BEST:
1151 case GOOD:
1152 return BEST;
1155 else
1157 e = env->end;
1158 for (i = 0; s < e && i < m && *s != c; i++)
1159 s += MBSIZE(s);
1160 if (i >= m)
1161 for (; s <= e && i <= n; s += MBSIZE(s), i++)
1162 switch (follow(env, rex, cont, s))
1164 case BAD:
1165 return BAD;
1166 case CUT:
1167 return CUT;
1168 case BEST:
1169 case GOOD:
1170 return BEST;
1174 return r;
1175 case REX_END:
1176 if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1177 return NONE;
1178 break;
1179 case REX_GROUP:
1180 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1181 if (env->stack)
1183 if (rex->re.group.number)
1184 env->match[rex->re.group.number].rm_so = s - env->beg;
1185 if (pospush(env, rex, s, BEG_SUB))
1186 return BAD;
1187 catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1189 catcher.type = REX_GROUP_CATCH;
1190 catcher.serial = rex->serial;
1191 catcher.re.group_catch.cont = cont;
1192 catcher.next = rex->next;
1193 r = parse(env, rex->re.group.expr.rex, &catcher, s);
1194 if (env->stack)
1196 pospop(env);
1197 if (rex->re.group.number)
1198 env->match[rex->re.group.number].rm_so = -1;
1200 return r;
1201 case REX_GROUP_CATCH:
1202 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1203 if (env->stack)
1205 if (rex->re.group_catch.eo)
1206 *rex->re.group_catch.eo = s - env->beg;
1207 if (pospush(env, rex, s, END_ANY))
1208 return BAD;
1210 r = follow(env, rex, rex->re.group_catch.cont, s);
1211 if (env->stack)
1213 pospop(env);
1214 if (rex->re.group_catch.eo)
1215 *rex->re.group_catch.eo = -1;
1217 return r;
1218 case REX_GROUP_AHEAD:
1219 catcher.type = REX_GROUP_AHEAD_CATCH;
1220 catcher.flags = rex->flags;
1221 catcher.serial = rex->serial;
1222 catcher.re.rep_catch.beg = s;
1223 catcher.re.rep_catch.cont = cont;
1224 catcher.next = rex->next;
1225 return parse(env, rex->re.group.expr.rex, &catcher, s);
1226 case REX_GROUP_AHEAD_CATCH:
1227 return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1228 case REX_GROUP_AHEAD_NOT:
1229 r = parse(env, rex->re.group.expr.rex, NiL, s);
1230 if (r == NONE)
1231 r = follow(env, rex, cont, s);
1232 else if (r != BAD)
1233 r = NONE;
1234 return r;
1235 case REX_GROUP_BEHIND:
1236 if ((s - env->beg) < rex->re.group.size)
1237 return NONE;
1238 catcher.type = REX_GROUP_BEHIND_CATCH;
1239 catcher.flags = rex->flags;
1240 catcher.serial = rex->serial;
1241 catcher.re.behind_catch.beg = s;
1242 catcher.re.behind_catch.end = e = env->end;
1243 catcher.re.behind_catch.cont = cont;
1244 catcher.next = rex->next;
1245 for (t = s - rex->re.group.size; t >= env->beg; t--)
1247 env->end = s;
1248 r = parse(env, rex->re.group.expr.rex, &catcher, t);
1249 env->end = e;
1250 if (r != NONE)
1251 return r;
1253 return NONE;
1254 case REX_GROUP_BEHIND_CATCH:
1255 if (s != rex->re.behind_catch.beg)
1256 return NONE;
1257 env->end = rex->re.behind_catch.end;
1258 return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1259 case REX_GROUP_BEHIND_NOT:
1260 if ((s - env->beg) < rex->re.group.size)
1261 r = NONE;
1262 else
1264 catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1265 catcher.re.neg_catch.beg = s;
1266 catcher.next = 0;
1267 e = env->end;
1268 env->end = s;
1269 for (t = s - rex->re.group.size; t >= env->beg; t--)
1271 r = parse(env, rex->re.group.expr.rex, &catcher, t);
1272 if (r != NONE)
1273 break;
1275 env->end = e;
1277 if (r == NONE)
1278 r = follow(env, rex, cont, s);
1279 else if (r != BAD)
1280 r = NONE;
1281 return r;
1282 case REX_GROUP_BEHIND_NOT_CATCH:
1283 return s == rex->re.neg_catch.beg ? GOOD : NONE;
1284 case REX_GROUP_COND:
1285 if (q = rex->re.group.expr.binary.right)
1287 catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1288 catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1290 else
1291 catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1292 if (q = rex->re.group.expr.binary.left)
1294 catcher.type = REX_GROUP_COND_CATCH;
1295 catcher.flags = rex->flags;
1296 catcher.serial = rex->serial;
1297 catcher.re.cond_catch.yes = 0;
1298 catcher.re.cond_catch.beg = s;
1299 catcher.re.cond_catch.cont = cont;
1300 catcher.next = rex->next;
1301 r = parse(env, q, &catcher, s);
1302 if (r == BAD || catcher.re.cond_catch.yes)
1303 return r;
1305 else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1306 r = GOOD;
1307 else
1308 r = NONE;
1309 if (q = catcher.re.cond_catch.next[r != NONE])
1311 catcher.type = REX_CAT;
1312 catcher.flags = q->flags;
1313 catcher.serial = q->serial;
1314 catcher.re.group_catch.cont = cont;
1315 catcher.next = rex->next;
1316 return parse(env, q, &catcher, s);
1318 return follow(env, rex, cont, s);
1319 case REX_GROUP_COND_CATCH:
1320 rex->re.cond_catch.yes = 1;
1321 catcher.type = REX_CAT;
1322 catcher.flags = rex->flags;
1323 catcher.serial = rex->serial;
1324 catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1325 catcher.next = rex->next;
1326 return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1327 case REX_CAT:
1328 return follow(env, rex, rex->re.group_catch.cont, s);
1329 case REX_GROUP_CUT:
1330 catcher.type = REX_GROUP_CUT_CATCH;
1331 catcher.flags = rex->flags;
1332 catcher.serial = rex->serial;
1333 catcher.re.group_catch.cont = cont;
1334 catcher.next = rex->next;
1335 return parse(env, rex->re.group.expr.rex, &catcher, s);
1336 case REX_GROUP_CUT_CATCH:
1337 switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1339 case GOOD:
1340 r = BEST;
1341 break;
1342 case NONE:
1343 r = CUT;
1344 break;
1346 return r;
1347 case REX_KMP:
1348 f = rex->re.string.fail;
1349 b = rex->re.string.base;
1350 n = rex->re.string.size;
1351 t = s;
1352 e = env->end;
1353 if (p = rex->map)
1355 while (t + n <= e)
1357 for (i = -1; t < e; t++)
1359 while (i >= 0 && b[i+1] != p[*t])
1360 i = f[i];
1361 if (b[i+1] == p[*t])
1362 i++;
1363 if (i + 1 == n)
1365 t++;
1366 if (env->stack)
1367 env->best[0].rm_so = t - s - n;
1368 switch (follow(env, rex, cont, t))
1370 case BAD:
1371 return BAD;
1372 case CUT:
1373 return CUT;
1374 case BEST:
1375 case GOOD:
1376 return BEST;
1378 t -= n - 1;
1379 break;
1384 else
1386 while (t + n <= e)
1388 for (i = -1; t < e; t++)
1390 while (i >= 0 && b[i+1] != *t)
1391 i = f[i];
1392 if (b[i+1] == *t)
1393 i++;
1394 if (i + 1 == n)
1396 t++;
1397 if (env->stack)
1398 env->best[0].rm_so = t - s - n;
1399 switch (follow(env, rex, cont, t))
1401 case BAD:
1402 return BAD;
1403 case CUT:
1404 return CUT;
1405 case BEST:
1406 case GOOD:
1407 return BEST;
1409 t -= n - 1;
1410 break;
1415 return NONE;
1416 case REX_NEG:
1417 if (LEADING(env, rex, s))
1418 return NONE;
1419 i = env->end - s;
1420 n = ((i + 7) >> 3) + 1;
1421 catcher.type = REX_NEG_CATCH;
1422 catcher.re.neg_catch.beg = s;
1423 if (!(p = (unsigned char*)stkpush(stkstd, n)))
1424 return BAD;
1425 memset(catcher.re.neg_catch.index = p, 0, n);
1426 catcher.next = rex->next;
1427 if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1428 r = BAD;
1429 else
1431 r = NONE;
1432 for (; i >= 0; i--)
1433 if (!bittst(p, i))
1435 switch (follow(env, rex, cont, s + i))
1437 case BAD:
1438 r = BAD;
1439 break;
1440 case BEST:
1441 r = BEST;
1442 break;
1443 case CUT:
1444 r = CUT;
1445 break;
1446 case GOOD:
1447 r = GOOD;
1448 /*FALLTHROUGH*/
1449 default:
1450 continue;
1452 break;
1455 stkpop(stkstd);
1456 return r;
1457 case REX_NEG_CATCH:
1458 bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1459 return NONE;
1460 case REX_NEST:
1461 if (s >= env->end)
1462 return NONE;
1465 if ((c = *s++) == rex->re.nest.primary)
1467 if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1468 return NONE;
1469 break;
1471 if (rex->re.nest.primary >= 0)
1472 return NONE;
1473 if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1474 break;
1475 if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1476 return NONE;
1477 } while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1478 break;
1479 case REX_NULL:
1480 break;
1481 case REX_ONECHAR:
1482 n = rex->hi;
1483 if (n > env->end - s)
1484 n = env->end - s;
1485 m = rex->lo;
1486 if (m > n)
1487 return NONE;
1488 r = NONE;
1489 c = rex->re.onechar;
1490 if (!(rex->flags & REG_MINIMAL))
1492 if (!mbwide())
1494 if (p = rex->map)
1496 for (i = 0; i < n; i++, s++)
1497 if (p[*s] != c)
1498 break;
1500 else
1502 for (i = 0; i < n; i++, s++)
1503 if (*s != c)
1504 break;
1506 for (; i-- >= m; s--)
1507 switch (follow(env, rex, cont, s))
1509 case BAD:
1510 return BAD;
1511 case BEST:
1512 return BEST;
1513 case CUT:
1514 return CUT;
1515 case GOOD:
1516 r = GOOD;
1517 break;
1520 else
1522 if (!(b = (unsigned char*)stkpush(stkstd, n)))
1524 env->error = REG_ESPACE;
1525 return BAD;
1527 e = env->end;
1528 if (!(rex->flags & REG_ICASE))
1530 for (i = 0; s < e && i < n; i++, s = t)
1532 t = s;
1533 if (mbchar(t) != c)
1534 break;
1535 b[i] = t - s;
1538 else
1540 for (i = 0; s < e && i < n; i++, s = t)
1542 t = s;
1543 if (towupper(mbchar(t)) != c)
1544 break;
1545 b[i] = t - s;
1548 for (; i-- >= m; s -= b[i])
1549 switch (follow(env, rex, cont, s))
1551 case BAD:
1552 stkpop(stkstd);
1553 return BAD;
1554 case BEST:
1555 stkpop(stkstd);
1556 return BEST;
1557 case CUT:
1558 stkpop(stkstd);
1559 return CUT;
1560 case GOOD:
1561 r = GOOD;
1562 break;
1564 stkpop(stkstd);
1567 else
1569 if (!mbwide())
1571 e = s + m;
1572 if (p = rex->map)
1574 for (; s < e; s++)
1575 if (p[*s] != c)
1576 return r;
1577 e += n - m;
1578 for (;;)
1580 switch (follow(env, rex, cont, s))
1582 case BAD:
1583 return BAD;
1584 case CUT:
1585 return CUT;
1586 case BEST:
1587 case GOOD:
1588 return BEST;
1590 if (s >= e || p[*s++] != c)
1591 break;
1594 else
1596 for (; s < e; s++)
1597 if (*s != c)
1598 return r;
1599 e += n - m;
1600 for (;;)
1602 switch (follow(env, rex, cont, s))
1604 case BAD:
1605 return BAD;
1606 case CUT:
1607 return CUT;
1608 case BEST:
1609 case GOOD:
1610 return BEST;
1612 if (s >= e || *s++ != c)
1613 break;
1617 else
1619 if (!(rex->flags & REG_ICASE))
1621 for (i = 0; i < m && s < e; i++, s = t)
1623 t = s;
1624 if (mbchar(t) != c)
1625 return r;
1627 while (i++ <= n)
1629 switch (follow(env, rex, cont, s))
1631 case BAD:
1632 return BAD;
1633 case CUT:
1634 return CUT;
1635 case BEST:
1636 case GOOD:
1637 return BEST;
1639 if (s >= e || mbchar(s) != c)
1640 break;
1643 else
1645 for (i = 0; i < m && s < e; i++, s = t)
1647 t = s;
1648 if (towupper(mbchar(t)) != c)
1649 return r;
1651 while (i++ <= n)
1653 switch (follow(env, rex, cont, s))
1655 case BAD:
1656 return BAD;
1657 case CUT:
1658 return CUT;
1659 case BEST:
1660 case GOOD:
1661 return BEST;
1663 if (s >= e || towupper(mbchar(s)) != c)
1664 break;
1669 return r;
1670 case REX_REP:
1671 if (env->stack && pospush(env, rex, s, BEG_REP))
1672 return BAD;
1673 r = parserep(env, rex, cont, s, 0);
1674 if (env->stack)
1675 pospop(env);
1676 return r;
1677 case REX_REP_CATCH:
1678 DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679 if (env->stack && pospush(env, rex, s, END_ANY))
1680 return BAD;
1681 if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1684 * optional empty iteration
1687 DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688 if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689 r = NONE;
1690 else if (pospush(env, rex, s, END_ANY))
1691 r = BAD;
1692 else
1694 r = follow(env, rex, rex->re.rep_catch.cont, s);
1695 pospop(env);
1698 else
1699 r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700 if (env->stack)
1701 pospop(env);
1702 return r;
1703 case REX_STRING:
1704 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705 if (rex->re.string.size > (env->end - s))
1706 return NONE;
1707 t = rex->re.string.base;
1708 e = t + rex->re.string.size;
1709 if (!(p = rex->map))
1711 while (t < e)
1712 if (*s++ != *t++)
1713 return NONE;
1715 else if (!mbwide())
1717 while (t < e)
1718 if (p[*s++] != *t++)
1719 return NONE;
1721 else
1723 while (t < e)
1725 c = mbchar(s);
1726 d = mbchar(t);
1727 if (towupper(c) != d)
1728 return NONE;
1731 break;
1732 case REX_TRIE:
1733 if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734 return NONE;
1735 return parsetrie(env, x, rex, cont, s);
1736 case REX_EXEC:
1737 u = 0;
1738 r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739 e = (unsigned char*)u;
1740 if (e >= s && e <= env->end)
1741 s = e;
1742 switch (r)
1744 case 0:
1745 break;
1746 case REG_NOMATCH:
1747 return NONE;
1748 default:
1749 env->error = r;
1750 return BAD;
1752 break;
1753 case REX_WBEG:
1754 if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755 return NONE;
1756 break;
1757 case REX_WEND:
1758 if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759 return NONE;
1760 break;
1761 case REX_WORD:
1762 if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763 return NONE;
1764 break;
1765 case REX_WORD_NOT:
1766 if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767 return NONE;
1768 break;
1769 case REX_BEG_STR:
1770 if (s != env->beg)
1771 return NONE;
1772 break;
1773 case REX_END_STR:
1774 for (t = s; t < env->end && *t == '\n'; t++);
1775 if (t < env->end)
1776 return NONE;
1777 break;
1778 case REX_FIN_STR:
1779 if (s < env->end)
1780 return NONE;
1781 break;
1783 if (!(rex = rex->next))
1785 if (!(rex = cont))
1786 break;
1787 cont = 0;
1790 return GOOD;
1793 #if _AST_REGEX_DEBUG
1795 static void
1796 listnode(Rex_t* e, int level)
1798 int i;
1800 if (e)
1804 for (i = 0; i < level; i++)
1805 sfprintf(sfstderr, " ");
1806 sfprintf(sfstderr, "%s\n", rexname(e));
1807 switch (e->type)
1809 case REX_ALT:
1810 case REX_CONJ:
1811 listnode(e->re.group.expr.binary.left, level + 1);
1812 listnode(e->re.group.expr.binary.right, level + 1);
1813 break;
1814 case REX_GROUP:
1815 case REX_GROUP_AHEAD:
1816 case REX_GROUP_AHEAD_NOT:
1817 case REX_GROUP_BEHIND:
1818 case REX_GROUP_BEHIND_NOT:
1819 case REX_GROUP_CUT:
1820 case REX_NEG:
1821 case REX_REP:
1822 listnode(e->re.group.expr.rex, level + 1);
1823 break;
1825 } while (e = e->next);
1829 static int
1830 list(Env_t* env, Rex_t* rex)
1832 sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833 if (rex)
1834 listnode(rex, 1);
1835 return 0;
1838 #endif
1841 * returning REG_BADPAT or REG_ESPACE is not explicitly
1842 * countenanced by the standard
1846 regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1848 register int n;
1849 register int i;
1850 int j;
1851 int k;
1852 int m;
1853 int advance;
1854 Env_t* env;
1855 Rex_t* e;
1857 DEBUG_INIT();
1858 DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859 if (!p || !(env = p->env))
1860 return REG_BADPAT;
1861 if (!s)
1862 return fatal(env->disc, REG_BADPAT, NiL);
1863 if (len < env->min)
1865 DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866 return REG_NOMATCH;
1868 env->regex = p;
1869 env->beg = (unsigned char*)s;
1870 env->end = env->beg + len;
1871 stknew(stkstd, &env->stk);
1872 env->flags &= ~REG_EXEC;
1873 env->flags |= (flags & REG_EXEC);
1874 advance = 0;
1875 if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1877 n = env->nsub;
1878 if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879 !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880 !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1882 k = REG_ESPACE;
1883 goto done;
1885 env->pos->cur = env->bestpos->cur = 0;
1886 env->best = &env->match[n + 1];
1887 env->best[0].rm_so = 0;
1888 env->best[0].rm_eo = -1;
1889 for (i = 0; i <= n; i++)
1890 env->match[i] = state.nomatch;
1891 if (flags & REG_ADVANCE)
1892 advance = 1;
1894 DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895 k = REG_NOMATCH;
1896 if ((e = env->rex)->type == REX_BM)
1898 DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899 if (len < e->re.bm.right)
1901 DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902 goto done;
1904 else if (!(flags & REG_LEFT))
1906 register unsigned char* buf = (unsigned char*)s;
1907 register size_t index = e->re.bm.left + e->re.bm.size;
1908 register size_t mid = len - e->re.bm.right;
1909 register size_t* skip = e->re.bm.skip;
1910 register size_t* fail = e->re.bm.fail;
1911 register Bm_mask_t** mask = e->re.bm.mask;
1912 Bm_mask_t m;
1913 size_t x;
1915 DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916 for (;;)
1918 while ((index += skip[buf[index]]) < mid);
1919 if (index < HIT)
1921 DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922 goto done;
1924 index -= HIT;
1925 m = mask[n = e->re.bm.size - 1][buf[index]];
1928 if (!n--)
1930 if (e->re.bm.back < 0)
1931 goto possible;
1932 if (advance)
1934 i = index - e->re.bm.back;
1935 s += i;
1936 if (env->stack)
1937 env->best[0].rm_so += i;
1938 goto possible;
1940 x = index;
1941 if (index < e->re.bm.back)
1942 index = 0;
1943 else
1944 index -= e->re.bm.back;
1945 while (index <= x)
1947 if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1949 if (env->stack)
1950 env->best[0].rm_so = index;
1951 n = env->nsub;
1952 goto hit;
1954 index++;
1956 index += e->re.bm.size;
1957 break;
1959 } while (m &= mask[n][buf[--index]]);
1960 if ((index += fail[n + 1]) >= len)
1961 goto done;
1964 possible:
1965 n = env->nsub;
1966 e = e->next;
1968 j = env->once || (flags & REG_LEFT);
1969 DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970 while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1972 if (j)
1973 goto done;
1974 i = MBSIZE(s);
1975 s += i;
1976 if ((unsigned char*)s > env->end - env->min)
1977 goto done;
1978 if (env->stack)
1979 env->best[0].rm_so += i;
1981 if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982 goto done;
1983 hit:
1984 if (k = env->error)
1985 goto done;
1986 if (i == CUT)
1988 k = env->error = REG_NOMATCH;
1989 goto done;
1991 if (!(env->flags & REG_NOSUB))
1993 k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994 for (i = j = m = 0; j < nmatch; i++)
1995 if (!i || !k || (i & 1))
1997 if (i > n)
1998 match[j] = state.nomatch;
1999 else
2000 match[m = j] = env->best[i];
2001 j++;
2003 if (k)
2005 while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006 m--;
2007 ((regex_t*)p)->re_nsub = m;
2010 k = 0;
2011 done:
2012 stkold(stkstd, &env->stk);
2013 env->stk.base = 0;
2014 if (k > REG_NOMATCH)
2015 fatal(p->env->disc, k, NiL);
2016 return k;
2019 void
2020 regfree(regex_t* p)
2022 Env_t* env;
2024 if (p && (env = p->env))
2026 #if _REG_subcomp
2027 if (env->sub)
2029 regsubfree(p);
2030 p->re_sub = 0;
2032 #endif
2033 p->env = 0;
2034 if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2036 drop(env->disc, env->rex);
2037 if (env->pos)
2038 vecclose(env->pos);
2039 if (env->bestpos)
2040 vecclose(env->bestpos);
2041 if (env->stk.base)
2042 stkold(stkstd, &env->stk);
2043 alloc(env->disc, env, 0);