Force a checkpoint in CREATE DATABASE before starting to copy the files,
[PostgreSQL.git] / src / backend / regex / regc_nfa.c
blobe76c567988e6abd6ac38128e4da7736fda7e312e
1 /*
2 * NFA utilities.
3 * This file is #included by regcomp.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
10 * thanks all of them.
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * $PostgreSQL$
34 * One or two things that technically ought to be in here
35 * are actually in color.c, thanks to some incestuous relationships in
36 * the color chains.
39 #define NISERR() VISERR(nfa->v)
40 #define NERR(e) VERR(nfa->v, (e))
44 * newnfa - set up an NFA
46 static struct nfa * /* the NFA, or NULL */
47 newnfa(struct vars * v,
48 struct colormap * cm,
49 struct nfa * parent) /* NULL if primary NFA */
51 struct nfa *nfa;
53 nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
54 if (nfa == NULL)
55 return NULL;
57 nfa->states = NULL;
58 nfa->slast = NULL;
59 nfa->free = NULL;
60 nfa->nstates = 0;
61 nfa->cm = cm;
62 nfa->v = v;
63 nfa->size = 0;
64 nfa->bos[0] = nfa->bos[1] = COLORLESS;
65 nfa->eos[0] = nfa->eos[1] = COLORLESS;
66 nfa->parent = parent; /* Precedes newfstate so parent is valid. */
67 nfa->post = newfstate(nfa, '@'); /* number 0 */
68 nfa->pre = newfstate(nfa, '>'); /* number 1 */
70 nfa->init = newstate(nfa); /* may become invalid later */
71 nfa->final = newstate(nfa);
72 if (ISERR())
74 freenfa(nfa);
75 return NULL;
77 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
78 newarc(nfa, '^', 1, nfa->pre, nfa->init);
79 newarc(nfa, '^', 0, nfa->pre, nfa->init);
80 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
81 newarc(nfa, '$', 1, nfa->final, nfa->post);
82 newarc(nfa, '$', 0, nfa->final, nfa->post);
84 if (ISERR())
86 freenfa(nfa);
87 return NULL;
89 return nfa;
93 * TooManyStates - checks if the max states exceeds the compile-time value
95 static int
96 TooManyStates(struct nfa *nfa)
98 struct nfa *parent = nfa->parent;
99 size_t sz = nfa->size;
101 while (parent != NULL)
103 sz = parent->size;
104 parent = parent->parent;
106 if (sz > REG_MAX_STATES)
107 return 1;
108 return 0;
112 * IncrementSize - increases the tracked size of the NFA and its parents.
114 static void
115 IncrementSize(struct nfa *nfa)
117 struct nfa *parent = nfa->parent;
119 nfa->size++;
120 while (parent != NULL)
122 parent->size++;
123 parent = parent->parent;
128 * DecrementSize - decreases the tracked size of the NFA and its parents.
130 static void
131 DecrementSize(struct nfa *nfa)
133 struct nfa *parent = nfa->parent;
135 nfa->size--;
136 while (parent != NULL)
138 parent->size--;
139 parent = parent->parent;
144 * freenfa - free an entire NFA
146 static void
147 freenfa(struct nfa * nfa)
149 struct state *s;
151 while ((s = nfa->states) != NULL)
153 s->nins = s->nouts = 0; /* don't worry about arcs */
154 freestate(nfa, s);
156 while ((s = nfa->free) != NULL)
158 nfa->free = s->next;
159 destroystate(nfa, s);
162 nfa->slast = NULL;
163 nfa->nstates = -1;
164 nfa->pre = NULL;
165 nfa->post = NULL;
166 FREE(nfa);
170 * newstate - allocate an NFA state, with zero flag value
172 static struct state * /* NULL on error */
173 newstate(struct nfa * nfa)
175 struct state *s;
177 if (TooManyStates(nfa))
179 NERR(REG_ETOOBIG);
180 return NULL;
182 if (nfa->free != NULL)
184 s = nfa->free;
185 nfa->free = s->next;
187 else
189 s = (struct state *) MALLOC(sizeof(struct state));
190 if (s == NULL)
192 NERR(REG_ESPACE);
193 return NULL;
195 s->oas.next = NULL;
196 s->free = NULL;
197 s->noas = 0;
200 assert(nfa->nstates >= 0);
201 s->no = nfa->nstates++;
202 s->flag = 0;
203 if (nfa->states == NULL)
204 nfa->states = s;
205 s->nins = 0;
206 s->ins = NULL;
207 s->nouts = 0;
208 s->outs = NULL;
209 s->tmp = NULL;
210 s->next = NULL;
211 if (nfa->slast != NULL)
213 assert(nfa->slast->next == NULL);
214 nfa->slast->next = s;
216 s->prev = nfa->slast;
217 nfa->slast = s;
218 /* track the current size and the parent size */
219 IncrementSize(nfa);
220 return s;
224 * newfstate - allocate an NFA state with a specified flag value
226 static struct state * /* NULL on error */
227 newfstate(struct nfa * nfa, int flag)
229 struct state *s;
231 s = newstate(nfa);
232 if (s != NULL)
233 s->flag = (char) flag;
234 return s;
238 * dropstate - delete a state's inarcs and outarcs and free it
240 static void
241 dropstate(struct nfa * nfa,
242 struct state * s)
244 struct arc *a;
246 while ((a = s->ins) != NULL)
247 freearc(nfa, a);
248 while ((a = s->outs) != NULL)
249 freearc(nfa, a);
250 freestate(nfa, s);
254 * freestate - free a state, which has no in-arcs or out-arcs
256 static void
257 freestate(struct nfa * nfa,
258 struct state * s)
260 assert(s != NULL);
261 assert(s->nins == 0 && s->nouts == 0);
263 s->no = FREESTATE;
264 s->flag = 0;
265 if (s->next != NULL)
266 s->next->prev = s->prev;
267 else
269 assert(s == nfa->slast);
270 nfa->slast = s->prev;
272 if (s->prev != NULL)
273 s->prev->next = s->next;
274 else
276 assert(s == nfa->states);
277 nfa->states = s->next;
279 s->prev = NULL;
280 s->next = nfa->free; /* don't delete it, put it on the free list */
281 nfa->free = s;
282 DecrementSize(nfa);
286 * destroystate - really get rid of an already-freed state
288 static void
289 destroystate(struct nfa * nfa,
290 struct state * s)
292 struct arcbatch *ab;
293 struct arcbatch *abnext;
295 assert(s->no == FREESTATE);
296 for (ab = s->oas.next; ab != NULL; ab = abnext)
298 abnext = ab->next;
299 FREE(ab);
301 s->ins = NULL;
302 s->outs = NULL;
303 s->next = NULL;
304 FREE(s);
308 * newarc - set up a new arc within an NFA
310 static void
311 newarc(struct nfa * nfa,
312 int t,
313 pcolor co,
314 struct state * from,
315 struct state * to)
317 struct arc *a;
319 assert(from != NULL && to != NULL);
321 /* check for duplicates */
322 for (a = from->outs; a != NULL; a = a->outchain)
323 if (a->to == to && a->co == co && a->type == t)
324 return;
326 a = allocarc(nfa, from);
327 if (NISERR())
328 return;
329 assert(a != NULL);
331 a->type = t;
332 a->co = (color) co;
333 a->to = to;
334 a->from = from;
337 * Put the new arc on the beginning, not the end, of the chains. Not only
338 * is this easier, it has the very useful side effect that deleting the
339 * most-recently-added arc is the cheapest case rather than the most
340 * expensive one.
342 a->inchain = to->ins;
343 to->ins = a;
344 a->outchain = from->outs;
345 from->outs = a;
347 from->nouts++;
348 to->nins++;
350 if (COLORED(a) && nfa->parent == NULL)
351 colorchain(nfa->cm, a);
355 * allocarc - allocate a new out-arc within a state
357 static struct arc * /* NULL for failure */
358 allocarc(struct nfa * nfa,
359 struct state * s)
361 struct arc *a;
363 /* shortcut */
364 if (s->free == NULL && s->noas < ABSIZE)
366 a = &s->oas.a[s->noas];
367 s->noas++;
368 return a;
371 /* if none at hand, get more */
372 if (s->free == NULL)
374 struct arcbatch *newAb;
375 int i;
377 newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
378 if (newAb == NULL)
380 NERR(REG_ESPACE);
381 return NULL;
383 newAb->next = s->oas.next;
384 s->oas.next = newAb;
386 for (i = 0; i < ABSIZE; i++)
388 newAb->a[i].type = 0;
389 newAb->a[i].freechain = &newAb->a[i + 1];
391 newAb->a[ABSIZE - 1].freechain = NULL;
392 s->free = &newAb->a[0];
394 assert(s->free != NULL);
396 a = s->free;
397 s->free = a->freechain;
398 return a;
402 * freearc - free an arc
404 static void
405 freearc(struct nfa * nfa,
406 struct arc * victim)
408 struct state *from = victim->from;
409 struct state *to = victim->to;
410 struct arc *a;
412 assert(victim->type != 0);
414 /* take it off color chain if necessary */
415 if (COLORED(victim) && nfa->parent == NULL)
416 uncolorchain(nfa->cm, victim);
418 /* take it off source's out-chain */
419 assert(from != NULL);
420 assert(from->outs != NULL);
421 a = from->outs;
422 if (a == victim) /* simple case: first in chain */
423 from->outs = victim->outchain;
424 else
426 for (; a != NULL && a->outchain != victim; a = a->outchain)
427 continue;
428 assert(a != NULL);
429 a->outchain = victim->outchain;
431 from->nouts--;
433 /* take it off target's in-chain */
434 assert(to != NULL);
435 assert(to->ins != NULL);
436 a = to->ins;
437 if (a == victim) /* simple case: first in chain */
438 to->ins = victim->inchain;
439 else
441 for (; a != NULL && a->inchain != victim; a = a->inchain)
442 continue;
443 assert(a != NULL);
444 a->inchain = victim->inchain;
446 to->nins--;
448 /* clean up and place on free list */
449 victim->type = 0;
450 victim->from = NULL; /* precautions... */
451 victim->to = NULL;
452 victim->inchain = NULL;
453 victim->outchain = NULL;
454 victim->freechain = from->free;
455 from->free = victim;
459 * findarc - find arc, if any, from given source with given type and color
460 * If there is more than one such arc, the result is random.
462 static struct arc *
463 findarc(struct state * s,
464 int type,
465 pcolor co)
467 struct arc *a;
469 for (a = s->outs; a != NULL; a = a->outchain)
470 if (a->type == type && a->co == co)
471 return a;
472 return NULL;
476 * cparc - allocate a new arc within an NFA, copying details from old one
478 static void
479 cparc(struct nfa * nfa,
480 struct arc * oa,
481 struct state * from,
482 struct state * to)
484 newarc(nfa, oa->type, oa->co, from, to);
488 * moveins - move all in arcs of a state to another state
490 * You might think this could be done better by just updating the
491 * existing arcs, and you would be right if it weren't for the desire
492 * for duplicate suppression, which makes it easier to just make new
493 * ones to exploit the suppression built into newarc.
495 static void
496 moveins(struct nfa * nfa,
497 struct state * oldState,
498 struct state * newState)
500 struct arc *a;
502 assert(oldState != newState);
504 while ((a = oldState->ins) != NULL)
506 cparc(nfa, a, a->from, newState);
507 freearc(nfa, a);
509 assert(oldState->nins == 0);
510 assert(oldState->ins == NULL);
514 * copyins - copy all in arcs of a state to another state
516 static void
517 copyins(struct nfa * nfa,
518 struct state * oldState,
519 struct state * newState)
521 struct arc *a;
523 assert(oldState != newState);
525 for (a = oldState->ins; a != NULL; a = a->inchain)
526 cparc(nfa, a, a->from, newState);
530 * moveouts - move all out arcs of a state to another state
532 static void
533 moveouts(struct nfa * nfa,
534 struct state * oldState,
535 struct state * newState)
537 struct arc *a;
539 assert(oldState != newState);
541 while ((a = oldState->outs) != NULL)
543 cparc(nfa, a, newState, a->to);
544 freearc(nfa, a);
549 * copyouts - copy all out arcs of a state to another state
551 static void
552 copyouts(struct nfa * nfa,
553 struct state * oldState,
554 struct state * newState)
556 struct arc *a;
558 assert(oldState != newState);
560 for (a = oldState->outs; a != NULL; a = a->outchain)
561 cparc(nfa, a, newState, a->to);
565 * cloneouts - copy out arcs of a state to another state pair, modifying type
567 static void
568 cloneouts(struct nfa * nfa,
569 struct state * old,
570 struct state * from,
571 struct state * to,
572 int type)
574 struct arc *a;
576 assert(old != from);
578 for (a = old->outs; a != NULL; a = a->outchain)
579 newarc(nfa, type, a->co, from, to);
583 * delsub - delete a sub-NFA, updating subre pointers if necessary
585 * This uses a recursive traversal of the sub-NFA, marking already-seen
586 * states using their tmp pointer.
588 static void
589 delsub(struct nfa * nfa,
590 struct state * lp, /* the sub-NFA goes from here... */
591 struct state * rp) /* ...to here, *not* inclusive */
593 assert(lp != rp);
595 rp->tmp = rp; /* mark end */
597 deltraverse(nfa, lp, lp);
598 assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
599 assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
601 rp->tmp = NULL; /* unmark end */
602 lp->tmp = NULL; /* and begin, marked by deltraverse */
606 * deltraverse - the recursive heart of delsub
607 * This routine's basic job is to destroy all out-arcs of the state.
609 static void
610 deltraverse(struct nfa * nfa,
611 struct state * leftend,
612 struct state * s)
614 struct arc *a;
615 struct state *to;
617 if (s->nouts == 0)
618 return; /* nothing to do */
619 if (s->tmp != NULL)
620 return; /* already in progress */
622 s->tmp = s; /* mark as in progress */
624 while ((a = s->outs) != NULL)
626 to = a->to;
627 deltraverse(nfa, leftend, to);
628 assert(to->nouts == 0 || to->tmp != NULL);
629 freearc(nfa, a);
630 if (to->nins == 0 && to->tmp == NULL)
632 assert(to->nouts == 0);
633 freestate(nfa, to);
637 assert(s->no != FREESTATE); /* we're still here */
638 assert(s == leftend || s->nins != 0); /* and still reachable */
639 assert(s->nouts == 0); /* but have no outarcs */
641 s->tmp = NULL; /* we're done here */
645 * dupnfa - duplicate sub-NFA
647 * Another recursive traversal, this time using tmp to point to duplicates
648 * as well as mark already-seen states. (You knew there was a reason why
649 * it's a state pointer, didn't you? :-))
651 static void
652 dupnfa(struct nfa * nfa,
653 struct state * start, /* duplicate of subNFA starting here */
654 struct state * stop, /* and stopping here */
655 struct state * from, /* stringing duplicate from here */
656 struct state * to) /* to here */
658 if (start == stop)
660 newarc(nfa, EMPTY, 0, from, to);
661 return;
664 stop->tmp = to;
665 duptraverse(nfa, start, from);
666 /* done, except for clearing out the tmp pointers */
668 stop->tmp = NULL;
669 cleartraverse(nfa, start);
673 * duptraverse - recursive heart of dupnfa
675 static void
676 duptraverse(struct nfa * nfa,
677 struct state * s,
678 struct state * stmp) /* s's duplicate, or NULL */
680 struct arc *a;
682 if (s->tmp != NULL)
683 return; /* already done */
685 s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
686 if (s->tmp == NULL)
688 assert(NISERR());
689 return;
692 for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
694 duptraverse(nfa, a->to, (struct state *) NULL);
695 if (NISERR())
696 break;
697 assert(a->to->tmp != NULL);
698 cparc(nfa, a, s->tmp, a->to->tmp);
703 * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
705 static void
706 cleartraverse(struct nfa * nfa,
707 struct state * s)
709 struct arc *a;
711 if (s->tmp == NULL)
712 return;
713 s->tmp = NULL;
715 for (a = s->outs; a != NULL; a = a->outchain)
716 cleartraverse(nfa, a->to);
720 * specialcolors - fill in special colors for an NFA
722 static void
723 specialcolors(struct nfa * nfa)
725 /* false colors for BOS, BOL, EOS, EOL */
726 if (nfa->parent == NULL)
728 nfa->bos[0] = pseudocolor(nfa->cm);
729 nfa->bos[1] = pseudocolor(nfa->cm);
730 nfa->eos[0] = pseudocolor(nfa->cm);
731 nfa->eos[1] = pseudocolor(nfa->cm);
733 else
735 assert(nfa->parent->bos[0] != COLORLESS);
736 nfa->bos[0] = nfa->parent->bos[0];
737 assert(nfa->parent->bos[1] != COLORLESS);
738 nfa->bos[1] = nfa->parent->bos[1];
739 assert(nfa->parent->eos[0] != COLORLESS);
740 nfa->eos[0] = nfa->parent->eos[0];
741 assert(nfa->parent->eos[1] != COLORLESS);
742 nfa->eos[1] = nfa->parent->eos[1];
747 * optimize - optimize an NFA
749 static long /* re_info bits */
750 optimize(struct nfa * nfa,
751 FILE *f) /* for debug output; NULL none */
753 #ifdef REG_DEBUG
754 int verbose = (f != NULL) ? 1 : 0;
756 if (verbose)
757 fprintf(f, "\ninitial cleanup:\n");
758 #endif
759 cleanup(nfa); /* may simplify situation */
760 #ifdef REG_DEBUG
761 if (verbose)
762 dumpnfa(nfa, f);
763 if (verbose)
764 fprintf(f, "\nempties:\n");
765 #endif
766 fixempties(nfa, f); /* get rid of EMPTY arcs */
767 #ifdef REG_DEBUG
768 if (verbose)
769 fprintf(f, "\nconstraints:\n");
770 #endif
771 pullback(nfa, f); /* pull back constraints backward */
772 pushfwd(nfa, f); /* push fwd constraints forward */
773 #ifdef REG_DEBUG
774 if (verbose)
775 fprintf(f, "\nfinal cleanup:\n");
776 #endif
777 cleanup(nfa); /* final tidying */
778 return analyze(nfa); /* and analysis */
782 * pullback - pull back constraints backward to (with luck) eliminate them
784 static void
785 pullback(struct nfa * nfa,
786 FILE *f) /* for debug output; NULL none */
788 struct state *s;
789 struct state *nexts;
790 struct arc *a;
791 struct arc *nexta;
792 int progress;
794 /* find and pull until there are no more */
797 progress = 0;
798 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
800 nexts = s->next;
801 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
803 nexta = a->outchain;
804 if (a->type == '^' || a->type == BEHIND)
805 if (pull(nfa, a))
806 progress = 1;
807 assert(nexta == NULL || s->no != FREESTATE);
810 if (progress && f != NULL)
811 dumpnfa(nfa, f);
812 } while (progress && !NISERR());
813 if (NISERR())
814 return;
816 for (a = nfa->pre->outs; a != NULL; a = nexta)
818 nexta = a->outchain;
819 if (a->type == '^')
821 assert(a->co == 0 || a->co == 1);
822 newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
823 freearc(nfa, a);
829 * pull - pull a back constraint backward past its source state
830 * A significant property of this function is that it deletes at most
831 * one state -- the constraint's from state -- and only if the constraint
832 * was that state's last outarc.
834 static int /* 0 couldn't, 1 could */
835 pull(struct nfa * nfa,
836 struct arc * con)
838 struct state *from = con->from;
839 struct state *to = con->to;
840 struct arc *a;
841 struct arc *nexta;
842 struct state *s;
844 if (from == to)
845 { /* circular constraint is pointless */
846 freearc(nfa, con);
847 return 1;
849 if (from->flag) /* can't pull back beyond start */
850 return 0;
851 if (from->nins == 0)
852 { /* unreachable */
853 freearc(nfa, con);
854 return 1;
858 * DGP 2007-11-15: Cloning a state with a circular constraint on its list
859 * of outs can lead to trouble [Tcl Bug 1810038], so get rid of them first.
861 for (a = from->outs; a != NULL; a = nexta)
863 nexta = a->outchain;
864 switch (a->type)
866 case '^':
867 case '$':
868 case BEHIND:
869 case AHEAD:
870 if (from == a->to)
871 freearc(nfa, a);
872 break;
876 /* first, clone from state if necessary to avoid other outarcs */
877 if (from->nouts > 1)
879 s = newstate(nfa);
880 if (NISERR())
881 return 0;
882 assert(to != from); /* con is not an inarc */
883 copyins(nfa, from, s); /* duplicate inarcs */
884 cparc(nfa, con, s, to); /* move constraint arc */
885 freearc(nfa, con);
886 from = s;
887 con = from->outs;
889 assert(from->nouts == 1);
891 /* propagate the constraint into the from state's inarcs */
892 for (a = from->ins; a != NULL; a = nexta)
894 nexta = a->inchain;
895 switch (combine(con, a))
897 case INCOMPATIBLE: /* destroy the arc */
898 freearc(nfa, a);
899 break;
900 case SATISFIED: /* no action needed */
901 break;
902 case COMPATIBLE: /* swap the two arcs, more or less */
903 s = newstate(nfa);
904 if (NISERR())
905 return 0;
906 cparc(nfa, a, s, to); /* anticipate move */
907 cparc(nfa, con, a->from, s);
908 if (NISERR())
909 return 0;
910 freearc(nfa, a);
911 break;
912 default:
913 assert(NOTREACHED);
914 break;
918 /* remaining inarcs, if any, incorporate the constraint */
919 moveins(nfa, from, to);
920 dropstate(nfa, from); /* will free the constraint */
921 return 1;
925 * pushfwd - push forward constraints forward to (with luck) eliminate them
927 static void
928 pushfwd(struct nfa * nfa,
929 FILE *f) /* for debug output; NULL none */
931 struct state *s;
932 struct state *nexts;
933 struct arc *a;
934 struct arc *nexta;
935 int progress;
937 /* find and push until there are no more */
940 progress = 0;
941 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
943 nexts = s->next;
944 for (a = s->ins; a != NULL && !NISERR(); a = nexta)
946 nexta = a->inchain;
947 if (a->type == '$' || a->type == AHEAD)
948 if (push(nfa, a))
949 progress = 1;
950 assert(nexta == NULL || s->no != FREESTATE);
953 if (progress && f != NULL)
954 dumpnfa(nfa, f);
955 } while (progress && !NISERR());
956 if (NISERR())
957 return;
959 for (a = nfa->post->ins; a != NULL; a = nexta)
961 nexta = a->inchain;
962 if (a->type == '$')
964 assert(a->co == 0 || a->co == 1);
965 newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
966 freearc(nfa, a);
972 * push - push a forward constraint forward past its destination state
973 * A significant property of this function is that it deletes at most
974 * one state -- the constraint's to state -- and only if the constraint
975 * was that state's last inarc.
977 static int /* 0 couldn't, 1 could */
978 push(struct nfa * nfa,
979 struct arc * con)
981 struct state *from = con->from;
982 struct state *to = con->to;
983 struct arc *a;
984 struct arc *nexta;
985 struct state *s;
987 if (to == from)
988 { /* circular constraint is pointless */
989 freearc(nfa, con);
990 return 1;
992 if (to->flag) /* can't push forward beyond end */
993 return 0;
994 if (to->nouts == 0)
995 { /* dead end */
996 freearc(nfa, con);
997 return 1;
1001 * DGP 2007-11-15: Here we duplicate the same protections as appear
1002 * in pull() above to avoid troubles with cloning a state with a
1003 * circular constraint on its list of ins. It is not clear whether
1004 * this is necessary, or is protecting against a "can't happen".
1005 * Any test case that actually leads to a freearc() call here would
1006 * be a welcome addition to the test suite.
1008 for (a = to->ins; a != NULL; a = nexta)
1010 nexta = a->inchain;
1011 switch (a->type)
1013 case '^':
1014 case '$':
1015 case BEHIND:
1016 case AHEAD:
1017 if (a->from == to)
1018 freearc(nfa, a);
1019 break;
1023 /* first, clone to state if necessary to avoid other inarcs */
1024 if (to->nins > 1)
1026 s = newstate(nfa);
1027 if (NISERR())
1028 return 0;
1029 copyouts(nfa, to, s); /* duplicate outarcs */
1030 cparc(nfa, con, from, s); /* move constraint */
1031 freearc(nfa, con);
1032 to = s;
1033 con = to->ins;
1035 assert(to->nins == 1);
1037 /* propagate the constraint into the to state's outarcs */
1038 for (a = to->outs; a != NULL; a = nexta)
1040 nexta = a->outchain;
1041 switch (combine(con, a))
1043 case INCOMPATIBLE: /* destroy the arc */
1044 freearc(nfa, a);
1045 break;
1046 case SATISFIED: /* no action needed */
1047 break;
1048 case COMPATIBLE: /* swap the two arcs, more or less */
1049 s = newstate(nfa);
1050 if (NISERR())
1051 return 0;
1052 cparc(nfa, con, s, a->to); /* anticipate move */
1053 cparc(nfa, a, from, s);
1054 if (NISERR())
1055 return 0;
1056 freearc(nfa, a);
1057 break;
1058 default:
1059 assert(NOTREACHED);
1060 break;
1064 /* remaining outarcs, if any, incorporate the constraint */
1065 moveouts(nfa, to, from);
1066 dropstate(nfa, to); /* will free the constraint */
1067 return 1;
1071 * combine - constraint lands on an arc, what happens?
1073 * #def INCOMPATIBLE 1 // destroys arc
1074 * #def SATISFIED 2 // constraint satisfied
1075 * #def COMPATIBLE 3 // compatible but not satisfied yet
1077 static int
1078 combine(struct arc * con,
1079 struct arc * a)
1081 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
1083 switch (CA(con->type, a->type))
1085 case CA('^', PLAIN): /* newlines are handled separately */
1086 case CA('$', PLAIN):
1087 return INCOMPATIBLE;
1088 break;
1089 case CA(AHEAD, PLAIN): /* color constraints meet colors */
1090 case CA(BEHIND, PLAIN):
1091 if (con->co == a->co)
1092 return SATISFIED;
1093 return INCOMPATIBLE;
1094 break;
1095 case CA('^', '^'): /* collision, similar constraints */
1096 case CA('$', '$'):
1097 case CA(AHEAD, AHEAD):
1098 case CA(BEHIND, BEHIND):
1099 if (con->co == a->co) /* true duplication */
1100 return SATISFIED;
1101 return INCOMPATIBLE;
1102 break;
1103 case CA('^', BEHIND): /* collision, dissimilar constraints */
1104 case CA(BEHIND, '^'):
1105 case CA('$', AHEAD):
1106 case CA(AHEAD, '$'):
1107 return INCOMPATIBLE;
1108 break;
1109 case CA('^', '$'): /* constraints passing each other */
1110 case CA('^', AHEAD):
1111 case CA(BEHIND, '$'):
1112 case CA(BEHIND, AHEAD):
1113 case CA('$', '^'):
1114 case CA('$', BEHIND):
1115 case CA(AHEAD, '^'):
1116 case CA(AHEAD, BEHIND):
1117 case CA('^', LACON):
1118 case CA(BEHIND, LACON):
1119 case CA('$', LACON):
1120 case CA(AHEAD, LACON):
1121 return COMPATIBLE;
1122 break;
1124 assert(NOTREACHED);
1125 return INCOMPATIBLE; /* for benefit of blind compilers */
1129 * fixempties - get rid of EMPTY arcs
1131 static void
1132 fixempties(struct nfa * nfa,
1133 FILE *f) /* for debug output; NULL none */
1135 struct state *s;
1136 struct state *nexts;
1137 struct arc *a;
1138 struct arc *nexta;
1139 int progress;
1141 /* find and eliminate empties until there are no more */
1144 progress = 0;
1145 for (s = nfa->states; s != NULL && !NISERR() &&
1146 s->no != FREESTATE; s = nexts)
1148 nexts = s->next;
1149 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
1151 nexta = a->outchain;
1152 if (a->type == EMPTY && unempty(nfa, a))
1153 progress = 1;
1154 assert(nexta == NULL || s->no != FREESTATE);
1157 if (progress && f != NULL)
1158 dumpnfa(nfa, f);
1159 } while (progress && !NISERR());
1163 * unempty - optimize out an EMPTY arc, if possible
1165 * Actually, as it stands this function always succeeds, but the return
1166 * value is kept with an eye on possible future changes.
1168 static int /* 0 couldn't, 1 could */
1169 unempty(struct nfa * nfa,
1170 struct arc * a)
1172 struct state *from = a->from;
1173 struct state *to = a->to;
1174 int usefrom; /* work on from, as opposed to to? */
1176 assert(a->type == EMPTY);
1177 assert(from != nfa->pre && to != nfa->post);
1179 if (from == to)
1180 { /* vacuous loop */
1181 freearc(nfa, a);
1182 return 1;
1185 /* decide which end to work on */
1186 usefrom = 1; /* default: attack from */
1187 if (from->nouts > to->nins)
1188 usefrom = 0;
1189 else if (from->nouts == to->nins)
1191 /* decide on secondary issue: move/copy fewest arcs */
1192 if (from->nins > to->nouts)
1193 usefrom = 0;
1196 freearc(nfa, a);
1197 if (usefrom)
1199 if (from->nouts == 0)
1201 /* was the state's only outarc */
1202 moveins(nfa, from, to);
1203 freestate(nfa, from);
1205 else
1206 copyins(nfa, from, to);
1208 else
1210 if (to->nins == 0)
1212 /* was the state's only inarc */
1213 moveouts(nfa, to, from);
1214 freestate(nfa, to);
1216 else
1217 copyouts(nfa, to, from);
1220 return 1;
1224 * cleanup - clean up NFA after optimizations
1226 static void
1227 cleanup(struct nfa * nfa)
1229 struct state *s;
1230 struct state *nexts;
1231 int n;
1233 /* clear out unreachable or dead-end states */
1234 /* use pre to mark reachable, then post to mark can-reach-post */
1235 markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
1236 markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
1237 for (s = nfa->states; s != NULL; s = nexts)
1239 nexts = s->next;
1240 if (s->tmp != nfa->post && !s->flag)
1241 dropstate(nfa, s);
1243 assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
1244 cleartraverse(nfa, nfa->pre);
1245 assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
1246 /* the nins==0 (final unreachable) case will be caught later */
1248 /* renumber surviving states */
1249 n = 0;
1250 for (s = nfa->states; s != NULL; s = s->next)
1251 s->no = n++;
1252 nfa->nstates = n;
1256 * markreachable - recursive marking of reachable states
1258 static void
1259 markreachable(struct nfa * nfa,
1260 struct state * s,
1261 struct state * okay, /* consider only states with this mark */
1262 struct state * mark) /* the value to mark with */
1264 struct arc *a;
1266 if (s->tmp != okay)
1267 return;
1268 s->tmp = mark;
1270 for (a = s->outs; a != NULL; a = a->outchain)
1271 markreachable(nfa, a->to, okay, mark);
1275 * markcanreach - recursive marking of states which can reach here
1277 static void
1278 markcanreach(struct nfa * nfa,
1279 struct state * s,
1280 struct state * okay, /* consider only states with this mark */
1281 struct state * mark) /* the value to mark with */
1283 struct arc *a;
1285 if (s->tmp != okay)
1286 return;
1287 s->tmp = mark;
1289 for (a = s->ins; a != NULL; a = a->inchain)
1290 markcanreach(nfa, a->from, okay, mark);
1294 * analyze - ascertain potentially-useful facts about an optimized NFA
1296 static long /* re_info bits to be ORed in */
1297 analyze(struct nfa * nfa)
1299 struct arc *a;
1300 struct arc *aa;
1302 if (nfa->pre->outs == NULL)
1303 return REG_UIMPOSSIBLE;
1304 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1305 for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
1306 if (aa->to == nfa->post)
1307 return REG_UEMPTYMATCH;
1308 return 0;
1312 * compact - compact an NFA
1314 static void
1315 compact(struct nfa * nfa,
1316 struct cnfa * cnfa)
1318 struct state *s;
1319 struct arc *a;
1320 size_t nstates;
1321 size_t narcs;
1322 struct carc *ca;
1323 struct carc *first;
1325 assert(!NISERR());
1327 nstates = 0;
1328 narcs = 0;
1329 for (s = nfa->states; s != NULL; s = s->next)
1331 nstates++;
1332 narcs += 1 + s->nouts + 1;
1333 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1336 cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
1337 cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
1338 if (cnfa->states == NULL || cnfa->arcs == NULL)
1340 if (cnfa->states != NULL)
1341 FREE(cnfa->states);
1342 if (cnfa->arcs != NULL)
1343 FREE(cnfa->arcs);
1344 NERR(REG_ESPACE);
1345 return;
1347 cnfa->nstates = nstates;
1348 cnfa->pre = nfa->pre->no;
1349 cnfa->post = nfa->post->no;
1350 cnfa->bos[0] = nfa->bos[0];
1351 cnfa->bos[1] = nfa->bos[1];
1352 cnfa->eos[0] = nfa->eos[0];
1353 cnfa->eos[1] = nfa->eos[1];
1354 cnfa->ncolors = maxcolor(nfa->cm) + 1;
1355 cnfa->flags = 0;
1357 ca = cnfa->arcs;
1358 for (s = nfa->states; s != NULL; s = s->next)
1360 assert((size_t) s->no < nstates);
1361 cnfa->states[s->no] = ca;
1362 ca->co = 0; /* clear and skip flags "arc" */
1363 ca++;
1364 first = ca;
1365 for (a = s->outs; a != NULL; a = a->outchain)
1366 switch (a->type)
1368 case PLAIN:
1369 ca->co = a->co;
1370 ca->to = a->to->no;
1371 ca++;
1372 break;
1373 case LACON:
1374 assert(s->no != cnfa->pre);
1375 ca->co = (color) (cnfa->ncolors + a->co);
1376 ca->to = a->to->no;
1377 ca++;
1378 cnfa->flags |= HASLACONS;
1379 break;
1380 default:
1381 assert(NOTREACHED);
1382 break;
1384 carcsort(first, ca - 1);
1385 ca->co = COLORLESS;
1386 ca->to = 0;
1387 ca++;
1389 assert(ca == &cnfa->arcs[narcs]);
1390 assert(cnfa->nstates != 0);
1392 /* mark no-progress states */
1393 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1394 cnfa->states[a->to->no]->co = 1;
1395 cnfa->states[nfa->pre->no]->co = 1;
1399 * carcsort - sort compacted-NFA arcs by color
1401 * Really dumb algorithm, but if the list is long enough for that to matter,
1402 * you're in real trouble anyway.
1404 static void
1405 carcsort(struct carc * first,
1406 struct carc * last)
1408 struct carc *p;
1409 struct carc *q;
1410 struct carc tmp;
1412 if (last - first <= 1)
1413 return;
1415 for (p = first; p <= last; p++)
1416 for (q = p; q <= last; q++)
1417 if (p->co > q->co ||
1418 (p->co == q->co && p->to > q->to))
1420 assert(p != q);
1421 tmp = *p;
1422 *p = *q;
1423 *q = tmp;
1428 * freecnfa - free a compacted NFA
1430 static void
1431 freecnfa(struct cnfa * cnfa)
1433 assert(cnfa->nstates != 0); /* not empty already */
1434 cnfa->nstates = 0;
1435 FREE(cnfa->states);
1436 FREE(cnfa->arcs);
1440 * dumpnfa - dump an NFA in human-readable form
1442 static void
1443 dumpnfa(struct nfa * nfa,
1444 FILE *f)
1446 #ifdef REG_DEBUG
1447 struct state *s;
1449 fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
1450 if (nfa->bos[0] != COLORLESS)
1451 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
1452 if (nfa->bos[1] != COLORLESS)
1453 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
1454 if (nfa->eos[0] != COLORLESS)
1455 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
1456 if (nfa->eos[1] != COLORLESS)
1457 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
1458 fprintf(f, "\n");
1459 for (s = nfa->states; s != NULL; s = s->next)
1460 dumpstate(s, f);
1461 if (nfa->parent == NULL)
1462 dumpcolors(nfa->cm, f);
1463 fflush(f);
1464 #endif
1467 #ifdef REG_DEBUG /* subordinates of dumpnfa */
1470 * dumpstate - dump an NFA state in human-readable form
1472 static void
1473 dumpstate(struct state * s,
1474 FILE *f)
1476 struct arc *a;
1478 fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
1479 (s->flag) ? s->flag : '.');
1480 if (s->prev != NULL && s->prev->next != s)
1481 fprintf(f, "\tstate chain bad\n");
1482 if (s->nouts == 0)
1483 fprintf(f, "\tno out arcs\n");
1484 else
1485 dumparcs(s, f);
1486 fflush(f);
1487 for (a = s->ins; a != NULL; a = a->inchain)
1489 if (a->to != s)
1490 fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
1491 a->from->no, a->to->no, s->no);
1496 * dumparcs - dump out-arcs in human-readable form
1498 static void
1499 dumparcs(struct state * s,
1500 FILE *f)
1502 int pos;
1504 assert(s->nouts > 0);
1505 /* printing arcs in reverse order is usually clearer */
1506 pos = dumprarcs(s->outs, s, f, 1);
1507 if (pos != 1)
1508 fprintf(f, "\n");
1512 * dumprarcs - dump remaining outarcs, recursively, in reverse order
1514 static int /* resulting print position */
1515 dumprarcs(struct arc * a,
1516 struct state * s,
1517 FILE *f,
1518 int pos) /* initial print position */
1520 if (a->outchain != NULL)
1521 pos = dumprarcs(a->outchain, s, f, pos);
1522 dumparc(a, s, f);
1523 if (pos == 5)
1525 fprintf(f, "\n");
1526 pos = 1;
1528 else
1529 pos++;
1530 return pos;
1534 * dumparc - dump one outarc in readable form, including prefixing tab
1536 static void
1537 dumparc(struct arc * a,
1538 struct state * s,
1539 FILE *f)
1541 struct arc *aa;
1542 struct arcbatch *ab;
1544 fprintf(f, "\t");
1545 switch (a->type)
1547 case PLAIN:
1548 fprintf(f, "[%ld]", (long) a->co);
1549 break;
1550 case AHEAD:
1551 fprintf(f, ">%ld>", (long) a->co);
1552 break;
1553 case BEHIND:
1554 fprintf(f, "<%ld<", (long) a->co);
1555 break;
1556 case LACON:
1557 fprintf(f, ":%ld:", (long) a->co);
1558 break;
1559 case '^':
1560 case '$':
1561 fprintf(f, "%c%d", a->type, (int) a->co);
1562 break;
1563 case EMPTY:
1564 break;
1565 default:
1566 fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
1567 break;
1569 if (a->from != s)
1570 fprintf(f, "?%d?", a->from->no);
1571 for (ab = &a->from->oas; ab != NULL; ab = ab->next)
1573 for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
1574 if (aa == a)
1575 break; /* NOTE BREAK OUT */
1576 if (aa < &ab->a[ABSIZE]) /* propagate break */
1577 break; /* NOTE BREAK OUT */
1579 if (ab == NULL)
1580 fprintf(f, "?!?"); /* not in allocated space */
1581 fprintf(f, "->");
1582 if (a->to == NULL)
1584 fprintf(f, "NULL");
1585 return;
1587 fprintf(f, "%d", a->to->no);
1588 for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
1589 if (aa == a)
1590 break; /* NOTE BREAK OUT */
1591 if (aa == NULL)
1592 fprintf(f, "?!?"); /* missing from in-chain */
1594 #endif /* REG_DEBUG */
1597 * dumpcnfa - dump a compacted NFA in human-readable form
1599 #ifdef REG_DEBUG
1600 static void
1601 dumpcnfa(struct cnfa * cnfa,
1602 FILE *f)
1604 int st;
1606 fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
1607 if (cnfa->bos[0] != COLORLESS)
1608 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
1609 if (cnfa->bos[1] != COLORLESS)
1610 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
1611 if (cnfa->eos[0] != COLORLESS)
1612 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
1613 if (cnfa->eos[1] != COLORLESS)
1614 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
1615 if (cnfa->flags & HASLACONS)
1616 fprintf(f, ", haslacons");
1617 fprintf(f, "\n");
1618 for (st = 0; st < cnfa->nstates; st++)
1619 dumpcstate(st, cnfa->states[st], cnfa, f);
1620 fflush(f);
1622 #endif
1624 #ifdef REG_DEBUG /* subordinates of dumpcnfa */
1627 * dumpcstate - dump a compacted-NFA state in human-readable form
1629 static void
1630 dumpcstate(int st,
1631 struct carc * ca,
1632 struct cnfa * cnfa,
1633 FILE *f)
1635 int i;
1636 int pos;
1638 fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
1639 pos = 1;
1640 for (i = 1; ca[i].co != COLORLESS; i++)
1642 if (ca[i].co < cnfa->ncolors)
1643 fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
1644 else
1645 fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
1646 ca[i].to);
1647 if (pos == 5)
1649 fprintf(f, "\n");
1650 pos = 1;
1652 else
1653 pos++;
1655 if (i == 1 || pos != 1)
1656 fprintf(f, "\n");
1657 fflush(f);
1660 #endif /* REG_DEBUG */