3 * This file is #included by regcomp.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * One or two things that technically ought to be in here
35 * are actually in color.c, thanks to some incestuous relationships in
39 #define NISERR() VISERR(nfa->v)
40 #define NERR(e) VERR(nfa->v, (e))
44 * newnfa - set up an NFA
46 static struct nfa
* /* the NFA, or NULL */
47 newnfa(struct vars
* v
,
49 struct nfa
* parent
) /* NULL if primary NFA */
53 nfa
= (struct nfa
*) MALLOC(sizeof(struct nfa
));
64 nfa
->bos
[0] = nfa
->bos
[1] = COLORLESS
;
65 nfa
->eos
[0] = nfa
->eos
[1] = COLORLESS
;
66 nfa
->parent
= parent
; /* Precedes newfstate so parent is valid. */
67 nfa
->post
= newfstate(nfa
, '@'); /* number 0 */
68 nfa
->pre
= newfstate(nfa
, '>'); /* number 1 */
70 nfa
->init
= newstate(nfa
); /* may become invalid later */
71 nfa
->final
= newstate(nfa
);
77 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->pre
, nfa
->init
);
78 newarc(nfa
, '^', 1, nfa
->pre
, nfa
->init
);
79 newarc(nfa
, '^', 0, nfa
->pre
, nfa
->init
);
80 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->final
, nfa
->post
);
81 newarc(nfa
, '$', 1, nfa
->final
, nfa
->post
);
82 newarc(nfa
, '$', 0, nfa
->final
, nfa
->post
);
93 * TooManyStates - checks if the max states exceeds the compile-time value
96 TooManyStates(struct nfa
*nfa
)
98 struct nfa
*parent
= nfa
->parent
;
99 size_t sz
= nfa
->size
;
101 while (parent
!= NULL
)
104 parent
= parent
->parent
;
106 if (sz
> REG_MAX_STATES
)
112 * IncrementSize - increases the tracked size of the NFA and its parents.
115 IncrementSize(struct nfa
*nfa
)
117 struct nfa
*parent
= nfa
->parent
;
120 while (parent
!= NULL
)
123 parent
= parent
->parent
;
128 * DecrementSize - decreases the tracked size of the NFA and its parents.
131 DecrementSize(struct nfa
*nfa
)
133 struct nfa
*parent
= nfa
->parent
;
136 while (parent
!= NULL
)
139 parent
= parent
->parent
;
144 * freenfa - free an entire NFA
147 freenfa(struct nfa
* nfa
)
151 while ((s
= nfa
->states
) != NULL
)
153 s
->nins
= s
->nouts
= 0; /* don't worry about arcs */
156 while ((s
= nfa
->free
) != NULL
)
159 destroystate(nfa
, s
);
170 * newstate - allocate an NFA state, with zero flag value
172 static struct state
* /* NULL on error */
173 newstate(struct nfa
* nfa
)
177 if (TooManyStates(nfa
))
182 if (nfa
->free
!= NULL
)
189 s
= (struct state
*) MALLOC(sizeof(struct state
));
200 assert(nfa
->nstates
>= 0);
201 s
->no
= nfa
->nstates
++;
203 if (nfa
->states
== NULL
)
211 if (nfa
->slast
!= NULL
)
213 assert(nfa
->slast
->next
== NULL
);
214 nfa
->slast
->next
= s
;
216 s
->prev
= nfa
->slast
;
218 /* track the current size and the parent size */
224 * newfstate - allocate an NFA state with a specified flag value
226 static struct state
* /* NULL on error */
227 newfstate(struct nfa
* nfa
, int flag
)
233 s
->flag
= (char) flag
;
238 * dropstate - delete a state's inarcs and outarcs and free it
241 dropstate(struct nfa
* nfa
,
246 while ((a
= s
->ins
) != NULL
)
248 while ((a
= s
->outs
) != NULL
)
254 * freestate - free a state, which has no in-arcs or out-arcs
257 freestate(struct nfa
* nfa
,
261 assert(s
->nins
== 0 && s
->nouts
== 0);
266 s
->next
->prev
= s
->prev
;
269 assert(s
== nfa
->slast
);
270 nfa
->slast
= s
->prev
;
273 s
->prev
->next
= s
->next
;
276 assert(s
== nfa
->states
);
277 nfa
->states
= s
->next
;
280 s
->next
= nfa
->free
; /* don't delete it, put it on the free list */
286 * destroystate - really get rid of an already-freed state
289 destroystate(struct nfa
* nfa
,
293 struct arcbatch
*abnext
;
295 assert(s
->no
== FREESTATE
);
296 for (ab
= s
->oas
.next
; ab
!= NULL
; ab
= abnext
)
308 * newarc - set up a new arc within an NFA
311 newarc(struct nfa
* nfa
,
319 assert(from
!= NULL
&& to
!= NULL
);
321 /* check for duplicates */
322 for (a
= from
->outs
; a
!= NULL
; a
= a
->outchain
)
323 if (a
->to
== to
&& a
->co
== co
&& a
->type
== t
)
326 a
= allocarc(nfa
, from
);
337 * Put the new arc on the beginning, not the end, of the chains. Not only
338 * is this easier, it has the very useful side effect that deleting the
339 * most-recently-added arc is the cheapest case rather than the most
342 a
->inchain
= to
->ins
;
344 a
->outchain
= from
->outs
;
350 if (COLORED(a
) && nfa
->parent
== NULL
)
351 colorchain(nfa
->cm
, a
);
355 * allocarc - allocate a new out-arc within a state
357 static struct arc
* /* NULL for failure */
358 allocarc(struct nfa
* nfa
,
364 if (s
->free
== NULL
&& s
->noas
< ABSIZE
)
366 a
= &s
->oas
.a
[s
->noas
];
371 /* if none at hand, get more */
374 struct arcbatch
*newAb
;
377 newAb
= (struct arcbatch
*) MALLOC(sizeof(struct arcbatch
));
383 newAb
->next
= s
->oas
.next
;
386 for (i
= 0; i
< ABSIZE
; i
++)
388 newAb
->a
[i
].type
= 0;
389 newAb
->a
[i
].freechain
= &newAb
->a
[i
+ 1];
391 newAb
->a
[ABSIZE
- 1].freechain
= NULL
;
392 s
->free
= &newAb
->a
[0];
394 assert(s
->free
!= NULL
);
397 s
->free
= a
->freechain
;
402 * freearc - free an arc
405 freearc(struct nfa
* nfa
,
408 struct state
*from
= victim
->from
;
409 struct state
*to
= victim
->to
;
412 assert(victim
->type
!= 0);
414 /* take it off color chain if necessary */
415 if (COLORED(victim
) && nfa
->parent
== NULL
)
416 uncolorchain(nfa
->cm
, victim
);
418 /* take it off source's out-chain */
419 assert(from
!= NULL
);
420 assert(from
->outs
!= NULL
);
422 if (a
== victim
) /* simple case: first in chain */
423 from
->outs
= victim
->outchain
;
426 for (; a
!= NULL
&& a
->outchain
!= victim
; a
= a
->outchain
)
429 a
->outchain
= victim
->outchain
;
433 /* take it off target's in-chain */
435 assert(to
->ins
!= NULL
);
437 if (a
== victim
) /* simple case: first in chain */
438 to
->ins
= victim
->inchain
;
441 for (; a
!= NULL
&& a
->inchain
!= victim
; a
= a
->inchain
)
444 a
->inchain
= victim
->inchain
;
448 /* clean up and place on free list */
450 victim
->from
= NULL
; /* precautions... */
452 victim
->inchain
= NULL
;
453 victim
->outchain
= NULL
;
454 victim
->freechain
= from
->free
;
459 * findarc - find arc, if any, from given source with given type and color
460 * If there is more than one such arc, the result is random.
463 findarc(struct state
* s
,
469 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
470 if (a
->type
== type
&& a
->co
== co
)
476 * cparc - allocate a new arc within an NFA, copying details from old one
479 cparc(struct nfa
* nfa
,
484 newarc(nfa
, oa
->type
, oa
->co
, from
, to
);
488 * moveins - move all in arcs of a state to another state
490 * You might think this could be done better by just updating the
491 * existing arcs, and you would be right if it weren't for the desire
492 * for duplicate suppression, which makes it easier to just make new
493 * ones to exploit the suppression built into newarc.
496 moveins(struct nfa
* nfa
,
497 struct state
* oldState
,
498 struct state
* newState
)
502 assert(oldState
!= newState
);
504 while ((a
= oldState
->ins
) != NULL
)
506 cparc(nfa
, a
, a
->from
, newState
);
509 assert(oldState
->nins
== 0);
510 assert(oldState
->ins
== NULL
);
514 * copyins - copy all in arcs of a state to another state
517 copyins(struct nfa
* nfa
,
518 struct state
* oldState
,
519 struct state
* newState
)
523 assert(oldState
!= newState
);
525 for (a
= oldState
->ins
; a
!= NULL
; a
= a
->inchain
)
526 cparc(nfa
, a
, a
->from
, newState
);
530 * moveouts - move all out arcs of a state to another state
533 moveouts(struct nfa
* nfa
,
534 struct state
* oldState
,
535 struct state
* newState
)
539 assert(oldState
!= newState
);
541 while ((a
= oldState
->outs
) != NULL
)
543 cparc(nfa
, a
, newState
, a
->to
);
549 * copyouts - copy all out arcs of a state to another state
552 copyouts(struct nfa
* nfa
,
553 struct state
* oldState
,
554 struct state
* newState
)
558 assert(oldState
!= newState
);
560 for (a
= oldState
->outs
; a
!= NULL
; a
= a
->outchain
)
561 cparc(nfa
, a
, newState
, a
->to
);
565 * cloneouts - copy out arcs of a state to another state pair, modifying type
568 cloneouts(struct nfa
* nfa
,
578 for (a
= old
->outs
; a
!= NULL
; a
= a
->outchain
)
579 newarc(nfa
, type
, a
->co
, from
, to
);
583 * delsub - delete a sub-NFA, updating subre pointers if necessary
585 * This uses a recursive traversal of the sub-NFA, marking already-seen
586 * states using their tmp pointer.
589 delsub(struct nfa
* nfa
,
590 struct state
* lp
, /* the sub-NFA goes from here... */
591 struct state
* rp
) /* ...to here, *not* inclusive */
595 rp
->tmp
= rp
; /* mark end */
597 deltraverse(nfa
, lp
, lp
);
598 assert(lp
->nouts
== 0 && rp
->nins
== 0); /* did the job */
599 assert(lp
->no
!= FREESTATE
&& rp
->no
!= FREESTATE
); /* no more */
601 rp
->tmp
= NULL
; /* unmark end */
602 lp
->tmp
= NULL
; /* and begin, marked by deltraverse */
606 * deltraverse - the recursive heart of delsub
607 * This routine's basic job is to destroy all out-arcs of the state.
610 deltraverse(struct nfa
* nfa
,
611 struct state
* leftend
,
618 return; /* nothing to do */
620 return; /* already in progress */
622 s
->tmp
= s
; /* mark as in progress */
624 while ((a
= s
->outs
) != NULL
)
627 deltraverse(nfa
, leftend
, to
);
628 assert(to
->nouts
== 0 || to
->tmp
!= NULL
);
630 if (to
->nins
== 0 && to
->tmp
== NULL
)
632 assert(to
->nouts
== 0);
637 assert(s
->no
!= FREESTATE
); /* we're still here */
638 assert(s
== leftend
|| s
->nins
!= 0); /* and still reachable */
639 assert(s
->nouts
== 0); /* but have no outarcs */
641 s
->tmp
= NULL
; /* we're done here */
645 * dupnfa - duplicate sub-NFA
647 * Another recursive traversal, this time using tmp to point to duplicates
648 * as well as mark already-seen states. (You knew there was a reason why
649 * it's a state pointer, didn't you? :-))
652 dupnfa(struct nfa
* nfa
,
653 struct state
* start
, /* duplicate of subNFA starting here */
654 struct state
* stop
, /* and stopping here */
655 struct state
* from
, /* stringing duplicate from here */
656 struct state
* to
) /* to here */
660 newarc(nfa
, EMPTY
, 0, from
, to
);
665 duptraverse(nfa
, start
, from
);
666 /* done, except for clearing out the tmp pointers */
669 cleartraverse(nfa
, start
);
673 * duptraverse - recursive heart of dupnfa
676 duptraverse(struct nfa
* nfa
,
678 struct state
* stmp
) /* s's duplicate, or NULL */
683 return; /* already done */
685 s
->tmp
= (stmp
== NULL
) ? newstate(nfa
) : stmp
;
692 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= a
->outchain
)
694 duptraverse(nfa
, a
->to
, (struct state
*) NULL
);
697 assert(a
->to
->tmp
!= NULL
);
698 cparc(nfa
, a
, s
->tmp
, a
->to
->tmp
);
703 * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
706 cleartraverse(struct nfa
* nfa
,
715 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
716 cleartraverse(nfa
, a
->to
);
720 * specialcolors - fill in special colors for an NFA
723 specialcolors(struct nfa
* nfa
)
725 /* false colors for BOS, BOL, EOS, EOL */
726 if (nfa
->parent
== NULL
)
728 nfa
->bos
[0] = pseudocolor(nfa
->cm
);
729 nfa
->bos
[1] = pseudocolor(nfa
->cm
);
730 nfa
->eos
[0] = pseudocolor(nfa
->cm
);
731 nfa
->eos
[1] = pseudocolor(nfa
->cm
);
735 assert(nfa
->parent
->bos
[0] != COLORLESS
);
736 nfa
->bos
[0] = nfa
->parent
->bos
[0];
737 assert(nfa
->parent
->bos
[1] != COLORLESS
);
738 nfa
->bos
[1] = nfa
->parent
->bos
[1];
739 assert(nfa
->parent
->eos
[0] != COLORLESS
);
740 nfa
->eos
[0] = nfa
->parent
->eos
[0];
741 assert(nfa
->parent
->eos
[1] != COLORLESS
);
742 nfa
->eos
[1] = nfa
->parent
->eos
[1];
747 * optimize - optimize an NFA
749 static long /* re_info bits */
750 optimize(struct nfa
* nfa
,
751 FILE *f
) /* for debug output; NULL none */
754 int verbose
= (f
!= NULL
) ? 1 : 0;
757 fprintf(f
, "\ninitial cleanup:\n");
759 cleanup(nfa
); /* may simplify situation */
764 fprintf(f
, "\nempties:\n");
766 fixempties(nfa
, f
); /* get rid of EMPTY arcs */
769 fprintf(f
, "\nconstraints:\n");
771 pullback(nfa
, f
); /* pull back constraints backward */
772 pushfwd(nfa
, f
); /* push fwd constraints forward */
775 fprintf(f
, "\nfinal cleanup:\n");
777 cleanup(nfa
); /* final tidying */
778 return analyze(nfa
); /* and analysis */
782 * pullback - pull back constraints backward to (with luck) eliminate them
785 pullback(struct nfa
* nfa
,
786 FILE *f
) /* for debug output; NULL none */
794 /* find and pull until there are no more */
798 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
801 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
804 if (a
->type
== '^' || a
->type
== BEHIND
)
807 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
810 if (progress
&& f
!= NULL
)
812 } while (progress
&& !NISERR());
816 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= nexta
)
821 assert(a
->co
== 0 || a
->co
== 1);
822 newarc(nfa
, PLAIN
, nfa
->bos
[a
->co
], a
->from
, a
->to
);
829 * pull - pull a back constraint backward past its source state
830 * A significant property of this function is that it deletes at most
831 * one state -- the constraint's from state -- and only if the constraint
832 * was that state's last outarc.
834 static int /* 0 couldn't, 1 could */
835 pull(struct nfa
* nfa
,
838 struct state
*from
= con
->from
;
839 struct state
*to
= con
->to
;
845 { /* circular constraint is pointless */
849 if (from
->flag
) /* can't pull back beyond start */
858 * DGP 2007-11-15: Cloning a state with a circular constraint on its list
859 * of outs can lead to trouble [Tcl Bug 1810038], so get rid of them first.
861 for (a
= from
->outs
; a
!= NULL
; a
= nexta
)
876 /* first, clone from state if necessary to avoid other outarcs */
882 assert(to
!= from
); /* con is not an inarc */
883 copyins(nfa
, from
, s
); /* duplicate inarcs */
884 cparc(nfa
, con
, s
, to
); /* move constraint arc */
889 assert(from
->nouts
== 1);
891 /* propagate the constraint into the from state's inarcs */
892 for (a
= from
->ins
; a
!= NULL
; a
= nexta
)
895 switch (combine(con
, a
))
897 case INCOMPATIBLE
: /* destroy the arc */
900 case SATISFIED
: /* no action needed */
902 case COMPATIBLE
: /* swap the two arcs, more or less */
906 cparc(nfa
, a
, s
, to
); /* anticipate move */
907 cparc(nfa
, con
, a
->from
, s
);
918 /* remaining inarcs, if any, incorporate the constraint */
919 moveins(nfa
, from
, to
);
920 dropstate(nfa
, from
); /* will free the constraint */
925 * pushfwd - push forward constraints forward to (with luck) eliminate them
928 pushfwd(struct nfa
* nfa
,
929 FILE *f
) /* for debug output; NULL none */
937 /* find and push until there are no more */
941 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
944 for (a
= s
->ins
; a
!= NULL
&& !NISERR(); a
= nexta
)
947 if (a
->type
== '$' || a
->type
== AHEAD
)
950 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
953 if (progress
&& f
!= NULL
)
955 } while (progress
&& !NISERR());
959 for (a
= nfa
->post
->ins
; a
!= NULL
; a
= nexta
)
964 assert(a
->co
== 0 || a
->co
== 1);
965 newarc(nfa
, PLAIN
, nfa
->eos
[a
->co
], a
->from
, a
->to
);
972 * push - push a forward constraint forward past its destination state
973 * A significant property of this function is that it deletes at most
974 * one state -- the constraint's to state -- and only if the constraint
975 * was that state's last inarc.
977 static int /* 0 couldn't, 1 could */
978 push(struct nfa
* nfa
,
981 struct state
*from
= con
->from
;
982 struct state
*to
= con
->to
;
988 { /* circular constraint is pointless */
992 if (to
->flag
) /* can't push forward beyond end */
1001 * DGP 2007-11-15: Here we duplicate the same protections as appear
1002 * in pull() above to avoid troubles with cloning a state with a
1003 * circular constraint on its list of ins. It is not clear whether
1004 * this is necessary, or is protecting against a "can't happen".
1005 * Any test case that actually leads to a freearc() call here would
1006 * be a welcome addition to the test suite.
1008 for (a
= to
->ins
; a
!= NULL
; a
= nexta
)
1023 /* first, clone to state if necessary to avoid other inarcs */
1029 copyouts(nfa
, to
, s
); /* duplicate outarcs */
1030 cparc(nfa
, con
, from
, s
); /* move constraint */
1035 assert(to
->nins
== 1);
1037 /* propagate the constraint into the to state's outarcs */
1038 for (a
= to
->outs
; a
!= NULL
; a
= nexta
)
1040 nexta
= a
->outchain
;
1041 switch (combine(con
, a
))
1043 case INCOMPATIBLE
: /* destroy the arc */
1046 case SATISFIED
: /* no action needed */
1048 case COMPATIBLE
: /* swap the two arcs, more or less */
1052 cparc(nfa
, con
, s
, a
->to
); /* anticipate move */
1053 cparc(nfa
, a
, from
, s
);
1064 /* remaining outarcs, if any, incorporate the constraint */
1065 moveouts(nfa
, to
, from
);
1066 dropstate(nfa
, to
); /* will free the constraint */
1071 * combine - constraint lands on an arc, what happens?
1073 * #def INCOMPATIBLE 1 // destroys arc
1074 * #def SATISFIED 2 // constraint satisfied
1075 * #def COMPATIBLE 3 // compatible but not satisfied yet
1078 combine(struct arc
* con
,
1081 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
1083 switch (CA(con
->type
, a
->type
))
1085 case CA('^', PLAIN
): /* newlines are handled separately */
1086 case CA('$', PLAIN
):
1087 return INCOMPATIBLE
;
1089 case CA(AHEAD
, PLAIN
): /* color constraints meet colors */
1090 case CA(BEHIND
, PLAIN
):
1091 if (con
->co
== a
->co
)
1093 return INCOMPATIBLE
;
1095 case CA('^', '^'): /* collision, similar constraints */
1097 case CA(AHEAD
, AHEAD
):
1098 case CA(BEHIND
, BEHIND
):
1099 if (con
->co
== a
->co
) /* true duplication */
1101 return INCOMPATIBLE
;
1103 case CA('^', BEHIND
): /* collision, dissimilar constraints */
1104 case CA(BEHIND
, '^'):
1105 case CA('$', AHEAD
):
1106 case CA(AHEAD
, '$'):
1107 return INCOMPATIBLE
;
1109 case CA('^', '$'): /* constraints passing each other */
1110 case CA('^', AHEAD
):
1111 case CA(BEHIND
, '$'):
1112 case CA(BEHIND
, AHEAD
):
1114 case CA('$', BEHIND
):
1115 case CA(AHEAD
, '^'):
1116 case CA(AHEAD
, BEHIND
):
1117 case CA('^', LACON
):
1118 case CA(BEHIND
, LACON
):
1119 case CA('$', LACON
):
1120 case CA(AHEAD
, LACON
):
1125 return INCOMPATIBLE
; /* for benefit of blind compilers */
1129 * fixempties - get rid of EMPTY arcs
1132 fixempties(struct nfa
* nfa
,
1133 FILE *f
) /* for debug output; NULL none */
1136 struct state
*nexts
;
1141 /* find and eliminate empties until there are no more */
1145 for (s
= nfa
->states
; s
!= NULL
&& !NISERR() &&
1146 s
->no
!= FREESTATE
; s
= nexts
)
1149 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
1151 nexta
= a
->outchain
;
1152 if (a
->type
== EMPTY
&& unempty(nfa
, a
))
1154 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
1157 if (progress
&& f
!= NULL
)
1159 } while (progress
&& !NISERR());
1163 * unempty - optimize out an EMPTY arc, if possible
1165 * Actually, as it stands this function always succeeds, but the return
1166 * value is kept with an eye on possible future changes.
1168 static int /* 0 couldn't, 1 could */
1169 unempty(struct nfa
* nfa
,
1172 struct state
*from
= a
->from
;
1173 struct state
*to
= a
->to
;
1174 int usefrom
; /* work on from, as opposed to to? */
1176 assert(a
->type
== EMPTY
);
1177 assert(from
!= nfa
->pre
&& to
!= nfa
->post
);
1180 { /* vacuous loop */
1185 /* decide which end to work on */
1186 usefrom
= 1; /* default: attack from */
1187 if (from
->nouts
> to
->nins
)
1189 else if (from
->nouts
== to
->nins
)
1191 /* decide on secondary issue: move/copy fewest arcs */
1192 if (from
->nins
> to
->nouts
)
1199 if (from
->nouts
== 0)
1201 /* was the state's only outarc */
1202 moveins(nfa
, from
, to
);
1203 freestate(nfa
, from
);
1206 copyins(nfa
, from
, to
);
1212 /* was the state's only inarc */
1213 moveouts(nfa
, to
, from
);
1217 copyouts(nfa
, to
, from
);
1224 * cleanup - clean up NFA after optimizations
1227 cleanup(struct nfa
* nfa
)
1230 struct state
*nexts
;
1233 /* clear out unreachable or dead-end states */
1234 /* use pre to mark reachable, then post to mark can-reach-post */
1235 markreachable(nfa
, nfa
->pre
, (struct state
*) NULL
, nfa
->pre
);
1236 markcanreach(nfa
, nfa
->post
, nfa
->pre
, nfa
->post
);
1237 for (s
= nfa
->states
; s
!= NULL
; s
= nexts
)
1240 if (s
->tmp
!= nfa
->post
&& !s
->flag
)
1243 assert(nfa
->post
->nins
== 0 || nfa
->post
->tmp
== nfa
->post
);
1244 cleartraverse(nfa
, nfa
->pre
);
1245 assert(nfa
->post
->nins
== 0 || nfa
->post
->tmp
== NULL
);
1246 /* the nins==0 (final unreachable) case will be caught later */
1248 /* renumber surviving states */
1250 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1256 * markreachable - recursive marking of reachable states
1259 markreachable(struct nfa
* nfa
,
1261 struct state
* okay
, /* consider only states with this mark */
1262 struct state
* mark
) /* the value to mark with */
1270 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1271 markreachable(nfa
, a
->to
, okay
, mark
);
1275 * markcanreach - recursive marking of states which can reach here
1278 markcanreach(struct nfa
* nfa
,
1280 struct state
* okay
, /* consider only states with this mark */
1281 struct state
* mark
) /* the value to mark with */
1289 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
1290 markcanreach(nfa
, a
->from
, okay
, mark
);
1294 * analyze - ascertain potentially-useful facts about an optimized NFA
1296 static long /* re_info bits to be ORed in */
1297 analyze(struct nfa
* nfa
)
1302 if (nfa
->pre
->outs
== NULL
)
1303 return REG_UIMPOSSIBLE
;
1304 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
1305 for (aa
= a
->to
->outs
; aa
!= NULL
; aa
= aa
->outchain
)
1306 if (aa
->to
== nfa
->post
)
1307 return REG_UEMPTYMATCH
;
1312 * compact - compact an NFA
1315 compact(struct nfa
* nfa
,
1329 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1332 narcs
+= 1 + s
->nouts
+ 1;
1333 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1336 cnfa
->states
= (struct carc
**) MALLOC(nstates
* sizeof(struct carc
*));
1337 cnfa
->arcs
= (struct carc
*) MALLOC(narcs
* sizeof(struct carc
));
1338 if (cnfa
->states
== NULL
|| cnfa
->arcs
== NULL
)
1340 if (cnfa
->states
!= NULL
)
1342 if (cnfa
->arcs
!= NULL
)
1347 cnfa
->nstates
= nstates
;
1348 cnfa
->pre
= nfa
->pre
->no
;
1349 cnfa
->post
= nfa
->post
->no
;
1350 cnfa
->bos
[0] = nfa
->bos
[0];
1351 cnfa
->bos
[1] = nfa
->bos
[1];
1352 cnfa
->eos
[0] = nfa
->eos
[0];
1353 cnfa
->eos
[1] = nfa
->eos
[1];
1354 cnfa
->ncolors
= maxcolor(nfa
->cm
) + 1;
1358 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1360 assert((size_t) s
->no
< nstates
);
1361 cnfa
->states
[s
->no
] = ca
;
1362 ca
->co
= 0; /* clear and skip flags "arc" */
1365 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1374 assert(s
->no
!= cnfa
->pre
);
1375 ca
->co
= (color
) (cnfa
->ncolors
+ a
->co
);
1378 cnfa
->flags
|= HASLACONS
;
1384 carcsort(first
, ca
- 1);
1389 assert(ca
== &cnfa
->arcs
[narcs
]);
1390 assert(cnfa
->nstates
!= 0);
1392 /* mark no-progress states */
1393 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
1394 cnfa
->states
[a
->to
->no
]->co
= 1;
1395 cnfa
->states
[nfa
->pre
->no
]->co
= 1;
1399 * carcsort - sort compacted-NFA arcs by color
1401 * Really dumb algorithm, but if the list is long enough for that to matter,
1402 * you're in real trouble anyway.
1405 carcsort(struct carc
* first
,
1412 if (last
- first
<= 1)
1415 for (p
= first
; p
<= last
; p
++)
1416 for (q
= p
; q
<= last
; q
++)
1417 if (p
->co
> q
->co
||
1418 (p
->co
== q
->co
&& p
->to
> q
->to
))
1428 * freecnfa - free a compacted NFA
1431 freecnfa(struct cnfa
* cnfa
)
1433 assert(cnfa
->nstates
!= 0); /* not empty already */
1440 * dumpnfa - dump an NFA in human-readable form
1443 dumpnfa(struct nfa
* nfa
,
1449 fprintf(f
, "pre %d, post %d", nfa
->pre
->no
, nfa
->post
->no
);
1450 if (nfa
->bos
[0] != COLORLESS
)
1451 fprintf(f
, ", bos [%ld]", (long) nfa
->bos
[0]);
1452 if (nfa
->bos
[1] != COLORLESS
)
1453 fprintf(f
, ", bol [%ld]", (long) nfa
->bos
[1]);
1454 if (nfa
->eos
[0] != COLORLESS
)
1455 fprintf(f
, ", eos [%ld]", (long) nfa
->eos
[0]);
1456 if (nfa
->eos
[1] != COLORLESS
)
1457 fprintf(f
, ", eol [%ld]", (long) nfa
->eos
[1]);
1459 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1461 if (nfa
->parent
== NULL
)
1462 dumpcolors(nfa
->cm
, f
);
1467 #ifdef REG_DEBUG /* subordinates of dumpnfa */
1470 * dumpstate - dump an NFA state in human-readable form
1473 dumpstate(struct state
* s
,
1478 fprintf(f
, "%d%s%c", s
->no
, (s
->tmp
!= NULL
) ? "T" : "",
1479 (s
->flag
) ? s
->flag
: '.');
1480 if (s
->prev
!= NULL
&& s
->prev
->next
!= s
)
1481 fprintf(f
, "\tstate chain bad\n");
1483 fprintf(f
, "\tno out arcs\n");
1487 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
1490 fprintf(f
, "\tlink from %d to %d on %d's in-chain\n",
1491 a
->from
->no
, a
->to
->no
, s
->no
);
1496 * dumparcs - dump out-arcs in human-readable form
1499 dumparcs(struct state
* s
,
1504 assert(s
->nouts
> 0);
1505 /* printing arcs in reverse order is usually clearer */
1506 pos
= dumprarcs(s
->outs
, s
, f
, 1);
1512 * dumprarcs - dump remaining outarcs, recursively, in reverse order
1514 static int /* resulting print position */
1515 dumprarcs(struct arc
* a
,
1518 int pos
) /* initial print position */
1520 if (a
->outchain
!= NULL
)
1521 pos
= dumprarcs(a
->outchain
, s
, f
, pos
);
1534 * dumparc - dump one outarc in readable form, including prefixing tab
1537 dumparc(struct arc
* a
,
1542 struct arcbatch
*ab
;
1548 fprintf(f
, "[%ld]", (long) a
->co
);
1551 fprintf(f
, ">%ld>", (long) a
->co
);
1554 fprintf(f
, "<%ld<", (long) a
->co
);
1557 fprintf(f
, ":%ld:", (long) a
->co
);
1561 fprintf(f
, "%c%d", a
->type
, (int) a
->co
);
1566 fprintf(f
, "0x%x/0%lo", a
->type
, (long) a
->co
);
1570 fprintf(f
, "?%d?", a
->from
->no
);
1571 for (ab
= &a
->from
->oas
; ab
!= NULL
; ab
= ab
->next
)
1573 for (aa
= &ab
->a
[0]; aa
< &ab
->a
[ABSIZE
]; aa
++)
1575 break; /* NOTE BREAK OUT */
1576 if (aa
< &ab
->a
[ABSIZE
]) /* propagate break */
1577 break; /* NOTE BREAK OUT */
1580 fprintf(f
, "?!?"); /* not in allocated space */
1587 fprintf(f
, "%d", a
->to
->no
);
1588 for (aa
= a
->to
->ins
; aa
!= NULL
; aa
= aa
->inchain
)
1590 break; /* NOTE BREAK OUT */
1592 fprintf(f
, "?!?"); /* missing from in-chain */
1594 #endif /* REG_DEBUG */
1597 * dumpcnfa - dump a compacted NFA in human-readable form
1601 dumpcnfa(struct cnfa
* cnfa
,
1606 fprintf(f
, "pre %d, post %d", cnfa
->pre
, cnfa
->post
);
1607 if (cnfa
->bos
[0] != COLORLESS
)
1608 fprintf(f
, ", bos [%ld]", (long) cnfa
->bos
[0]);
1609 if (cnfa
->bos
[1] != COLORLESS
)
1610 fprintf(f
, ", bol [%ld]", (long) cnfa
->bos
[1]);
1611 if (cnfa
->eos
[0] != COLORLESS
)
1612 fprintf(f
, ", eos [%ld]", (long) cnfa
->eos
[0]);
1613 if (cnfa
->eos
[1] != COLORLESS
)
1614 fprintf(f
, ", eol [%ld]", (long) cnfa
->eos
[1]);
1615 if (cnfa
->flags
& HASLACONS
)
1616 fprintf(f
, ", haslacons");
1618 for (st
= 0; st
< cnfa
->nstates
; st
++)
1619 dumpcstate(st
, cnfa
->states
[st
], cnfa
, f
);
1624 #ifdef REG_DEBUG /* subordinates of dumpcnfa */
1627 * dumpcstate - dump a compacted-NFA state in human-readable form
1638 fprintf(f
, "%d%s", st
, (ca
[0].co
) ? ":" : ".");
1640 for (i
= 1; ca
[i
].co
!= COLORLESS
; i
++)
1642 if (ca
[i
].co
< cnfa
->ncolors
)
1643 fprintf(f
, "\t[%ld]->%d", (long) ca
[i
].co
, ca
[i
].to
);
1645 fprintf(f
, "\t:%ld:->%d", (long) ca
[i
].co
- cnfa
->ncolors
,
1655 if (i
== 1 || pos
!= 1)
1660 #endif /* REG_DEBUG */