3 * This file is #included by regcomp.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * src/backend/regex/regc_nfa.c
34 * One or two things that technically ought to be in here
35 * are actually in color.c, thanks to some incestuous relationships in
39 #define NISERR() VISERR(nfa->v)
40 #define NERR(e) VERR(nfa->v, (e))
44 * newnfa - set up an NFA
46 static struct nfa
* /* the NFA, or NULL */
47 newnfa(struct vars
*v
,
49 struct nfa
*parent
) /* NULL if primary NFA */
53 nfa
= (struct nfa
*) MALLOC(sizeof(struct nfa
));
60 /* Make the NFA minimally valid, so freenfa() will behave sanely */
63 nfa
->freestates
= NULL
;
72 nfa
->bos
[0] = nfa
->bos
[1] = COLORLESS
;
73 nfa
->eos
[0] = nfa
->eos
[1] = COLORLESS
;
75 nfa
->minmatchall
= nfa
->maxmatchall
= -1;
76 nfa
->parent
= parent
; /* Precedes newfstate so parent is valid. */
78 /* Create required infrastructure */
79 nfa
->post
= newfstate(nfa
, '@'); /* number 0 */
80 nfa
->pre
= newfstate(nfa
, '>'); /* number 1 */
81 nfa
->init
= newstate(nfa
); /* may become invalid later */
82 nfa
->final
= newstate(nfa
);
88 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->pre
, nfa
->init
);
89 newarc(nfa
, '^', 1, nfa
->pre
, nfa
->init
);
90 newarc(nfa
, '^', 0, nfa
->pre
, nfa
->init
);
91 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->final
, nfa
->post
);
92 newarc(nfa
, '$', 1, nfa
->final
, nfa
->post
);
93 newarc(nfa
, '$', 0, nfa
->final
, nfa
->post
);
104 * freenfa - free an entire NFA
107 freenfa(struct nfa
*nfa
)
109 struct statebatch
*sb
;
110 struct statebatch
*sbnext
;
112 struct arcbatch
*abnext
;
114 for (sb
= nfa
->lastsb
; sb
!= NULL
; sb
= sbnext
)
117 nfa
->v
->spaceused
-= STATEBATCHSIZE(sb
->nstates
);
121 for (ab
= nfa
->lastab
; ab
!= NULL
; ab
= abnext
)
124 nfa
->v
->spaceused
-= ARCBATCHSIZE(ab
->narcs
);
134 * newstate - allocate an NFA state, with zero flag value
136 static struct state
* /* NULL on error */
137 newstate(struct nfa
*nfa
)
142 * This is a handy place to check for operation cancel during regex
143 * compilation, since no code path will go very long without making a new
146 INTERRUPT(nfa
->v
->re
);
148 /* first, recycle anything that's on the freelist */
149 if (nfa
->freestates
!= NULL
)
152 nfa
->freestates
= s
->next
;
154 /* otherwise, is there anything left in the last statebatch? */
155 else if (nfa
->lastsb
!= NULL
&& nfa
->lastsbused
< nfa
->lastsb
->nstates
)
157 s
= &nfa
->lastsb
->s
[nfa
->lastsbused
++];
159 /* otherwise, need to allocate a new statebatch */
162 struct statebatch
*newSb
;
165 if (nfa
->v
->spaceused
>= REG_MAX_COMPILE_SPACE
)
170 nstates
= (nfa
->lastsb
!= NULL
) ? nfa
->lastsb
->nstates
* 2 : FIRSTSBSIZE
;
171 if (nstates
> MAXSBSIZE
)
173 newSb
= (struct statebatch
*) MALLOC(STATEBATCHSIZE(nstates
));
179 nfa
->v
->spaceused
+= STATEBATCHSIZE(nstates
);
180 newSb
->nstates
= nstates
;
181 newSb
->next
= nfa
->lastsb
;
187 assert(nfa
->nstates
>= 0);
188 s
->no
= nfa
->nstates
++;
190 if (nfa
->states
== NULL
)
198 if (nfa
->slast
!= NULL
)
200 assert(nfa
->slast
->next
== NULL
);
201 nfa
->slast
->next
= s
;
203 s
->prev
= nfa
->slast
;
209 * newfstate - allocate an NFA state with a specified flag value
211 static struct state
* /* NULL on error */
212 newfstate(struct nfa
*nfa
, int flag
)
218 s
->flag
= (char) flag
;
223 * dropstate - delete a state's inarcs and outarcs and free it
226 dropstate(struct nfa
*nfa
,
231 while ((a
= s
->ins
) != NULL
)
233 while ((a
= s
->outs
) != NULL
)
239 * freestate - free a state, which has no in-arcs or out-arcs
242 freestate(struct nfa
*nfa
,
246 assert(s
->nins
== 0 && s
->nouts
== 0);
251 s
->next
->prev
= s
->prev
;
254 assert(s
== nfa
->slast
);
255 nfa
->slast
= s
->prev
;
258 s
->prev
->next
= s
->next
;
261 assert(s
== nfa
->states
);
262 nfa
->states
= s
->next
;
265 s
->next
= nfa
->freestates
; /* don't delete it, put it on the free list */
270 * newarc - set up a new arc within an NFA
272 * This function checks to make sure that no duplicate arcs are created.
273 * In general we never want duplicates.
275 * However: in principle, a RAINBOW arc is redundant with any plain arc
276 * (unless that arc is for a pseudocolor). But we don't try to recognize
277 * that redundancy, either here or in allied operations such as moveins().
278 * The pseudocolor consideration makes that more costly than it seems worth.
281 newarc(struct nfa
*nfa
,
289 assert(from
!= NULL
&& to
!= NULL
);
292 * This is a handy place to check for operation cancel during regex
293 * compilation, since no code path will go very long without making a new
296 INTERRUPT(nfa
->v
->re
);
298 /* check for duplicate arc, using whichever chain is shorter */
299 if (from
->nouts
<= to
->nins
)
301 for (a
= from
->outs
; a
!= NULL
; a
= a
->outchain
)
302 if (a
->to
== to
&& a
->co
== co
&& a
->type
== t
)
307 for (a
= to
->ins
; a
!= NULL
; a
= a
->inchain
)
308 if (a
->from
== from
&& a
->co
== co
&& a
->type
== t
)
312 /* no dup, so create the arc */
313 createarc(nfa
, t
, co
, from
, to
);
317 * createarc - create a new arc within an NFA
319 * This function must *only* be used after verifying that there is no existing
320 * identical arc (same type/color/from/to).
323 createarc(struct nfa
*nfa
,
342 * Put the new arc on the beginning, not the end, of the chains; it's
343 * simpler here, and freearc() is the same cost either way. See also the
344 * logic in moveins() and its cohorts, as well as fixempties().
346 a
->inchain
= to
->ins
;
347 a
->inchainRev
= NULL
;
349 to
->ins
->inchainRev
= a
;
351 a
->outchain
= from
->outs
;
352 a
->outchainRev
= NULL
;
354 from
->outs
->outchainRev
= a
;
360 if (COLORED(a
) && nfa
->parent
== NULL
)
361 colorchain(nfa
->cm
, a
);
365 * allocarc - allocate a new arc within an NFA
367 static struct arc
* /* NULL for failure */
368 allocarc(struct nfa
*nfa
)
372 /* first, recycle anything that's on the freelist */
373 if (nfa
->freearcs
!= NULL
)
376 nfa
->freearcs
= a
->freechain
;
378 /* otherwise, is there anything left in the last arcbatch? */
379 else if (nfa
->lastab
!= NULL
&& nfa
->lastabused
< nfa
->lastab
->narcs
)
381 a
= &nfa
->lastab
->a
[nfa
->lastabused
++];
383 /* otherwise, need to allocate a new arcbatch */
386 struct arcbatch
*newAb
;
389 if (nfa
->v
->spaceused
>= REG_MAX_COMPILE_SPACE
)
394 narcs
= (nfa
->lastab
!= NULL
) ? nfa
->lastab
->narcs
* 2 : FIRSTABSIZE
;
395 if (narcs
> MAXABSIZE
)
397 newAb
= (struct arcbatch
*) MALLOC(ARCBATCHSIZE(narcs
));
403 nfa
->v
->spaceused
+= ARCBATCHSIZE(narcs
);
404 newAb
->narcs
= narcs
;
405 newAb
->next
= nfa
->lastab
;
415 * freearc - free an arc
418 freearc(struct nfa
*nfa
,
421 struct state
*from
= victim
->from
;
422 struct state
*to
= victim
->to
;
423 struct arc
*predecessor
;
425 assert(victim
->type
!= 0);
427 /* take it off color chain if necessary */
428 if (COLORED(victim
) && nfa
->parent
== NULL
)
429 uncolorchain(nfa
->cm
, victim
);
431 /* take it off source's out-chain */
432 assert(from
!= NULL
);
433 predecessor
= victim
->outchainRev
;
434 if (predecessor
== NULL
)
436 assert(from
->outs
== victim
);
437 from
->outs
= victim
->outchain
;
441 assert(predecessor
->outchain
== victim
);
442 predecessor
->outchain
= victim
->outchain
;
444 if (victim
->outchain
!= NULL
)
446 assert(victim
->outchain
->outchainRev
== victim
);
447 victim
->outchain
->outchainRev
= predecessor
;
451 /* take it off target's in-chain */
453 predecessor
= victim
->inchainRev
;
454 if (predecessor
== NULL
)
456 assert(to
->ins
== victim
);
457 to
->ins
= victim
->inchain
;
461 assert(predecessor
->inchain
== victim
);
462 predecessor
->inchain
= victim
->inchain
;
464 if (victim
->inchain
!= NULL
)
466 assert(victim
->inchain
->inchainRev
== victim
);
467 victim
->inchain
->inchainRev
= predecessor
;
471 /* clean up and place on NFA's free list */
473 victim
->from
= NULL
; /* precautions... */
475 victim
->inchain
= NULL
;
476 victim
->inchainRev
= NULL
;
477 victim
->outchain
= NULL
;
478 victim
->outchainRev
= NULL
;
479 victim
->freechain
= nfa
->freearcs
;
480 nfa
->freearcs
= victim
;
484 * changearcsource - flip an arc to have a different from state
486 * Caller must have verified that there is no pre-existing duplicate arc.
489 changearcsource(struct arc
*a
, struct state
*newfrom
)
491 struct state
*oldfrom
= a
->from
;
492 struct arc
*predecessor
;
494 assert(oldfrom
!= newfrom
);
496 /* take it off old source's out-chain */
497 assert(oldfrom
!= NULL
);
498 predecessor
= a
->outchainRev
;
499 if (predecessor
== NULL
)
501 assert(oldfrom
->outs
== a
);
502 oldfrom
->outs
= a
->outchain
;
506 assert(predecessor
->outchain
== a
);
507 predecessor
->outchain
= a
->outchain
;
509 if (a
->outchain
!= NULL
)
511 assert(a
->outchain
->outchainRev
== a
);
512 a
->outchain
->outchainRev
= predecessor
;
518 /* prepend it to new source's out-chain */
519 a
->outchain
= newfrom
->outs
;
520 a
->outchainRev
= NULL
;
522 newfrom
->outs
->outchainRev
= a
;
528 * changearctarget - flip an arc to have a different to state
530 * Caller must have verified that there is no pre-existing duplicate arc.
533 changearctarget(struct arc
*a
, struct state
*newto
)
535 struct state
*oldto
= a
->to
;
536 struct arc
*predecessor
;
538 assert(oldto
!= newto
);
540 /* take it off old target's in-chain */
541 assert(oldto
!= NULL
);
542 predecessor
= a
->inchainRev
;
543 if (predecessor
== NULL
)
545 assert(oldto
->ins
== a
);
546 oldto
->ins
= a
->inchain
;
550 assert(predecessor
->inchain
== a
);
551 predecessor
->inchain
= a
->inchain
;
553 if (a
->inchain
!= NULL
)
555 assert(a
->inchain
->inchainRev
== a
);
556 a
->inchain
->inchainRev
= predecessor
;
562 /* prepend it to new target's in-chain */
563 a
->inchain
= newto
->ins
;
564 a
->inchainRev
= NULL
;
566 newto
->ins
->inchainRev
= a
;
572 * hasnonemptyout - Does state have a non-EMPTY out arc?
575 hasnonemptyout(struct state
*s
)
579 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
581 if (a
->type
!= EMPTY
)
588 * findarc - find arc, if any, from given source with given type and color
589 * If there is more than one such arc, the result is random.
592 findarc(struct state
*s
,
598 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
599 if (a
->type
== type
&& a
->co
== co
)
605 * cparc - allocate a new arc within an NFA, copying details from old one
608 cparc(struct nfa
*nfa
,
613 newarc(nfa
, oa
->type
, oa
->co
, from
, to
);
617 * sortins - sort the in arcs of a state by from/color/type
620 sortins(struct nfa
*nfa
,
623 struct arc
**sortarray
;
629 return; /* nothing to do */
630 /* make an array of arc pointers ... */
631 sortarray
= (struct arc
**) MALLOC(n
* sizeof(struct arc
*));
632 if (sortarray
== NULL
)
638 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
641 /* ... sort the array */
642 qsort(sortarray
, n
, sizeof(struct arc
*), sortins_cmp
);
643 /* ... and rebuild arc list in order */
644 /* it seems worth special-casing first and last items to simplify loop */
647 a
->inchain
= sortarray
[1];
648 a
->inchainRev
= NULL
;
649 for (i
= 1; i
< n
- 1; i
++)
652 a
->inchain
= sortarray
[i
+ 1];
653 a
->inchainRev
= sortarray
[i
- 1];
657 a
->inchainRev
= sortarray
[i
- 1];
662 sortins_cmp(const void *a
, const void *b
)
664 const struct arc
*aa
= *((const struct arc
*const *) a
);
665 const struct arc
*bb
= *((const struct arc
*const *) b
);
667 /* we check the fields in the order they are most likely to be different */
668 if (aa
->from
->no
< bb
->from
->no
)
670 if (aa
->from
->no
> bb
->from
->no
)
676 if (aa
->type
< bb
->type
)
678 if (aa
->type
> bb
->type
)
684 * sortouts - sort the out arcs of a state by to/color/type
687 sortouts(struct nfa
*nfa
,
690 struct arc
**sortarray
;
696 return; /* nothing to do */
697 /* make an array of arc pointers ... */
698 sortarray
= (struct arc
**) MALLOC(n
* sizeof(struct arc
*));
699 if (sortarray
== NULL
)
705 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
708 /* ... sort the array */
709 qsort(sortarray
, n
, sizeof(struct arc
*), sortouts_cmp
);
710 /* ... and rebuild arc list in order */
711 /* it seems worth special-casing first and last items to simplify loop */
714 a
->outchain
= sortarray
[1];
715 a
->outchainRev
= NULL
;
716 for (i
= 1; i
< n
- 1; i
++)
719 a
->outchain
= sortarray
[i
+ 1];
720 a
->outchainRev
= sortarray
[i
- 1];
724 a
->outchainRev
= sortarray
[i
- 1];
729 sortouts_cmp(const void *a
, const void *b
)
731 const struct arc
*aa
= *((const struct arc
*const *) a
);
732 const struct arc
*bb
= *((const struct arc
*const *) b
);
734 /* we check the fields in the order they are most likely to be different */
735 if (aa
->to
->no
< bb
->to
->no
)
737 if (aa
->to
->no
> bb
->to
->no
)
743 if (aa
->type
< bb
->type
)
745 if (aa
->type
> bb
->type
)
751 * Common decision logic about whether to use arc-by-arc operations or
752 * sort/merge. If there's just a few source arcs we cannot recoup the
753 * cost of sorting the destination arc list, no matter how large it is.
754 * Otherwise, limit the number of arc-by-arc comparisons to about 1000
755 * (a somewhat arbitrary choice, but the breakeven point would probably
756 * be machine dependent anyway).
758 #define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
759 ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
762 * moveins - move all in arcs of a state to another state
764 * You might think this could be done better by just updating the
765 * existing arcs, and you would be right if it weren't for the need
766 * for duplicate suppression, which makes it easier to just make new
767 * ones to exploit the suppression built into newarc.
769 * However, if we have a whole lot of arcs to deal with, retail duplicate
770 * checks become too slow. In that case we proceed by sorting and merging
771 * the arc lists, and then we can indeed just update the arcs in-place.
773 * On the other hand, it's also true that this is frequently called with
774 * a brand-new newState that has no existing in-arcs. In that case,
775 * de-duplication is unnecessary, so we can just blindly move all the arcs.
778 moveins(struct nfa
*nfa
,
779 struct state
*oldState
,
780 struct state
*newState
)
782 assert(oldState
!= newState
);
784 if (newState
->nins
== 0)
786 /* No need for de-duplication */
789 while ((a
= oldState
->ins
) != NULL
)
791 createarc(nfa
, a
->type
, a
->co
, a
->from
, newState
);
795 else if (!BULK_ARC_OP_USE_SORT(oldState
->nins
, newState
->nins
))
797 /* With not too many arcs, just do them one at a time */
800 while ((a
= oldState
->ins
) != NULL
)
802 cparc(nfa
, a
, a
->from
, newState
);
809 * With many arcs, use a sort-merge approach. Note changearctarget()
810 * will put the arc onto the front of newState's chain, so it does not
811 * break our walk through the sorted part of the chain.
817 * Because we bypass newarc() in this code path, we'd better include a
820 INTERRUPT(nfa
->v
->re
);
822 sortins(nfa
, oldState
);
823 sortins(nfa
, newState
);
825 return; /* might have failed to sort */
828 while (oa
!= NULL
&& na
!= NULL
)
832 switch (sortins_cmp(&oa
, &na
))
835 /* newState does not have anything matching oa */
839 * Rather than doing createarc+freearc, we can just unlink
840 * and relink the existing arc struct.
842 changearctarget(a
, newState
);
845 /* match, advance in both lists */
848 /* ... and drop duplicate arc from oldState */
852 /* advance only na; oa might have a match later */
861 /* newState does not have anything matching oa */
865 changearctarget(a
, newState
);
869 assert(oldState
->nins
== 0);
870 assert(oldState
->ins
== NULL
);
874 * copyins - copy in arcs of a state to another state
876 * The comments for moveins() apply here as well. However, in current
877 * usage, this is *only* called with brand-new target states, so that
878 * only the "no need for de-duplication" code path is ever reached.
879 * We keep the rest #ifdef'd out in case it's needed in the future.
882 copyins(struct nfa
*nfa
,
883 struct state
*oldState
,
884 struct state
*newState
)
886 assert(oldState
!= newState
);
887 assert(newState
->nins
== 0); /* see comment above */
889 if (newState
->nins
== 0)
891 /* No need for de-duplication */
894 for (a
= oldState
->ins
; a
!= NULL
; a
= a
->inchain
)
895 createarc(nfa
, a
->type
, a
->co
, a
->from
, newState
);
897 #ifdef NOT_USED /* see comment above */
898 else if (!BULK_ARC_OP_USE_SORT(oldState
->nins
, newState
->nins
))
900 /* With not too many arcs, just do them one at a time */
903 for (a
= oldState
->ins
; a
!= NULL
; a
= a
->inchain
)
904 cparc(nfa
, a
, a
->from
, newState
);
909 * With many arcs, use a sort-merge approach. Note that createarc()
910 * will put new arcs onto the front of newState's chain, so it does
911 * not break our walk through the sorted part of the chain.
917 * Because we bypass newarc() in this code path, we'd better include a
920 INTERRUPT(nfa
->v
->re
);
922 sortins(nfa
, oldState
);
923 sortins(nfa
, newState
);
925 return; /* might have failed to sort */
928 while (oa
!= NULL
&& na
!= NULL
)
932 switch (sortins_cmp(&oa
, &na
))
935 /* newState does not have anything matching oa */
937 createarc(nfa
, a
->type
, a
->co
, a
->from
, newState
);
940 /* match, advance in both lists */
945 /* advance only na; oa might have a match later */
954 /* newState does not have anything matching oa */
958 createarc(nfa
, a
->type
, a
->co
, a
->from
, newState
);
961 #endif /* NOT_USED */
965 * mergeins - merge a list of inarcs into a state
967 * This is much like copyins, but the source arcs are listed in an array,
968 * and are not guaranteed unique. It's okay to clobber the array contents.
971 mergeins(struct nfa
*nfa
,
973 struct arc
**arcarray
,
984 * Because we bypass newarc() in this code path, we'd better include a
987 INTERRUPT(nfa
->v
->re
);
989 /* Sort existing inarcs as well as proposed new ones */
992 return; /* might have failed to sort */
994 qsort(arcarray
, arccount
, sizeof(struct arc
*), sortins_cmp
);
997 * arcarray very likely includes dups, so we must eliminate them. (This
998 * could be folded into the next loop, but it's not worth the trouble.)
1001 for (i
= 1; i
< arccount
; i
++)
1003 switch (sortins_cmp(&arcarray
[j
], &arcarray
[i
]))
1007 arcarray
[++j
] = arcarray
[i
];
1020 * Now merge into s' inchain. Note that createarc() will put new arcs
1021 * onto the front of s's chain, so it does not break our walk through the
1022 * sorted part of the chain.
1026 while (i
< arccount
&& na
!= NULL
)
1028 struct arc
*a
= arcarray
[i
];
1030 switch (sortins_cmp(&a
, &na
))
1033 /* s does not have anything matching a */
1034 createarc(nfa
, a
->type
, a
->co
, a
->from
, s
);
1038 /* match, advance in both lists */
1043 /* advance only na; array might have a match later */
1050 while (i
< arccount
)
1052 /* s does not have anything matching a */
1053 struct arc
*a
= arcarray
[i
];
1055 createarc(nfa
, a
->type
, a
->co
, a
->from
, s
);
1061 * moveouts - move all out arcs of a state to another state
1063 * See comments for moveins()
1066 moveouts(struct nfa
*nfa
,
1067 struct state
*oldState
,
1068 struct state
*newState
)
1070 assert(oldState
!= newState
);
1072 if (newState
->nouts
== 0)
1074 /* No need for de-duplication */
1077 while ((a
= oldState
->outs
) != NULL
)
1079 createarc(nfa
, a
->type
, a
->co
, newState
, a
->to
);
1083 else if (!BULK_ARC_OP_USE_SORT(oldState
->nouts
, newState
->nouts
))
1085 /* With not too many arcs, just do them one at a time */
1088 while ((a
= oldState
->outs
) != NULL
)
1090 cparc(nfa
, a
, newState
, a
->to
);
1097 * With many arcs, use a sort-merge approach. Note changearcsource()
1098 * will put the arc onto the front of newState's chain, so it does not
1099 * break our walk through the sorted part of the chain.
1105 * Because we bypass newarc() in this code path, we'd better include a
1108 INTERRUPT(nfa
->v
->re
);
1110 sortouts(nfa
, oldState
);
1111 sortouts(nfa
, newState
);
1113 return; /* might have failed to sort */
1114 oa
= oldState
->outs
;
1115 na
= newState
->outs
;
1116 while (oa
!= NULL
&& na
!= NULL
)
1120 switch (sortouts_cmp(&oa
, &na
))
1123 /* newState does not have anything matching oa */
1127 * Rather than doing createarc+freearc, we can just unlink
1128 * and relink the existing arc struct.
1130 changearcsource(a
, newState
);
1133 /* match, advance in both lists */
1136 /* ... and drop duplicate arc from oldState */
1140 /* advance only na; oa might have a match later */
1149 /* newState does not have anything matching oa */
1153 changearcsource(a
, newState
);
1157 assert(oldState
->nouts
== 0);
1158 assert(oldState
->outs
== NULL
);
1162 * copyouts - copy out arcs of a state to another state
1164 * See comments for copyins()
1167 copyouts(struct nfa
*nfa
,
1168 struct state
*oldState
,
1169 struct state
*newState
)
1171 assert(oldState
!= newState
);
1172 assert(newState
->nouts
== 0); /* see comment above */
1174 if (newState
->nouts
== 0)
1176 /* No need for de-duplication */
1179 for (a
= oldState
->outs
; a
!= NULL
; a
= a
->outchain
)
1180 createarc(nfa
, a
->type
, a
->co
, newState
, a
->to
);
1182 #ifdef NOT_USED /* see comment above */
1183 else if (!BULK_ARC_OP_USE_SORT(oldState
->nouts
, newState
->nouts
))
1185 /* With not too many arcs, just do them one at a time */
1188 for (a
= oldState
->outs
; a
!= NULL
; a
= a
->outchain
)
1189 cparc(nfa
, a
, newState
, a
->to
);
1194 * With many arcs, use a sort-merge approach. Note that createarc()
1195 * will put new arcs onto the front of newState's chain, so it does
1196 * not break our walk through the sorted part of the chain.
1202 * Because we bypass newarc() in this code path, we'd better include a
1205 INTERRUPT(nfa
->v
->re
);
1207 sortouts(nfa
, oldState
);
1208 sortouts(nfa
, newState
);
1210 return; /* might have failed to sort */
1211 oa
= oldState
->outs
;
1212 na
= newState
->outs
;
1213 while (oa
!= NULL
&& na
!= NULL
)
1217 switch (sortouts_cmp(&oa
, &na
))
1220 /* newState does not have anything matching oa */
1222 createarc(nfa
, a
->type
, a
->co
, newState
, a
->to
);
1225 /* match, advance in both lists */
1230 /* advance only na; oa might have a match later */
1239 /* newState does not have anything matching oa */
1243 createarc(nfa
, a
->type
, a
->co
, newState
, a
->to
);
1246 #endif /* NOT_USED */
1250 * cloneouts - copy out arcs of a state to another state pair, modifying type
1252 * This is only used to convert PLAIN arcs to AHEAD/BEHIND arcs, which share
1253 * the same interpretation of "co". It wouldn't be sensible with LACONs.
1256 cloneouts(struct nfa
*nfa
,
1264 assert(old
!= from
);
1265 assert(type
== AHEAD
|| type
== BEHIND
);
1267 for (a
= old
->outs
; a
!= NULL
; a
= a
->outchain
)
1269 assert(a
->type
== PLAIN
);
1270 newarc(nfa
, type
, a
->co
, from
, to
);
1275 * delsub - delete a sub-NFA, updating subre pointers if necessary
1277 * This uses a recursive traversal of the sub-NFA, marking already-seen
1278 * states using their tmp pointer.
1281 delsub(struct nfa
*nfa
,
1282 struct state
*lp
, /* the sub-NFA goes from here... */
1283 struct state
*rp
) /* ...to here, *not* inclusive */
1287 rp
->tmp
= rp
; /* mark end */
1289 deltraverse(nfa
, lp
, lp
);
1291 return; /* asserts might not hold after failure */
1292 assert(lp
->nouts
== 0 && rp
->nins
== 0); /* did the job */
1293 assert(lp
->no
!= FREESTATE
&& rp
->no
!= FREESTATE
); /* no more */
1295 rp
->tmp
= NULL
; /* unmark end */
1296 lp
->tmp
= NULL
; /* and begin, marked by deltraverse */
1300 * deltraverse - the recursive heart of delsub
1301 * This routine's basic job is to destroy all out-arcs of the state.
1304 deltraverse(struct nfa
*nfa
,
1305 struct state
*leftend
,
1311 /* Since this is recursive, it could be driven to stack overflow */
1312 if (STACK_TOO_DEEP(nfa
->v
->re
))
1319 return; /* nothing to do */
1321 return; /* already in progress */
1323 s
->tmp
= s
; /* mark as in progress */
1325 while ((a
= s
->outs
) != NULL
)
1328 deltraverse(nfa
, leftend
, to
);
1330 return; /* asserts might not hold after failure */
1331 assert(to
->nouts
== 0 || to
->tmp
!= NULL
);
1333 if (to
->nins
== 0 && to
->tmp
== NULL
)
1335 assert(to
->nouts
== 0);
1340 assert(s
->no
!= FREESTATE
); /* we're still here */
1341 assert(s
== leftend
|| s
->nins
!= 0); /* and still reachable */
1342 assert(s
->nouts
== 0); /* but have no outarcs */
1344 s
->tmp
= NULL
; /* we're done here */
1348 * dupnfa - duplicate sub-NFA
1350 * Another recursive traversal, this time using tmp to point to duplicates
1351 * as well as mark already-seen states. (You knew there was a reason why
1352 * it's a state pointer, didn't you? :-))
1355 dupnfa(struct nfa
*nfa
,
1356 struct state
*start
, /* duplicate of subNFA starting here */
1357 struct state
*stop
, /* and stopping here */
1358 struct state
*from
, /* stringing duplicate from here */
1359 struct state
*to
) /* to here */
1363 newarc(nfa
, EMPTY
, 0, from
, to
);
1368 duptraverse(nfa
, start
, from
);
1369 /* done, except for clearing out the tmp pointers */
1372 cleartraverse(nfa
, start
);
1376 * duptraverse - recursive heart of dupnfa
1379 duptraverse(struct nfa
*nfa
,
1381 struct state
*stmp
) /* s's duplicate, or NULL */
1385 /* Since this is recursive, it could be driven to stack overflow */
1386 if (STACK_TOO_DEEP(nfa
->v
->re
))
1393 return; /* already done */
1395 s
->tmp
= (stmp
== NULL
) ? newstate(nfa
) : stmp
;
1402 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= a
->outchain
)
1404 duptraverse(nfa
, a
->to
, (struct state
*) NULL
);
1407 assert(a
->to
->tmp
!= NULL
);
1408 cparc(nfa
, a
, s
->tmp
, a
->to
->tmp
);
1413 * removeconstraints - remove any constraints in an NFA
1415 * Constraint arcs are replaced by empty arcs, essentially treating all
1416 * constraints as automatically satisfied.
1419 removeconstraints(struct nfa
*nfa
,
1420 struct state
*start
, /* process subNFA starting here */
1421 struct state
*stop
) /* and stopping here */
1427 removetraverse(nfa
, start
);
1428 /* done, except for clearing out the tmp pointers */
1431 cleartraverse(nfa
, start
);
1435 * removetraverse - recursive heart of removeconstraints
1438 removetraverse(struct nfa
*nfa
,
1444 /* Since this is recursive, it could be driven to stack overflow */
1445 if (STACK_TOO_DEEP(nfa
->v
->re
))
1452 return; /* already done */
1455 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= oa
)
1457 removetraverse(nfa
, a
->to
);
1474 newarc(nfa
, EMPTY
, 0, s
, a
->to
);
1485 * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
1488 cleartraverse(struct nfa
*nfa
,
1493 /* Since this is recursive, it could be driven to stack overflow */
1494 if (STACK_TOO_DEEP(nfa
->v
->re
))
1504 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1505 cleartraverse(nfa
, a
->to
);
1509 * single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
1511 * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
1512 * of a set of colors), return a state whose outarc list contains only PLAIN
1513 * arcs of those color(s). Otherwise return NULL.
1515 * This is used before optimizing the NFA, so there may be EMPTY arcs, which
1516 * we should ignore; the possibility of an EMPTY is why the result state could
1517 * be different from s1.
1519 * It's worth troubling to handle multiple parallel PLAIN arcs here because a
1520 * bracket construct such as [abc] might yield either one or several parallel
1521 * PLAIN arcs depending on earlier atoms in the expression. We'd rather that
1522 * that implementation detail not create user-visible performance differences.
1524 static struct state
*
1525 single_color_transition(struct state
*s1
, struct state
*s2
)
1529 /* Ignore leading EMPTY arc, if any */
1530 if (s1
->nouts
== 1 && s1
->outs
->type
== EMPTY
)
1532 /* Likewise for any trailing EMPTY arc */
1533 if (s2
->nins
== 1 && s2
->ins
->type
== EMPTY
)
1535 /* Perhaps we could have a single-state loop in between, if so reject */
1538 /* s1 must have at least one outarc... */
1539 if (s1
->outs
== NULL
)
1541 /* ... and they must all be PLAIN arcs to s2 */
1542 for (a
= s1
->outs
; a
!= NULL
; a
= a
->outchain
)
1544 if (a
->type
!= PLAIN
|| a
->to
!= s2
)
1547 /* OK, return s1 as the possessor of the relevant outarcs */
1552 * specialcolors - fill in special colors for an NFA
1555 specialcolors(struct nfa
*nfa
)
1557 /* false colors for BOS, BOL, EOS, EOL */
1558 if (nfa
->parent
== NULL
)
1560 nfa
->bos
[0] = pseudocolor(nfa
->cm
);
1561 nfa
->bos
[1] = pseudocolor(nfa
->cm
);
1562 nfa
->eos
[0] = pseudocolor(nfa
->cm
);
1563 nfa
->eos
[1] = pseudocolor(nfa
->cm
);
1567 assert(nfa
->parent
->bos
[0] != COLORLESS
);
1568 nfa
->bos
[0] = nfa
->parent
->bos
[0];
1569 assert(nfa
->parent
->bos
[1] != COLORLESS
);
1570 nfa
->bos
[1] = nfa
->parent
->bos
[1];
1571 assert(nfa
->parent
->eos
[0] != COLORLESS
);
1572 nfa
->eos
[0] = nfa
->parent
->eos
[0];
1573 assert(nfa
->parent
->eos
[1] != COLORLESS
);
1574 nfa
->eos
[1] = nfa
->parent
->eos
[1];
1579 * optimize - optimize an NFA
1581 * The main goal of this function is not so much "optimization" (though it
1582 * does try to get rid of useless NFA states) as reducing the NFA to a form
1583 * the regex executor can handle. The executor, and indeed the cNFA format
1584 * that is its input, can only handle PLAIN and LACON arcs. The output of
1585 * the regex parser also includes EMPTY (do-nothing) arcs, as well as
1586 * ^, $, AHEAD, and BEHIND constraint arcs, which we must get rid of here.
1587 * We first get rid of EMPTY arcs and then deal with the constraint arcs.
1588 * The hardest part of either job is to get rid of circular loops of the
1589 * target arc type. We would have to do that in any case, though, as such a
1590 * loop would otherwise allow the executor to cycle through the loop endlessly
1591 * without making any progress in the input string.
1593 static long /* re_info bits */
1594 optimize(struct nfa
*nfa
,
1595 FILE *f
) /* for debug output; NULL none */
1598 int verbose
= (f
!= NULL
) ? 1 : 0;
1601 fprintf(f
, "\ninitial cleanup:\n");
1603 /* If we have any CANTMATCH arcs, drop them; but this is uncommon */
1604 if (nfa
->flags
& HASCANTMATCH
)
1606 removecantmatch(nfa
);
1607 nfa
->flags
&= ~HASCANTMATCH
;
1609 cleanup(nfa
); /* may simplify situation */
1614 fprintf(f
, "\nempties:\n");
1616 fixempties(nfa
, f
); /* get rid of EMPTY arcs */
1619 fprintf(f
, "\nconstraints:\n");
1621 fixconstraintloops(nfa
, f
); /* get rid of constraint loops */
1622 pullback(nfa
, f
); /* pull back constraints backward */
1623 pushfwd(nfa
, f
); /* push fwd constraints forward */
1626 fprintf(f
, "\nfinal cleanup:\n");
1628 cleanup(nfa
); /* final tidying */
1633 return analyze(nfa
); /* and analysis */
1637 * pullback - pull back constraints backward to eliminate them
1640 pullback(struct nfa
*nfa
,
1641 FILE *f
) /* for debug output; NULL none */
1644 struct state
*nexts
;
1647 struct state
*intermediates
;
1650 /* find and pull until there are no more */
1654 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
1657 intermediates
= NULL
;
1658 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
1660 nexta
= a
->outchain
;
1661 if (a
->type
== '^' || a
->type
== BEHIND
)
1662 if (pull(nfa
, a
, &intermediates
))
1665 /* clear tmp fields of intermediate states created here */
1666 while (intermediates
!= NULL
)
1668 struct state
*ns
= intermediates
->tmp
;
1670 intermediates
->tmp
= NULL
;
1673 /* if s is now useless, get rid of it */
1674 if ((s
->nins
== 0 || s
->nouts
== 0) && !s
->flag
)
1677 if (progress
&& f
!= NULL
)
1679 } while (progress
&& !NISERR());
1684 * Any ^ constraints we were able to pull to the start state can now be
1685 * replaced by PLAIN arcs referencing the BOS or BOL colors. There should
1686 * be no other ^ or BEHIND arcs left in the NFA, though we do not check
1687 * that here (compact() will fail if so).
1689 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= nexta
)
1691 nexta
= a
->outchain
;
1694 assert(a
->co
== 0 || a
->co
== 1);
1695 newarc(nfa
, PLAIN
, nfa
->bos
[a
->co
], a
->from
, a
->to
);
1702 * pull - pull a back constraint backward past its source state
1704 * Returns 1 if successful (which it always is unless the source is the
1705 * start state or we have an internal error), 0 if nothing happened.
1707 * A significant property of this function is that it deletes no pre-existing
1708 * states, and no outarcs of the constraint's from state other than the given
1709 * constraint arc. This makes the loops in pullback() safe, at the cost that
1710 * we may leave useless states behind. Therefore, we leave it to pullback()
1711 * to delete such states.
1713 * If the from state has multiple back-constraint outarcs, and/or multiple
1714 * compatible constraint inarcs, we only need to create one new intermediate
1715 * state per combination of predecessor and successor states. *intermediates
1716 * points to a list of such intermediate states for this from state (chained
1717 * through their tmp fields).
1720 pull(struct nfa
*nfa
,
1722 struct state
**intermediates
)
1724 struct state
*from
= con
->from
;
1725 struct state
*to
= con
->to
;
1730 assert(from
!= to
); /* should have gotten rid of this earlier */
1731 if (from
->flag
) /* can't pull back beyond start */
1733 if (from
->nins
== 0)
1740 * First, clone from state if necessary to avoid other outarcs. This may
1741 * seem wasteful, but it simplifies the logic, and we'll get rid of the
1742 * clone state again at the bottom.
1744 if (from
->nouts
> 1)
1749 copyins(nfa
, from
, s
); /* duplicate inarcs */
1750 cparc(nfa
, con
, s
, to
); /* move constraint arc */
1757 assert(from
->nouts
== 1);
1759 /* propagate the constraint into the from state's inarcs */
1760 for (a
= from
->ins
; a
!= NULL
&& !NISERR(); a
= nexta
)
1763 switch (combine(nfa
, con
, a
))
1765 case INCOMPATIBLE
: /* destroy the arc */
1768 case SATISFIED
: /* no action needed */
1770 case COMPATIBLE
: /* swap the two arcs, more or less */
1771 /* need an intermediate state, but might have one already */
1772 for (s
= *intermediates
; s
!= NULL
; s
= s
->tmp
)
1774 assert(s
->nins
> 0 && s
->nouts
> 0);
1775 if (s
->ins
->from
== a
->from
&& s
->outs
->to
== to
)
1783 s
->tmp
= *intermediates
;
1786 cparc(nfa
, con
, a
->from
, s
);
1787 cparc(nfa
, a
, s
, to
);
1790 case REPLACEARC
: /* replace arc's color */
1791 newarc(nfa
, a
->type
, con
->co
, a
->from
, to
);
1800 /* remaining inarcs, if any, incorporate the constraint */
1801 moveins(nfa
, from
, to
);
1803 /* from state is now useless, but we leave it to pullback() to clean up */
1808 * pushfwd - push forward constraints forward to eliminate them
1811 pushfwd(struct nfa
*nfa
,
1812 FILE *f
) /* for debug output; NULL none */
1815 struct state
*nexts
;
1818 struct state
*intermediates
;
1821 /* find and push until there are no more */
1825 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
1828 intermediates
= NULL
;
1829 for (a
= s
->ins
; a
!= NULL
&& !NISERR(); a
= nexta
)
1832 if (a
->type
== '$' || a
->type
== AHEAD
)
1833 if (push(nfa
, a
, &intermediates
))
1836 /* clear tmp fields of intermediate states created here */
1837 while (intermediates
!= NULL
)
1839 struct state
*ns
= intermediates
->tmp
;
1841 intermediates
->tmp
= NULL
;
1844 /* if s is now useless, get rid of it */
1845 if ((s
->nins
== 0 || s
->nouts
== 0) && !s
->flag
)
1848 if (progress
&& f
!= NULL
)
1850 } while (progress
&& !NISERR());
1855 * Any $ constraints we were able to push to the post state can now be
1856 * replaced by PLAIN arcs referencing the EOS or EOL colors. There should
1857 * be no other $ or AHEAD arcs left in the NFA, though we do not check
1858 * that here (compact() will fail if so).
1860 for (a
= nfa
->post
->ins
; a
!= NULL
; a
= nexta
)
1865 assert(a
->co
== 0 || a
->co
== 1);
1866 newarc(nfa
, PLAIN
, nfa
->eos
[a
->co
], a
->from
, a
->to
);
1873 * push - push a forward constraint forward past its destination state
1875 * Returns 1 if successful (which it always is unless the destination is the
1876 * post state or we have an internal error), 0 if nothing happened.
1878 * A significant property of this function is that it deletes no pre-existing
1879 * states, and no inarcs of the constraint's to state other than the given
1880 * constraint arc. This makes the loops in pushfwd() safe, at the cost that
1881 * we may leave useless states behind. Therefore, we leave it to pushfwd()
1882 * to delete such states.
1884 * If the to state has multiple forward-constraint inarcs, and/or multiple
1885 * compatible constraint outarcs, we only need to create one new intermediate
1886 * state per combination of predecessor and successor states. *intermediates
1887 * points to a list of such intermediate states for this to state (chained
1888 * through their tmp fields).
1891 push(struct nfa
*nfa
,
1893 struct state
**intermediates
)
1895 struct state
*from
= con
->from
;
1896 struct state
*to
= con
->to
;
1901 assert(to
!= from
); /* should have gotten rid of this earlier */
1902 if (to
->flag
) /* can't push forward beyond end */
1911 * First, clone to state if necessary to avoid other inarcs. This may
1912 * seem wasteful, but it simplifies the logic, and we'll get rid of the
1913 * clone state again at the bottom.
1920 copyouts(nfa
, to
, s
); /* duplicate outarcs */
1921 cparc(nfa
, con
, from
, s
); /* move constraint arc */
1928 assert(to
->nins
== 1);
1930 /* propagate the constraint into the to state's outarcs */
1931 for (a
= to
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
1933 nexta
= a
->outchain
;
1934 switch (combine(nfa
, con
, a
))
1936 case INCOMPATIBLE
: /* destroy the arc */
1939 case SATISFIED
: /* no action needed */
1941 case COMPATIBLE
: /* swap the two arcs, more or less */
1942 /* need an intermediate state, but might have one already */
1943 for (s
= *intermediates
; s
!= NULL
; s
= s
->tmp
)
1945 assert(s
->nins
> 0 && s
->nouts
> 0);
1946 if (s
->ins
->from
== from
&& s
->outs
->to
== a
->to
)
1954 s
->tmp
= *intermediates
;
1957 cparc(nfa
, con
, s
, a
->to
);
1958 cparc(nfa
, a
, from
, s
);
1961 case REPLACEARC
: /* replace arc's color */
1962 newarc(nfa
, a
->type
, con
->co
, from
, a
->to
);
1971 /* remaining outarcs, if any, incorporate the constraint */
1972 moveouts(nfa
, to
, from
);
1974 /* to state is now useless, but we leave it to pushfwd() to clean up */
1979 * combine - constraint lands on an arc, what happens?
1981 * #def INCOMPATIBLE 1 // destroys arc
1982 * #def SATISFIED 2 // constraint satisfied
1983 * #def COMPATIBLE 3 // compatible but not satisfied yet
1984 * #def REPLACEARC 4 // replace arc's color with constraint color
1987 combine(struct nfa
*nfa
,
1991 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
1993 switch (CA(con
->type
, a
->type
))
1995 case CA('^', PLAIN
): /* newlines are handled separately */
1996 case CA('$', PLAIN
):
1997 return INCOMPATIBLE
;
1999 case CA(AHEAD
, PLAIN
): /* color constraints meet colors */
2000 case CA(BEHIND
, PLAIN
):
2001 if (con
->co
== a
->co
)
2003 if (con
->co
== RAINBOW
)
2005 /* con is satisfied unless arc's color is a pseudocolor */
2006 if (!(nfa
->cm
->cd
[a
->co
].flags
& PSEUDO
))
2009 else if (a
->co
== RAINBOW
)
2011 /* con is incompatible if it's for a pseudocolor */
2012 /* (this is hypothetical; we make no such constraints today) */
2013 if (nfa
->cm
->cd
[con
->co
].flags
& PSEUDO
)
2014 return INCOMPATIBLE
;
2015 /* otherwise, constraint constrains arc to be only its color */
2018 return INCOMPATIBLE
;
2020 case CA('^', '^'): /* collision, similar constraints */
2022 if (con
->co
== a
->co
) /* true duplication */
2024 return INCOMPATIBLE
;
2026 case CA(AHEAD
, AHEAD
): /* collision, similar constraints */
2027 case CA(BEHIND
, BEHIND
):
2028 if (con
->co
== a
->co
) /* true duplication */
2030 if (con
->co
== RAINBOW
)
2032 /* con is satisfied unless arc's color is a pseudocolor */
2033 if (!(nfa
->cm
->cd
[a
->co
].flags
& PSEUDO
))
2036 else if (a
->co
== RAINBOW
)
2038 /* con is incompatible if it's for a pseudocolor */
2039 /* (this is hypothetical; we make no such constraints today) */
2040 if (nfa
->cm
->cd
[con
->co
].flags
& PSEUDO
)
2041 return INCOMPATIBLE
;
2042 /* otherwise, constraint constrains arc to be only its color */
2045 return INCOMPATIBLE
;
2047 case CA('^', BEHIND
): /* collision, dissimilar constraints */
2048 case CA(BEHIND
, '^'):
2049 case CA('$', AHEAD
):
2050 case CA(AHEAD
, '$'):
2051 return INCOMPATIBLE
;
2053 case CA('^', '$'): /* constraints passing each other */
2054 case CA('^', AHEAD
):
2055 case CA(BEHIND
, '$'):
2056 case CA(BEHIND
, AHEAD
):
2058 case CA('$', BEHIND
):
2059 case CA(AHEAD
, '^'):
2060 case CA(AHEAD
, BEHIND
):
2061 case CA('^', LACON
):
2062 case CA(BEHIND
, LACON
):
2063 case CA('$', LACON
):
2064 case CA(AHEAD
, LACON
):
2069 return INCOMPATIBLE
; /* for benefit of blind compilers */
2073 * fixempties - get rid of EMPTY arcs
2076 fixempties(struct nfa
*nfa
,
2077 FILE *f
) /* for debug output; NULL none */
2081 struct state
*nexts
;
2085 struct arc
**inarcsorig
;
2086 struct arc
**arcarray
;
2092 * First, get rid of any states whose sole out-arc is an EMPTY, since
2093 * they're basically just aliases for their successor. The parsing
2094 * algorithm creates enough of these that it's worth special-casing this.
2096 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
2099 if (s
->flag
|| s
->nouts
!= 1)
2102 assert(a
!= NULL
&& a
->outchain
== NULL
);
2103 if (a
->type
!= EMPTY
)
2106 moveins(nfa
, s
, a
->to
);
2111 * Similarly, get rid of any state with a single EMPTY in-arc, by folding
2112 * it into its predecessor.
2114 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
2117 /* while we're at it, ensure tmp fields are clear for next step */
2118 assert(s
->tmp
== NULL
);
2119 if (s
->flag
|| s
->nins
!= 1)
2122 assert(a
!= NULL
&& a
->inchain
== NULL
);
2123 if (a
->type
!= EMPTY
)
2126 moveouts(nfa
, s
, a
->from
);
2134 * For each remaining NFA state, find all other states from which it is
2135 * reachable by a chain of one or more EMPTY arcs. Then generate new arcs
2136 * that eliminate the need for each such chain.
2138 * We could replace a chain of EMPTY arcs that leads from a "from" state
2139 * to a "to" state either by pushing non-EMPTY arcs forward (linking
2140 * directly from "from"'s predecessors to "to") or by pulling them back
2141 * (linking directly from "from" to "to"'s successors). We choose to
2142 * always do the former; this choice is somewhat arbitrary, but the
2143 * approach below requires that we uniformly do one or the other.
2145 * Suppose we have a chain of N successive EMPTY arcs (where N can easily
2146 * approach the size of the NFA). All of the intermediate states must
2147 * have additional inarcs and outarcs, else they'd have been removed by
2148 * the steps above. Assuming their inarcs are mostly not empties, we will
2149 * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
2150 * state in the chain must be duplicated to lead to all its successor
2151 * states as well. So there is no hope of doing less than O(N^2) work;
2152 * however, we should endeavor to keep the big-O cost from being even
2153 * worse than that, which it can easily become without care. In
2154 * particular, suppose we were to copy all S1's inarcs forward to S2, and
2155 * then also to S3, and then later we consider pushing S2's inarcs forward
2156 * to S3. If we include the arcs already copied from S1 in that, we'd be
2157 * doing O(N^3) work. (The duplicate-arc elimination built into newarc()
2158 * and its cohorts would get rid of the extra arcs, but not without cost.)
2160 * We can avoid this cost by treating only arcs that existed at the start
2161 * of this phase as candidates to be pushed forward. To identify those,
2162 * we remember the first inarc each state had to start with. We rely on
2163 * the fact that newarc() and friends put new arcs on the front of their
2164 * to-states' inchains, and that this phase never deletes arcs, so that
2165 * the original arcs must be the last arcs in their to-states' inchains.
2167 * So the process here is that, for each state in the NFA, we gather up
2168 * all non-EMPTY inarcs of states that can reach the target state via
2169 * EMPTY arcs. We then sort, de-duplicate, and merge these arcs into the
2170 * target state's inchain. (We can safely use sort-merge for this as long
2171 * as we update each state's original-arcs pointer after we add arcs to
2172 * it; the sort step of mergeins probably changed the order of the old
2175 * Another refinement worth making is that, because we only add non-EMPTY
2176 * arcs during this phase, and all added arcs have the same from-state as
2177 * the non-EMPTY arc they were cloned from, we know ahead of time that any
2178 * states having only EMPTY outarcs will be useless for lack of outarcs
2179 * after we drop the EMPTY arcs. (They cannot gain non-EMPTY outarcs if
2180 * they had none to start with.) So we need not bother to update the
2181 * inchains of such states at all.
2184 /* Remember the states' first original inarcs */
2185 /* ... and while at it, count how many old inarcs there are altogether */
2186 inarcsorig
= (struct arc
**) MALLOC(nfa
->nstates
* sizeof(struct arc
*));
2187 if (inarcsorig
== NULL
)
2193 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
2195 inarcsorig
[s
->no
] = s
->ins
;
2196 totalinarcs
+= s
->nins
;
2200 * Create a workspace for accumulating the inarcs to be added to the
2201 * current target state. totalinarcs is probably a considerable
2202 * overestimate of the space needed, but the NFA is unlikely to be large
2203 * enough at this point to make it worth being smarter.
2205 arcarray
= (struct arc
**) MALLOC(totalinarcs
* sizeof(struct arc
*));
2206 if (arcarray
== NULL
)
2213 /* And iterate over the target states */
2214 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= s
->next
)
2216 /* Ignore target states without non-EMPTY outarcs, per note above */
2217 if (!s
->flag
&& !hasnonemptyout(s
))
2220 /* Find predecessor states and accumulate their original inarcs */
2222 for (s2
= emptyreachable(nfa
, s
, s
, inarcsorig
); s2
!= s
; s2
= nexts
)
2224 /* Add s2's original inarcs to arcarray[], but ignore empties */
2225 for (a
= inarcsorig
[s2
->no
]; a
!= NULL
; a
= a
->inchain
)
2227 if (a
->type
!= EMPTY
)
2228 arcarray
[arccount
++] = a
;
2231 /* Reset the tmp fields as we walk back */
2236 assert(arccount
<= totalinarcs
);
2238 /* Remember how many original inarcs this state has */
2241 /* Add non-duplicate inarcs to target state */
2242 mergeins(nfa
, s
, arcarray
, arccount
);
2244 /* Now we must update the state's inarcsorig pointer */
2245 nskip
= s
->nins
- prevnins
;
2249 inarcsorig
[s
->no
] = a
;
2259 * Now remove all the EMPTY arcs, since we don't need them anymore.
2261 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
2263 for (a
= s
->outs
; a
!= NULL
; a
= nexta
)
2265 nexta
= a
->outchain
;
2266 if (a
->type
== EMPTY
)
2272 * And remove any states that have become useless. (This cleanup is not
2273 * very thorough, and would be even less so if we tried to combine it with
2274 * the previous step; but cleanup() will take care of anything we miss.)
2276 for (s
= nfa
->states
; s
!= NULL
; s
= nexts
)
2279 if ((s
->nins
== 0 || s
->nouts
== 0) && !s
->flag
)
2288 * emptyreachable - recursively find all states that can reach s by EMPTY arcs
2290 * The return value is the last such state found. Its tmp field links back
2291 * to the next-to-last such state, and so on back to s, so that all these
2292 * states can be located without searching the whole NFA.
2294 * Since this is only used in fixempties(), we pass in the inarcsorig[] array
2295 * maintained by that function. This lets us skip over all new inarcs, which
2296 * are certainly not EMPTY arcs.
2298 * The maximum recursion depth here is equal to the length of the longest
2299 * loop-free chain of EMPTY arcs, which is surely no more than the size of
2300 * the NFA ... but that could still be enough to cause trouble.
2302 static struct state
*
2303 emptyreachable(struct nfa
*nfa
,
2305 struct state
*lastfound
,
2306 struct arc
**inarcsorig
)
2310 /* Since this is recursive, it could be driven to stack overflow */
2311 if (STACK_TOO_DEEP(nfa
->v
->re
))
2319 for (a
= inarcsorig
[s
->no
]; a
!= NULL
; a
= a
->inchain
)
2321 if (a
->type
== EMPTY
&& a
->from
->tmp
== NULL
)
2322 lastfound
= emptyreachable(nfa
, a
->from
, lastfound
, inarcsorig
);
2328 * isconstraintarc - detect whether an arc is of a constraint type
2331 isconstraintarc(struct arc
*a
)
2346 * hasconstraintout - does state have a constraint out arc?
2349 hasconstraintout(struct state
*s
)
2353 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
2355 if (isconstraintarc(a
))
2362 * fixconstraintloops - get rid of loops containing only constraint arcs
2364 * A loop of states that contains only constraint arcs is useless, since
2365 * passing around the loop represents no forward progress. Moreover, it
2366 * would cause infinite looping in pullback/pushfwd, so we need to get rid
2367 * of such loops before doing that.
2370 fixconstraintloops(struct nfa
*nfa
,
2371 FILE *f
) /* for debug output; NULL none */
2374 struct state
*nexts
;
2380 * In the trivial case of a state that loops to itself, we can just drop
2381 * the constraint arc altogether. This is worth special-casing because
2382 * such loops are far more common than loops containing multiple states.
2383 * While we're at it, note whether any constraint arcs survive.
2386 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
2389 /* while we're at it, ensure tmp fields are clear for next step */
2390 assert(s
->tmp
== NULL
);
2391 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
2393 nexta
= a
->outchain
;
2394 if (isconstraintarc(a
))
2402 /* If we removed all the outarcs, the state is useless. */
2403 if (s
->nouts
== 0 && !s
->flag
)
2407 /* Nothing to do if no remaining constraint arcs */
2408 if (NISERR() || !hasconstraints
)
2412 * Starting from each remaining NFA state, search outwards for a
2413 * constraint loop. If we find a loop, break the loop, then start the
2414 * search over. (We could possibly retain some state from the first scan,
2415 * but it would complicate things greatly, and multi-state constraint
2416 * loops are rare enough that it's not worth optimizing the case.)
2419 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= s
->next
)
2421 if (findconstraintloop(nfa
, s
))
2429 * Now remove any states that have become useless. (This cleanup is not
2430 * very thorough, and would be even less so if we tried to combine it with
2431 * the previous step; but cleanup() will take care of anything we miss.)
2433 * Because findconstraintloop intentionally doesn't reset all tmp fields,
2434 * we have to clear them after it's done. This is a convenient place to
2437 for (s
= nfa
->states
; s
!= NULL
; s
= nexts
)
2441 if ((s
->nins
== 0 || s
->nouts
== 0) && !s
->flag
)
2450 * findconstraintloop - recursively find a loop of constraint arcs
2452 * If we find a loop, break it by calling breakconstraintloop(), then
2453 * return 1; otherwise return 0.
2455 * State tmp fields are guaranteed all NULL on a success return, because
2456 * breakconstraintloop does that. After a failure return, any state that
2457 * is known not to be part of a loop is marked with s->tmp == s; this allows
2458 * us not to have to re-prove that fact on later calls. (This convention is
2459 * workable because we already eliminated single-state loops.)
2461 * Note that the found loop doesn't necessarily include the first state we
2462 * are called on. Any loop reachable from that state will do.
2464 * The maximum recursion depth here is one more than the length of the longest
2465 * loop-free chain of constraint arcs, which is surely no more than the size
2466 * of the NFA ... but that could still be enough to cause trouble.
2469 findconstraintloop(struct nfa
*nfa
, struct state
*s
)
2473 /* Since this is recursive, it could be driven to stack overflow */
2474 if (STACK_TOO_DEEP(nfa
->v
->re
))
2477 return 1; /* to exit as quickly as possible */
2482 /* Already proven uninteresting? */
2485 /* Found a loop involving s */
2486 breakconstraintloop(nfa
, s
);
2487 /* The tmp fields have been cleaned up by breakconstraintloop */
2490 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
2492 if (isconstraintarc(a
))
2494 struct state
*sto
= a
->to
;
2498 if (findconstraintloop(nfa
, sto
))
2504 * If we get here, no constraint loop exists leading out from s. Mark it
2505 * with s->tmp == s so we need not rediscover that fact again later.
2512 * breakconstraintloop - break a loop of constraint arcs
2514 * sinitial is any one member state of the loop. Each loop member's tmp
2515 * field links to its successor within the loop. (Note that this function
2516 * will reset all the tmp fields to NULL.)
2518 * We can break the loop by, for any one state S1 in the loop, cloning its
2519 * loop successor state S2 (and possibly following states), and then moving
2520 * all S1->S2 constraint arcs to point to the cloned S2. The cloned S2 should
2521 * copy any non-constraint outarcs of S2. Constraint outarcs should be
2522 * dropped if they point back to S1, else they need to be copied as arcs to
2523 * similarly cloned states S3, S4, etc. In general, each cloned state copies
2524 * non-constraint outarcs, drops constraint outarcs that would lead to itself
2525 * or any earlier cloned state, and sends other constraint outarcs to newly
2526 * cloned states. No cloned state will have any inarcs that aren't constraint
2527 * arcs or do not lead from S1 or earlier-cloned states. It's okay to drop
2528 * constraint back-arcs since they would not take us to any state we've not
2529 * already been in; therefore, no new constraint loop is created. In this way
2530 * we generate a modified NFA that can still represent every useful state
2531 * sequence, but not sequences that represent state loops with no consumption
2532 * of input data. Note that the set of cloned states will certainly include
2533 * all of the loop member states other than S1, and it may also include
2534 * non-loop states that are reachable from S2 via constraint arcs. This is
2535 * important because there is no guarantee that findconstraintloop found a
2536 * maximal loop (and searching for one would be NP-hard, so don't try).
2537 * Frequently the "non-loop states" are actually part of a larger loop that
2538 * we didn't notice, and indeed there may be several overlapping loops.
2539 * This technique ensures convergence in such cases, while considering only
2540 * the originally-found loop does not.
2542 * If there is only one S1->S2 constraint arc, then that constraint is
2543 * certainly satisfied when we enter any of the clone states. This means that
2544 * in the common case where many of the constraint arcs are identically
2545 * labeled, we can merge together clone states linked by a similarly-labeled
2546 * constraint: if we can get to the first one we can certainly get to the
2547 * second, so there's no need to distinguish. This greatly reduces the number
2548 * of new states needed, so we preferentially break the given loop at a state
2549 * pair where this is true.
2551 * Furthermore, it's fairly common to find that a cloned successor state has
2552 * no outarcs, especially if we're a bit aggressive about removing unnecessary
2553 * outarcs. If that happens, then there is simply not any interesting state
2554 * that can be reached through the predecessor's loop arcs, which means we can
2555 * break the loop just by removing those loop arcs, with no new states added.
2558 breakconstraintloop(struct nfa
*nfa
, struct state
*sinitial
)
2561 struct state
*shead
;
2562 struct state
*stail
;
2563 struct state
*sclone
;
2564 struct state
*nexts
;
2570 * Start by identifying which loop step we want to break at.
2571 * Preferentially this is one with only one constraint arc. (XXX are
2572 * there any other secondary heuristics we want to use here?) Set refarc
2573 * to point to the selected lone constraint arc, if there is one.
2580 assert(nexts
!= s
); /* should not see any one-element loops */
2585 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
2587 if (a
->to
== nexts
&& isconstraintarc(a
))
2595 refarc
= NULL
; /* multiple constraint arcs here, no good */
2598 } while (s
!= sinitial
);
2602 /* break at the refarc */
2603 shead
= refarc
->from
;
2605 assert(stail
== shead
->tmp
);
2609 /* for lack of a better idea, break after sinitial */
2611 stail
= sinitial
->tmp
;
2615 * Reset the tmp fields so that we can use them for local storage in
2616 * clonesuccessorstates. (findconstraintloop won't mind, since it's just
2617 * going to abandon its search anyway.)
2619 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
2623 * Recursively build clone state(s) as needed.
2625 sclone
= newstate(nfa
);
2632 clonesuccessorstates(nfa
, stail
, sclone
, shead
, refarc
,
2633 NULL
, NULL
, nfa
->nstates
);
2639 * It's possible that sclone has no outarcs at all, in which case it's
2640 * useless. (We don't try extremely hard to get rid of useless states
2641 * here, but this is an easy and fairly common case.)
2643 if (sclone
->nouts
== 0)
2645 freestate(nfa
, sclone
);
2650 * Move shead's constraint-loop arcs to point to sclone, or just drop them
2651 * if we discovered we don't need sclone.
2653 for (a
= shead
->outs
; a
!= NULL
; a
= nexta
)
2655 nexta
= a
->outchain
;
2656 if (a
->to
== stail
&& isconstraintarc(a
))
2659 cparc(nfa
, a
, shead
, sclone
);
2668 * clonesuccessorstates - create a tree of constraint-arc successor states
2670 * ssource is the state to be cloned, and sclone is the state to copy its
2671 * outarcs into. sclone's inarcs, if any, should already be set up.
2673 * spredecessor is the original predecessor state that we are trying to build
2674 * successors for (it may not be the immediate predecessor of ssource).
2675 * refarc, if not NULL, is the original constraint arc that is known to have
2676 * been traversed out of spredecessor to reach the successor(s).
2678 * For each cloned successor state, we transiently create a "donemap" that is
2679 * a boolean array showing which source states we've already visited for this
2680 * clone state. This prevents infinite recursion as well as useless repeat
2681 * visits to the same state subtree (which can add up fast, since typical NFAs
2682 * have multiple redundant arc pathways). Each donemap is a char array
2683 * indexed by state number. The donemaps are all of the same size "nstates",
2684 * which is nfa->nstates as of the start of the recursion. This is enough to
2685 * have entries for all pre-existing states, but *not* entries for clone
2686 * states created during the recursion. That's okay since we have no need to
2689 * curdonemap is NULL when recursing to a new sclone state, or sclone's
2690 * donemap when we are recursing without having created a new state (which we
2691 * do when we decide we can merge a successor state into the current clone
2692 * state). outerdonemap is NULL at the top level and otherwise the parent
2693 * clone state's donemap.
2695 * The successor states we create and fill here form a strict tree structure,
2696 * with each state having exactly one predecessor, except that the toplevel
2697 * state has no inarcs as yet (breakconstraintloop will add its inarcs from
2698 * spredecessor after we're done). Thus, we can examine sclone's inarcs back
2699 * to the root, plus refarc if any, to identify the set of constraints already
2700 * known valid at the current point. This allows us to avoid generating extra
2704 clonesuccessorstates(struct nfa
*nfa
,
2705 struct state
*ssource
,
2706 struct state
*sclone
,
2707 struct state
*spredecessor
,
2716 /* Since this is recursive, it could be driven to stack overflow */
2717 if (STACK_TOO_DEEP(nfa
->v
->re
))
2723 /* If this state hasn't already got a donemap, create one */
2724 donemap
= curdonemap
;
2725 if (donemap
== NULL
)
2727 donemap
= (char *) MALLOC(nstates
* sizeof(char));
2728 if (donemap
== NULL
)
2734 if (outerdonemap
!= NULL
)
2737 * Not at outermost recursion level, so copy the outer level's
2738 * donemap; this ensures that we see states in process of being
2739 * visited at outer levels, or already merged into predecessor
2740 * states, as ones we shouldn't traverse back to.
2742 memcpy(donemap
, outerdonemap
, nstates
* sizeof(char));
2746 /* At outermost level, only spredecessor is off-limits */
2747 memset(donemap
, 0, nstates
* sizeof(char));
2748 assert(spredecessor
->no
< nstates
);
2749 donemap
[spredecessor
->no
] = 1;
2753 /* Mark ssource as visited in the donemap */
2754 assert(ssource
->no
< nstates
);
2755 assert(donemap
[ssource
->no
] == 0);
2756 donemap
[ssource
->no
] = 1;
2759 * We proceed by first cloning all of ssource's outarcs, creating new
2760 * clone states as needed but not doing more with them than that. Then in
2761 * a second pass, recurse to process the child clone states. This allows
2762 * us to have only one child clone state per reachable source state, even
2763 * when there are multiple outarcs leading to the same state. Also, when
2764 * we do visit a child state, its set of inarcs is known exactly, which
2765 * makes it safe to apply the constraint-is-already-checked optimization.
2766 * Also, this ensures that we've merged all the states we can into the
2767 * current clone before we recurse to any children, thus possibly saving
2768 * them from making extra images of those states.
2770 * While this function runs, child clone states of the current state are
2771 * marked by setting their tmp fields to point to the original state they
2772 * were cloned from. This makes it possible to detect multiple outarcs
2773 * leading to the same state, and also makes it easy to distinguish clone
2774 * states from original states (which will have tmp == NULL).
2776 for (a
= ssource
->outs
; a
!= NULL
&& !NISERR(); a
= a
->outchain
)
2778 struct state
*sto
= a
->to
;
2781 * We do not consider cloning successor states that have no constraint
2782 * outarcs; just link to them as-is. They cannot be part of a
2783 * constraint loop so there is no need to make copies. In particular,
2784 * this rule keeps us from trying to clone the post state, which would
2787 if (isconstraintarc(a
) && hasconstraintout(sto
))
2789 struct state
*prevclone
;
2794 * Back-link constraint arcs must not be followed. Nor is there a
2795 * need to revisit states previously merged into this clone.
2797 assert(sto
->no
< nstates
);
2798 if (donemap
[sto
->no
] != 0)
2802 * Check whether we already have a child clone state for this
2806 for (a2
= sclone
->outs
; a2
!= NULL
; a2
= a2
->outchain
)
2808 if (a2
->to
->tmp
== sto
)
2816 * If this arc is labeled the same as refarc, or the same as any
2817 * arc we must have traversed to get to sclone, then no additional
2818 * constraints need to be met to get to sto, so we should just
2819 * merge its outarcs into sclone.
2821 if (refarc
&& a
->type
== refarc
->type
&& a
->co
== refarc
->co
)
2828 for (s
= sclone
; s
->ins
; s
= s
->ins
->from
)
2831 a
->type
== s
->ins
->type
&& a
->co
== s
->ins
->co
)
2842 * We can merge into sclone. If we previously made a child
2843 * clone state, drop it; there's no need to visit it. (This
2844 * can happen if ssource has multiple pathways to sto, and we
2845 * only just now found one that is provably a no-op.)
2848 dropstate(nfa
, prevclone
); /* kills our outarc, too */
2850 /* Recurse to merge sto's outarcs into sclone */
2851 clonesuccessorstates(nfa
,
2859 /* sto should now be marked as previously visited */
2860 assert(NISERR() || donemap
[sto
->no
] == 1);
2865 * We already have a clone state for this successor, so just
2866 * make another arc to it.
2868 cparc(nfa
, a
, sclone
, prevclone
);
2873 * We need to create a new successor clone state.
2875 struct state
*stoclone
;
2877 stoclone
= newstate(nfa
);
2878 if (stoclone
== NULL
)
2883 /* Mark it as to what it's a clone of */
2884 stoclone
->tmp
= sto
;
2885 /* ... and add the outarc leading to it */
2886 cparc(nfa
, a
, sclone
, stoclone
);
2892 * Non-constraint outarcs just get copied to sclone, as do outarcs
2893 * leading to states with no constraint outarc.
2895 cparc(nfa
, a
, sclone
, sto
);
2900 * If we are at outer level for this clone state, recurse to all its child
2901 * clone states, clearing their tmp fields as we go. (If we're not
2902 * outermost for sclone, leave this to be done by the outer call level.)
2903 * Note that if we have multiple outarcs leading to the same clone state,
2904 * it will only be recursed-to once.
2906 if (curdonemap
== NULL
)
2908 for (a
= sclone
->outs
; a
!= NULL
&& !NISERR(); a
= a
->outchain
)
2910 struct state
*stoclone
= a
->to
;
2911 struct state
*sto
= stoclone
->tmp
;
2915 stoclone
->tmp
= NULL
;
2916 clonesuccessorstates(nfa
,
2927 /* Don't forget to free sclone's donemap when done with it */
2933 * removecantmatch - remove CANTMATCH arcs, which are no longer useful
2934 * once we are done with the parsing phase. (We need them only to
2935 * preserve connectedness of NFA subgraphs during parsing.)
2938 removecantmatch(struct nfa
*nfa
)
2942 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
2947 for (a
= s
->outs
; a
!= NULL
; a
= nexta
)
2949 nexta
= a
->outchain
;
2950 if (a
->type
== CANTMATCH
)
2961 * cleanup - clean up NFA after optimizations
2964 cleanup(struct nfa
*nfa
)
2967 struct state
*nexts
;
2973 /* clear out unreachable or dead-end states */
2974 /* use pre to mark reachable, then post to mark can-reach-post */
2975 markreachable(nfa
, nfa
->pre
, (struct state
*) NULL
, nfa
->pre
);
2976 markcanreach(nfa
, nfa
->post
, nfa
->pre
, nfa
->post
);
2977 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
2980 if (s
->tmp
!= nfa
->post
&& !s
->flag
)
2983 assert(NISERR() || nfa
->post
->nins
== 0 || nfa
->post
->tmp
== nfa
->post
);
2984 cleartraverse(nfa
, nfa
->pre
);
2985 assert(NISERR() || nfa
->post
->nins
== 0 || nfa
->post
->tmp
== NULL
);
2986 /* the nins==0 (final unreachable) case will be caught later */
2988 /* renumber surviving states */
2990 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
2996 * markreachable - recursive marking of reachable states
2999 markreachable(struct nfa
*nfa
,
3001 struct state
*okay
, /* consider only states with this mark */
3002 struct state
*mark
) /* the value to mark with */
3006 /* Since this is recursive, it could be driven to stack overflow */
3007 if (STACK_TOO_DEEP(nfa
->v
->re
))
3017 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3018 markreachable(nfa
, a
->to
, okay
, mark
);
3022 * markcanreach - recursive marking of states which can reach here
3025 markcanreach(struct nfa
*nfa
,
3027 struct state
*okay
, /* consider only states with this mark */
3028 struct state
*mark
) /* the value to mark with */
3032 /* Since this is recursive, it could be driven to stack overflow */
3033 if (STACK_TOO_DEEP(nfa
->v
->re
))
3043 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
3044 markcanreach(nfa
, a
->from
, okay
, mark
);
3048 * analyze - ascertain potentially-useful facts about an optimized NFA
3050 static long /* re_info bits to be ORed in */
3051 analyze(struct nfa
*nfa
)
3059 /* Detect whether NFA can't match anything */
3060 if (nfa
->pre
->outs
== NULL
)
3061 return REG_UIMPOSSIBLE
;
3063 /* Detect whether NFA matches all strings (possibly with length bounds) */
3066 /* Detect whether NFA can possibly match a zero-length string */
3067 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
3068 for (aa
= a
->to
->outs
; aa
!= NULL
; aa
= aa
->outchain
)
3069 if (aa
->to
== nfa
->post
)
3070 return REG_UEMPTYMATCH
;
3075 * checkmatchall - does the NFA represent no more than a string length test?
3077 * If so, set nfa->minmatchall and nfa->maxmatchall correctly (they are -1
3078 * to begin with) and set the MATCHALL bit in nfa->flags.
3080 * To succeed, we require all arcs to be PLAIN RAINBOW arcs, except for those
3081 * for pseudocolors (i.e., BOS/BOL/EOS/EOL). We must be able to reach the
3082 * post state via RAINBOW arcs, and if there are any loops in the graph, they
3083 * must be loop-to-self arcs, ensuring that each loop iteration consumes
3084 * exactly one character. (Longer loops are problematic because they create
3085 * non-consecutive possible match lengths; we have no good way to represent
3086 * that situation for lengths beyond the DUPINF limit.)
3088 * Pseudocolor arcs complicate things a little. We know that they can only
3089 * appear as pre-state outarcs (for BOS/BOL) or post-state inarcs (for
3090 * EOS/EOL). There, they must exactly replicate the parallel RAINBOW arcs,
3091 * e.g. if the pre state has one RAINBOW outarc to state 2, it must have BOS
3092 * and BOL outarcs to state 2, and no others. Missing or extra pseudocolor
3093 * arcs can occur, meaning that the NFA involves some constraint on the
3094 * adjacent characters, which makes it not a matchall NFA.
3097 checkmatchall(struct nfa
*nfa
)
3104 * If there are too many states, don't bother trying to detect matchall.
3105 * This limit serves to bound the time and memory we could consume below.
3106 * Note that even if the graph is all-RAINBOW, if there are significantly
3107 * more than DUPINF states then it's likely that there are paths of length
3108 * more than DUPINF, which would force us to fail anyhow. In practice,
3109 * plausible ways of writing a matchall regex with maximum finite path
3110 * length K tend not to have very many more than K states.
3112 if (nfa
->nstates
> DUPINF
* 2)
3116 * First, scan all the states to verify that only RAINBOW arcs appear,
3117 * plus pseudocolor arcs adjacent to the pre and post states. This lets
3118 * us quickly eliminate most cases that aren't matchall NFAs.
3120 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
3124 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3126 if (a
->type
!= PLAIN
)
3127 return; /* any LACONs make it non-matchall */
3128 if (a
->co
!= RAINBOW
)
3130 if (nfa
->cm
->cd
[a
->co
].flags
& PSEUDO
)
3133 * Pseudocolor arc: verify it's in a valid place (this
3134 * seems quite unlikely to fail, but let's be sure).
3136 if (s
== nfa
->pre
&&
3137 (a
->co
== nfa
->bos
[0] || a
->co
== nfa
->bos
[1]))
3138 /* okay BOS/BOL arc */ ;
3139 else if (a
->to
== nfa
->post
&&
3140 (a
->co
== nfa
->eos
[0] || a
->co
== nfa
->eos
[1]))
3141 /* okay EOS/EOL arc */ ;
3143 return; /* unexpected pseudocolor arc */
3144 /* We'll check these arcs some more below. */
3147 return; /* any other color makes it non-matchall */
3150 /* Also, assert that the tmp fields are available for use. */
3151 assert(s
->tmp
== NULL
);
3155 * The next cheapest check we can make is to verify that the BOS/BOL
3156 * outarcs of the pre state reach the same states as its RAINBOW outarcs.
3157 * If they don't, the NFA expresses some constraints on the character
3158 * before the matched string, making it non-matchall. Likewise, the
3159 * EOS/EOL inarcs of the post state must match its RAINBOW inarcs.
3161 if (!check_out_colors_match(nfa
->pre
, RAINBOW
, nfa
->bos
[0]) ||
3162 !check_out_colors_match(nfa
->pre
, RAINBOW
, nfa
->bos
[1]) ||
3163 !check_in_colors_match(nfa
->post
, RAINBOW
, nfa
->eos
[0]) ||
3164 !check_in_colors_match(nfa
->post
, RAINBOW
, nfa
->eos
[1]))
3168 * Initialize an array of path-length arrays, in which
3169 * checkmatchall_recurse will return per-state results. This lets us
3170 * memo-ize the recursive search and avoid exponential time consumption.
3172 haspaths
= (bool **) MALLOC(nfa
->nstates
* sizeof(bool *));
3173 if (haspaths
== NULL
)
3174 return; /* fail quietly */
3175 memset(haspaths
, 0, nfa
->nstates
* sizeof(bool *));
3178 * Recursively search the graph for all-RAINBOW paths to the "post" state,
3179 * starting at the "pre" state, and computing the lengths of the paths.
3180 * (Given the preceding checks, there should be at least one such path.
3181 * However we could get back a false result anyway, in case there are
3182 * multi-state loops, paths exceeding DUPINF+1 length, or non-algorithmic
3183 * failures such as ENOMEM.)
3185 if (checkmatchall_recurse(nfa
, nfa
->pre
, haspaths
))
3187 /* The useful result is the path length array for the pre state */
3188 bool *haspath
= haspaths
[nfa
->pre
->no
];
3193 assert(haspath
!= NULL
);
3196 * haspath[] now represents the set of possible path lengths; but we
3197 * want to reduce that to a min and max value, because it doesn't seem
3198 * worth complicating regexec.c to deal with nonconsecutive possible
3199 * match lengths. Find min and max of first run of lengths, then
3200 * verify there are no nonconsecutive lengths.
3202 for (minmatch
= 0; minmatch
<= DUPINF
+ 1; minmatch
++)
3204 if (haspath
[minmatch
])
3207 assert(minmatch
<= DUPINF
+ 1); /* else checkmatchall_recurse lied */
3208 for (maxmatch
= minmatch
; maxmatch
< DUPINF
+ 1; maxmatch
++)
3210 if (!haspath
[maxmatch
+ 1])
3213 for (morematch
= maxmatch
+ 1; morematch
<= DUPINF
+ 1; morematch
++)
3215 if (haspath
[morematch
])
3217 haspath
= NULL
; /* fail, there are nonconsecutive lengths */
3222 if (haspath
!= NULL
)
3225 * Success, so record the info. Here we have a fine point: the
3226 * path length from the pre state includes the pre-to-initial
3227 * transition, so it's one more than the actually matched string
3228 * length. (We avoided counting the final-to-post transition
3229 * within checkmatchall_recurse, but not this one.) This is why
3230 * checkmatchall_recurse allows one more level of path length than
3231 * might seem necessary. This decrement also takes care of
3232 * converting checkmatchall_recurse's definition of "infinity" as
3233 * "DUPINF+1" to our normal representation as "DUPINF".
3235 assert(minmatch
> 0); /* else pre and post states were adjacent */
3236 nfa
->minmatchall
= minmatch
- 1;
3237 nfa
->maxmatchall
= maxmatch
- 1;
3238 nfa
->flags
|= MATCHALL
;
3243 for (i
= 0; i
< nfa
->nstates
; i
++)
3245 if (haspaths
[i
] != NULL
)
3252 * checkmatchall_recurse - recursive search for checkmatchall
3254 * s is the state to be examined in this recursion level.
3255 * haspaths[] is an array of per-state exit path length arrays.
3257 * We return true if the search was performed successfully, false if
3258 * we had to fail because of multi-state loops or other internal reasons.
3259 * (Because "dead" states that can't reach the post state have been
3260 * eliminated, and we already verified that only RAINBOW and matching
3261 * pseudocolor arcs exist, every state should have RAINBOW path(s) to
3262 * the post state. Hence we take a false result from recursive calls
3263 * as meaning that we'd better fail altogether, not just that that
3264 * particular state can't reach the post state.)
3266 * On success, we store a malloc'd result array in haspaths[s->no],
3267 * showing the possible path lengths from s to the post state.
3268 * Each state's haspath[] array is of length DUPINF+2. The entries from
3269 * k = 0 to DUPINF are true if there is an all-RAINBOW path of length k
3270 * from this state to the string end. haspath[DUPINF+1] is true if all
3271 * path lengths >= DUPINF+1 are possible. (Situations that cannot be
3272 * represented under these rules cause failure.)
3274 * checkmatchall is responsible for eventually freeing the haspath[] arrays.
3277 checkmatchall_recurse(struct nfa
*nfa
, struct state
*s
, bool **haspaths
)
3279 bool result
= false;
3280 bool foundloop
= false;
3285 * Since this is recursive, it could be driven to stack overflow. But we
3286 * need not treat that as a hard failure; just deem the NFA non-matchall.
3288 if (STACK_TOO_DEEP(nfa
->v
->re
))
3291 /* In case the search takes a long time, check for cancel */
3292 INTERRUPT(nfa
->v
->re
);
3294 /* Create a haspath array for this state */
3295 haspath
= (bool *) MALLOC((DUPINF
+ 2) * sizeof(bool));
3296 if (haspath
== NULL
)
3297 return false; /* again, treat as non-matchall */
3298 memset(haspath
, 0, (DUPINF
+ 2) * sizeof(bool));
3300 /* Mark this state as being visited */
3301 assert(s
->tmp
== NULL
);
3304 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3306 if (a
->co
!= RAINBOW
)
3307 continue; /* ignore pseudocolor arcs */
3308 if (a
->to
== nfa
->post
)
3310 /* We found an all-RAINBOW path to the post state */
3314 * Mark this state as being zero steps away from the string end
3315 * (the transition to the post state isn't counted).
3319 else if (a
->to
== s
)
3321 /* We found a cycle of length 1, which we'll deal with below. */
3324 else if (a
->to
->tmp
!= NULL
)
3326 /* It's busy, so we found a cycle of length > 1, so fail. */
3332 /* Consider paths forward through this to-state. */
3336 /* If to-state was not already visited, recurse */
3337 if (haspaths
[a
->to
->no
] == NULL
)
3339 result
= checkmatchall_recurse(nfa
, a
->to
, haspaths
);
3340 /* Fail if any recursive path fails */
3346 /* The previous visit must have found path(s) to the end */
3349 assert(a
->to
->tmp
== NULL
);
3350 nexthaspath
= haspaths
[a
->to
->no
];
3351 assert(nexthaspath
!= NULL
);
3354 * Now, for every path of length i from a->to to the string end,
3355 * there is a path of length i + 1 from s to the string end.
3357 if (nexthaspath
[DUPINF
] != nexthaspath
[DUPINF
+ 1])
3360 * a->to has a path of length exactly DUPINF, but not longer;
3361 * or it has paths of all lengths > DUPINF but not one of
3362 * exactly that length. In either case, we cannot represent
3363 * the possible path lengths from s correctly, so fail.
3368 /* Merge knowledge of these path lengths into what we have */
3369 for (i
= 0; i
< DUPINF
; i
++)
3370 haspath
[i
+ 1] |= nexthaspath
[i
];
3371 /* Infinity + 1 is still infinity */
3372 haspath
[DUPINF
+ 1] |= nexthaspath
[DUPINF
+ 1];
3376 if (result
&& foundloop
)
3379 * If there is a length-1 loop at this state, then find the shortest
3380 * known path length to the end. The loop means that every larger
3381 * path length is possible, too. (It doesn't matter whether any of
3382 * the longer lengths were already known possible.)
3386 for (i
= 0; i
<= DUPINF
; i
++)
3391 for (i
++; i
<= DUPINF
+ 1; i
++)
3395 /* Report out the completed path length map */
3396 assert(s
->no
< nfa
->nstates
);
3397 assert(haspaths
[s
->no
] == NULL
);
3398 haspaths
[s
->no
] = haspath
;
3400 /* Mark state no longer busy */
3407 * check_out_colors_match - subroutine for checkmatchall
3409 * Check whether the set of states reachable from s by arcs of color co1
3410 * is equivalent to the set reachable by arcs of color co2.
3411 * checkmatchall already verified that all of the NFA's arcs are PLAIN,
3412 * so we need not examine arc types here.
3415 check_out_colors_match(struct state
*s
, color co1
, color co2
)
3421 * To do this in linear time, we assume that the NFA contains no duplicate
3422 * arcs. Run through the out-arcs, marking states reachable by arcs of
3423 * color co1. Run through again, un-marking states reachable by arcs of
3424 * color co2; if we see a not-marked state, we know this co2 arc is
3425 * unmatched. Then run through again, checking for still-marked states,
3426 * and in any case leaving all the tmp fields reset to NULL.
3428 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3432 assert(a
->to
->tmp
== NULL
);
3436 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3440 if (a
->to
->tmp
!= NULL
)
3443 result
= false; /* unmatched co2 arc */
3446 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3450 if (a
->to
->tmp
!= NULL
)
3452 result
= false; /* unmatched co1 arc */
3461 * check_in_colors_match - subroutine for checkmatchall
3463 * Check whether the set of states that can reach s by arcs of color co1
3464 * is equivalent to the set that can reach s by arcs of color co2.
3465 * checkmatchall already verified that all of the NFA's arcs are PLAIN,
3466 * so we need not examine arc types here.
3469 check_in_colors_match(struct state
*s
, color co1
, color co2
)
3475 * Identical algorithm to check_out_colors_match, except examine the
3476 * from-states of s' inarcs.
3478 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
3482 assert(a
->from
->tmp
== NULL
);
3483 a
->from
->tmp
= a
->from
;
3486 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
3490 if (a
->from
->tmp
!= NULL
)
3491 a
->from
->tmp
= NULL
;
3493 result
= false; /* unmatched co2 arc */
3496 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
3500 if (a
->from
->tmp
!= NULL
)
3502 result
= false; /* unmatched co1 arc */
3503 a
->from
->tmp
= NULL
;
3511 * compact - construct the compact representation of an NFA
3514 compact(struct nfa
*nfa
,
3528 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
3531 narcs
+= s
->nouts
+ 1; /* need one extra for endmarker */
3534 cnfa
->stflags
= (char *) MALLOC(nstates
* sizeof(char));
3535 cnfa
->states
= (struct carc
**) MALLOC(nstates
* sizeof(struct carc
*));
3536 cnfa
->arcs
= (struct carc
*) MALLOC(narcs
* sizeof(struct carc
));
3537 if (cnfa
->stflags
== NULL
|| cnfa
->states
== NULL
|| cnfa
->arcs
== NULL
)
3539 if (cnfa
->stflags
!= NULL
)
3540 FREE(cnfa
->stflags
);
3541 if (cnfa
->states
!= NULL
)
3543 if (cnfa
->arcs
!= NULL
)
3548 cnfa
->nstates
= nstates
;
3549 cnfa
->pre
= nfa
->pre
->no
;
3550 cnfa
->post
= nfa
->post
->no
;
3551 cnfa
->bos
[0] = nfa
->bos
[0];
3552 cnfa
->bos
[1] = nfa
->bos
[1];
3553 cnfa
->eos
[0] = nfa
->eos
[0];
3554 cnfa
->eos
[1] = nfa
->eos
[1];
3555 cnfa
->ncolors
= maxcolor(nfa
->cm
) + 1;
3556 cnfa
->flags
= nfa
->flags
;
3557 cnfa
->minmatchall
= nfa
->minmatchall
;
3558 cnfa
->maxmatchall
= nfa
->maxmatchall
;
3561 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
3563 assert((size_t) s
->no
< nstates
);
3564 cnfa
->stflags
[s
->no
] = 0;
3565 cnfa
->states
[s
->no
] = ca
;
3567 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
3576 assert(s
->no
!= cnfa
->pre
);
3578 ca
->co
= (color
) (cnfa
->ncolors
+ a
->co
);
3581 cnfa
->flags
|= HASLACONS
;
3587 carcsort(first
, ca
- first
);
3592 assert(ca
== &cnfa
->arcs
[narcs
]);
3593 assert(cnfa
->nstates
!= 0);
3595 /* mark no-progress states */
3596 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
3597 cnfa
->stflags
[a
->to
->no
] = CNFA_NOPROGRESS
;
3598 cnfa
->stflags
[nfa
->pre
->no
] = CNFA_NOPROGRESS
;
3602 * carcsort - sort compacted-NFA arcs by color
3605 carcsort(struct carc
*first
, size_t n
)
3608 qsort(first
, n
, sizeof(struct carc
), carc_cmp
);
3612 carc_cmp(const void *a
, const void *b
)
3614 const struct carc
*aa
= (const struct carc
*) a
;
3615 const struct carc
*bb
= (const struct carc
*) b
;
3617 if (aa
->co
< bb
->co
)
3619 if (aa
->co
> bb
->co
)
3621 if (aa
->to
< bb
->to
)
3623 if (aa
->to
> bb
->to
)
3625 /* This is unreached, since there should be no duplicate arcs now: */
3630 * freecnfa - free a compacted NFA
3633 freecnfa(struct cnfa
*cnfa
)
3635 assert(!NULLCNFA(*cnfa
)); /* not empty already */
3636 FREE(cnfa
->stflags
);
3643 * dumpnfa - dump an NFA in human-readable form
3646 dumpnfa(struct nfa
*nfa
,
3654 fprintf(f
, "pre %d, post %d", nfa
->pre
->no
, nfa
->post
->no
);
3655 if (nfa
->bos
[0] != COLORLESS
)
3656 fprintf(f
, ", bos [%ld]", (long) nfa
->bos
[0]);
3657 if (nfa
->bos
[1] != COLORLESS
)
3658 fprintf(f
, ", bol [%ld]", (long) nfa
->bos
[1]);
3659 if (nfa
->eos
[0] != COLORLESS
)
3660 fprintf(f
, ", eos [%ld]", (long) nfa
->eos
[0]);
3661 if (nfa
->eos
[1] != COLORLESS
)
3662 fprintf(f
, ", eol [%ld]", (long) nfa
->eos
[1]);
3663 if (nfa
->flags
& HASLACONS
)
3664 fprintf(f
, ", haslacons");
3665 if (nfa
->flags
& HASCANTMATCH
)
3666 fprintf(f
, ", hascantmatch");
3667 if (nfa
->flags
& MATCHALL
)
3669 fprintf(f
, ", minmatchall %d", nfa
->minmatchall
);
3670 if (nfa
->maxmatchall
== DUPINF
)
3671 fprintf(f
, ", maxmatchall inf");
3673 fprintf(f
, ", maxmatchall %d", nfa
->maxmatchall
);
3676 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
3682 fprintf(f
, "total of %d states, %d arcs\n", nstates
, narcs
);
3683 if (nfa
->parent
== NULL
)
3684 dumpcolors(nfa
->cm
, f
);
3689 #ifdef REG_DEBUG /* subordinates of dumpnfa */
3692 * dumpstate - dump an NFA state in human-readable form
3695 dumpstate(struct state
*s
,
3700 fprintf(f
, "%d%s%c", s
->no
, (s
->tmp
!= NULL
) ? "T" : "",
3701 (s
->flag
) ? s
->flag
: '.');
3702 if (s
->prev
!= NULL
&& s
->prev
->next
!= s
)
3703 fprintf(f
, "\tstate chain bad\n");
3705 fprintf(f
, "\tno out arcs\n");
3708 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
3711 fprintf(f
, "\tlink from %d to %d on %d's in-chain\n",
3712 a
->from
->no
, a
->to
->no
, s
->no
);
3718 * dumparcs - dump out-arcs in human-readable form
3721 dumparcs(struct state
*s
,
3727 /* printing oldest arcs first is usually clearer */
3730 while (a
->outchain
!= NULL
)
3744 } while (a
!= NULL
);
3750 * dumparc - dump one outarc in readable form, including prefixing tab
3753 dumparc(struct arc
*a
,
3763 if (a
->co
== RAINBOW
)
3766 fprintf(f
, "[%ld]", (long) a
->co
);
3769 if (a
->co
== RAINBOW
)
3772 fprintf(f
, ">%ld>", (long) a
->co
);
3775 if (a
->co
== RAINBOW
)
3778 fprintf(f
, "<%ld<", (long) a
->co
);
3781 fprintf(f
, ":%ld:", (long) a
->co
);
3785 fprintf(f
, "%c%d", a
->type
, (int) a
->co
);
3793 fprintf(f
, "0x%x/0%lo", a
->type
, (long) a
->co
);
3797 fprintf(f
, "?%d?", a
->from
->no
);
3798 for (aa
= a
->from
->outs
; aa
!= NULL
; aa
= aa
->outchain
)
3800 break; /* NOTE BREAK OUT */
3802 fprintf(f
, "?!?"); /* missing from out-chain */
3809 fprintf(f
, "%d", a
->to
->no
);
3810 for (aa
= a
->to
->ins
; aa
!= NULL
; aa
= aa
->inchain
)
3812 break; /* NOTE BREAK OUT */
3814 fprintf(f
, "?!?"); /* missing from in-chain */
3816 #endif /* REG_DEBUG */
3819 * dumpcnfa - dump a compacted NFA in human-readable form
3823 dumpcnfa(struct cnfa
*cnfa
,
3828 fprintf(f
, "pre %d, post %d", cnfa
->pre
, cnfa
->post
);
3829 if (cnfa
->bos
[0] != COLORLESS
)
3830 fprintf(f
, ", bos [%ld]", (long) cnfa
->bos
[0]);
3831 if (cnfa
->bos
[1] != COLORLESS
)
3832 fprintf(f
, ", bol [%ld]", (long) cnfa
->bos
[1]);
3833 if (cnfa
->eos
[0] != COLORLESS
)
3834 fprintf(f
, ", eos [%ld]", (long) cnfa
->eos
[0]);
3835 if (cnfa
->eos
[1] != COLORLESS
)
3836 fprintf(f
, ", eol [%ld]", (long) cnfa
->eos
[1]);
3837 if (cnfa
->flags
& HASLACONS
)
3838 fprintf(f
, ", haslacons");
3839 if (cnfa
->flags
& MATCHALL
)
3841 fprintf(f
, ", minmatchall %d", cnfa
->minmatchall
);
3842 if (cnfa
->maxmatchall
== DUPINF
)
3843 fprintf(f
, ", maxmatchall inf");
3845 fprintf(f
, ", maxmatchall %d", cnfa
->maxmatchall
);
3848 for (st
= 0; st
< cnfa
->nstates
; st
++)
3849 dumpcstate(st
, cnfa
, f
);
3854 #ifdef REG_DEBUG /* subordinates of dumpcnfa */
3857 * dumpcstate - dump a compacted-NFA state in human-readable form
3867 fprintf(f
, "%d%s", st
, (cnfa
->stflags
[st
] & CNFA_NOPROGRESS
) ? ":" : ".");
3869 for (ca
= cnfa
->states
[st
]; ca
->co
!= COLORLESS
; ca
++)
3871 if (ca
->co
== RAINBOW
)
3872 fprintf(f
, "\t[*]->%d", ca
->to
);
3873 else if (ca
->co
< cnfa
->ncolors
)
3874 fprintf(f
, "\t[%ld]->%d", (long) ca
->co
, ca
->to
);
3876 fprintf(f
, "\t:%ld:->%d", (long) (ca
->co
- cnfa
->ncolors
), ca
->to
);
3885 if (ca
== cnfa
->states
[st
] || pos
!= 1)
3890 #endif /* REG_DEBUG */