2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 #include "sbsttable.h"
45 /* Flags that control merging. */
46 #define STB_GRAPH1 0x01
47 #define STB_GRAPH2 0x02
49 #define STB_ISFINAL 0x04
50 #define STB_ISMARKED 0x08
51 #define STB_ONLIST 0x10
59 struct LongestMatchPart
;
61 /* State list element for unambiguous access to list element. */
67 /* This is the marked index for a state pair. Used in minimization. It keeps
68 * track of whether or not the state pair is marked. */
71 MarkIndex(int states
);
74 void markPair(int state1
, int state2
);
75 bool isPairMarked(int state1
, int state2
);
82 extern KeyOps
*keyOps
;
84 /* Transistion Action Element. */
85 typedef SBstMapEl
< int, Action
* > ActionTableEl
;
87 /* Nodes in the tree that use this action. */
90 typedef Vector
<NameInst
*> ActionRefs
;
92 /* Element in list of actions. Contains the string for the code to exectute. */
95 public DListEl
<Action
>,
96 public AvlTreeEl
<Action
>
100 Action( const InputLoc
&loc
, const char *name
, InlineList
*inlineList
, int condId
)
104 inlineList(inlineList
),
117 /* Key for action dictionary. */
118 const char *getKey() const { return name
; }
120 /* Data collected during parse. */
123 InlineList
*inlineList
;
126 void actionName( ostream
&out
)
131 out
<< loc
.line
<< ":" << loc
.col
;
134 /* Places in the input text that reference the action. */
135 ActionRefs actionRefs
;
137 /* Number of references in the final machine. */
139 { return numTransRefs
+ numToStateRefs
+ numFromStateRefs
+ numEofRefs
; }
142 int numFromStateRefs
;
153 static inline int compare( const Action
*cond1
, const Action
*cond2
)
155 if ( cond1
->condId
< cond2
->condId
)
157 else if ( cond1
->condId
> cond2
->condId
)
163 /* A list of actions. */
164 typedef DList
<Action
> ActionList
;
165 typedef AvlTree
<Action
, char *, CmpStr
> ActionDict
;
167 /* Structure for reverse action mapping. */
168 struct RevActionMapEl
175 /* Transition Action Table. */
177 : public SBstMap
< int, Action
*, CmpOrd
<int> >
179 void setAction( int ordering
, Action
*action
);
180 void setActions( int *orderings
, Action
**actions
, int nActs
);
181 void setActions( const ActionTable
&other
);
183 bool hasAction( Action
*action
);
186 typedef SBstSet
< Action
*, CmpOrd
<Action
*> > ActionSet
;
187 typedef CmpSTable
< Action
*, CmpOrd
<Action
*> > CmpActionSet
;
189 /* Transistion Action Element. */
190 typedef SBstMapEl
< int, LongestMatchPart
* > LmActionTableEl
;
192 /* Transition Action Table. */
194 : public SBstMap
< int, LongestMatchPart
*, CmpOrd
<int> >
196 void setAction( int ordering
, LongestMatchPart
*action
);
197 void setActions( const LmActionTable
&other
);
200 /* Compare of a whole action table element (key & value). */
201 struct CmpActionTableEl
203 static int compare( const ActionTableEl
&action1
,
204 const ActionTableEl
&action2
)
206 if ( action1
.key
< action2
.key
)
208 else if ( action1
.key
> action2
.key
)
210 else if ( action1
.value
< action2
.value
)
212 else if ( action1
.value
> action2
.value
)
218 /* Compare for ActionTable. */
219 typedef CmpSTable
< ActionTableEl
, CmpActionTableEl
> CmpActionTable
;
221 /* Compare of a whole lm action table element (key & value). */
222 struct CmpLmActionTableEl
224 static int compare( const LmActionTableEl
&lmAction1
,
225 const LmActionTableEl
&lmAction2
)
227 if ( lmAction1
.key
< lmAction2
.key
)
229 else if ( lmAction1
.key
> lmAction2
.key
)
231 else if ( lmAction1
.value
< lmAction2
.value
)
233 else if ( lmAction1
.value
> lmAction2
.value
)
239 /* Compare for ActionTable. */
240 typedef CmpSTable
< LmActionTableEl
, CmpLmActionTableEl
> CmpLmActionTable
;
242 /* Action table element for error action tables. Adds the encoding of transfer
244 struct ErrActionTableEl
246 ErrActionTableEl( Action
*action
, int ordering
, int transferPoint
)
247 : ordering(ordering
), action(action
), transferPoint(transferPoint
) { }
249 /* Ordering and id of the action embedding. */
253 /* Id of point of transfere from Error action table to transtions and
257 int getKey() const { return ordering
; }
260 struct ErrActionTable
261 : public SBstTable
< ErrActionTableEl
, int, CmpOrd
<int> >
263 void setAction( int ordering
, Action
*action
, int transferPoint
);
264 void setActions( const ErrActionTable
&other
);
267 /* Compare of an error action table element (key & value). */
268 struct CmpErrActionTableEl
270 static int compare( const ErrActionTableEl
&action1
,
271 const ErrActionTableEl
&action2
)
273 if ( action1
.ordering
< action2
.ordering
)
275 else if ( action1
.ordering
> action2
.ordering
)
277 else if ( action1
.action
< action2
.action
)
279 else if ( action1
.action
> action2
.action
)
281 else if ( action1
.transferPoint
< action2
.transferPoint
)
283 else if ( action1
.transferPoint
> action2
.transferPoint
)
289 /* Compare for ErrActionTable. */
290 typedef CmpSTable
< ErrActionTableEl
, CmpErrActionTableEl
> CmpErrActionTable
;
293 /* Descibe a priority, shared among PriorEls.
294 * Has key and whether or not used. */
301 /* Element in the arrays of priorities for transitions and arrays. Ordering is
302 * unique among instantiations of machines, desc is shared. */
305 PriorEl( int ordering
, PriorDesc
*desc
)
306 : ordering(ordering
), desc(desc
) { }
312 /* Compare priority elements, which are ordered by the priority descriptor
316 static inline int compare( const PriorEl
&pel1
, const PriorEl
&pel2
)
318 if ( pel1
.desc
->key
< pel2
.desc
->key
)
320 else if ( pel1
.desc
->key
> pel2
.desc
->key
)
328 /* Priority Table. */
330 : public SBstSet
< PriorEl
, PriorElCmp
>
332 void setPrior( int ordering
, PriorDesc
*desc
);
333 void setPriors( const PriorTable
&other
);
336 /* Compare of prior table elements for distinguising state data. */
339 static inline int compare( const PriorEl
&pel1
, const PriorEl
&pel2
)
341 if ( pel1
.desc
< pel2
.desc
)
343 else if ( pel1
.desc
> pel2
.desc
)
345 else if ( pel1
.ordering
< pel2
.ordering
)
347 else if ( pel1
.ordering
> pel2
.ordering
)
353 /* Compare of PriorTable distinguising state data. Using a compare of the
354 * pointers is a little more strict than it needs be. It requires that
355 * prioritiy tables have the exact same set of priority assignment operators
356 * (from the input lang) to be considered equal.
358 * Really only key-value pairs need be tested and ordering be merged. However
359 * this would require that in the fuseing of states, priority descriptors be
360 * chosen for the new fused state based on priority. Since the out transition
361 * lists and ranges aren't necessarily going to line up, this is more work for
362 * little gain. Final compression resets all priorities first, so this would
363 * only be useful for compression at every operator, which is only an
364 * undocumented test feature.
366 typedef CmpSTable
<PriorEl
, CmpPriorEl
> CmpPriorTable
;
368 /* Plain action list that imposes no ordering. */
369 typedef Vector
<int> TransFuncList
;
371 /* Comparison for TransFuncList. */
372 typedef CmpTable
< int, CmpOrd
<int> > TransFuncListCompare
;
374 /* Transition class that implements actions and priorities. */
377 TransAp() : fromState(0), toState(0) {}
378 TransAp( const TransAp
&other
) :
379 lowKey(other
.lowKey
),
380 highKey(other
.highKey
),
381 fromState(0), toState(0),
382 actionTable(other
.actionTable
),
383 priorTable(other
.priorTable
),
384 lmActionTable(other
.lmActionTable
) {}
390 /* Pointers for outlist. */
391 TransAp
*prev
, *next
;
393 /* Pointers for in-list. */
394 TransAp
*ilprev
, *ilnext
;
396 /* The function table and priority for the transition. */
397 ActionTable actionTable
;
398 PriorTable priorTable
;
400 LmActionTable lmActionTable
;
403 /* In transition list. Like DList except only has head pointers, which is all
404 * that is required. Insertion and deletion is handled by the graph. This
405 * class provides the iterator of a single list. */
408 TransInList() : head(0) { }
414 /* Default construct. */
417 /* Construct, assign from a list. */
418 Iter( const TransInList
&il
) : ptr(il
.head
) { }
419 Iter
&operator=( const TransInList
&dl
) { ptr
= dl
.head
; return *this; }
422 bool lte() const { return ptr
!= 0; }
423 bool end() const { return ptr
== 0; }
425 /* At the first, last element. */
426 bool first() const { return ptr
&& ptr
->ilprev
== 0; }
427 bool last() const { return ptr
&& ptr
->ilnext
== 0; }
429 /* Cast, dereference, arrow ops. */
430 operator TransAp
*() const { return ptr
; }
431 TransAp
&operator *() const { return *ptr
; }
432 TransAp
*operator->() const { return ptr
; }
434 /* Increment, decrement. */
435 inline void operator++(int) { ptr
= ptr
->ilnext
; }
436 inline void operator--(int) { ptr
= ptr
->ilprev
; }
438 /* The iterator is simply a pointer. */
443 typedef DList
<TransAp
> TransList
;
445 /* Set of states, list of states. */
446 typedef BstSet
<StateAp
*> StateSet
;
447 typedef DList
<StateAp
> StateList
;
449 /* A element in a state dict. */
452 public AvlTreeEl
<StateDictEl
>
454 StateDictEl(const StateSet
&stateSet
)
455 : stateSet(stateSet
) { }
457 const StateSet
&getKey() { return stateSet
; }
462 /* Dictionary mapping a set of states to a target state. */
463 typedef AvlTree
< StateDictEl
, StateSet
, CmpTable
<StateAp
*> > StateDict
;
465 /* Data needed for a merge operation. */
469 : stfillHead(0), stfillTail(0) { }
476 void fillListAppend( StateAp
*state
);
483 TransEl( Key lowKey
, Key highKey
)
484 : lowKey(lowKey
), highKey(highKey
) { }
485 TransEl( Key lowKey
, Key highKey
, TransAp
*value
)
486 : lowKey(lowKey
), highKey(highKey
), value(value
) { }
494 static int compare( const Key key1
, const Key key2
)
498 else if ( key1
> key2
)
505 /* Vector based set of key items. */
506 typedef BstSet
<Key
, CmpKey
> KeySet
;
510 MinPartition() : active(false) { }
515 MinPartition
*prev
, *next
;
518 /* Epsilon transition stored in a state. Specifies the target */
519 typedef Vector
<int> EpsilonTrans
;
521 /* List of states that are to be drawn into this. */
524 EptVectEl( StateAp
*targ
, bool leaving
)
525 : targ(targ
), leaving(leaving
) { }
530 typedef Vector
<EptVectEl
> EptVect
;
532 /* Set of entry ids that go into this state. */
533 typedef BstSet
<int> EntryIdSet
;
535 /* Set of longest match items that may be active in a given state. */
536 typedef BstSet
<LongestMatchPart
*> LmItemSet
;
538 /* A Conditions which is to be
539 * transfered on pending out transitions. */
542 OutCond( Action
*action
, bool sense
)
543 : action(action
), sense(sense
) {}
551 static int compare( const OutCond
&outCond1
, const OutCond
&outCond2
)
553 if ( outCond1
.action
< outCond2
.action
)
555 else if ( outCond1
.action
> outCond2
.action
)
557 else if ( outCond1
.sense
< outCond2
.sense
)
559 else if ( outCond1
.sense
> outCond2
.sense
)
565 /* Set of conditions to be transfered to on pending out transitions. */
566 typedef SBstSet
< OutCond
, CmpOutCond
> OutCondSet
;
567 typedef CmpSTable
< OutCond
, CmpOutCond
> CmpOutCondSet
;
570 typedef BstSet
< Action
*, CmpCondId
> CondSet
;
571 typedef CmpTable
< Action
*, CmpCondId
> CmpCondSet
;
574 : public AvlTreeEl
<CondSpace
>
576 CondSpace( const CondSet
&condSet
)
577 : condSet(condSet
) {}
579 const CondSet
&getKey() { return condSet
; }
586 typedef Vector
<CondSpace
*> CondSpaceVect
;
588 typedef AvlTree
<CondSpace
, CondSet
, CmpCondSet
> CondSpaceMap
;
592 StateCond( Key lowKey
, Key highKey
) :
593 lowKey(lowKey
), highKey(highKey
) {}
597 CondSpace
*condSpace
;
599 StateCond
*prev
, *next
;
602 typedef DList
<StateCond
> StateCondList
;
603 typedef Vector
<long> LongVect
;
607 Expansion( Key lowKey
, Key highKey
) :
608 lowKey(lowKey
), highKey(highKey
),
609 fromTrans(0), fromCondSpace(0),
614 if ( fromTrans
!= 0 )
622 CondSpace
*fromCondSpace
;
625 CondSpace
*toCondSpace
;
628 Expansion
*prev
, *next
;
631 typedef DList
<Expansion
> ExpansionList
;
643 CondData() : lastCondKey(0) {}
645 /* Condition info. */
648 CondSpaceMap condSpaceMap
;
651 extern CondData
*condData
;
653 struct FsmConstructFail
660 FsmConstructFail( Reason reason
)
665 /* State class that implements actions and priorities. */
669 StateAp(const StateAp
&other
);
672 /* Is the state final? */
673 bool isFinState() { return stateBits
& STB_ISFINAL
; }
675 /* Out transition list and the pointer for the default out trans. */
678 /* In transition Lists. */
681 /* Set only during scanner construction when actions are added. NFA to DFA
682 * code can ignore this. */
685 /* Entry points into the state. */
688 /* Epsilon transitions. */
689 EpsilonTrans epsilonTrans
;
691 /* Condition info. */
692 StateCondList stateCondList
;
694 /* Number of in transitions from states other than ourselves. */
697 /* Temporary data for various algorithms. */
699 /* When duplicating the fsm we need to map each
700 * state to the new state representing it. */
703 /* When minimizing machines by partitioning, this maps to the group
704 * the state is in. */
705 MinPartition
*partition
;
707 /* When merging states (state machine operations) this next pointer is
708 * used for the list of states that need to be filled in. */
711 /* Identification for printing and stable minimization. */
716 /* Data used in epsilon operation, maybe fit into alg? */
717 StateAp
*isolatedShadow
;
720 /* A pointer to a dict element that contains the set of states this state
721 * represents. This cannot go into alg, because alg.next is used during
722 * the merging process. */
723 StateDictEl
*stateDictEl
;
725 /* When drawing epsilon transitions, holds the list of states to merge
729 /* Bits controlling the behaviour of the state during collapsing to dfa. */
732 /* State list elements. */
733 StateAp
*next
, *prev
;
736 * Priority and Action data.
739 /* Out priorities transfered to out transitions. */
740 PriorTable outPriorTable
;
742 /* The following two action tables are distinguished by the fact that when
743 * toState actions are executed immediatly after transition actions of
744 * incoming transitions and the current character will be the same as the
745 * one available then. The fromState actions are executed immediately
746 * before the transition actions of outgoing transitions and the current
747 * character is same as the one available then. */
749 /* Actions to execute upon entering into a state. */
750 ActionTable toStateActionTable
;
752 /* Actions to execute when going from the state to the transition. */
753 ActionTable fromStateActionTable
;
755 /* Actions to add to any future transitions that leave via this state. */
756 ActionTable outActionTable
;
758 /* Conditions to add to any future transiions that leave via this sttate. */
759 OutCondSet outCondSet
;
761 /* Error action tables. */
762 ErrActionTable errActionTable
;
764 /* Actions to execute on eof. */
765 ActionTable eofActionTable
;
767 /* Set of longest match items that may be active in this state. */
771 template <class ListItem
> struct NextTrans
782 lowKey
= trans
->lowKey
;
783 highKey
= trans
->highKey
;
787 void set( ListItem
*t
) {
799 /* Encodes the different states that are meaningful to the of the iterator. */
800 enum PairIterUserState
802 RangeInS1
, RangeInS2
,
807 template <class ListItem1
, class ListItem2
= ListItem1
> struct PairIter
809 /* Encodes the different states that an fsm iterator can be in. */
812 ConsumeS1Range
, ConsumeS2Range
,
813 OnlyInS1Range
, OnlyInS2Range
,
814 S1SticksOut
, S1SticksOutBreak
,
815 S2SticksOut
, S2SticksOutBreak
,
816 S1DragsBehind
, S1DragsBehindBreak
,
817 S2DragsBehind
, S2DragsBehindBreak
,
821 PairIter( ListItem1
*list1
, ListItem2
*list2
);
823 /* Query iterator. */
824 bool lte() { return itState
!= End
; }
825 bool end() { return itState
== End
; }
826 void operator++(int) { findNext(); }
827 void operator++() { findNext(); }
829 /* Iterator state. */
833 PairIterUserState userState
;
835 NextTrans
<ListItem1
> s1Tel
;
836 NextTrans
<ListItem2
> s2Tel
;
837 Key bottomLow
, bottomHigh
;
838 ListItem1
*bottomTrans1
;
839 ListItem2
*bottomTrans2
;
845 /* Init the iterator by advancing to the first item. */
846 template <class ListItem1
, class ListItem2
> PairIter
<ListItem1
, ListItem2
>::PairIter(
847 ListItem1
*list1
, ListItem2
*list2
)
856 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
857 * used inside of a block. */
858 #define CO_RETURN(label) \
861 entry##label: backIn = true
863 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
864 * used inside of a block. */
865 #define CO_RETURN2(label, uState) \
867 userState = uState; \
869 entry##label: backIn = true
871 /* Advance to the next transition. When returns, trans points to the next
872 * transition, unless there are no more, in which case end() returns true. */
873 template <class ListItem1
, class ListItem2
> void PairIter
<ListItem1
, ListItem2
>::findNext()
875 /* This variable is used in dummy statements that follow the entry
876 * goto labels. The compiler needs some statement to follow the label. */
879 /* Jump into the iterator routine base on the iterator state. */
881 case Begin
: goto entryBegin
;
882 case ConsumeS1Range
: goto entryConsumeS1Range
;
883 case ConsumeS2Range
: goto entryConsumeS2Range
;
884 case OnlyInS1Range
: goto entryOnlyInS1Range
;
885 case OnlyInS2Range
: goto entryOnlyInS2Range
;
886 case S1SticksOut
: goto entryS1SticksOut
;
887 case S1SticksOutBreak
: goto entryS1SticksOutBreak
;
888 case S2SticksOut
: goto entryS2SticksOut
;
889 case S2SticksOutBreak
: goto entryS2SticksOutBreak
;
890 case S1DragsBehind
: goto entryS1DragsBehind
;
891 case S1DragsBehindBreak
: goto entryS1DragsBehindBreak
;
892 case S2DragsBehind
: goto entryS2DragsBehind
;
893 case S2DragsBehindBreak
: goto entryS2DragsBehindBreak
;
894 case ExactOverlap
: goto entryExactOverlap
;
895 case End
: goto entryEnd
;
899 /* Set up the next structs at the head of the transition lists. */
903 /* Concurrently scan both out ranges. */
905 if ( s1Tel
.trans
== 0 ) {
906 /* We are at the end of state1's ranges. Process the rest of
907 * state2's ranges. */
908 while ( s2Tel
.trans
!= 0 ) {
909 /* Range is only in s2. */
910 CO_RETURN2( ConsumeS2Range
, RangeInS2
);
915 else if ( s2Tel
.trans
== 0 ) {
916 /* We are at the end of state2's ranges. Process the rest of
917 * state1's ranges. */
918 while ( s1Tel
.trans
!= 0 ) {
919 /* Range is only in s1. */
920 CO_RETURN2( ConsumeS1Range
, RangeInS1
);
925 /* Both state1's and state2's transition elements are good.
926 * The signiture of no overlap is a back key being in front of a
928 else if ( s1Tel
.highKey
< s2Tel
.lowKey
) {
929 /* A range exists in state1 that does not overlap with state2. */
930 CO_RETURN2( OnlyInS1Range
, RangeInS1
);
933 else if ( s2Tel
.highKey
< s1Tel
.lowKey
) {
934 /* A range exists in state2 that does not overlap with state1. */
935 CO_RETURN2( OnlyInS2Range
, RangeInS2
);
938 /* There is overlap, must mix the ranges in some way. */
939 else if ( s1Tel
.lowKey
< s2Tel
.lowKey
) {
940 /* Range from state1 sticks out front. Must break it into
941 * non-overlaping and overlaping segments. */
942 bottomLow
= s2Tel
.lowKey
;
943 bottomHigh
= s1Tel
.highKey
;
944 s1Tel
.highKey
= s2Tel
.lowKey
;
945 s1Tel
.highKey
.decrement();
946 bottomTrans1
= s1Tel
.trans
;
948 /* Notify the caller that we are breaking s1. This gives them a
949 * chance to duplicate s1Tel[0,1].value. */
950 CO_RETURN2( S1SticksOutBreak
, BreakS1
);
952 /* Broken off range is only in s1. */
953 CO_RETURN2( S1SticksOut
, RangeInS1
);
955 /* Advance over the part sticking out front. */
956 s1Tel
.lowKey
= bottomLow
;
957 s1Tel
.highKey
= bottomHigh
;
958 s1Tel
.trans
= bottomTrans1
;
960 else if ( s2Tel
.lowKey
< s1Tel
.lowKey
) {
961 /* Range from state2 sticks out front. Must break it into
962 * non-overlaping and overlaping segments. */
963 bottomLow
= s1Tel
.lowKey
;
964 bottomHigh
= s2Tel
.highKey
;
965 s2Tel
.highKey
= s1Tel
.lowKey
;
966 s2Tel
.highKey
.decrement();
967 bottomTrans2
= s2Tel
.trans
;
969 /* Notify the caller that we are breaking s2. This gives them a
970 * chance to duplicate s2Tel[0,1].value. */
971 CO_RETURN2( S2SticksOutBreak
, BreakS2
);
973 /* Broken off range is only in s2. */
974 CO_RETURN2( S2SticksOut
, RangeInS2
);
976 /* Advance over the part sticking out front. */
977 s2Tel
.lowKey
= bottomLow
;
978 s2Tel
.highKey
= bottomHigh
;
979 s2Tel
.trans
= bottomTrans2
;
981 /* Low ends are even. Are the high ends even? */
982 else if ( s1Tel
.highKey
< s2Tel
.highKey
) {
983 /* Range from state2 goes longer than the range from state1. We
984 * must break the range from state2 into an evenly overlaping
986 bottomLow
= s1Tel
.highKey
;
987 bottomLow
.increment();
988 bottomHigh
= s2Tel
.highKey
;
989 s2Tel
.highKey
= s1Tel
.highKey
;
990 bottomTrans2
= s2Tel
.trans
;
992 /* Notify the caller that we are breaking s2. This gives them a
993 * chance to duplicate s2Tel[0,1].value. */
994 CO_RETURN2( S2DragsBehindBreak
, BreakS2
);
996 /* Breaking s2 produces exact overlap. */
997 CO_RETURN2( S2DragsBehind
, RangeOverlap
);
999 /* Advance over the front we just broke off of range 2. */
1000 s2Tel
.lowKey
= bottomLow
;
1001 s2Tel
.highKey
= bottomHigh
;
1002 s2Tel
.trans
= bottomTrans2
;
1004 /* Advance over the entire s1Tel. We have consumed it. */
1007 else if ( s2Tel
.highKey
< s1Tel
.highKey
) {
1008 /* Range from state1 goes longer than the range from state2. We
1009 * must break the range from state1 into an evenly overlaping
1011 bottomLow
= s2Tel
.highKey
;
1012 bottomLow
.increment();
1013 bottomHigh
= s1Tel
.highKey
;
1014 s1Tel
.highKey
= s2Tel
.highKey
;
1015 bottomTrans1
= s1Tel
.trans
;
1017 /* Notify the caller that we are breaking s1. This gives them a
1018 * chance to duplicate s2Tel[0,1].value. */
1019 CO_RETURN2( S1DragsBehindBreak
, BreakS1
);
1021 /* Breaking s1 produces exact overlap. */
1022 CO_RETURN2( S1DragsBehind
, RangeOverlap
);
1024 /* Advance over the front we just broke off of range 1. */
1025 s1Tel
.lowKey
= bottomLow
;
1026 s1Tel
.highKey
= bottomHigh
;
1027 s1Tel
.trans
= bottomTrans1
;
1029 /* Advance over the entire s2Tel. We have consumed it. */
1033 /* There is an exact overlap. */
1034 CO_RETURN2( ExactOverlap
, RangeOverlap
);
1041 /* Done, go into end state. */
1046 /* Compare lists of epsilon transitions. Entries are name ids of targets. */
1047 typedef CmpTable
< int, CmpOrd
<int> > CmpEpsilonTrans
;
1049 /* Compare class for the Approximate minimization. */
1054 int compare( const StateAp
*pState1
, const StateAp
*pState2
);
1057 /* Compare class for the initial partitioning of a partition minimization. */
1058 class InitPartitionCompare
1061 InitPartitionCompare() { }
1062 int compare( const StateAp
*pState1
, const StateAp
*pState2
);
1065 /* Compare class for the regular partitioning of a partition minimization. */
1066 class PartitionCompare
1069 PartitionCompare() { }
1070 int compare( const StateAp
*pState1
, const StateAp
*pState2
);
1073 /* Compare class for a minimization that marks pairs. Provides the shouldMark
1079 bool shouldMark( MarkIndex
&markIndex
, const StateAp
*pState1
,
1080 const StateAp
*pState2
);
1083 /* List of partitions. */
1084 typedef DList
< MinPartition
> PartitionList
;
1086 /* List of transtions out of a state. */
1087 typedef Vector
<TransEl
> TransListVect
;
1089 /* Entry point map used for keeping track of entry points in a machine. */
1090 typedef BstSet
< int > EntryIdSet
;
1091 typedef BstMapEl
< int, StateAp
* > EntryMapEl
;
1092 typedef BstMap
< int, StateAp
* > EntryMap
;
1093 typedef Vector
<EntryMapEl
> EntryMapBase
;
1095 /* Graph class that implements actions and priorities. */
1098 /* Constructors/Destructors. */
1100 FsmAp( const FsmAp
&graph
);
1103 /* The list of states. */
1104 StateList stateList
;
1105 StateList misfitList
;
1107 /* The map of entry points. */
1108 EntryMap entryPoints
;
1110 /* The start state. */
1111 StateAp
*startState
;
1113 /* Error state, possibly created only when the final machine has been
1114 * created and the XML machine is about to be written. No transitions
1115 * point to this state. */
1118 /* The set of final states. */
1119 StateSet finStateSet
;
1121 /* Misfit Accounting. Are misfits put on a separate list. */
1122 bool misfitAccounting
;
1125 * Transition actions and priorities.
1128 /* Set priorities on transtions. */
1129 void startFsmPrior( int ordering
, PriorDesc
*prior
);
1130 void allTransPrior( int ordering
, PriorDesc
*prior
);
1131 void finishFsmPrior( int ordering
, PriorDesc
*prior
);
1132 void leaveFsmPrior( int ordering
, PriorDesc
*prior
);
1134 /* Action setting support. */
1135 void transferOutActions( StateAp
*state
);
1136 void transferErrorActions( StateAp
*state
, int transferPoint
);
1137 void setErrorActions( StateAp
*state
, const ActionTable
&other
);
1138 void setErrorAction( StateAp
*state
, int ordering
, Action
*action
);
1140 /* Fill all spaces in a transition list with an error transition. */
1141 void fillGaps( StateAp
*state
);
1143 /* Similar to setErrorAction, instead gives a state to go to on error. */
1144 void setErrorTarget( StateAp
*state
, StateAp
*target
, int *orderings
,
1145 Action
**actions
, int nActs
);
1147 /* Set actions to execute. */
1148 void startFsmAction( int ordering
, Action
*action
);
1149 void allTransAction( int ordering
, Action
*action
);
1150 void finishFsmAction( int ordering
, Action
*action
);
1151 void leaveFsmAction( int ordering
, Action
*action
);
1152 void longMatchAction( int ordering
, LongestMatchPart
*lmPart
);
1154 /* Set conditions. */
1155 CondSpace
*addCondSpace( const CondSet
&condSet
);
1157 void findEmbedExpansions( ExpansionList
&expansionList
,
1158 StateAp
*destState
, Action
*condAction
, bool sense
);
1159 void embedCondition( MergeData
&md
, StateAp
*state
, Action
*condAction
, bool sense
);
1160 void embedCondition( StateAp
*state
, Action
*condAction
, bool sense
);
1162 void startFsmCondition( Action
*condAction
, bool sense
);
1163 void allTransCondition( Action
*condAction
, bool sense
);
1164 void leaveFsmCondition( Action
*condAction
, bool sense
);
1166 /* Set error actions to execute. */
1167 void startErrorAction( int ordering
, Action
*action
, int transferPoint
);
1168 void allErrorAction( int ordering
, Action
*action
, int transferPoint
);
1169 void finalErrorAction( int ordering
, Action
*action
, int transferPoint
);
1170 void notStartErrorAction( int ordering
, Action
*action
, int transferPoint
);
1171 void notFinalErrorAction( int ordering
, Action
*action
, int transferPoint
);
1172 void middleErrorAction( int ordering
, Action
*action
, int transferPoint
);
1174 /* Set EOF actions. */
1175 void startEOFAction( int ordering
, Action
*action
);
1176 void allEOFAction( int ordering
, Action
*action
);
1177 void finalEOFAction( int ordering
, Action
*action
);
1178 void notStartEOFAction( int ordering
, Action
*action
);
1179 void notFinalEOFAction( int ordering
, Action
*action
);
1180 void middleEOFAction( int ordering
, Action
*action
);
1182 /* Set To State actions. */
1183 void startToStateAction( int ordering
, Action
*action
);
1184 void allToStateAction( int ordering
, Action
*action
);
1185 void finalToStateAction( int ordering
, Action
*action
);
1186 void notStartToStateAction( int ordering
, Action
*action
);
1187 void notFinalToStateAction( int ordering
, Action
*action
);
1188 void middleToStateAction( int ordering
, Action
*action
);
1190 /* Set From State actions. */
1191 void startFromStateAction( int ordering
, Action
*action
);
1192 void allFromStateAction( int ordering
, Action
*action
);
1193 void finalFromStateAction( int ordering
, Action
*action
);
1194 void notStartFromStateAction( int ordering
, Action
*action
);
1195 void notFinalFromStateAction( int ordering
, Action
*action
);
1196 void middleFromStateAction( int ordering
, Action
*action
);
1198 /* Shift the action ordering of the start transitions to start at
1199 * fromOrder and increase in units of 1. Useful before kleene star
1201 int shiftStartActionOrder( int fromOrder
);
1203 /* Clear all priorities from the fsm to so they won't affcet minimization
1204 * of the final fsm. */
1205 void clearAllPriorities();
1207 /* Zero out all the function keys. */
1208 void nullActionKeys();
1210 /* Walk the list of states and verify state properties. */
1211 void verifyStates();
1213 /* Misfit Accounting. Are misfits put on a separate list. */
1214 void setMisfitAccounting( bool val
)
1215 { misfitAccounting
= val
; }
1217 /* Set and Unset a state as final. */
1218 void setFinState( StateAp
*state
);
1219 void unsetFinState( StateAp
*state
);
1221 void setStartState( StateAp
*state
);
1222 void unsetStartState( );
1224 /* Set and unset a state as an entry point. */
1225 void setEntry( int id
, StateAp
*state
);
1226 void changeEntry( int id
, StateAp
*to
, StateAp
*from
);
1227 void unsetEntry( int id
, StateAp
*state
);
1228 void unsetEntry( int id
);
1229 void unsetAllEntryPoints();
1231 /* Epsilon transitions. */
1232 void epsilonTrans( int id
);
1233 void shadowReadWriteStates( MergeData
&md
);
1236 * Basic attaching and detaching.
1239 /* Common to attaching/detaching list and default. */
1240 void attachToInList( StateAp
*from
, StateAp
*to
, TransAp
*&head
, TransAp
*trans
);
1241 void detachFromInList( StateAp
*from
, StateAp
*to
, TransAp
*&head
, TransAp
*trans
);
1243 /* Attach with a new transition. */
1244 TransAp
*attachNewTrans( StateAp
*from
, StateAp
*to
,
1245 Key onChar1
, Key onChar2
);
1247 /* Attach with an existing transition that already in an out list. */
1248 void attachTrans( StateAp
*from
, StateAp
*to
, TransAp
*trans
);
1250 /* Redirect a transition away from error and towards some state. */
1251 void redirectErrorTrans( StateAp
*from
, StateAp
*to
, TransAp
*trans
);
1253 /* Detach a transition from a target state. */
1254 void detachTrans( StateAp
*from
, StateAp
*to
, TransAp
*trans
);
1256 /* Detach a state from the graph. */
1257 void detachState( StateAp
*state
);
1260 * NFA to DFA conversion routines.
1263 /* Duplicate a transition that will dropin to a free spot. */
1264 TransAp
*dupTrans( StateAp
*from
, TransAp
*srcTrans
);
1266 /* In crossing, two transitions both go to real states. */
1267 TransAp
*fsmAttachStates( MergeData
&md
, StateAp
*from
,
1268 TransAp
*destTrans
, TransAp
*srcTrans
);
1270 /* Two transitions are to be crossed, handle the possibility of either
1271 * going to the error state. */
1272 TransAp
*mergeTrans( MergeData
&md
, StateAp
*from
,
1273 TransAp
*destTrans
, TransAp
*srcTrans
);
1275 /* Compare deterimne relative priorities of two transition tables. */
1276 int comparePrior( const PriorTable
&priorTable1
, const PriorTable
&priorTable2
);
1278 /* Cross a src transition with one that is already occupying a spot. */
1279 TransAp
*crossTransitions( MergeData
&md
, StateAp
*from
,
1280 TransAp
*destTrans
, TransAp
*srcTrans
);
1282 void outTransCopy( MergeData
&md
, StateAp
*dest
, TransAp
*srcList
);
1284 void doRemove( MergeData
&md
, StateAp
*destState
, ExpansionList
&expList1
);
1285 void doExpand( MergeData
&md
, StateAp
*destState
, ExpansionList
&expList1
);
1286 void findCondExpInTrans( ExpansionList
&expansionList
, StateAp
*state
,
1287 Key lowKey
, Key highKey
, CondSpace
*fromCondSpace
, CondSpace
*toCondSpace
,
1288 long destVals
, LongVect
&toValsList
);
1289 void findTransExpansions( ExpansionList
&expansionList
,
1290 StateAp
*destState
, StateAp
*srcState
);
1291 void findCondExpansions( ExpansionList
&expansionList
,
1292 StateAp
*destState
, StateAp
*srcState
);
1293 void mergeStateConds( StateAp
*destState
, StateAp
*srcState
);
1295 /* Merge a set of states into newState. */
1296 void mergeStates( MergeData
&md
, StateAp
*destState
,
1297 StateAp
**srcStates
, int numSrc
);
1298 void mergeStatesLeaving( MergeData
&md
, StateAp
*destState
, StateAp
*srcState
);
1299 void mergeStates( MergeData
&md
, StateAp
*destState
, StateAp
*srcState
);
1301 /* Make all states that are combinations of other states and that
1302 * have not yet had their out transitions filled in. This will
1303 * empty out stateDict and stFil. */
1304 void fillInStates( MergeData
&md
);
1307 * Transition Comparison.
1310 /* Compare transition data. Either of the pointers may be null. */
1311 static inline int compareDataPtr( TransAp
*trans1
, TransAp
*trans2
);
1313 /* Compare target state and transition data. Either pointer may be null. */
1314 static inline int compareFullPtr( TransAp
*trans1
, TransAp
*trans2
);
1316 /* Compare target partitions. Either pointer may be null. */
1317 static inline int comparePartPtr( TransAp
*trans1
, TransAp
*trans2
);
1319 /* Check marked status of target states. Either pointer may be null. */
1320 static inline bool shouldMarkPtr( MarkIndex
&markIndex
,
1321 TransAp
*trans1
, TransAp
*trans2
);
1327 /* Compare priority and function table of transitions. */
1328 static int compareTransData( TransAp
*trans1
, TransAp
*trans2
);
1330 /* Add in the properties of srcTrans into this. */
1331 void addInTrans( TransAp
*destTrans
, TransAp
*srcTrans
);
1333 /* Compare states on data stored in the states. */
1334 static int compareStateData( const StateAp
*state1
, const StateAp
*state2
);
1336 /* Out transition data. */
1337 void clearOutData( StateAp
*state
);
1338 bool hasOutData( StateAp
*state
);
1339 void transferOutData( StateAp
*destState
, StateAp
*srcState
);
1345 /* New up a state and add it to the graph. */
1346 StateAp
*addState();
1349 * Building basic machines
1352 void concatFsm( Key c
);
1353 void concatFsm( Key
*str
, int len
);
1354 void concatFsmCI( Key
*str
, int len
);
1355 void orFsm( Key
*set
, int len
);
1356 void rangeFsm( Key low
, Key high
);
1357 void rangeStarFsm( Key low
, Key high
);
1366 void repeatOp( int times
);
1367 void optionalRepeatOp( int times
);
1368 void concatOp( FsmAp
*other
);
1369 void unionOp( FsmAp
*other
);
1370 void intersectOp( FsmAp
*other
);
1371 void subtractOp( FsmAp
*other
);
1373 void joinOp( int startId
, int finalId
, FsmAp
**others
, int numOthers
);
1374 void globOp( FsmAp
**others
, int numOthers
);
1375 void deterministicEntry();
1381 /* Determine if there are any entry points into a start state other than
1382 * the start state. */
1383 bool isStartStateIsolated();
1385 /* Make a new start state that has no entry points. Will not change the
1386 * identity of the fsm. */
1387 void isolateStartState();
1389 /* Workers for resolving epsilon transitions. */
1390 bool inEptVect( EptVect
*eptVect
, StateAp
*targ
);
1391 void epsilonFillEptVectFrom( StateAp
*root
, StateAp
*from
, bool parentLeaving
);
1392 void resolveEpsilonTrans( MergeData
&md
);
1394 /* Workers for concatenation and union. */
1395 void doConcat( FsmAp
*other
, StateSet
*fromStates
, bool optional
);
1396 void doOr( FsmAp
*other
);
1402 /* Unset any final states that are no longer to be final
1403 * due to final bits. */
1404 void unsetIncompleteFinals();
1405 void unsetKilledFinals();
1407 /* Bring in other's entry points. Assumes others states are going to be
1408 * copied into this machine. */
1409 void copyInEntryPoints( FsmAp
*other
);
1411 /* Ordering states. */
1412 void depthFirstOrdering( StateAp
*state
);
1413 void depthFirstOrdering();
1414 void sortStatesByFinal();
1416 /* Set sqequential state numbers starting at 0. */
1417 void setStateNumbers( int base
);
1419 /* Unset all final states. */
1420 void unsetAllFinStates();
1422 /* Set the bits of final states and clear the bits of non final states. */
1423 void setFinBits( int finStateBits
);
1426 * Self-consistency checks.
1429 /* Run a sanity check on the machine. */
1430 void verifyIntegrity();
1432 /* Verify that there are no unreachable states, or dead end states. */
1433 void verifyReachability();
1434 void verifyNoDeadEndStates();
1440 /* Mark all states reachable from state. */
1441 void markReachableFromHereReverse( StateAp
*state
);
1443 /* Mark all states reachable from state. */
1444 void markReachableFromHere( StateAp
*state
);
1445 void markReachableFromHereStopFinal( StateAp
*state
);
1447 /* Removes states that cannot be reached by any path in the fsm and are
1448 * thus wasted silicon. */
1449 void removeDeadEndStates();
1451 /* Removes states that cannot be reached by any path in the fsm and are
1452 * thus wasted silicon. */
1453 void removeUnreachableStates();
1455 /* Remove error actions from states on which the error transition will
1456 * never be taken. */
1457 bool outListCovers( StateAp
*state
);
1458 bool anyErrorRange( StateAp
*state
);
1460 /* Remove states that are on the misfit list. */
1461 void removeMisfits();
1467 /* Minimization by partitioning. */
1468 void minimizePartition1();
1469 void minimizePartition2();
1471 /* Minimize the final state Machine. The result is the minimal fsm. Slow
1472 * but stable, correct minimization. Uses n^2 space (lookout) and average
1473 * n^2 time. Worst case n^3 time, but a that is a very rare case. */
1474 void minimizeStable();
1476 /* Minimize the final state machine. Does not find the minimal fsm, but a
1477 * pretty good approximation. Does not use any extra space. Average n^2
1478 * time. Worst case n^3 time, but a that is a very rare case. */
1479 void minimizeApproximate();
1481 /* This is the worker for the minimize approximate solution. It merges
1482 * states that have identical out transitions. */
1483 bool minimizeRound( );
1485 /* Given an intial partioning of states, split partitions that have out trans
1486 * to differing partitions. */
1487 int partitionRound( StateAp
**statePtrs
, MinPartition
*parts
, int numParts
);
1489 /* Split partitions that have a transition to a previously split partition, until
1490 * there are no more partitions to split. */
1491 int splitCandidates( StateAp
**statePtrs
, MinPartition
*parts
, int numParts
);
1493 /* Fuse together states in the same partition. */
1494 void fusePartitions( MinPartition
*parts
, int numParts
);
1496 /* Mark pairs where out final stateness differs, out trans data differs,
1497 * trans pairs go to a marked pair or trans data differs. Should get
1499 void initialMarkRound( MarkIndex
&markIndex
);
1501 /* One marking round on all state pairs. Considers if trans pairs go
1502 * to a marked state only. Returns whether or not a pair was marked. */
1503 bool markRound( MarkIndex
&markIndex
);
1505 /* Move the in trans into src into dest. */
1506 void inTransMove(StateAp
*dest
, StateAp
*src
);
1508 /* Make state src and dest the same state. */
1509 void fuseEquivStates(StateAp
*dest
, StateAp
*src
);
1511 /* Find any states that didn't get marked by the marking algorithm and
1512 * merge them into the primary states of their equivalence class. */
1513 void fuseUnmarkedPairs( MarkIndex
&markIndex
);
1515 /* Merge neighboring transitions go to the same state and have the same
1516 * transitions data. */
1517 void compressTransitions();
1519 /* Returns true if there is a transtion (either explicit or by a gap) to
1520 * the error state. */
1521 bool checkErrTrans( StateAp
*state
, TransAp
*trans
);
1522 bool checkErrTransFinish( StateAp
*state
);
1523 bool hasErrorTrans();
1525 /* Check if a machine defines a single character. This is useful in
1526 * validating ranges and machines to export. */
1527 bool checkSingleCharMachine( );
1531 #endif /* _FSMGRAPH_H */