2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 #include "sbsttable.h"
45 /* Flags that control merging. */
46 #define STB_GRAPH1 0x01
47 #define STB_GRAPH2 0x02
49 #define STB_ISFINAL 0x04
50 #define STB_ISMARKED 0x08
51 #define STB_ONLIST 0x10
59 struct LongestMatchPart
;
61 /* State list element for unambiguous access to list element. */
67 /* This is the marked index for a state pair. Used in minimization. It keeps
68 * track of whether or not the state pair is marked. */
71 MarkIndex(int states
);
74 void markPair(int state1
, int state2
);
75 bool isPairMarked(int state1
, int state2
);
82 extern KeyOps
*keyOps
;
84 /* Transistion Action Element. */
85 typedef SBstMapEl
< int, Action
* > ActionTableEl
;
87 /* Nodes in the tree that use this action. */
90 typedef Vector
<NameInst
*> ActionRefs
;
92 /* Element in list of actions. Contains the string for the code to exectute. */
95 public DListEl
<Action
>,
96 public AvlTreeEl
<Action
>
100 Action( const InputLoc
&loc
, const char *name
, InlineList
*inlineList
, int condId
)
104 inlineList(inlineList
),
117 /* Key for action dictionary. */
118 const char *getKey() const { return name
; }
120 /* Data collected during parse. */
123 InlineList
*inlineList
;
126 void actionName( ostream
&out
)
131 out
<< loc
.line
<< ":" << loc
.col
;
134 /* Places in the input text that reference the action. */
135 ActionRefs actionRefs
;
137 /* Number of references in the final machine. */
139 { return numTransRefs
+ numToStateRefs
+ numFromStateRefs
+ numEofRefs
; }
142 int numFromStateRefs
;
153 static inline int compare( const Action
*cond1
, const Action
*cond2
)
155 if ( cond1
->condId
< cond2
->condId
)
157 else if ( cond1
->condId
> cond2
->condId
)
163 /* A list of actions. */
164 typedef DList
<Action
> ActionList
;
165 typedef AvlTree
<Action
, char *, CmpStr
> ActionDict
;
167 /* Structure for reverse action mapping. */
168 struct RevActionMapEl
175 /* Transition Action Table. */
177 : public SBstMap
< int, Action
*, CmpOrd
<int> >
179 void setAction( int ordering
, Action
*action
);
180 void setActions( int *orderings
, Action
**actions
, int nActs
);
181 void setActions( const ActionTable
&other
);
183 bool hasAction( Action
*action
);
186 typedef SBstSet
< Action
*, CmpOrd
<Action
*> > ActionSet
;
187 typedef CmpSTable
< Action
*, CmpOrd
<Action
*> > CmpActionSet
;
189 /* Transistion Action Element. */
190 typedef SBstMapEl
< int, LongestMatchPart
* > LmActionTableEl
;
192 /* Transition Action Table. */
194 : public SBstMap
< int, LongestMatchPart
*, CmpOrd
<int> >
196 void setAction( int ordering
, LongestMatchPart
*action
);
197 void setActions( const LmActionTable
&other
);
200 /* Compare of a whole action table element (key & value). */
201 struct CmpActionTableEl
203 static int compare( const ActionTableEl
&action1
,
204 const ActionTableEl
&action2
)
206 if ( action1
.key
< action2
.key
)
208 else if ( action1
.key
> action2
.key
)
210 else if ( action1
.value
< action2
.value
)
212 else if ( action1
.value
> action2
.value
)
218 /* Compare for ActionTable. */
219 typedef CmpSTable
< ActionTableEl
, CmpActionTableEl
> CmpActionTable
;
221 /* Compare of a whole lm action table element (key & value). */
222 struct CmpLmActionTableEl
224 static int compare( const LmActionTableEl
&lmAction1
,
225 const LmActionTableEl
&lmAction2
)
227 if ( lmAction1
.key
< lmAction2
.key
)
229 else if ( lmAction1
.key
> lmAction2
.key
)
231 else if ( lmAction1
.value
< lmAction2
.value
)
233 else if ( lmAction1
.value
> lmAction2
.value
)
239 /* Compare for ActionTable. */
240 typedef CmpSTable
< LmActionTableEl
, CmpLmActionTableEl
> CmpLmActionTable
;
242 /* Action table element for error action tables. Adds the encoding of transfer
244 struct ErrActionTableEl
246 ErrActionTableEl( Action
*action
, int ordering
, int transferPoint
)
247 : ordering(ordering
), action(action
), transferPoint(transferPoint
) { }
249 /* Ordering and id of the action embedding. */
253 /* Id of point of transfere from Error action table to transtions and
257 int getKey() const { return ordering
; }
260 struct ErrActionTable
261 : public SBstTable
< ErrActionTableEl
, int, CmpOrd
<int> >
263 void setAction( int ordering
, Action
*action
, int transferPoint
);
264 void setActions( const ErrActionTable
&other
);
267 /* Compare of an error action table element (key & value). */
268 struct CmpErrActionTableEl
270 static int compare( const ErrActionTableEl
&action1
,
271 const ErrActionTableEl
&action2
)
273 if ( action1
.ordering
< action2
.ordering
)
275 else if ( action1
.ordering
> action2
.ordering
)
277 else if ( action1
.action
< action2
.action
)
279 else if ( action1
.action
> action2
.action
)
281 else if ( action1
.transferPoint
< action2
.transferPoint
)
283 else if ( action1
.transferPoint
> action2
.transferPoint
)
289 /* Compare for ErrActionTable. */
290 typedef CmpSTable
< ErrActionTableEl
, CmpErrActionTableEl
> CmpErrActionTable
;
293 /* Descibe a priority, shared among PriorEls.
294 * Has key and whether or not used. */
301 /* Element in the arrays of priorities for transitions and arrays. Ordering is
302 * unique among instantiations of machines, desc is shared. */
305 PriorEl( int ordering
, PriorDesc
*desc
)
306 : ordering(ordering
), desc(desc
) { }
312 /* Compare priority elements, which are ordered by the priority descriptor
316 static inline int compare( const PriorEl
&pel1
, const PriorEl
&pel2
)
318 if ( pel1
.desc
->key
< pel2
.desc
->key
)
320 else if ( pel1
.desc
->key
> pel2
.desc
->key
)
328 /* Priority Table. */
330 : public SBstSet
< PriorEl
, PriorElCmp
>
332 void setPrior( int ordering
, PriorDesc
*desc
);
333 void setPriors( const PriorTable
&other
);
336 /* Compare of prior table elements for distinguising state data. */
339 static inline int compare( const PriorEl
&pel1
, const PriorEl
&pel2
)
341 if ( pel1
.desc
< pel2
.desc
)
343 else if ( pel1
.desc
> pel2
.desc
)
345 else if ( pel1
.ordering
< pel2
.ordering
)
347 else if ( pel1
.ordering
> pel2
.ordering
)
353 /* Compare of PriorTable distinguising state data. Using a compare of the
354 * pointers is a little more strict than it needs be. It requires that
355 * prioritiy tables have the exact same set of priority assignment operators
356 * (from the input lang) to be considered equal.
358 * Really only key-value pairs need be tested and ordering be merged. However
359 * this would require that in the fuseing of states, priority descriptors be
360 * chosen for the new fused state based on priority. Since the out transition
361 * lists and ranges aren't necessarily going to line up, this is more work for
362 * little gain. Final compression resets all priorities first, so this would
363 * only be useful for compression at every operator, which is only an
364 * undocumented test feature.
366 typedef CmpSTable
<PriorEl
, CmpPriorEl
> CmpPriorTable
;
368 /* Plain action list that imposes no ordering. */
369 typedef Vector
<int> TransFuncList
;
371 /* Comparison for TransFuncList. */
372 typedef CmpTable
< int, CmpOrd
<int> > TransFuncListCompare
;
374 /* Transition class that implements actions and priorities. */
377 TransAp() : fromState(0), toState(0) {}
378 TransAp( const TransAp
&other
) :
379 lowKey(other
.lowKey
),
380 highKey(other
.highKey
),
381 fromState(0), toState(0),
382 actionTable(other
.actionTable
),
383 priorTable(other
.priorTable
)
385 assert( lmActionTable
.length() == 0 && other
.lmActionTable
.length() == 0 );
392 /* Pointers for outlist. */
393 TransAp
*prev
, *next
;
395 /* Pointers for in-list. */
396 TransAp
*ilprev
, *ilnext
;
398 /* The function table and priority for the transition. */
399 ActionTable actionTable
;
400 PriorTable priorTable
;
402 LmActionTable lmActionTable
;
405 /* In transition list. Like DList except only has head pointers, which is all
406 * that is required. Insertion and deletion is handled by the graph. This
407 * class provides the iterator of a single list. */
410 TransInList() : head(0) { }
416 /* Default construct. */
419 /* Construct, assign from a list. */
420 Iter( const TransInList
&il
) : ptr(il
.head
) { }
421 Iter
&operator=( const TransInList
&dl
) { ptr
= dl
.head
; return *this; }
424 bool lte() const { return ptr
!= 0; }
425 bool end() const { return ptr
== 0; }
427 /* At the first, last element. */
428 bool first() const { return ptr
&& ptr
->ilprev
== 0; }
429 bool last() const { return ptr
&& ptr
->ilnext
== 0; }
431 /* Cast, dereference, arrow ops. */
432 operator TransAp
*() const { return ptr
; }
433 TransAp
&operator *() const { return *ptr
; }
434 TransAp
*operator->() const { return ptr
; }
436 /* Increment, decrement. */
437 inline void operator++(int) { ptr
= ptr
->ilnext
; }
438 inline void operator--(int) { ptr
= ptr
->ilprev
; }
440 /* The iterator is simply a pointer. */
445 typedef DList
<TransAp
> TransList
;
447 /* Set of states, list of states. */
448 typedef BstSet
<StateAp
*> StateSet
;
449 typedef DList
<StateAp
> StateList
;
451 /* A element in a state dict. */
454 public AvlTreeEl
<StateDictEl
>
456 StateDictEl(const StateSet
&stateSet
)
457 : stateSet(stateSet
) { }
459 const StateSet
&getKey() { return stateSet
; }
464 /* Dictionary mapping a set of states to a target state. */
465 typedef AvlTree
< StateDictEl
, StateSet
, CmpTable
<StateAp
*> > StateDict
;
467 /* Data needed for a merge operation. */
471 : stfillHead(0), stfillTail(0) { }
478 void fillListAppend( StateAp
*state
);
485 TransEl( Key lowKey
, Key highKey
)
486 : lowKey(lowKey
), highKey(highKey
) { }
487 TransEl( Key lowKey
, Key highKey
, TransAp
*value
)
488 : lowKey(lowKey
), highKey(highKey
), value(value
) { }
496 static int compare( const Key key1
, const Key key2
)
500 else if ( key1
> key2
)
507 /* Vector based set of key items. */
508 typedef BstSet
<Key
, CmpKey
> KeySet
;
512 MinPartition() : active(false) { }
517 MinPartition
*prev
, *next
;
520 /* Epsilon transition stored in a state. Specifies the target */
521 typedef Vector
<int> EpsilonTrans
;
523 /* List of states that are to be drawn into this. */
526 EptVectEl( StateAp
*targ
, bool leaving
)
527 : targ(targ
), leaving(leaving
) { }
532 typedef Vector
<EptVectEl
> EptVect
;
534 /* Set of entry ids that go into this state. */
535 typedef BstSet
<int> EntryIdSet
;
537 /* Set of longest match items that may be active in a given state. */
538 typedef BstSet
<LongestMatchPart
*> LmItemSet
;
540 /* A Conditions which is to be
541 * transfered on pending out transitions. */
544 OutCond( Action
*action
, bool sense
)
545 : action(action
), sense(sense
) {}
553 static int compare( const OutCond
&outCond1
, const OutCond
&outCond2
)
555 if ( outCond1
.action
< outCond2
.action
)
557 else if ( outCond1
.action
> outCond2
.action
)
559 else if ( outCond1
.sense
< outCond2
.sense
)
561 else if ( outCond1
.sense
> outCond2
.sense
)
567 /* Set of conditions to be transfered to on pending out transitions. */
568 typedef SBstSet
< OutCond
, CmpOutCond
> OutCondSet
;
569 typedef CmpSTable
< OutCond
, CmpOutCond
> CmpOutCondSet
;
572 typedef BstSet
< Action
*, CmpCondId
> CondSet
;
573 typedef CmpTable
< Action
*, CmpCondId
> CmpCondSet
;
576 : public AvlTreeEl
<CondSpace
>
578 CondSpace( const CondSet
&condSet
)
579 : condSet(condSet
) {}
581 const CondSet
&getKey() { return condSet
; }
588 typedef Vector
<CondSpace
*> CondSpaceVect
;
590 typedef AvlTree
<CondSpace
, CondSet
, CmpCondSet
> CondSpaceMap
;
594 StateCond( Key lowKey
, Key highKey
) :
595 lowKey(lowKey
), highKey(highKey
) {}
599 CondSpace
*condSpace
;
601 StateCond
*prev
, *next
;
604 typedef DList
<StateCond
> StateCondList
;
605 typedef Vector
<long> LongVect
;
609 Expansion( Key lowKey
, Key highKey
) :
610 lowKey(lowKey
), highKey(highKey
),
611 fromTrans(0), fromCondSpace(0),
616 if ( fromTrans
!= 0 )
624 CondSpace
*fromCondSpace
;
627 CondSpace
*toCondSpace
;
630 Expansion
*prev
, *next
;
633 typedef DList
<Expansion
> ExpansionList
;
645 CondData() : lastCondKey(0) {}
647 /* Condition info. */
650 CondSpaceMap condSpaceMap
;
653 extern CondData
*condData
;
655 struct FsmConstructFail
662 FsmConstructFail( Reason reason
)
667 /* State class that implements actions and priorities. */
671 StateAp(const StateAp
&other
);
674 /* Is the state final? */
675 bool isFinState() { return stateBits
& STB_ISFINAL
; }
677 /* Out transition list and the pointer for the default out trans. */
680 /* In transition Lists. */
683 /* Set only during scanner construction when actions are added. NFA to DFA
684 * code can ignore this. */
687 /* Entry points into the state. */
690 /* Epsilon transitions. */
691 EpsilonTrans epsilonTrans
;
693 /* Condition info. */
694 StateCondList stateCondList
;
696 /* Number of in transitions from states other than ourselves. */
699 /* Temporary data for various algorithms. */
701 /* When duplicating the fsm we need to map each
702 * state to the new state representing it. */
705 /* When minimizing machines by partitioning, this maps to the group
706 * the state is in. */
707 MinPartition
*partition
;
709 /* When merging states (state machine operations) this next pointer is
710 * used for the list of states that need to be filled in. */
713 /* Identification for printing and stable minimization. */
718 /* Data used in epsilon operation, maybe fit into alg? */
719 StateAp
*isolatedShadow
;
722 /* A pointer to a dict element that contains the set of states this state
723 * represents. This cannot go into alg, because alg.next is used during
724 * the merging process. */
725 StateDictEl
*stateDictEl
;
727 /* When drawing epsilon transitions, holds the list of states to merge
731 /* Bits controlling the behaviour of the state during collapsing to dfa. */
734 /* State list elements. */
735 StateAp
*next
, *prev
;
738 * Priority and Action data.
741 /* Out priorities transfered to out transitions. */
742 PriorTable outPriorTable
;
744 /* The following two action tables are distinguished by the fact that when
745 * toState actions are executed immediatly after transition actions of
746 * incoming transitions and the current character will be the same as the
747 * one available then. The fromState actions are executed immediately
748 * before the transition actions of outgoing transitions and the current
749 * character is same as the one available then. */
751 /* Actions to execute upon entering into a state. */
752 ActionTable toStateActionTable
;
754 /* Actions to execute when going from the state to the transition. */
755 ActionTable fromStateActionTable
;
757 /* Actions to add to any future transitions that leave via this state. */
758 ActionTable outActionTable
;
760 /* Conditions to add to any future transiions that leave via this sttate. */
761 OutCondSet outCondSet
;
763 /* Error action tables. */
764 ErrActionTable errActionTable
;
766 /* Actions to execute on eof. */
767 ActionTable eofActionTable
;
769 /* Set of longest match items that may be active in this state. */
773 template <class ListItem
> struct NextTrans
784 lowKey
= trans
->lowKey
;
785 highKey
= trans
->highKey
;
789 void set( ListItem
*t
) {
801 /* Encodes the different states that are meaningful to the of the iterator. */
802 enum PairIterUserState
804 RangeInS1
, RangeInS2
,
809 template <class ListItem1
, class ListItem2
= ListItem1
> struct PairIter
811 /* Encodes the different states that an fsm iterator can be in. */
814 ConsumeS1Range
, ConsumeS2Range
,
815 OnlyInS1Range
, OnlyInS2Range
,
816 S1SticksOut
, S1SticksOutBreak
,
817 S2SticksOut
, S2SticksOutBreak
,
818 S1DragsBehind
, S1DragsBehindBreak
,
819 S2DragsBehind
, S2DragsBehindBreak
,
823 PairIter( ListItem1
*list1
, ListItem2
*list2
);
825 /* Query iterator. */
826 bool lte() { return itState
!= End
; }
827 bool end() { return itState
== End
; }
828 void operator++(int) { findNext(); }
829 void operator++() { findNext(); }
831 /* Iterator state. */
835 PairIterUserState userState
;
837 NextTrans
<ListItem1
> s1Tel
;
838 NextTrans
<ListItem2
> s2Tel
;
839 Key bottomLow
, bottomHigh
;
840 ListItem1
*bottomTrans1
;
841 ListItem2
*bottomTrans2
;
847 /* Init the iterator by advancing to the first item. */
848 template <class ListItem1
, class ListItem2
> PairIter
<ListItem1
, ListItem2
>::PairIter(
849 ListItem1
*list1
, ListItem2
*list2
)
858 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
859 * used inside of a block. */
860 #define CO_RETURN(label) \
863 entry##label: backIn = true
865 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
866 * used inside of a block. */
867 #define CO_RETURN2(label, uState) \
869 userState = uState; \
871 entry##label: backIn = true
873 /* Advance to the next transition. When returns, trans points to the next
874 * transition, unless there are no more, in which case end() returns true. */
875 template <class ListItem1
, class ListItem2
> void PairIter
<ListItem1
, ListItem2
>::findNext()
877 /* This variable is used in dummy statements that follow the entry
878 * goto labels. The compiler needs some statement to follow the label. */
881 /* Jump into the iterator routine base on the iterator state. */
883 case Begin
: goto entryBegin
;
884 case ConsumeS1Range
: goto entryConsumeS1Range
;
885 case ConsumeS2Range
: goto entryConsumeS2Range
;
886 case OnlyInS1Range
: goto entryOnlyInS1Range
;
887 case OnlyInS2Range
: goto entryOnlyInS2Range
;
888 case S1SticksOut
: goto entryS1SticksOut
;
889 case S1SticksOutBreak
: goto entryS1SticksOutBreak
;
890 case S2SticksOut
: goto entryS2SticksOut
;
891 case S2SticksOutBreak
: goto entryS2SticksOutBreak
;
892 case S1DragsBehind
: goto entryS1DragsBehind
;
893 case S1DragsBehindBreak
: goto entryS1DragsBehindBreak
;
894 case S2DragsBehind
: goto entryS2DragsBehind
;
895 case S2DragsBehindBreak
: goto entryS2DragsBehindBreak
;
896 case ExactOverlap
: goto entryExactOverlap
;
897 case End
: goto entryEnd
;
901 /* Set up the next structs at the head of the transition lists. */
905 /* Concurrently scan both out ranges. */
907 if ( s1Tel
.trans
== 0 ) {
908 /* We are at the end of state1's ranges. Process the rest of
909 * state2's ranges. */
910 while ( s2Tel
.trans
!= 0 ) {
911 /* Range is only in s2. */
912 CO_RETURN2( ConsumeS2Range
, RangeInS2
);
917 else if ( s2Tel
.trans
== 0 ) {
918 /* We are at the end of state2's ranges. Process the rest of
919 * state1's ranges. */
920 while ( s1Tel
.trans
!= 0 ) {
921 /* Range is only in s1. */
922 CO_RETURN2( ConsumeS1Range
, RangeInS1
);
927 /* Both state1's and state2's transition elements are good.
928 * The signiture of no overlap is a back key being in front of a
930 else if ( s1Tel
.highKey
< s2Tel
.lowKey
) {
931 /* A range exists in state1 that does not overlap with state2. */
932 CO_RETURN2( OnlyInS1Range
, RangeInS1
);
935 else if ( s2Tel
.highKey
< s1Tel
.lowKey
) {
936 /* A range exists in state2 that does not overlap with state1. */
937 CO_RETURN2( OnlyInS2Range
, RangeInS2
);
940 /* There is overlap, must mix the ranges in some way. */
941 else if ( s1Tel
.lowKey
< s2Tel
.lowKey
) {
942 /* Range from state1 sticks out front. Must break it into
943 * non-overlaping and overlaping segments. */
944 bottomLow
= s2Tel
.lowKey
;
945 bottomHigh
= s1Tel
.highKey
;
946 s1Tel
.highKey
= s2Tel
.lowKey
;
947 s1Tel
.highKey
.decrement();
948 bottomTrans1
= s1Tel
.trans
;
950 /* Notify the caller that we are breaking s1. This gives them a
951 * chance to duplicate s1Tel[0,1].value. */
952 CO_RETURN2( S1SticksOutBreak
, BreakS1
);
954 /* Broken off range is only in s1. */
955 CO_RETURN2( S1SticksOut
, RangeInS1
);
957 /* Advance over the part sticking out front. */
958 s1Tel
.lowKey
= bottomLow
;
959 s1Tel
.highKey
= bottomHigh
;
960 s1Tel
.trans
= bottomTrans1
;
962 else if ( s2Tel
.lowKey
< s1Tel
.lowKey
) {
963 /* Range from state2 sticks out front. Must break it into
964 * non-overlaping and overlaping segments. */
965 bottomLow
= s1Tel
.lowKey
;
966 bottomHigh
= s2Tel
.highKey
;
967 s2Tel
.highKey
= s1Tel
.lowKey
;
968 s2Tel
.highKey
.decrement();
969 bottomTrans2
= s2Tel
.trans
;
971 /* Notify the caller that we are breaking s2. This gives them a
972 * chance to duplicate s2Tel[0,1].value. */
973 CO_RETURN2( S2SticksOutBreak
, BreakS2
);
975 /* Broken off range is only in s2. */
976 CO_RETURN2( S2SticksOut
, RangeInS2
);
978 /* Advance over the part sticking out front. */
979 s2Tel
.lowKey
= bottomLow
;
980 s2Tel
.highKey
= bottomHigh
;
981 s2Tel
.trans
= bottomTrans2
;
983 /* Low ends are even. Are the high ends even? */
984 else if ( s1Tel
.highKey
< s2Tel
.highKey
) {
985 /* Range from state2 goes longer than the range from state1. We
986 * must break the range from state2 into an evenly overlaping
988 bottomLow
= s1Tel
.highKey
;
989 bottomLow
.increment();
990 bottomHigh
= s2Tel
.highKey
;
991 s2Tel
.highKey
= s1Tel
.highKey
;
992 bottomTrans2
= s2Tel
.trans
;
994 /* Notify the caller that we are breaking s2. This gives them a
995 * chance to duplicate s2Tel[0,1].value. */
996 CO_RETURN2( S2DragsBehindBreak
, BreakS2
);
998 /* Breaking s2 produces exact overlap. */
999 CO_RETURN2( S2DragsBehind
, RangeOverlap
);
1001 /* Advance over the front we just broke off of range 2. */
1002 s2Tel
.lowKey
= bottomLow
;
1003 s2Tel
.highKey
= bottomHigh
;
1004 s2Tel
.trans
= bottomTrans2
;
1006 /* Advance over the entire s1Tel. We have consumed it. */
1009 else if ( s2Tel
.highKey
< s1Tel
.highKey
) {
1010 /* Range from state1 goes longer than the range from state2. We
1011 * must break the range from state1 into an evenly overlaping
1013 bottomLow
= s2Tel
.highKey
;
1014 bottomLow
.increment();
1015 bottomHigh
= s1Tel
.highKey
;
1016 s1Tel
.highKey
= s2Tel
.highKey
;
1017 bottomTrans1
= s1Tel
.trans
;
1019 /* Notify the caller that we are breaking s1. This gives them a
1020 * chance to duplicate s2Tel[0,1].value. */
1021 CO_RETURN2( S1DragsBehindBreak
, BreakS1
);
1023 /* Breaking s1 produces exact overlap. */
1024 CO_RETURN2( S1DragsBehind
, RangeOverlap
);
1026 /* Advance over the front we just broke off of range 1. */
1027 s1Tel
.lowKey
= bottomLow
;
1028 s1Tel
.highKey
= bottomHigh
;
1029 s1Tel
.trans
= bottomTrans1
;
1031 /* Advance over the entire s2Tel. We have consumed it. */
1035 /* There is an exact overlap. */
1036 CO_RETURN2( ExactOverlap
, RangeOverlap
);
1043 /* Done, go into end state. */
1048 /* Compare lists of epsilon transitions. Entries are name ids of targets. */
1049 typedef CmpTable
< int, CmpOrd
<int> > CmpEpsilonTrans
;
1051 /* Compare class for the Approximate minimization. */
1056 int compare( const StateAp
*pState1
, const StateAp
*pState2
);
1059 /* Compare class for the initial partitioning of a partition minimization. */
1060 class InitPartitionCompare
1063 InitPartitionCompare() { }
1064 int compare( const StateAp
*pState1
, const StateAp
*pState2
);
1067 /* Compare class for the regular partitioning of a partition minimization. */
1068 class PartitionCompare
1071 PartitionCompare() { }
1072 int compare( const StateAp
*pState1
, const StateAp
*pState2
);
1075 /* Compare class for a minimization that marks pairs. Provides the shouldMark
1081 bool shouldMark( MarkIndex
&markIndex
, const StateAp
*pState1
,
1082 const StateAp
*pState2
);
1085 /* List of partitions. */
1086 typedef DList
< MinPartition
> PartitionList
;
1088 /* List of transtions out of a state. */
1089 typedef Vector
<TransEl
> TransListVect
;
1091 /* Entry point map used for keeping track of entry points in a machine. */
1092 typedef BstSet
< int > EntryIdSet
;
1093 typedef BstMapEl
< int, StateAp
* > EntryMapEl
;
1094 typedef BstMap
< int, StateAp
* > EntryMap
;
1095 typedef Vector
<EntryMapEl
> EntryMapBase
;
1097 /* Graph class that implements actions and priorities. */
1100 /* Constructors/Destructors. */
1102 FsmAp( const FsmAp
&graph
);
1105 /* The list of states. */
1106 StateList stateList
;
1107 StateList misfitList
;
1109 /* The map of entry points. */
1110 EntryMap entryPoints
;
1112 /* The start state. */
1113 StateAp
*startState
;
1115 /* Error state, possibly created only when the final machine has been
1116 * created and the XML machine is about to be written. No transitions
1117 * point to this state. */
1120 /* The set of final states. */
1121 StateSet finStateSet
;
1123 /* Misfit Accounting. Are misfits put on a separate list. */
1124 bool misfitAccounting
;
1127 * Transition actions and priorities.
1130 /* Set priorities on transtions. */
1131 void startFsmPrior( int ordering
, PriorDesc
*prior
);
1132 void allTransPrior( int ordering
, PriorDesc
*prior
);
1133 void finishFsmPrior( int ordering
, PriorDesc
*prior
);
1134 void leaveFsmPrior( int ordering
, PriorDesc
*prior
);
1136 /* Action setting support. */
1137 void transferOutActions( StateAp
*state
);
1138 void transferErrorActions( StateAp
*state
, int transferPoint
);
1139 void setErrorAction( StateAp
*state
, int ordering
, Action
*action
);
1141 /* Fill all spaces in a transition list with an error transition. */
1142 void fillGaps( StateAp
*state
);
1144 /* Similar to setErrorAction, instead gives a state to go to on error. */
1145 void setErrorTarget( StateAp
*state
, StateAp
*target
, int *orderings
,
1146 Action
**actions
, int nActs
);
1148 /* Set actions to execute. */
1149 void startFsmAction( int ordering
, Action
*action
);
1150 void allTransAction( int ordering
, Action
*action
);
1151 void finishFsmAction( int ordering
, Action
*action
);
1152 void leaveFsmAction( int ordering
, Action
*action
);
1153 void longMatchAction( int ordering
, LongestMatchPart
*lmPart
);
1155 /* Set conditions. */
1156 CondSpace
*addCondSpace( const CondSet
&condSet
);
1158 void findEmbedExpansions( ExpansionList
&expansionList
,
1159 StateAp
*destState
, Action
*condAction
, bool sense
);
1160 void embedCondition( MergeData
&md
, StateAp
*state
, Action
*condAction
, bool sense
);
1161 void embedCondition( StateAp
*state
, Action
*condAction
, bool sense
);
1163 void startFsmCondition( Action
*condAction
, bool sense
);
1164 void allTransCondition( Action
*condAction
, bool sense
);
1165 void leaveFsmCondition( Action
*condAction
, bool sense
);
1167 /* Set error actions to execute. */
1168 void startErrorAction( int ordering
, Action
*action
, int transferPoint
);
1169 void allErrorAction( int ordering
, Action
*action
, int transferPoint
);
1170 void finalErrorAction( int ordering
, Action
*action
, int transferPoint
);
1171 void notStartErrorAction( int ordering
, Action
*action
, int transferPoint
);
1172 void notFinalErrorAction( int ordering
, Action
*action
, int transferPoint
);
1173 void middleErrorAction( int ordering
, Action
*action
, int transferPoint
);
1175 /* Set EOF actions. */
1176 void startEOFAction( int ordering
, Action
*action
);
1177 void allEOFAction( int ordering
, Action
*action
);
1178 void finalEOFAction( int ordering
, Action
*action
);
1179 void notStartEOFAction( int ordering
, Action
*action
);
1180 void notFinalEOFAction( int ordering
, Action
*action
);
1181 void middleEOFAction( int ordering
, Action
*action
);
1183 /* Set To State actions. */
1184 void startToStateAction( int ordering
, Action
*action
);
1185 void allToStateAction( int ordering
, Action
*action
);
1186 void finalToStateAction( int ordering
, Action
*action
);
1187 void notStartToStateAction( int ordering
, Action
*action
);
1188 void notFinalToStateAction( int ordering
, Action
*action
);
1189 void middleToStateAction( int ordering
, Action
*action
);
1191 /* Set From State actions. */
1192 void startFromStateAction( int ordering
, Action
*action
);
1193 void allFromStateAction( int ordering
, Action
*action
);
1194 void finalFromStateAction( int ordering
, Action
*action
);
1195 void notStartFromStateAction( int ordering
, Action
*action
);
1196 void notFinalFromStateAction( int ordering
, Action
*action
);
1197 void middleFromStateAction( int ordering
, Action
*action
);
1199 /* Shift the action ordering of the start transitions to start at
1200 * fromOrder and increase in units of 1. Useful before kleene star
1202 int shiftStartActionOrder( int fromOrder
);
1204 /* Clear all priorities from the fsm to so they won't affcet minimization
1205 * of the final fsm. */
1206 void clearAllPriorities();
1208 /* Zero out all the function keys. */
1209 void nullActionKeys();
1211 /* Walk the list of states and verify state properties. */
1212 void verifyStates();
1214 /* Misfit Accounting. Are misfits put on a separate list. */
1215 void setMisfitAccounting( bool val
)
1216 { misfitAccounting
= val
; }
1218 /* Set and Unset a state as final. */
1219 void setFinState( StateAp
*state
);
1220 void unsetFinState( StateAp
*state
);
1222 void setStartState( StateAp
*state
);
1223 void unsetStartState( );
1225 /* Set and unset a state as an entry point. */
1226 void setEntry( int id
, StateAp
*state
);
1227 void changeEntry( int id
, StateAp
*to
, StateAp
*from
);
1228 void unsetEntry( int id
, StateAp
*state
);
1229 void unsetEntry( int id
);
1230 void unsetAllEntryPoints();
1232 /* Epsilon transitions. */
1233 void epsilonTrans( int id
);
1234 void shadowReadWriteStates( MergeData
&md
);
1237 * Basic attaching and detaching.
1240 /* Common to attaching/detaching list and default. */
1241 void attachToInList( StateAp
*from
, StateAp
*to
, TransAp
*&head
, TransAp
*trans
);
1242 void detachFromInList( StateAp
*from
, StateAp
*to
, TransAp
*&head
, TransAp
*trans
);
1244 /* Attach with a new transition. */
1245 TransAp
*attachNewTrans( StateAp
*from
, StateAp
*to
,
1246 Key onChar1
, Key onChar2
);
1248 /* Attach with an existing transition that already in an out list. */
1249 void attachTrans( StateAp
*from
, StateAp
*to
, TransAp
*trans
);
1251 /* Redirect a transition away from error and towards some state. */
1252 void redirectErrorTrans( StateAp
*from
, StateAp
*to
, TransAp
*trans
);
1254 /* Detach a transition from a target state. */
1255 void detachTrans( StateAp
*from
, StateAp
*to
, TransAp
*trans
);
1257 /* Detach a state from the graph. */
1258 void detachState( StateAp
*state
);
1261 * NFA to DFA conversion routines.
1264 /* Duplicate a transition that will dropin to a free spot. */
1265 TransAp
*dupTrans( StateAp
*from
, TransAp
*srcTrans
);
1267 /* In crossing, two transitions both go to real states. */
1268 TransAp
*fsmAttachStates( MergeData
&md
, StateAp
*from
,
1269 TransAp
*destTrans
, TransAp
*srcTrans
);
1271 /* Two transitions are to be crossed, handle the possibility of either
1272 * going to the error state. */
1273 TransAp
*mergeTrans( MergeData
&md
, StateAp
*from
,
1274 TransAp
*destTrans
, TransAp
*srcTrans
);
1276 /* Compare deterimne relative priorities of two transition tables. */
1277 int comparePrior( const PriorTable
&priorTable1
, const PriorTable
&priorTable2
);
1279 /* Cross a src transition with one that is already occupying a spot. */
1280 TransAp
*crossTransitions( MergeData
&md
, StateAp
*from
,
1281 TransAp
*destTrans
, TransAp
*srcTrans
);
1283 void outTransCopy( MergeData
&md
, StateAp
*dest
, TransAp
*srcList
);
1285 void doRemove( MergeData
&md
, StateAp
*destState
, ExpansionList
&expList1
);
1286 void doExpand( MergeData
&md
, StateAp
*destState
, ExpansionList
&expList1
);
1287 void findCondExpInTrans( ExpansionList
&expansionList
, StateAp
*state
,
1288 Key lowKey
, Key highKey
, CondSpace
*fromCondSpace
, CondSpace
*toCondSpace
,
1289 long destVals
, LongVect
&toValsList
);
1290 void findTransExpansions( ExpansionList
&expansionList
,
1291 StateAp
*destState
, StateAp
*srcState
);
1292 void findCondExpansions( ExpansionList
&expansionList
,
1293 StateAp
*destState
, StateAp
*srcState
);
1294 void mergeStateConds( StateAp
*destState
, StateAp
*srcState
);
1296 /* Merge a set of states into newState. */
1297 void mergeStates( MergeData
&md
, StateAp
*destState
,
1298 StateAp
**srcStates
, int numSrc
);
1299 void mergeStatesLeaving( MergeData
&md
, StateAp
*destState
, StateAp
*srcState
);
1300 void mergeStates( MergeData
&md
, StateAp
*destState
, StateAp
*srcState
);
1302 /* Make all states that are combinations of other states and that
1303 * have not yet had their out transitions filled in. This will
1304 * empty out stateDict and stFil. */
1305 void fillInStates( MergeData
&md
);
1308 * Transition Comparison.
1311 /* Compare transition data. Either of the pointers may be null. */
1312 static inline int compareDataPtr( TransAp
*trans1
, TransAp
*trans2
);
1314 /* Compare target state and transition data. Either pointer may be null. */
1315 static inline int compareFullPtr( TransAp
*trans1
, TransAp
*trans2
);
1317 /* Compare target partitions. Either pointer may be null. */
1318 static inline int comparePartPtr( TransAp
*trans1
, TransAp
*trans2
);
1320 /* Check marked status of target states. Either pointer may be null. */
1321 static inline bool shouldMarkPtr( MarkIndex
&markIndex
,
1322 TransAp
*trans1
, TransAp
*trans2
);
1328 /* Compare priority and function table of transitions. */
1329 static int compareTransData( TransAp
*trans1
, TransAp
*trans2
);
1331 /* Add in the properties of srcTrans into this. */
1332 void addInTrans( TransAp
*destTrans
, TransAp
*srcTrans
);
1334 /* Compare states on data stored in the states. */
1335 static int compareStateData( const StateAp
*state1
, const StateAp
*state2
);
1337 /* Out transition data. */
1338 void clearOutData( StateAp
*state
);
1339 bool hasOutData( StateAp
*state
);
1340 void transferOutData( StateAp
*destState
, StateAp
*srcState
);
1346 /* New up a state and add it to the graph. */
1347 StateAp
*addState();
1350 * Building basic machines
1353 void concatFsm( Key c
);
1354 void concatFsm( Key
*str
, int len
);
1355 void concatFsmCI( Key
*str
, int len
);
1356 void orFsm( Key
*set
, int len
);
1357 void rangeFsm( Key low
, Key high
);
1358 void rangeStarFsm( Key low
, Key high
);
1367 void repeatOp( int times
);
1368 void optionalRepeatOp( int times
);
1369 void concatOp( FsmAp
*other
);
1370 void unionOp( FsmAp
*other
);
1371 void intersectOp( FsmAp
*other
);
1372 void subtractOp( FsmAp
*other
);
1374 void joinOp( int startId
, int finalId
, FsmAp
**others
, int numOthers
);
1375 void globOp( FsmAp
**others
, int numOthers
);
1376 void deterministicEntry();
1382 /* Determine if there are any entry points into a start state other than
1383 * the start state. */
1384 bool isStartStateIsolated();
1386 /* Make a new start state that has no entry points. Will not change the
1387 * identity of the fsm. */
1388 void isolateStartState();
1390 /* Workers for resolving epsilon transitions. */
1391 bool inEptVect( EptVect
*eptVect
, StateAp
*targ
);
1392 void epsilonFillEptVectFrom( StateAp
*root
, StateAp
*from
, bool parentLeaving
);
1393 void resolveEpsilonTrans( MergeData
&md
);
1395 /* Workers for concatenation and union. */
1396 void doConcat( FsmAp
*other
, StateSet
*fromStates
, bool optional
);
1397 void doOr( FsmAp
*other
);
1403 /* Unset any final states that are no longer to be final
1404 * due to final bits. */
1405 void unsetIncompleteFinals();
1406 void unsetKilledFinals();
1408 /* Bring in other's entry points. Assumes others states are going to be
1409 * copied into this machine. */
1410 void copyInEntryPoints( FsmAp
*other
);
1412 /* Ordering states. */
1413 void depthFirstOrdering( StateAp
*state
);
1414 void depthFirstOrdering();
1415 void sortStatesByFinal();
1417 /* Set sqequential state numbers starting at 0. */
1418 void setStateNumbers( int base
);
1420 /* Unset all final states. */
1421 void unsetAllFinStates();
1423 /* Set the bits of final states and clear the bits of non final states. */
1424 void setFinBits( int finStateBits
);
1427 * Self-consistency checks.
1430 /* Run a sanity check on the machine. */
1431 void verifyIntegrity();
1433 /* Verify that there are no unreachable states, or dead end states. */
1434 void verifyReachability();
1435 void verifyNoDeadEndStates();
1441 /* Mark all states reachable from state. */
1442 void markReachableFromHereReverse( StateAp
*state
);
1444 /* Mark all states reachable from state. */
1445 void markReachableFromHere( StateAp
*state
);
1446 void markReachableFromHereStopFinal( StateAp
*state
);
1448 /* Removes states that cannot be reached by any path in the fsm and are
1449 * thus wasted silicon. */
1450 void removeDeadEndStates();
1452 /* Removes states that cannot be reached by any path in the fsm and are
1453 * thus wasted silicon. */
1454 void removeUnreachableStates();
1456 /* Remove error actions from states on which the error transition will
1457 * never be taken. */
1458 bool outListCovers( StateAp
*state
);
1459 bool anyErrorRange( StateAp
*state
);
1461 /* Remove states that are on the misfit list. */
1462 void removeMisfits();
1468 /* Minimization by partitioning. */
1469 void minimizePartition1();
1470 void minimizePartition2();
1472 /* Minimize the final state Machine. The result is the minimal fsm. Slow
1473 * but stable, correct minimization. Uses n^2 space (lookout) and average
1474 * n^2 time. Worst case n^3 time, but a that is a very rare case. */
1475 void minimizeStable();
1477 /* Minimize the final state machine. Does not find the minimal fsm, but a
1478 * pretty good approximation. Does not use any extra space. Average n^2
1479 * time. Worst case n^3 time, but a that is a very rare case. */
1480 void minimizeApproximate();
1482 /* This is the worker for the minimize approximate solution. It merges
1483 * states that have identical out transitions. */
1484 bool minimizeRound( );
1486 /* Given an intial partioning of states, split partitions that have out trans
1487 * to differing partitions. */
1488 int partitionRound( StateAp
**statePtrs
, MinPartition
*parts
, int numParts
);
1490 /* Split partitions that have a transition to a previously split partition, until
1491 * there are no more partitions to split. */
1492 int splitCandidates( StateAp
**statePtrs
, MinPartition
*parts
, int numParts
);
1494 /* Fuse together states in the same partition. */
1495 void fusePartitions( MinPartition
*parts
, int numParts
);
1497 /* Mark pairs where out final stateness differs, out trans data differs,
1498 * trans pairs go to a marked pair or trans data differs. Should get
1500 void initialMarkRound( MarkIndex
&markIndex
);
1502 /* One marking round on all state pairs. Considers if trans pairs go
1503 * to a marked state only. Returns whether or not a pair was marked. */
1504 bool markRound( MarkIndex
&markIndex
);
1506 /* Move the in trans into src into dest. */
1507 void inTransMove(StateAp
*dest
, StateAp
*src
);
1509 /* Make state src and dest the same state. */
1510 void fuseEquivStates(StateAp
*dest
, StateAp
*src
);
1512 /* Find any states that didn't get marked by the marking algorithm and
1513 * merge them into the primary states of their equivalence class. */
1514 void fuseUnmarkedPairs( MarkIndex
&markIndex
);
1516 /* Merge neighboring transitions go to the same state and have the same
1517 * transitions data. */
1518 void compressTransitions();
1520 /* Returns true if there is a transtion (either explicit or by a gap) to
1521 * the error state. */
1522 bool checkErrTrans( StateAp
*state
, TransAp
*trans
);
1523 bool checkErrTransFinish( StateAp
*state
);
1524 bool hasErrorTrans();
1526 /* Check if a machine defines a single character. This is useful in
1527 * validating ranges and machines to export. */
1528 bool checkSingleCharMachine( );
1532 #endif /* _FSMGRAPH_H */