A fix to the documentation makefile from John D. Mitchell.
[ragel.git] / ragel / fsmgraph.h
blob2ccfeb0dd3e3028671b649bccfbce1c843dc9d85
1 /*
2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #ifndef _FSMGRAPH_H
23 #define _FSMGRAPH_H
25 #include "config.h"
26 #include <assert.h>
27 #include <iostream>
28 #include <string>
29 #include "common.h"
30 #include "vector.h"
31 #include "bstset.h"
32 #include "compare.h"
33 #include "avltree.h"
34 #include "dlist.h"
35 #include "bstmap.h"
36 #include "sbstmap.h"
37 #include "sbstset.h"
38 #include "sbsttable.h"
39 #include "avlset.h"
40 #include "avlmap.h"
41 #include "ragel.h"
43 //#define LOG_CONDS
45 /* Flags that control merging. */
46 #define STB_GRAPH1 0x01
47 #define STB_GRAPH2 0x02
48 #define STB_BOTH 0x03
49 #define STB_ISFINAL 0x04
50 #define STB_ISMARKED 0x08
51 #define STB_ONLIST 0x10
53 using std::ostream;
55 struct TransAp;
56 struct StateAp;
57 struct FsmAp;
58 struct Action;
59 struct LongestMatchPart;
61 /* State list element for unambiguous access to list element. */
62 struct FsmListEl
64 StateAp *prev, *next;
67 /* This is the marked index for a state pair. Used in minimization. It keeps
68 * track of whether or not the state pair is marked. */
69 struct MarkIndex
71 MarkIndex(int states);
72 ~MarkIndex();
74 void markPair(int state1, int state2);
75 bool isPairMarked(int state1, int state2);
77 private:
78 int numStates;
79 bool *array;
82 extern KeyOps *keyOps;
84 /* Transistion Action Element. */
85 typedef SBstMapEl< int, Action* > ActionTableEl;
87 /* Nodes in the tree that use this action. */
88 struct NameInst;
89 struct InlineList;
90 typedef Vector<NameInst*> ActionRefs;
92 /* Element in list of actions. Contains the string for the code to exectute. */
93 struct Action
95 public DListEl<Action>,
96 public AvlTreeEl<Action>
98 public:
100 Action( const InputLoc &loc, const char *name, InlineList *inlineList, int condId )
102 loc(loc),
103 name(name),
104 inlineList(inlineList),
105 actionId(-1),
106 numTransRefs(0),
107 numToStateRefs(0),
108 numFromStateRefs(0),
109 numEofRefs(0),
110 numCondRefs(0),
111 anyCall(false),
112 isLmAction(false),
113 condId(condId)
117 /* Key for action dictionary. */
118 const char *getKey() const { return name; }
120 /* Data collected during parse. */
121 InputLoc loc;
122 const char *name;
123 InlineList *inlineList;
124 int actionId;
126 void actionName( ostream &out )
128 if ( name != 0 )
129 out << name;
130 else
131 out << loc.line << ":" << loc.col;
134 /* Places in the input text that reference the action. */
135 ActionRefs actionRefs;
137 /* Number of references in the final machine. */
138 int numRefs()
139 { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
140 int numTransRefs;
141 int numToStateRefs;
142 int numFromStateRefs;
143 int numEofRefs;
144 int numCondRefs;
145 bool anyCall;
147 bool isLmAction;
148 int condId;
151 struct CmpCondId
153 static inline int compare( const Action *cond1, const Action *cond2 )
155 if ( cond1->condId < cond2->condId )
156 return -1;
157 else if ( cond1->condId > cond2->condId )
158 return 1;
159 return 0;
163 /* A list of actions. */
164 typedef DList<Action> ActionList;
165 typedef AvlTree<Action, char *, CmpStr> ActionDict;
167 /* Structure for reverse action mapping. */
168 struct RevActionMapEl
170 char *name;
171 InputLoc location;
175 /* Transition Action Table. */
176 struct ActionTable
177 : public SBstMap< int, Action*, CmpOrd<int> >
179 void setAction( int ordering, Action *action );
180 void setActions( int *orderings, Action **actions, int nActs );
181 void setActions( const ActionTable &other );
183 bool hasAction( Action *action );
186 typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
187 typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
189 /* Transistion Action Element. */
190 typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl;
192 /* Transition Action Table. */
193 struct LmActionTable
194 : public SBstMap< int, LongestMatchPart*, CmpOrd<int> >
196 void setAction( int ordering, LongestMatchPart *action );
197 void setActions( const LmActionTable &other );
200 /* Compare of a whole action table element (key & value). */
201 struct CmpActionTableEl
203 static int compare( const ActionTableEl &action1,
204 const ActionTableEl &action2 )
206 if ( action1.key < action2.key )
207 return -1;
208 else if ( action1.key > action2.key )
209 return 1;
210 else if ( action1.value < action2.value )
211 return -1;
212 else if ( action1.value > action2.value )
213 return 1;
214 return 0;
218 /* Compare for ActionTable. */
219 typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
221 /* Compare of a whole lm action table element (key & value). */
222 struct CmpLmActionTableEl
224 static int compare( const LmActionTableEl &lmAction1,
225 const LmActionTableEl &lmAction2 )
227 if ( lmAction1.key < lmAction2.key )
228 return -1;
229 else if ( lmAction1.key > lmAction2.key )
230 return 1;
231 else if ( lmAction1.value < lmAction2.value )
232 return -1;
233 else if ( lmAction1.value > lmAction2.value )
234 return 1;
235 return 0;
239 /* Compare for ActionTable. */
240 typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
242 /* Action table element for error action tables. Adds the encoding of transfer
243 * point. */
244 struct ErrActionTableEl
246 ErrActionTableEl( Action *action, int ordering, int transferPoint )
247 : ordering(ordering), action(action), transferPoint(transferPoint) { }
249 /* Ordering and id of the action embedding. */
250 int ordering;
251 Action *action;
253 /* Id of point of transfere from Error action table to transtions and
254 * eofActionTable. */
255 int transferPoint;
257 int getKey() const { return ordering; }
260 struct ErrActionTable
261 : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
263 void setAction( int ordering, Action *action, int transferPoint );
264 void setActions( const ErrActionTable &other );
267 /* Compare of an error action table element (key & value). */
268 struct CmpErrActionTableEl
270 static int compare( const ErrActionTableEl &action1,
271 const ErrActionTableEl &action2 )
273 if ( action1.ordering < action2.ordering )
274 return -1;
275 else if ( action1.ordering > action2.ordering )
276 return 1;
277 else if ( action1.action < action2.action )
278 return -1;
279 else if ( action1.action > action2.action )
280 return 1;
281 else if ( action1.transferPoint < action2.transferPoint )
282 return -1;
283 else if ( action1.transferPoint > action2.transferPoint )
284 return 1;
285 return 0;
289 /* Compare for ErrActionTable. */
290 typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
293 /* Descibe a priority, shared among PriorEls.
294 * Has key and whether or not used. */
295 struct PriorDesc
297 int key;
298 int priority;
301 /* Element in the arrays of priorities for transitions and arrays. Ordering is
302 * unique among instantiations of machines, desc is shared. */
303 struct PriorEl
305 PriorEl( int ordering, PriorDesc *desc )
306 : ordering(ordering), desc(desc) { }
308 int ordering;
309 PriorDesc *desc;
312 /* Compare priority elements, which are ordered by the priority descriptor
313 * key. */
314 struct PriorElCmp
316 static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
318 if ( pel1.desc->key < pel2.desc->key )
319 return -1;
320 else if ( pel1.desc->key > pel2.desc->key )
321 return 1;
322 else
323 return 0;
328 /* Priority Table. */
329 struct PriorTable
330 : public SBstSet< PriorEl, PriorElCmp >
332 void setPrior( int ordering, PriorDesc *desc );
333 void setPriors( const PriorTable &other );
336 /* Compare of prior table elements for distinguising state data. */
337 struct CmpPriorEl
339 static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
341 if ( pel1.desc < pel2.desc )
342 return -1;
343 else if ( pel1.desc > pel2.desc )
344 return 1;
345 else if ( pel1.ordering < pel2.ordering )
346 return -1;
347 else if ( pel1.ordering > pel2.ordering )
348 return 1;
349 return 0;
353 /* Compare of PriorTable distinguising state data. Using a compare of the
354 * pointers is a little more strict than it needs be. It requires that
355 * prioritiy tables have the exact same set of priority assignment operators
356 * (from the input lang) to be considered equal.
358 * Really only key-value pairs need be tested and ordering be merged. However
359 * this would require that in the fuseing of states, priority descriptors be
360 * chosen for the new fused state based on priority. Since the out transition
361 * lists and ranges aren't necessarily going to line up, this is more work for
362 * little gain. Final compression resets all priorities first, so this would
363 * only be useful for compression at every operator, which is only an
364 * undocumented test feature.
366 typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
368 /* Plain action list that imposes no ordering. */
369 typedef Vector<int> TransFuncList;
371 /* Comparison for TransFuncList. */
372 typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
374 /* Transition class that implements actions and priorities. */
375 struct TransAp
377 TransAp() : fromState(0), toState(0) {}
378 TransAp( const TransAp &other ) :
379 lowKey(other.lowKey),
380 highKey(other.highKey),
381 fromState(0), toState(0),
382 actionTable(other.actionTable),
383 priorTable(other.priorTable),
384 lmActionTable(other.lmActionTable) {}
386 Key lowKey, highKey;
387 StateAp *fromState;
388 StateAp *toState;
390 /* Pointers for outlist. */
391 TransAp *prev, *next;
393 /* Pointers for in-list. */
394 TransAp *ilprev, *ilnext;
396 /* The function table and priority for the transition. */
397 ActionTable actionTable;
398 PriorTable priorTable;
400 LmActionTable lmActionTable;
403 /* In transition list. Like DList except only has head pointers, which is all
404 * that is required. Insertion and deletion is handled by the graph. This
405 * class provides the iterator of a single list. */
406 struct TransInList
408 TransInList() : head(0) { }
410 TransAp *head;
412 struct Iter
414 /* Default construct. */
415 Iter() : ptr(0) { }
417 /* Construct, assign from a list. */
418 Iter( const TransInList &il ) : ptr(il.head) { }
419 Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
421 /* At the end */
422 bool lte() const { return ptr != 0; }
423 bool end() const { return ptr == 0; }
425 /* At the first, last element. */
426 bool first() const { return ptr && ptr->ilprev == 0; }
427 bool last() const { return ptr && ptr->ilnext == 0; }
429 /* Cast, dereference, arrow ops. */
430 operator TransAp*() const { return ptr; }
431 TransAp &operator *() const { return *ptr; }
432 TransAp *operator->() const { return ptr; }
434 /* Increment, decrement. */
435 inline void operator++(int) { ptr = ptr->ilnext; }
436 inline void operator--(int) { ptr = ptr->ilprev; }
438 /* The iterator is simply a pointer. */
439 TransAp *ptr;
443 typedef DList<TransAp> TransList;
445 /* Set of states, list of states. */
446 typedef BstSet<StateAp*> StateSet;
447 typedef DList<StateAp> StateList;
449 /* A element in a state dict. */
450 struct StateDictEl
452 public AvlTreeEl<StateDictEl>
454 StateDictEl(const StateSet &stateSet)
455 : stateSet(stateSet) { }
457 const StateSet &getKey() { return stateSet; }
458 StateSet stateSet;
459 StateAp *targState;
462 /* Dictionary mapping a set of states to a target state. */
463 typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict;
465 /* Data needed for a merge operation. */
466 struct MergeData
468 MergeData()
469 : stfillHead(0), stfillTail(0) { }
471 StateDict stateDict;
473 StateAp *stfillHead;
474 StateAp *stfillTail;
476 void fillListAppend( StateAp *state );
479 struct TransEl
481 /* Constructors. */
482 TransEl() { }
483 TransEl( Key lowKey, Key highKey )
484 : lowKey(lowKey), highKey(highKey) { }
485 TransEl( Key lowKey, Key highKey, TransAp *value )
486 : lowKey(lowKey), highKey(highKey), value(value) { }
488 Key lowKey, highKey;
489 TransAp *value;
492 struct CmpKey
494 static int compare( const Key key1, const Key key2 )
496 if ( key1 < key2 )
497 return -1;
498 else if ( key1 > key2 )
499 return 1;
500 else
501 return 0;
505 /* Vector based set of key items. */
506 typedef BstSet<Key, CmpKey> KeySet;
508 struct MinPartition
510 MinPartition() : active(false) { }
512 StateList list;
513 bool active;
515 MinPartition *prev, *next;
518 /* Epsilon transition stored in a state. Specifies the target */
519 typedef Vector<int> EpsilonTrans;
521 /* List of states that are to be drawn into this. */
522 struct EptVectEl
524 EptVectEl( StateAp *targ, bool leaving )
525 : targ(targ), leaving(leaving) { }
527 StateAp *targ;
528 bool leaving;
530 typedef Vector<EptVectEl> EptVect;
532 /* Set of entry ids that go into this state. */
533 typedef BstSet<int> EntryIdSet;
535 /* Set of longest match items that may be active in a given state. */
536 typedef BstSet<LongestMatchPart*> LmItemSet;
538 /* A Conditions which is to be
539 * transfered on pending out transitions. */
540 struct OutCond
542 OutCond( Action *action, bool sense )
543 : action(action), sense(sense) {}
545 Action *action;
546 bool sense;
549 struct CmpOutCond
551 static int compare( const OutCond &outCond1, const OutCond &outCond2 )
553 if ( outCond1.action < outCond2.action )
554 return -1;
555 else if ( outCond1.action > outCond2.action )
556 return 1;
557 else if ( outCond1.sense < outCond2.sense )
558 return -1;
559 else if ( outCond1.sense > outCond2.sense )
560 return 1;
561 return 0;
565 /* Set of conditions to be transfered to on pending out transitions. */
566 typedef SBstSet< OutCond, CmpOutCond > OutCondSet;
567 typedef CmpSTable< OutCond, CmpOutCond > CmpOutCondSet;
569 /* Conditions. */
570 typedef BstSet< Action*, CmpCondId > CondSet;
571 typedef CmpTable< Action*, CmpCondId > CmpCondSet;
573 struct CondSpace
574 : public AvlTreeEl<CondSpace>
576 CondSpace( const CondSet &condSet )
577 : condSet(condSet) {}
579 const CondSet &getKey() { return condSet; }
581 CondSet condSet;
582 Key baseKey;
583 long condSpaceId;
586 typedef Vector<CondSpace*> CondSpaceVect;
588 typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
590 struct StateCond
592 StateCond( Key lowKey, Key highKey ) :
593 lowKey(lowKey), highKey(highKey) {}
595 Key lowKey;
596 Key highKey;
597 CondSpace *condSpace;
599 StateCond *prev, *next;
602 typedef DList<StateCond> StateCondList;
603 typedef Vector<long> LongVect;
605 struct Expansion
607 Expansion( Key lowKey, Key highKey ) :
608 lowKey(lowKey), highKey(highKey),
609 fromTrans(0), fromCondSpace(0),
610 toCondSpace(0) {}
612 ~Expansion()
614 if ( fromTrans != 0 )
615 delete fromTrans;
618 Key lowKey;
619 Key highKey;
621 TransAp *fromTrans;
622 CondSpace *fromCondSpace;
623 long fromVals;
625 CondSpace *toCondSpace;
626 LongVect toValsList;
628 Expansion *prev, *next;
631 typedef DList<Expansion> ExpansionList;
633 struct Removal
635 Key lowKey;
636 Key highKey;
638 Removal *next;
641 struct CondData
643 CondData() : lastCondKey(0) {}
645 /* Condition info. */
646 Key lastCondKey;
648 CondSpaceMap condSpaceMap;
651 extern CondData *condData;
653 struct FsmConstructFail
655 enum Reason
657 CondNoKeySpace
660 FsmConstructFail( Reason reason )
661 : reason(reason) {}
662 Reason reason;
665 /* State class that implements actions and priorities. */
666 struct StateAp
668 StateAp();
669 StateAp(const StateAp &other);
670 ~StateAp();
672 /* Is the state final? */
673 bool isFinState() { return stateBits & STB_ISFINAL; }
675 /* Out transition list and the pointer for the default out trans. */
676 TransList outList;
678 /* In transition Lists. */
679 TransInList inList;
681 /* Set only during scanner construction when actions are added. NFA to DFA
682 * code can ignore this. */
683 StateAp *eofTarget;
685 /* Entry points into the state. */
686 EntryIdSet entryIds;
688 /* Epsilon transitions. */
689 EpsilonTrans epsilonTrans;
691 /* Condition info. */
692 StateCondList stateCondList;
694 /* Number of in transitions from states other than ourselves. */
695 int foreignInTrans;
697 /* Temporary data for various algorithms. */
698 union {
699 /* When duplicating the fsm we need to map each
700 * state to the new state representing it. */
701 StateAp *stateMap;
703 /* When minimizing machines by partitioning, this maps to the group
704 * the state is in. */
705 MinPartition *partition;
707 /* When merging states (state machine operations) this next pointer is
708 * used for the list of states that need to be filled in. */
709 StateAp *next;
711 /* Identification for printing and stable minimization. */
712 int stateNum;
714 } alg;
716 /* Data used in epsilon operation, maybe fit into alg? */
717 StateAp *isolatedShadow;
718 int owningGraph;
720 /* A pointer to a dict element that contains the set of states this state
721 * represents. This cannot go into alg, because alg.next is used during
722 * the merging process. */
723 StateDictEl *stateDictEl;
725 /* When drawing epsilon transitions, holds the list of states to merge
726 * with. */
727 EptVect *eptVect;
729 /* Bits controlling the behaviour of the state during collapsing to dfa. */
730 int stateBits;
732 /* State list elements. */
733 StateAp *next, *prev;
736 * Priority and Action data.
739 /* Out priorities transfered to out transitions. */
740 PriorTable outPriorTable;
742 /* The following two action tables are distinguished by the fact that when
743 * toState actions are executed immediatly after transition actions of
744 * incoming transitions and the current character will be the same as the
745 * one available then. The fromState actions are executed immediately
746 * before the transition actions of outgoing transitions and the current
747 * character is same as the one available then. */
749 /* Actions to execute upon entering into a state. */
750 ActionTable toStateActionTable;
752 /* Actions to execute when going from the state to the transition. */
753 ActionTable fromStateActionTable;
755 /* Actions to add to any future transitions that leave via this state. */
756 ActionTable outActionTable;
758 /* Conditions to add to any future transiions that leave via this sttate. */
759 OutCondSet outCondSet;
761 /* Error action tables. */
762 ErrActionTable errActionTable;
764 /* Actions to execute on eof. */
765 ActionTable eofActionTable;
767 /* Set of longest match items that may be active in this state. */
768 LmItemSet lmItemSet;
771 template <class ListItem> struct NextTrans
773 Key lowKey, highKey;
774 ListItem *trans;
775 ListItem *next;
777 void load() {
778 if ( trans == 0 )
779 next = 0;
780 else {
781 next = trans->next;
782 lowKey = trans->lowKey;
783 highKey = trans->highKey;
787 void set( ListItem *t ) {
788 trans = t;
789 load();
792 void increment() {
793 trans = next;
794 load();
799 /* Encodes the different states that are meaningful to the of the iterator. */
800 enum PairIterUserState
802 RangeInS1, RangeInS2,
803 RangeOverlap,
804 BreakS1, BreakS2
807 template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
809 /* Encodes the different states that an fsm iterator can be in. */
810 enum IterState {
811 Begin,
812 ConsumeS1Range, ConsumeS2Range,
813 OnlyInS1Range, OnlyInS2Range,
814 S1SticksOut, S1SticksOutBreak,
815 S2SticksOut, S2SticksOutBreak,
816 S1DragsBehind, S1DragsBehindBreak,
817 S2DragsBehind, S2DragsBehindBreak,
818 ExactOverlap, End
821 PairIter( ListItem1 *list1, ListItem2 *list2 );
823 /* Query iterator. */
824 bool lte() { return itState != End; }
825 bool end() { return itState == End; }
826 void operator++(int) { findNext(); }
827 void operator++() { findNext(); }
829 /* Iterator state. */
830 ListItem1 *list1;
831 ListItem2 *list2;
832 IterState itState;
833 PairIterUserState userState;
835 NextTrans<ListItem1> s1Tel;
836 NextTrans<ListItem2> s2Tel;
837 Key bottomLow, bottomHigh;
838 ListItem1 *bottomTrans1;
839 ListItem2 *bottomTrans2;
841 private:
842 void findNext();
845 /* Init the iterator by advancing to the first item. */
846 template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
847 ListItem1 *list1, ListItem2 *list2 )
849 list1(list1),
850 list2(list2),
851 itState(Begin)
853 findNext();
856 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
857 * used inside of a block. */
858 #define CO_RETURN(label) \
859 itState = label; \
860 return; \
861 entry##label: backIn = true
863 /* Return and re-entry for the co-routine iterators. This should ALWAYS be
864 * used inside of a block. */
865 #define CO_RETURN2(label, uState) \
866 itState = label; \
867 userState = uState; \
868 return; \
869 entry##label: backIn = true
871 /* Advance to the next transition. When returns, trans points to the next
872 * transition, unless there are no more, in which case end() returns true. */
873 template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
875 /* This variable is used in dummy statements that follow the entry
876 * goto labels. The compiler needs some statement to follow the label. */
877 bool backIn;
879 /* Jump into the iterator routine base on the iterator state. */
880 switch ( itState ) {
881 case Begin: goto entryBegin;
882 case ConsumeS1Range: goto entryConsumeS1Range;
883 case ConsumeS2Range: goto entryConsumeS2Range;
884 case OnlyInS1Range: goto entryOnlyInS1Range;
885 case OnlyInS2Range: goto entryOnlyInS2Range;
886 case S1SticksOut: goto entryS1SticksOut;
887 case S1SticksOutBreak: goto entryS1SticksOutBreak;
888 case S2SticksOut: goto entryS2SticksOut;
889 case S2SticksOutBreak: goto entryS2SticksOutBreak;
890 case S1DragsBehind: goto entryS1DragsBehind;
891 case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
892 case S2DragsBehind: goto entryS2DragsBehind;
893 case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
894 case ExactOverlap: goto entryExactOverlap;
895 case End: goto entryEnd;
898 entryBegin:
899 /* Set up the next structs at the head of the transition lists. */
900 s1Tel.set( list1 );
901 s2Tel.set( list2 );
903 /* Concurrently scan both out ranges. */
904 while ( true ) {
905 if ( s1Tel.trans == 0 ) {
906 /* We are at the end of state1's ranges. Process the rest of
907 * state2's ranges. */
908 while ( s2Tel.trans != 0 ) {
909 /* Range is only in s2. */
910 CO_RETURN2( ConsumeS2Range, RangeInS2 );
911 s2Tel.increment();
913 break;
915 else if ( s2Tel.trans == 0 ) {
916 /* We are at the end of state2's ranges. Process the rest of
917 * state1's ranges. */
918 while ( s1Tel.trans != 0 ) {
919 /* Range is only in s1. */
920 CO_RETURN2( ConsumeS1Range, RangeInS1 );
921 s1Tel.increment();
923 break;
925 /* Both state1's and state2's transition elements are good.
926 * The signiture of no overlap is a back key being in front of a
927 * front key. */
928 else if ( s1Tel.highKey < s2Tel.lowKey ) {
929 /* A range exists in state1 that does not overlap with state2. */
930 CO_RETURN2( OnlyInS1Range, RangeInS1 );
931 s1Tel.increment();
933 else if ( s2Tel.highKey < s1Tel.lowKey ) {
934 /* A range exists in state2 that does not overlap with state1. */
935 CO_RETURN2( OnlyInS2Range, RangeInS2 );
936 s2Tel.increment();
938 /* There is overlap, must mix the ranges in some way. */
939 else if ( s1Tel.lowKey < s2Tel.lowKey ) {
940 /* Range from state1 sticks out front. Must break it into
941 * non-overlaping and overlaping segments. */
942 bottomLow = s2Tel.lowKey;
943 bottomHigh = s1Tel.highKey;
944 s1Tel.highKey = s2Tel.lowKey;
945 s1Tel.highKey.decrement();
946 bottomTrans1 = s1Tel.trans;
948 /* Notify the caller that we are breaking s1. This gives them a
949 * chance to duplicate s1Tel[0,1].value. */
950 CO_RETURN2( S1SticksOutBreak, BreakS1 );
952 /* Broken off range is only in s1. */
953 CO_RETURN2( S1SticksOut, RangeInS1 );
955 /* Advance over the part sticking out front. */
956 s1Tel.lowKey = bottomLow;
957 s1Tel.highKey = bottomHigh;
958 s1Tel.trans = bottomTrans1;
960 else if ( s2Tel.lowKey < s1Tel.lowKey ) {
961 /* Range from state2 sticks out front. Must break it into
962 * non-overlaping and overlaping segments. */
963 bottomLow = s1Tel.lowKey;
964 bottomHigh = s2Tel.highKey;
965 s2Tel.highKey = s1Tel.lowKey;
966 s2Tel.highKey.decrement();
967 bottomTrans2 = s2Tel.trans;
969 /* Notify the caller that we are breaking s2. This gives them a
970 * chance to duplicate s2Tel[0,1].value. */
971 CO_RETURN2( S2SticksOutBreak, BreakS2 );
973 /* Broken off range is only in s2. */
974 CO_RETURN2( S2SticksOut, RangeInS2 );
976 /* Advance over the part sticking out front. */
977 s2Tel.lowKey = bottomLow;
978 s2Tel.highKey = bottomHigh;
979 s2Tel.trans = bottomTrans2;
981 /* Low ends are even. Are the high ends even? */
982 else if ( s1Tel.highKey < s2Tel.highKey ) {
983 /* Range from state2 goes longer than the range from state1. We
984 * must break the range from state2 into an evenly overlaping
985 * segment. */
986 bottomLow = s1Tel.highKey;
987 bottomLow.increment();
988 bottomHigh = s2Tel.highKey;
989 s2Tel.highKey = s1Tel.highKey;
990 bottomTrans2 = s2Tel.trans;
992 /* Notify the caller that we are breaking s2. This gives them a
993 * chance to duplicate s2Tel[0,1].value. */
994 CO_RETURN2( S2DragsBehindBreak, BreakS2 );
996 /* Breaking s2 produces exact overlap. */
997 CO_RETURN2( S2DragsBehind, RangeOverlap );
999 /* Advance over the front we just broke off of range 2. */
1000 s2Tel.lowKey = bottomLow;
1001 s2Tel.highKey = bottomHigh;
1002 s2Tel.trans = bottomTrans2;
1004 /* Advance over the entire s1Tel. We have consumed it. */
1005 s1Tel.increment();
1007 else if ( s2Tel.highKey < s1Tel.highKey ) {
1008 /* Range from state1 goes longer than the range from state2. We
1009 * must break the range from state1 into an evenly overlaping
1010 * segment. */
1011 bottomLow = s2Tel.highKey;
1012 bottomLow.increment();
1013 bottomHigh = s1Tel.highKey;
1014 s1Tel.highKey = s2Tel.highKey;
1015 bottomTrans1 = s1Tel.trans;
1017 /* Notify the caller that we are breaking s1. This gives them a
1018 * chance to duplicate s2Tel[0,1].value. */
1019 CO_RETURN2( S1DragsBehindBreak, BreakS1 );
1021 /* Breaking s1 produces exact overlap. */
1022 CO_RETURN2( S1DragsBehind, RangeOverlap );
1024 /* Advance over the front we just broke off of range 1. */
1025 s1Tel.lowKey = bottomLow;
1026 s1Tel.highKey = bottomHigh;
1027 s1Tel.trans = bottomTrans1;
1029 /* Advance over the entire s2Tel. We have consumed it. */
1030 s2Tel.increment();
1032 else {
1033 /* There is an exact overlap. */
1034 CO_RETURN2( ExactOverlap, RangeOverlap );
1036 s1Tel.increment();
1037 s2Tel.increment();
1041 /* Done, go into end state. */
1042 CO_RETURN( End );
1046 /* Compare lists of epsilon transitions. Entries are name ids of targets. */
1047 typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
1049 /* Compare class for the Approximate minimization. */
1050 class ApproxCompare
1052 public:
1053 ApproxCompare() { }
1054 int compare( const StateAp *pState1, const StateAp *pState2 );
1057 /* Compare class for the initial partitioning of a partition minimization. */
1058 class InitPartitionCompare
1060 public:
1061 InitPartitionCompare() { }
1062 int compare( const StateAp *pState1, const StateAp *pState2 );
1065 /* Compare class for the regular partitioning of a partition minimization. */
1066 class PartitionCompare
1068 public:
1069 PartitionCompare() { }
1070 int compare( const StateAp *pState1, const StateAp *pState2 );
1073 /* Compare class for a minimization that marks pairs. Provides the shouldMark
1074 * routine. */
1075 class MarkCompare
1077 public:
1078 MarkCompare() { }
1079 bool shouldMark( MarkIndex &markIndex, const StateAp *pState1,
1080 const StateAp *pState2 );
1083 /* List of partitions. */
1084 typedef DList< MinPartition > PartitionList;
1086 /* List of transtions out of a state. */
1087 typedef Vector<TransEl> TransListVect;
1089 /* Entry point map used for keeping track of entry points in a machine. */
1090 typedef BstSet< int > EntryIdSet;
1091 typedef BstMapEl< int, StateAp* > EntryMapEl;
1092 typedef BstMap< int, StateAp* > EntryMap;
1093 typedef Vector<EntryMapEl> EntryMapBase;
1095 /* Graph class that implements actions and priorities. */
1096 struct FsmAp
1098 /* Constructors/Destructors. */
1099 FsmAp( );
1100 FsmAp( const FsmAp &graph );
1101 ~FsmAp();
1103 /* The list of states. */
1104 StateList stateList;
1105 StateList misfitList;
1107 /* The map of entry points. */
1108 EntryMap entryPoints;
1110 /* The start state. */
1111 StateAp *startState;
1113 /* Error state, possibly created only when the final machine has been
1114 * created and the XML machine is about to be written. No transitions
1115 * point to this state. */
1116 StateAp *errState;
1118 /* The set of final states. */
1119 StateSet finStateSet;
1121 /* Misfit Accounting. Are misfits put on a separate list. */
1122 bool misfitAccounting;
1125 * Transition actions and priorities.
1128 /* Set priorities on transtions. */
1129 void startFsmPrior( int ordering, PriorDesc *prior );
1130 void allTransPrior( int ordering, PriorDesc *prior );
1131 void finishFsmPrior( int ordering, PriorDesc *prior );
1132 void leaveFsmPrior( int ordering, PriorDesc *prior );
1134 /* Action setting support. */
1135 void transferOutActions( StateAp *state );
1136 void transferErrorActions( StateAp *state, int transferPoint );
1137 void setErrorActions( StateAp *state, const ActionTable &other );
1138 void setErrorAction( StateAp *state, int ordering, Action *action );
1140 /* Fill all spaces in a transition list with an error transition. */
1141 void fillGaps( StateAp *state );
1143 /* Similar to setErrorAction, instead gives a state to go to on error. */
1144 void setErrorTarget( StateAp *state, StateAp *target, int *orderings,
1145 Action **actions, int nActs );
1147 /* Set actions to execute. */
1148 void startFsmAction( int ordering, Action *action );
1149 void allTransAction( int ordering, Action *action );
1150 void finishFsmAction( int ordering, Action *action );
1151 void leaveFsmAction( int ordering, Action *action );
1152 void longMatchAction( int ordering, LongestMatchPart *lmPart );
1154 /* Set conditions. */
1155 CondSpace *addCondSpace( const CondSet &condSet );
1157 void findEmbedExpansions( ExpansionList &expansionList,
1158 StateAp *destState, Action *condAction, bool sense );
1159 void embedCondition( MergeData &md, StateAp *state, Action *condAction, bool sense );
1160 void embedCondition( StateAp *state, Action *condAction, bool sense );
1162 void startFsmCondition( Action *condAction, bool sense );
1163 void allTransCondition( Action *condAction, bool sense );
1164 void leaveFsmCondition( Action *condAction, bool sense );
1166 /* Set error actions to execute. */
1167 void startErrorAction( int ordering, Action *action, int transferPoint );
1168 void allErrorAction( int ordering, Action *action, int transferPoint );
1169 void finalErrorAction( int ordering, Action *action, int transferPoint );
1170 void notStartErrorAction( int ordering, Action *action, int transferPoint );
1171 void notFinalErrorAction( int ordering, Action *action, int transferPoint );
1172 void middleErrorAction( int ordering, Action *action, int transferPoint );
1174 /* Set EOF actions. */
1175 void startEOFAction( int ordering, Action *action );
1176 void allEOFAction( int ordering, Action *action );
1177 void finalEOFAction( int ordering, Action *action );
1178 void notStartEOFAction( int ordering, Action *action );
1179 void notFinalEOFAction( int ordering, Action *action );
1180 void middleEOFAction( int ordering, Action *action );
1182 /* Set To State actions. */
1183 void startToStateAction( int ordering, Action *action );
1184 void allToStateAction( int ordering, Action *action );
1185 void finalToStateAction( int ordering, Action *action );
1186 void notStartToStateAction( int ordering, Action *action );
1187 void notFinalToStateAction( int ordering, Action *action );
1188 void middleToStateAction( int ordering, Action *action );
1190 /* Set From State actions. */
1191 void startFromStateAction( int ordering, Action *action );
1192 void allFromStateAction( int ordering, Action *action );
1193 void finalFromStateAction( int ordering, Action *action );
1194 void notStartFromStateAction( int ordering, Action *action );
1195 void notFinalFromStateAction( int ordering, Action *action );
1196 void middleFromStateAction( int ordering, Action *action );
1198 /* Shift the action ordering of the start transitions to start at
1199 * fromOrder and increase in units of 1. Useful before kleene star
1200 * operation. */
1201 int shiftStartActionOrder( int fromOrder );
1203 /* Clear all priorities from the fsm to so they won't affcet minimization
1204 * of the final fsm. */
1205 void clearAllPriorities();
1207 /* Zero out all the function keys. */
1208 void nullActionKeys();
1210 /* Walk the list of states and verify state properties. */
1211 void verifyStates();
1213 /* Misfit Accounting. Are misfits put on a separate list. */
1214 void setMisfitAccounting( bool val )
1215 { misfitAccounting = val; }
1217 /* Set and Unset a state as final. */
1218 void setFinState( StateAp *state );
1219 void unsetFinState( StateAp *state );
1221 void setStartState( StateAp *state );
1222 void unsetStartState( );
1224 /* Set and unset a state as an entry point. */
1225 void setEntry( int id, StateAp *state );
1226 void changeEntry( int id, StateAp *to, StateAp *from );
1227 void unsetEntry( int id, StateAp *state );
1228 void unsetEntry( int id );
1229 void unsetAllEntryPoints();
1231 /* Epsilon transitions. */
1232 void epsilonTrans( int id );
1233 void shadowReadWriteStates( MergeData &md );
1236 * Basic attaching and detaching.
1239 /* Common to attaching/detaching list and default. */
1240 void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
1241 void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans );
1243 /* Attach with a new transition. */
1244 TransAp *attachNewTrans( StateAp *from, StateAp *to,
1245 Key onChar1, Key onChar2 );
1247 /* Attach with an existing transition that already in an out list. */
1248 void attachTrans( StateAp *from, StateAp *to, TransAp *trans );
1250 /* Redirect a transition away from error and towards some state. */
1251 void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans );
1253 /* Detach a transition from a target state. */
1254 void detachTrans( StateAp *from, StateAp *to, TransAp *trans );
1256 /* Detach a state from the graph. */
1257 void detachState( StateAp *state );
1260 * NFA to DFA conversion routines.
1263 /* Duplicate a transition that will dropin to a free spot. */
1264 TransAp *dupTrans( StateAp *from, TransAp *srcTrans );
1266 /* In crossing, two transitions both go to real states. */
1267 TransAp *fsmAttachStates( MergeData &md, StateAp *from,
1268 TransAp *destTrans, TransAp *srcTrans );
1270 /* Two transitions are to be crossed, handle the possibility of either
1271 * going to the error state. */
1272 TransAp *mergeTrans( MergeData &md, StateAp *from,
1273 TransAp *destTrans, TransAp *srcTrans );
1275 /* Compare deterimne relative priorities of two transition tables. */
1276 int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
1278 /* Cross a src transition with one that is already occupying a spot. */
1279 TransAp *crossTransitions( MergeData &md, StateAp *from,
1280 TransAp *destTrans, TransAp *srcTrans );
1282 void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList );
1284 void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 );
1285 void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 );
1286 void findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
1287 Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
1288 long destVals, LongVect &toValsList );
1289 void findTransExpansions( ExpansionList &expansionList,
1290 StateAp *destState, StateAp *srcState );
1291 void findCondExpansions( ExpansionList &expansionList,
1292 StateAp *destState, StateAp *srcState );
1293 void mergeStateConds( StateAp *destState, StateAp *srcState );
1295 /* Merge a set of states into newState. */
1296 void mergeStates( MergeData &md, StateAp *destState,
1297 StateAp **srcStates, int numSrc );
1298 void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState );
1299 void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState );
1301 /* Make all states that are combinations of other states and that
1302 * have not yet had their out transitions filled in. This will
1303 * empty out stateDict and stFil. */
1304 void fillInStates( MergeData &md );
1307 * Transition Comparison.
1310 /* Compare transition data. Either of the pointers may be null. */
1311 static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 );
1313 /* Compare target state and transition data. Either pointer may be null. */
1314 static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 );
1316 /* Compare target partitions. Either pointer may be null. */
1317 static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 );
1319 /* Check marked status of target states. Either pointer may be null. */
1320 static inline bool shouldMarkPtr( MarkIndex &markIndex,
1321 TransAp *trans1, TransAp *trans2 );
1324 * Callbacks.
1327 /* Compare priority and function table of transitions. */
1328 static int compareTransData( TransAp *trans1, TransAp *trans2 );
1330 /* Add in the properties of srcTrans into this. */
1331 void addInTrans( TransAp *destTrans, TransAp *srcTrans );
1333 /* Compare states on data stored in the states. */
1334 static int compareStateData( const StateAp *state1, const StateAp *state2 );
1336 /* Out transition data. */
1337 void clearOutData( StateAp *state );
1338 bool hasOutData( StateAp *state );
1339 void transferOutData( StateAp *destState, StateAp *srcState );
1342 * Allocation.
1345 /* New up a state and add it to the graph. */
1346 StateAp *addState();
1349 * Building basic machines
1352 void concatFsm( Key c );
1353 void concatFsm( Key *str, int len );
1354 void concatFsmCI( Key *str, int len );
1355 void orFsm( Key *set, int len );
1356 void rangeFsm( Key low, Key high );
1357 void rangeStarFsm( Key low, Key high );
1358 void emptyFsm( );
1359 void lambdaFsm( );
1362 * Fsm operators.
1365 void starOp( );
1366 void repeatOp( int times );
1367 void optionalRepeatOp( int times );
1368 void concatOp( FsmAp *other );
1369 void unionOp( FsmAp *other );
1370 void intersectOp( FsmAp *other );
1371 void subtractOp( FsmAp *other );
1372 void epsilonOp();
1373 void joinOp( int startId, int finalId, FsmAp **others, int numOthers );
1374 void globOp( FsmAp **others, int numOthers );
1375 void deterministicEntry();
1378 * Operator workers
1381 /* Determine if there are any entry points into a start state other than
1382 * the start state. */
1383 bool isStartStateIsolated();
1385 /* Make a new start state that has no entry points. Will not change the
1386 * identity of the fsm. */
1387 void isolateStartState();
1389 /* Workers for resolving epsilon transitions. */
1390 bool inEptVect( EptVect *eptVect, StateAp *targ );
1391 void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving );
1392 void resolveEpsilonTrans( MergeData &md );
1394 /* Workers for concatenation and union. */
1395 void doConcat( FsmAp *other, StateSet *fromStates, bool optional );
1396 void doOr( FsmAp *other );
1399 * Final states
1402 /* Unset any final states that are no longer to be final
1403 * due to final bits. */
1404 void unsetIncompleteFinals();
1405 void unsetKilledFinals();
1407 /* Bring in other's entry points. Assumes others states are going to be
1408 * copied into this machine. */
1409 void copyInEntryPoints( FsmAp *other );
1411 /* Ordering states. */
1412 void depthFirstOrdering( StateAp *state );
1413 void depthFirstOrdering();
1414 void sortStatesByFinal();
1416 /* Set sqequential state numbers starting at 0. */
1417 void setStateNumbers( int base );
1419 /* Unset all final states. */
1420 void unsetAllFinStates();
1422 /* Set the bits of final states and clear the bits of non final states. */
1423 void setFinBits( int finStateBits );
1426 * Self-consistency checks.
1429 /* Run a sanity check on the machine. */
1430 void verifyIntegrity();
1432 /* Verify that there are no unreachable states, or dead end states. */
1433 void verifyReachability();
1434 void verifyNoDeadEndStates();
1437 * Path pruning
1440 /* Mark all states reachable from state. */
1441 void markReachableFromHereReverse( StateAp *state );
1443 /* Mark all states reachable from state. */
1444 void markReachableFromHere( StateAp *state );
1445 void markReachableFromHereStopFinal( StateAp *state );
1447 /* Removes states that cannot be reached by any path in the fsm and are
1448 * thus wasted silicon. */
1449 void removeDeadEndStates();
1451 /* Removes states that cannot be reached by any path in the fsm and are
1452 * thus wasted silicon. */
1453 void removeUnreachableStates();
1455 /* Remove error actions from states on which the error transition will
1456 * never be taken. */
1457 bool outListCovers( StateAp *state );
1458 bool anyErrorRange( StateAp *state );
1460 /* Remove states that are on the misfit list. */
1461 void removeMisfits();
1464 * FSM Minimization
1467 /* Minimization by partitioning. */
1468 void minimizePartition1();
1469 void minimizePartition2();
1471 /* Minimize the final state Machine. The result is the minimal fsm. Slow
1472 * but stable, correct minimization. Uses n^2 space (lookout) and average
1473 * n^2 time. Worst case n^3 time, but a that is a very rare case. */
1474 void minimizeStable();
1476 /* Minimize the final state machine. Does not find the minimal fsm, but a
1477 * pretty good approximation. Does not use any extra space. Average n^2
1478 * time. Worst case n^3 time, but a that is a very rare case. */
1479 void minimizeApproximate();
1481 /* This is the worker for the minimize approximate solution. It merges
1482 * states that have identical out transitions. */
1483 bool minimizeRound( );
1485 /* Given an intial partioning of states, split partitions that have out trans
1486 * to differing partitions. */
1487 int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts );
1489 /* Split partitions that have a transition to a previously split partition, until
1490 * there are no more partitions to split. */
1491 int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts );
1493 /* Fuse together states in the same partition. */
1494 void fusePartitions( MinPartition *parts, int numParts );
1496 /* Mark pairs where out final stateness differs, out trans data differs,
1497 * trans pairs go to a marked pair or trans data differs. Should get
1498 * alot of pairs. */
1499 void initialMarkRound( MarkIndex &markIndex );
1501 /* One marking round on all state pairs. Considers if trans pairs go
1502 * to a marked state only. Returns whether or not a pair was marked. */
1503 bool markRound( MarkIndex &markIndex );
1505 /* Move the in trans into src into dest. */
1506 void inTransMove(StateAp *dest, StateAp *src);
1508 /* Make state src and dest the same state. */
1509 void fuseEquivStates(StateAp *dest, StateAp *src);
1511 /* Find any states that didn't get marked by the marking algorithm and
1512 * merge them into the primary states of their equivalence class. */
1513 void fuseUnmarkedPairs( MarkIndex &markIndex );
1515 /* Merge neighboring transitions go to the same state and have the same
1516 * transitions data. */
1517 void compressTransitions();
1519 /* Returns true if there is a transtion (either explicit or by a gap) to
1520 * the error state. */
1521 bool checkErrTrans( StateAp *state, TransAp *trans );
1522 bool checkErrTransFinish( StateAp *state );
1523 bool hasErrorTrans();
1525 /* Check if a machine defines a single character. This is useful in
1526 * validating ranges and machines to export. */
1527 bool checkSingleCharMachine( );
1531 #endif /* _FSMGRAPH_H */