A fix to the documentation makefile from John D. Mitchell.
[ragel.git] / ragel / fsmgraph.cpp
blob34ea6612493a14042c93414e0fcfbda6014b1766
1 /*
2 * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <assert.h>
23 #include <iostream>
25 #include "fsmgraph.h"
26 #include "mergesort.h"
27 #include "parsedata.h"
29 using std::cerr;
30 using std::endl;
32 /* Make a new state. The new state will be put on the graph's
33 * list of state. The new state can be created final or non final. */
34 StateAp *FsmAp::addState()
36 /* Make the new state to return. */
37 StateAp *state = new StateAp();
39 if ( misfitAccounting ) {
40 /* Create the new state on the misfit list. All states are created
41 * with no foreign in transitions. */
42 misfitList.append( state );
44 else {
45 /* Create the new state. */
46 stateList.append( state );
49 return state;
52 /* Construct an FSM that is the concatenation of an array of characters. A new
53 * machine will be made that has len+1 states with one transition between each
54 * state for each integer in str. IsSigned determines if the integers are to
55 * be considered as signed or unsigned ints. */
56 void FsmAp::concatFsm( Key *str, int len )
58 /* Make the first state and set it as the start state. */
59 StateAp *last = addState();
60 setStartState( last );
62 /* Attach subsequent states. */
63 for ( int i = 0; i < len; i++ ) {
64 StateAp *newState = addState();
65 attachNewTrans( last, newState, str[i], str[i] );
66 last = newState;
69 /* Make the last state the final state. */
70 setFinState( last );
73 /* Case insensitive version of concatFsm. */
74 void FsmAp::concatFsmCI( Key *str, int len )
76 /* Make the first state and set it as the start state. */
77 StateAp *last = addState();
78 setStartState( last );
80 /* Attach subsequent states. */
81 for ( int i = 0; i < len; i++ ) {
82 StateAp *newState = addState();
84 KeySet keySet;
85 if ( str[i].isLower() )
86 keySet.insert( str[i].toUpper() );
87 if ( str[i].isUpper() )
88 keySet.insert( str[i].toLower() );
89 keySet.insert( str[i] );
91 for ( int i = 0; i < keySet.length(); i++ )
92 attachNewTrans( last, newState, keySet[i], keySet[i] );
94 last = newState;
97 /* Make the last state the final state. */
98 setFinState( last );
101 /* Construct a machine that matches one character. A new machine will be made
102 * that has two states with a single transition between the states. IsSigned
103 * determines if the integers are to be considered as signed or unsigned ints. */
104 void FsmAp::concatFsm( Key chr )
106 /* Two states first start, second final. */
107 setStartState( addState() );
109 StateAp *end = addState();
110 setFinState( end );
112 /* Attach on the character. */
113 attachNewTrans( startState, end, chr, chr );
116 /* Construct a machine that matches any character in set. A new machine will
117 * be made that has two states and len transitions between the them. The set
118 * should be ordered correctly accroding to KeyOps and should not contain
119 * any duplicates. */
120 void FsmAp::orFsm( Key *set, int len )
122 /* Two states first start, second final. */
123 setStartState( addState() );
125 StateAp *end = addState();
126 setFinState( end );
128 for ( int i = 1; i < len; i++ )
129 assert( set[i-1] < set[i] );
131 /* Attach on all the integers in the given string of ints. */
132 for ( int i = 0; i < len; i++ )
133 attachNewTrans( startState, end, set[i], set[i] );
136 /* Construct a machine that matches a range of characters. A new machine will
137 * be made with two states and a range transition between them. The range will
138 * match any characters from low to high inclusive. Low should be less than or
139 * equal to high otherwise undefined behaviour results. IsSigned determines
140 * if the integers are to be considered as signed or unsigned ints. */
141 void FsmAp::rangeFsm( Key low, Key high )
143 /* Two states first start, second final. */
144 setStartState( addState() );
146 StateAp *end = addState();
147 setFinState( end );
149 /* Attach using the range of characters. */
150 attachNewTrans( startState, end, low, high );
153 /* Construct a machine that a repeated range of characters. */
154 void FsmAp::rangeStarFsm( Key low, Key high)
156 /* One state which is final and is the start state. */
157 setStartState( addState() );
158 setFinState( startState );
160 /* Attach start to start using range of characters. */
161 attachNewTrans( startState, startState, low, high );
164 /* Construct a machine that matches the empty string. A new machine will be
165 * made with only one state. The new state will be both a start and final
166 * state. IsSigned determines if the machine has a signed or unsigned
167 * alphabet. Fsm operations must be done on machines with the same alphabet
168 * signedness. */
169 void FsmAp::lambdaFsm( )
171 /* Give it one state with no transitions making it
172 * the start state and final state. */
173 setStartState( addState() );
174 setFinState( startState );
177 /* Construct a machine that matches nothing at all. A new machine will be
178 * made with only one state. It will not be final. */
179 void FsmAp::emptyFsm( )
181 /* Give it one state with no transitions making it
182 * the start state and final state. */
183 setStartState( addState() );
186 void FsmAp::transferOutData( StateAp *destState, StateAp *srcState )
188 for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
189 if ( trans->toState != 0 ) {
190 /* Get the actions data from the outActionTable. */
191 trans->actionTable.setActions( srcState->outActionTable );
193 /* Get the priorities from the outPriorTable. */
194 trans->priorTable.setPriors( srcState->outPriorTable );
199 /* Kleene star operator. Makes this machine the kleene star of itself. Any
200 * transitions made going out of the machine and back into itself will be
201 * notified that they are leaving transitions by having the leavingFromState
202 * callback invoked. */
203 void FsmAp::starOp( )
205 /* For the merging process. */
206 MergeData md;
208 /* Turn on misfit accounting to possibly catch the old start state. */
209 setMisfitAccounting( true );
211 /* Create the new new start state. It will be set final after the merging
212 * of the final states with the start state is complete. */
213 StateAp *prevStartState = startState;
214 unsetStartState();
215 setStartState( addState() );
217 /* Merge the new start state with the old one to isolate it. */
218 mergeStates( md, startState, prevStartState );
220 /* Merge the start state into all final states. Except the start state on
221 * the first pass. If the start state is set final we will be doubling up
222 * its transitions, which will get transfered to any final states that
223 * follow it in the final state set. This will be determined by the order
224 * of items in the final state set. To prevent this we just merge with the
225 * start on a second pass. */
226 for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
227 if ( *st != startState )
228 mergeStatesLeaving( md, *st, startState );
231 /* Now it is safe to merge the start state with itself (provided it
232 * is set final). */
233 if ( startState->isFinState() )
234 mergeStatesLeaving( md, startState, startState );
236 /* Now ensure the new start state is a final state. */
237 setFinState( startState );
239 /* Fill in any states that were newed up as combinations of others. */
240 fillInStates( md );
242 /* Remove the misfits and turn off misfit accounting. */
243 removeMisfits();
244 setMisfitAccounting( false );
247 void FsmAp::repeatOp( int times )
249 /* Must be 1 and up. 0 produces null machine and requires deleting this. */
250 assert( times > 0 );
252 /* A repeat of one does absolutely nothing. */
253 if ( times == 1 )
254 return;
256 /* Make a machine to make copies from. */
257 FsmAp *copyFrom = new FsmAp( *this );
259 /* Concatentate duplicates onto the end up until before the last. */
260 for ( int i = 1; i < times-1; i++ ) {
261 FsmAp *dup = new FsmAp( *copyFrom );
262 doConcat( dup, 0, false );
265 /* Now use the copyFrom on the end. */
266 doConcat( copyFrom, 0, false );
269 void FsmAp::optionalRepeatOp( int times )
271 /* Must be 1 and up. 0 produces null machine and requires deleting this. */
272 assert( times > 0 );
274 /* A repeat of one optional merely allows zero string. */
275 if ( times == 1 ) {
276 setFinState( startState );
277 return;
280 /* Make a machine to make copies from. */
281 FsmAp *copyFrom = new FsmAp( *this );
283 /* The state set used in the from end of the concatentation. Starts with
284 * the initial final state set, then after each concatenation, gets set to
285 * the the final states that come from the the duplicate. */
286 StateSet lastFinSet( finStateSet );
288 /* Set the initial state to zero to allow zero copies. */
289 setFinState( startState );
291 /* Concatentate duplicates onto the end up until before the last. */
292 for ( int i = 1; i < times-1; i++ ) {
293 /* Make a duplicate for concating and set the fin bits to graph 2 so we
294 * can pick out it's final states after the optional style concat. */
295 FsmAp *dup = new FsmAp( *copyFrom );
296 dup->setFinBits( STB_GRAPH2 );
297 doConcat( dup, &lastFinSet, true );
299 /* Clear the last final state set and make the new one by taking only
300 * the final states that come from graph 2.*/
301 lastFinSet.empty();
302 for ( int i = 0; i < finStateSet.length(); i++ ) {
303 /* If the state came from graph 2, add it to the last set and clear
304 * the bits. */
305 StateAp *fs = finStateSet[i];
306 if ( fs->stateBits & STB_GRAPH2 ) {
307 lastFinSet.insert( fs );
308 fs->stateBits &= ~STB_GRAPH2;
313 /* Now use the copyFrom on the end, no bits set, no bits to clear. */
314 doConcat( copyFrom, &lastFinSet, true );
318 /* Fsm concatentation worker. Supports treating the concatentation as optional,
319 * which essentially leaves the final states of machine one as final. */
320 void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional )
322 /* For the merging process. */
323 StateSet finStateSetCopy, startStateSet;
324 MergeData md;
326 /* Turn on misfit accounting for both graphs. */
327 setMisfitAccounting( true );
328 other->setMisfitAccounting( true );
330 /* Get the other's start state. */
331 StateAp *otherStartState = other->startState;
333 /* Unset other's start state before bringing in the entry points. */
334 other->unsetStartState();
336 /* Bring in the rest of other's entry points. */
337 copyInEntryPoints( other );
338 other->entryPoints.empty();
340 /* Bring in other's states into our state lists. */
341 stateList.append( other->stateList );
342 misfitList.append( other->misfitList );
344 /* If from states is not set, then get a copy of our final state set before
345 * we clobber it and use it instead. */
346 if ( fromStates == 0 ) {
347 finStateSetCopy = finStateSet;
348 fromStates = &finStateSetCopy;
351 /* Unset all of our final states and get the final states from other. */
352 if ( !optional )
353 unsetAllFinStates();
354 finStateSet.insert( other->finStateSet );
356 /* Since other's lists are empty, we can delete the fsm without
357 * affecting any states. */
358 delete other;
360 /* Merge our former final states with the start state of other. */
361 for ( int i = 0; i < fromStates->length(); i++ ) {
362 StateAp *state = fromStates->data[i];
364 /* Merge the former final state with other's start state. */
365 mergeStatesLeaving( md, state, otherStartState );
367 /* If the former final state was not reset final then we must clear
368 * the state's out trans data. If it got reset final then it gets to
369 * keep its out trans data. This must be done before fillInStates gets
370 * called to prevent the data from being sourced. */
371 if ( ! state->isFinState() )
372 clearOutData( state );
375 /* Fill in any new states made from merging. */
376 fillInStates( md );
378 /* Remove the misfits and turn off misfit accounting. */
379 removeMisfits();
380 setMisfitAccounting( false );
383 /* Concatenates other to the end of this machine. Other is deleted. Any
384 * transitions made leaving this machine and entering into other are notified
385 * that they are leaving transitions by having the leavingFromState callback
386 * invoked. */
387 void FsmAp::concatOp( FsmAp *other )
389 /* Assert same signedness and return graph concatenation op. */
390 doConcat( other, 0, false );
394 void FsmAp::doOr( FsmAp *other )
396 /* For the merging process. */
397 MergeData md;
399 /* Build a state set consisting of both start states */
400 StateSet startStateSet;
401 startStateSet.insert( startState );
402 startStateSet.insert( other->startState );
404 /* Both of the original start states loose their start state status. */
405 unsetStartState();
406 other->unsetStartState();
408 /* Bring in the rest of other's entry points. */
409 copyInEntryPoints( other );
410 other->entryPoints.empty();
412 /* Merge the lists. This will move all the states from other
413 * into this. No states will be deleted. */
414 stateList.append( other->stateList );
415 misfitList.append( other->misfitList );
417 /* Move the final set data from other into this. */
418 finStateSet.insert(other->finStateSet);
419 other->finStateSet.empty();
421 /* Since other's list is empty, we can delete the fsm without
422 * affecting any states. */
423 delete other;
425 /* Create a new start state. */
426 setStartState( addState() );
428 /* Merge the start states. */
429 mergeStates( md, startState, startStateSet.data, startStateSet.length() );
431 /* Fill in any new states made from merging. */
432 fillInStates( md );
435 /* Unions other with this machine. Other is deleted. */
436 void FsmAp::unionOp( FsmAp *other )
438 /* Turn on misfit accounting for both graphs. */
439 setMisfitAccounting( true );
440 other->setMisfitAccounting( true );
442 /* Call Worker routine. */
443 doOr( other );
445 /* Remove the misfits and turn off misfit accounting. */
446 removeMisfits();
447 setMisfitAccounting( false );
450 /* Intersects other with this machine. Other is deleted. */
451 void FsmAp::intersectOp( FsmAp *other )
453 /* Turn on misfit accounting for both graphs. */
454 setMisfitAccounting( true );
455 other->setMisfitAccounting( true );
457 /* Set the fin bits on this and other to want each other. */
458 setFinBits( STB_GRAPH1 );
459 other->setFinBits( STB_GRAPH2 );
461 /* Call worker Or routine. */
462 doOr( other );
464 /* Unset any final states that are no longer to
465 * be final due to final bits. */
466 unsetIncompleteFinals();
468 /* Remove the misfits and turn off misfit accounting. */
469 removeMisfits();
470 setMisfitAccounting( false );
472 /* Remove states that have no path to a final state. */
473 removeDeadEndStates();
476 /* Set subtracts other machine from this machine. Other is deleted. */
477 void FsmAp::subtractOp( FsmAp *other )
479 /* Turn on misfit accounting for both graphs. */
480 setMisfitAccounting( true );
481 other->setMisfitAccounting( true );
483 /* Set the fin bits of other to be killers. */
484 other->setFinBits( STB_GRAPH1 );
486 /* Call worker Or routine. */
487 doOr( other );
489 /* Unset any final states that are no longer to
490 * be final due to final bits. */
491 unsetKilledFinals();
493 /* Remove the misfits and turn off misfit accounting. */
494 removeMisfits();
495 setMisfitAccounting( false );
497 /* Remove states that have no path to a final state. */
498 removeDeadEndStates();
501 bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state )
503 if ( eptVect != 0 ) {
504 /* Vect is there, walk it looking for state. */
505 for ( int i = 0; i < eptVect->length(); i++ ) {
506 if ( eptVect->data[i].targ == state )
507 return true;
510 return false;
513 /* Fill epsilon vectors in a root state from a given starting point. Epmploys
514 * a depth first search through the graph of epsilon transitions. */
515 void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving )
517 /* Walk the epsilon transitions out of the state. */
518 for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
519 /* Find the entry point, if the it does not resove, ignore it. */
520 EntryMapEl *enLow, *enHigh;
521 if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
522 /* Loop the targets. */
523 for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
524 /* Do not add the root or states already in eptVect. */
525 StateAp *targ = en->value;
526 if ( targ != from && !inEptVect(root->eptVect, targ) ) {
527 /* Maybe need to create the eptVect. */
528 if ( root->eptVect == 0 )
529 root->eptVect = new EptVect();
531 /* If moving to a different graph or if any parent is
532 * leaving then we are leaving. */
533 bool leaving = parentLeaving ||
534 root->owningGraph != targ->owningGraph;
536 /* All ok, add the target epsilon and recurse. */
537 root->eptVect->append( EptVectEl(targ, leaving) );
538 epsilonFillEptVectFrom( root, targ, leaving );
545 void FsmAp::shadowReadWriteStates( MergeData &md )
547 /* Init isolatedShadow algorithm data. */
548 for ( StateList::Iter st = stateList; st.lte(); st++ )
549 st->isolatedShadow = 0;
551 /* Any states that may be both read from and written to must
552 * be shadowed. */
553 for ( StateList::Iter st = stateList; st.lte(); st++ ) {
554 /* Find such states by looping through stateVect lists, which give us
555 * the states that will be read from. May cause us to visit the states
556 * that we are interested in more than once. */
557 if ( st->eptVect != 0 ) {
558 /* For all states that will be read from. */
559 for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
560 /* Check for read and write to the same state. */
561 StateAp *targ = ept->targ;
562 if ( targ->eptVect != 0 ) {
563 /* State is to be written to, if the shadow is not already
564 * there, create it. */
565 if ( targ->isolatedShadow == 0 ) {
566 StateAp *shadow = addState();
567 mergeStates( md, shadow, targ );
568 targ->isolatedShadow = shadow;
571 /* Write shadow into the state vector so that it is the
572 * state that the epsilon transition will read from. */
573 ept->targ = targ->isolatedShadow;
580 void FsmAp::resolveEpsilonTrans( MergeData &md )
582 /* Walk the state list and invoke recursive worker on each state. */
583 for ( StateList::Iter st = stateList; st.lte(); st++ )
584 epsilonFillEptVectFrom( st, st, false );
586 /* Prevent reading from and writing to of the same state. */
587 shadowReadWriteStates( md );
589 /* For all states that have epsilon transitions out, draw the transitions,
590 * clear the epsilon transitions. */
591 for ( StateList::Iter st = stateList; st.lte(); st++ ) {
592 /* If there is a state vector, then create the pre-merge state. */
593 if ( st->eptVect != 0 ) {
594 /* Merge all the epsilon targets into the state. */
595 for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
596 if ( ept->leaving )
597 mergeStatesLeaving( md, st, ept->targ );
598 else
599 mergeStates( md, st, ept->targ );
602 /* Clean up the target list. */
603 delete st->eptVect;
604 st->eptVect = 0;
607 /* Clear the epsilon transitions vector. */
608 st->epsilonTrans.empty();
612 void FsmAp::epsilonOp()
614 /* For merging process. */
615 MergeData md;
617 setMisfitAccounting( true );
619 for ( StateList::Iter st = stateList; st.lte(); st++ )
620 st->owningGraph = 0;
622 /* Perform merges. */
623 resolveEpsilonTrans( md );
625 /* Epsilons can caused merges which leave behind unreachable states. */
626 fillInStates( md );
628 /* Remove the misfits and turn off misfit accounting. */
629 removeMisfits();
630 setMisfitAccounting( false );
633 /* Make a new maching by joining together a bunch of machines without making
634 * any transitions between them. A negative finalId results in there being no
635 * final id. */
636 void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers )
638 /* For the merging process. */
639 MergeData md;
641 /* Set the owning machines. Start at one. Zero is reserved for the start
642 * and final states. */
643 for ( StateList::Iter st = stateList; st.lte(); st++ )
644 st->owningGraph = 1;
645 for ( int m = 0; m < numOthers; m++ ) {
646 for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
647 st->owningGraph = 2+m;
650 /* All machines loose start state status. */
651 unsetStartState();
652 for ( int m = 0; m < numOthers; m++ )
653 others[m]->unsetStartState();
655 /* Bring the other machines into this. */
656 for ( int m = 0; m < numOthers; m++ ) {
657 /* Bring in the rest of other's entry points. */
658 copyInEntryPoints( others[m] );
659 others[m]->entryPoints.empty();
661 /* Merge the lists. This will move all the states from other into
662 * this. No states will be deleted. */
663 stateList.append( others[m]->stateList );
664 assert( others[m]->misfitList.length() == 0 );
666 /* Move the final set data from other into this. */
667 finStateSet.insert( others[m]->finStateSet );
668 others[m]->finStateSet.empty();
670 /* Since other's list is empty, we can delete the fsm without
671 * affecting any states. */
672 delete others[m];
675 /* Look up the start entry point. */
676 EntryMapEl *enLow = 0, *enHigh = 0;
677 bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
678 if ( ! findRes ) {
679 /* No start state. Set a default one and proceed with the join. Note
680 * that the result of the join will be a very uninteresting machine. */
681 setStartState( addState() );
683 else {
684 /* There is at least one start state, create a state that will become
685 * the new start state. */
686 StateAp *newStart = addState();
687 setStartState( newStart );
689 /* The start state is in an owning machine class all it's own. */
690 newStart->owningGraph = 0;
692 /* Create the set of states to merge from. */
693 StateSet stateSet;
694 for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
695 stateSet.insert( en->value );
697 /* Merge in the set of start states into the new start state. */
698 mergeStates( md, newStart, stateSet.data, stateSet.length() );
701 /* Take a copy of the final state set, before unsetting them all. This
702 * will allow us to call clearOutData on the states that don't get
703 * final state status back back. */
704 StateSet finStateSetCopy = finStateSet;
706 /* Now all final states are unset. */
707 unsetAllFinStates();
709 if ( finalId >= 0 ) {
710 /* Create the implicit final state. */
711 StateAp *finState = addState();
712 setFinState( finState );
714 /* Assign an entry into the final state on the final state entry id. Note
715 * that there may already be an entry on this id. That's ok. Also set the
716 * final state owning machine id. It's in a class all it's own. */
717 setEntry( finalId, finState );
718 finState->owningGraph = 0;
721 /* Hand over to workers for resolving epsilon trans. This will merge states
722 * with the targets of their epsilon transitions. */
723 resolveEpsilonTrans( md );
725 /* Invoke the relinquish final callback on any states that did not get
726 * final state status back. */
727 for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
728 if ( !((*st)->stateBits & STB_ISFINAL) )
729 clearOutData( *st );
732 /* Fill in any new states made from merging. */
733 fillInStates( md );
735 /* Joining can be messy. Instead of having misfit accounting on (which is
736 * tricky here) do a full cleaning. */
737 removeUnreachableStates();
740 void FsmAp::globOp( FsmAp **others, int numOthers )
742 /* All other machines loose start states status. */
743 for ( int m = 0; m < numOthers; m++ )
744 others[m]->unsetStartState();
746 /* Bring the other machines into this. */
747 for ( int m = 0; m < numOthers; m++ ) {
748 /* Bring in the rest of other's entry points. */
749 copyInEntryPoints( others[m] );
750 others[m]->entryPoints.empty();
752 /* Merge the lists. This will move all the states from other into
753 * this. No states will be deleted. */
754 stateList.append( others[m]->stateList );
755 assert( others[m]->misfitList.length() == 0 );
757 /* Move the final set data from other into this. */
758 finStateSet.insert( others[m]->finStateSet );
759 others[m]->finStateSet.empty();
761 /* Since other's list is empty, we can delete the fsm without
762 * affecting any states. */
763 delete others[m];
767 void FsmAp::deterministicEntry()
769 /* For the merging process. */
770 MergeData md;
772 /* States may loose their entry points, turn on misfit accounting. */
773 setMisfitAccounting( true );
775 /* Get a copy of the entry map then clear all the entry points. As we
776 * iterate the old entry map finding duplicates we will add the entry
777 * points for the new states that we create. */
778 EntryMap prevEntry = entryPoints;
779 unsetAllEntryPoints();
781 for ( int enId = 0; enId < prevEntry.length(); ) {
782 /* Count the number of states on this entry key. */
783 int highId = enId;
784 while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
785 highId += 1;
787 int numIds = highId - enId;
788 if ( numIds == 1 ) {
789 /* Only a single entry point, just set the entry. */
790 setEntry( prevEntry[enId].key, prevEntry[enId].value );
792 else {
793 /* Multiple entry points, need to create a new state and merge in
794 * all the targets of entry points. */
795 StateAp *newEntry = addState();
796 for ( int en = enId; en < highId; en++ )
797 mergeStates( md, newEntry, prevEntry[en].value );
799 /* Add the new state as the single entry point. */
800 setEntry( prevEntry[enId].key, newEntry );
803 enId += numIds;
806 /* The old start state may be unreachable. Remove the misfits and turn off
807 * misfit accounting. */
808 removeMisfits();
809 setMisfitAccounting( false );
812 /* Unset any final states that are no longer to be final due to final bits. */
813 void FsmAp::unsetKilledFinals()
815 /* Duplicate the final state set before we begin modifying it. */
816 StateSet fin( finStateSet );
818 for ( int s = 0; s < fin.length(); s++ ) {
819 /* Check for killing bit. */
820 StateAp *state = fin.data[s];
821 if ( state->stateBits & STB_GRAPH1 ) {
822 /* One final state is a killer, set to non-final. */
823 unsetFinState( state );
826 /* Clear all killing bits. Non final states should never have had those
827 * state bits set in the first place. */
828 state->stateBits &= ~STB_GRAPH1;
832 /* Unset any final states that are no longer to be final due to final bits. */
833 void FsmAp::unsetIncompleteFinals()
835 /* Duplicate the final state set before we begin modifying it. */
836 StateSet fin( finStateSet );
838 for ( int s = 0; s < fin.length(); s++ ) {
839 /* Check for one set but not the other. */
840 StateAp *state = fin.data[s];
841 if ( state->stateBits & STB_BOTH &&
842 (state->stateBits & STB_BOTH) != STB_BOTH )
844 /* One state wants the other but it is not there. */
845 unsetFinState( state );
848 /* Clear wanting bits. Non final states should never have had those
849 * state bits set in the first place. */
850 state->stateBits &= ~STB_BOTH;
854 /* Ensure that the start state is free of entry points (aside from the fact
855 * that it is the start state). If the start state has entry points then Make a
856 * new start state by merging with the old one. Useful before modifying start
857 * transitions. If the existing start state has any entry points other than the
858 * start state entry then modifying its transitions changes more than the start
859 * transitions. So isolate the start state by separating it out such that it
860 * only has start stateness as it's entry point. */
861 void FsmAp::isolateStartState( )
863 /* For the merging process. */
864 MergeData md;
866 /* Bail out if the start state is already isolated. */
867 if ( isStartStateIsolated() )
868 return;
870 /* Turn on misfit accounting to possibly catch the old start state. */
871 setMisfitAccounting( true );
873 /* This will be the new start state. The existing start
874 * state is merged with it. */
875 StateAp *prevStartState = startState;
876 unsetStartState();
877 setStartState( addState() );
879 /* Merge the new start state with the old one to isolate it. */
880 mergeStates( md, startState, prevStartState );
882 /* Stfil and stateDict will be empty because the merging of the old start
883 * state into the new one will not have any conflicting transitions. */
884 assert( md.stateDict.treeSize == 0 );
885 assert( md.stfillHead == 0 );
887 /* The old start state may be unreachable. Remove the misfits and turn off
888 * misfit accounting. */
889 removeMisfits();
890 setMisfitAccounting( false );
893 #ifdef LOG_CONDS
894 void logCondSpace( CondSpace *condSpace )
896 if ( condSpace == 0 )
897 cerr << "<empty>";
898 else {
899 for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) {
900 if ( ! csi.last() )
901 cerr << ',';
902 (*csi)->actionName( cerr );
907 void logNewExpansion( Expansion *exp )
909 cerr << "created expansion:" << endl;
910 cerr << " range: " << exp->lowKey.getVal() << " .. " <<
911 exp->highKey.getVal() << endl;
913 cerr << " fromCondSpace: ";
914 logCondSpace( exp->fromCondSpace );
915 cerr << endl;
916 cerr << " fromVals: " << exp->fromVals << endl;
918 cerr << " toCondSpace: ";
919 logCondSpace( exp->toCondSpace );
920 cerr << endl;
921 cerr << " toValsList: ";
922 for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ )
923 cerr << " " << *to;
924 cerr << endl;
926 #endif
929 void FsmAp::findTransExpansions( ExpansionList &expansionList,
930 StateAp *destState, StateAp *srcState )
932 PairIter<TransAp, StateCond> transCond( destState->outList.head,
933 srcState->stateCondList.head );
934 for ( ; !transCond.end(); transCond++ ) {
935 if ( transCond.userState == RangeOverlap ) {
936 Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
937 transCond.s1Tel.highKey );
938 expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
939 expansion->fromTrans->fromState = 0;
940 expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
941 expansion->fromCondSpace = 0;
942 expansion->fromVals = 0;
943 CondSpace *srcCS = transCond.s2Tel.trans->condSpace;
944 expansion->toCondSpace = srcCS;
946 long numTargVals = (1 << srcCS->condSet.length());
947 for ( long targVals = 0; targVals < numTargVals; targVals++ )
948 expansion->toValsList.append( targVals );
950 #ifdef LOG_CONDS
951 logNewExpansion( expansion );
952 #endif
953 expansionList.append( expansion );
958 void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state,
959 Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
960 long fromVals, LongVect &toValsList )
962 /* Make condition-space low and high keys for searching. */
963 TransAp searchTrans;
964 searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
965 (lowKey - keyOps->minKey);
966 searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
967 (highKey - keyOps->minKey);
968 searchTrans.prev = searchTrans.next = 0;
970 PairIter<TransAp> pairIter( state->outList.head, &searchTrans );
971 for ( ; !pairIter.end(); pairIter++ ) {
972 if ( pairIter.userState == RangeOverlap ) {
973 /* Need to make character-space low and high keys from the range
974 * overlap for the expansion object. */
975 Key expLowKey = pairIter.s1Tel.lowKey - fromCondSpace->baseKey - fromVals *
976 keyOps->alphSize() + keyOps->minKey;
977 Key expHighKey = pairIter.s1Tel.highKey - fromCondSpace->baseKey - fromVals *
978 keyOps->alphSize() + keyOps->minKey;
980 Expansion *expansion = new Expansion( expLowKey, expHighKey );
981 expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans);
982 expansion->fromTrans->fromState = 0;
983 expansion->fromTrans->toState = pairIter.s1Tel.trans->toState;
984 expansion->fromCondSpace = fromCondSpace;
985 expansion->fromVals = fromVals;
986 expansion->toCondSpace = toCondSpace;
987 expansion->toValsList = toValsList;
989 expansionList.append( expansion );
990 #ifdef LOG_CONDS
991 logNewExpansion( expansion );
992 #endif
997 void FsmAp::findCondExpansions( ExpansionList &expansionList,
998 StateAp *destState, StateAp *srcState )
1000 PairIter<StateCond, StateCond> condCond( destState->stateCondList.head,
1001 srcState->stateCondList.head );
1002 for ( ; !condCond.end(); condCond++ ) {
1003 if ( condCond.userState == RangeOverlap ) {
1004 /* Loop over all existing condVals . */
1005 CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet;
1006 long destLen = destCS.length();
1008 /* Find the items in src cond set that are not in dest
1009 * cond set. These are the items that we must expand. */
1010 CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet;
1011 for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ )
1012 srcOnlyCS.remove( *dcsi );
1013 long srcOnlyLen = srcOnlyCS.length();
1015 if ( srcOnlyCS.length() > 0 ) {
1016 #ifdef LOG_CONDS
1017 cerr << "there are " << srcOnlyCS.length() << " item(s) that are "
1018 "only in the srcCS" << endl;
1019 #endif
1021 CondSet mergedCS = destCS;
1022 mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet );
1024 CondSpace *fromCondSpace = addCondSpace( destCS );
1025 CondSpace *toCondSpace = addCondSpace( mergedCS );
1027 /* Loop all values in the dest space. */
1028 for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
1029 long basicVals = 0;
1030 for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
1031 if ( destVals & (1 << csi.pos()) ) {
1032 Action **cim = mergedCS.find( *csi );
1033 long bitPos = (cim - mergedCS.data);
1034 basicVals |= 1 << bitPos;
1038 /* Loop all new values. */
1039 LongVect expandToVals;
1040 for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) {
1041 long targVals = basicVals;
1042 for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) {
1043 if ( soVals & (1 << csi.pos()) ) {
1044 Action **cim = mergedCS.find( *csi );
1045 long bitPos = (cim - mergedCS.data);
1046 targVals |= 1 << bitPos;
1049 expandToVals.append( targVals );
1052 findCondExpInTrans( expansionList, destState,
1053 condCond.s1Tel.lowKey, condCond.s1Tel.highKey,
1054 fromCondSpace, toCondSpace, destVals, expandToVals );
1061 void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 )
1063 for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
1064 for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) {
1065 long targVals = *to;
1067 /* We will use the copy of the transition that was made when the
1068 * expansion was created. It will get used multiple times. Each
1069 * time we must set up the keys, everything else is constant and
1070 * and already prepared. */
1071 TransAp *srcTrans = exp->fromTrans;
1073 srcTrans->lowKey = exp->toCondSpace->baseKey +
1074 targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
1075 srcTrans->highKey = exp->toCondSpace->baseKey +
1076 targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
1078 TransList srcList;
1079 srcList.append( srcTrans );
1080 outTransCopy( md, destState, srcList.head );
1081 srcList.abandon();
1087 void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 )
1089 for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
1090 Removal removal;
1091 if ( exp->fromCondSpace == 0 ) {
1092 removal.lowKey = exp->lowKey;
1093 removal.highKey = exp->highKey;
1095 else {
1096 removal.lowKey = exp->fromCondSpace->baseKey +
1097 exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
1098 removal.highKey = exp->fromCondSpace->baseKey +
1099 exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
1101 removal.next = 0;
1103 TransList destList;
1104 PairIter<TransAp, Removal> pairIter( destState->outList.head, &removal );
1105 for ( ; !pairIter.end(); pairIter++ ) {
1106 switch ( pairIter.userState ) {
1107 case RangeInS1: {
1108 TransAp *destTrans = pairIter.s1Tel.trans;
1109 destTrans->lowKey = pairIter.s1Tel.lowKey;
1110 destTrans->highKey = pairIter.s1Tel.highKey;
1111 destList.append( destTrans );
1112 break;
1114 case RangeInS2:
1115 break;
1116 case RangeOverlap: {
1117 TransAp *trans = pairIter.s1Tel.trans;
1118 detachTrans( trans->fromState, trans->toState, trans );
1119 delete trans;
1120 break;
1122 case BreakS1: {
1123 pairIter.s1Tel.trans = dupTrans( destState,
1124 pairIter.s1Tel.trans );
1125 break;
1127 case BreakS2:
1128 break;
1131 destState->outList.transfer( destList );
1135 void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState )
1137 StateCondList destList;
1138 PairIter<StateCond> pairIter( destState->stateCondList.head,
1139 srcState->stateCondList.head );
1140 for ( ; !pairIter.end(); pairIter++ ) {
1141 switch ( pairIter.userState ) {
1142 case RangeInS1: {
1143 StateCond *destCond = pairIter.s1Tel.trans;
1144 destCond->lowKey = pairIter.s1Tel.lowKey;
1145 destCond->highKey = pairIter.s1Tel.highKey;
1146 destList.append( destCond );
1147 break;
1149 case RangeInS2: {
1150 StateCond *newCond = new StateCond( *pairIter.s2Tel.trans );
1151 newCond->lowKey = pairIter.s2Tel.lowKey;
1152 newCond->highKey = pairIter.s2Tel.highKey;
1153 destList.append( newCond );
1154 break;
1156 case RangeOverlap: {
1157 StateCond *destCond = pairIter.s1Tel.trans;
1158 StateCond *srcCond = pairIter.s2Tel.trans;
1159 CondSet mergedCondSet;
1160 mergedCondSet.insert( destCond->condSpace->condSet );
1161 mergedCondSet.insert( srcCond->condSpace->condSet );
1162 destCond->condSpace = addCondSpace( mergedCondSet );
1164 destCond->lowKey = pairIter.s1Tel.lowKey;
1165 destCond->highKey = pairIter.s1Tel.highKey;
1166 destList.append( destCond );
1167 break;
1169 case BreakS1:
1170 pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans );
1171 break;
1173 case BreakS2:
1174 break;
1177 destState->stateCondList.transfer( destList );
1180 /* A state merge which represents the drawing in of leaving transitions. If
1181 * there is any out data then we duplicate the source state, transfer the out
1182 * data, then merge in the state. The new state will be reaped because it will
1183 * not be given any in transitions. */
1184 void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState )
1186 if ( !hasOutData( destState ) )
1187 mergeStates( md, destState, srcState );
1188 else {
1189 StateAp *ssMutable = addState();
1190 mergeStates( md, ssMutable, srcState );
1191 transferOutData( ssMutable, destState );
1193 for ( OutCondSet::Iter cond = destState->outCondSet; cond.lte(); cond++ )
1194 embedCondition( md, ssMutable, cond->action, cond->sense );
1196 mergeStates( md, destState, ssMutable );
1200 void FsmAp::mergeStates( MergeData &md, StateAp *destState,
1201 StateAp **srcStates, int numSrc )
1203 for ( int s = 0; s < numSrc; s++ )
1204 mergeStates( md, destState, srcStates[s] );
1207 void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState )
1209 ExpansionList expList1;
1210 ExpansionList expList2;
1212 findTransExpansions( expList1, destState, srcState );
1213 findCondExpansions( expList1, destState, srcState );
1214 findTransExpansions( expList2, srcState, destState );
1215 findCondExpansions( expList2, srcState, destState );
1217 mergeStateConds( destState, srcState );
1219 outTransCopy( md, destState, srcState->outList.head );
1221 doExpand( md, destState, expList1 );
1222 doExpand( md, destState, expList2 );
1224 doRemove( md, destState, expList1 );
1225 doRemove( md, destState, expList2 );
1227 expList1.empty();
1228 expList2.empty();
1230 /* Get its bits and final state status. */
1231 destState->stateBits |= ( srcState->stateBits & ~STB_ISFINAL );
1232 if ( srcState->isFinState() )
1233 setFinState( destState );
1235 /* Draw in any properties of srcState into destState. */
1236 if ( srcState == destState ) {
1237 /* Duplicate the list to protect against write to source. The
1238 * priorities sets are not copied in because that would have no
1239 * effect. */
1240 destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
1242 /* Get all actions, duplicating to protect against write to source. */
1243 destState->toStateActionTable.setActions(
1244 ActionTable( srcState->toStateActionTable ) );
1245 destState->fromStateActionTable.setActions(
1246 ActionTable( srcState->fromStateActionTable ) );
1247 destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
1248 destState->outCondSet.insert( OutCondSet( srcState->outCondSet ) );
1249 destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
1250 destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
1252 else {
1253 /* Get the epsilons, out priorities. */
1254 destState->epsilonTrans.append( srcState->epsilonTrans );
1255 destState->outPriorTable.setPriors( srcState->outPriorTable );
1257 /* Get all actions. */
1258 destState->toStateActionTable.setActions( srcState->toStateActionTable );
1259 destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
1260 destState->outActionTable.setActions( srcState->outActionTable );
1261 destState->outCondSet.insert( srcState->outCondSet );
1262 destState->errActionTable.setActions( srcState->errActionTable );
1263 destState->eofActionTable.setActions( srcState->eofActionTable );
1267 void FsmAp::fillInStates( MergeData &md )
1269 /* Merge any states that are awaiting merging. This will likey cause
1270 * other states to be added to the stfil list. */
1271 StateAp *state = md.stfillHead;
1272 while ( state != 0 ) {
1273 StateSet *stateSet = &state->stateDictEl->stateSet;
1274 mergeStates( md, state, stateSet->data, stateSet->length() );
1275 state = state->alg.next;
1278 /* Delete the state sets of all states that are on the fill list. */
1279 state = md.stfillHead;
1280 while ( state != 0 ) {
1281 /* Delete and reset the state set. */
1282 delete state->stateDictEl;
1283 state->stateDictEl = 0;
1285 /* Next state in the stfill list. */
1286 state = state->alg.next;
1289 /* StateDict will still have its ptrs/size set but all of it's element
1290 * will be deleted so we don't need to clean it up. */
1293 void FsmAp::findEmbedExpansions( ExpansionList &expansionList,
1294 StateAp *destState, Action *condAction, bool sense )
1296 StateCondList destList;
1297 PairIter<TransAp, StateCond> transCond( destState->outList.head,
1298 destState->stateCondList.head );
1299 for ( ; !transCond.end(); transCond++ ) {
1300 switch ( transCond.userState ) {
1301 case RangeInS1: {
1302 if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) {
1303 assert( transCond.s1Tel.highKey <= keyOps->maxKey );
1305 /* Make a new state cond. */
1306 StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey,
1307 transCond.s1Tel.highKey );
1308 newStateCond->condSpace = addCondSpace( CondSet( condAction ) );
1309 destList.append( newStateCond );
1311 /* Create the expansion. */
1312 Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
1313 transCond.s1Tel.highKey );
1314 expansion->fromTrans = new TransAp(*transCond.s1Tel.trans);
1315 expansion->fromTrans->fromState = 0;
1316 expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
1317 expansion->fromCondSpace = 0;
1318 expansion->fromVals = 0;
1319 expansion->toCondSpace = newStateCond->condSpace;
1320 expansion->toValsList.append( sense?1:0 );
1321 #ifdef LOG_CONDS
1322 logNewExpansion( expansion );
1323 #endif
1324 expansionList.append( expansion );
1326 break;
1328 case RangeInS2: {
1329 /* Enhance state cond and find the expansion. */
1330 StateCond *stateCond = transCond.s2Tel.trans;
1331 stateCond->lowKey = transCond.s2Tel.lowKey;
1332 stateCond->highKey = transCond.s2Tel.highKey;
1334 CondSet &destCS = stateCond->condSpace->condSet;
1335 long destLen = destCS.length();
1336 CondSpace *fromCondSpace = stateCond->condSpace;
1338 CondSet mergedCS = destCS;
1339 mergedCS.insert( condAction );
1340 CondSpace *toCondSpace = addCondSpace( mergedCS );
1341 stateCond->condSpace = toCondSpace;
1342 destList.append( stateCond );
1344 /* Loop all values in the dest space. */
1345 for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
1346 long basicVals = 0;
1347 for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
1348 if ( destVals & (1 << csi.pos()) ) {
1349 Action **cim = mergedCS.find( *csi );
1350 long bitPos = (cim - mergedCS.data);
1351 basicVals |= 1 << bitPos;
1355 long targVals = basicVals;
1356 Action **cim = mergedCS.find( condAction );
1357 long bitPos = (cim - mergedCS.data);
1358 targVals |= (sense?1:0) << bitPos;
1360 LongVect expandToVals( targVals );
1361 findCondExpInTrans( expansionList, destState,
1362 transCond.s2Tel.lowKey, transCond.s2Tel.highKey,
1363 fromCondSpace, toCondSpace, destVals, expandToVals );
1365 break;
1369 case RangeOverlap:
1370 case BreakS1:
1371 case BreakS2:
1372 assert( false );
1373 break;
1377 destState->stateCondList.transfer( destList );
1380 void FsmAp::embedCondition( StateAp *state, Action *condAction, bool sense )
1382 MergeData md;
1383 ExpansionList expList;
1385 /* Turn on misfit accounting to possibly catch the old start state. */
1386 setMisfitAccounting( true );
1388 /* Worker. */
1389 embedCondition( md, state, condAction, sense );
1391 /* Fill in any states that were newed up as combinations of others. */
1392 fillInStates( md );
1394 /* Remove the misfits and turn off misfit accounting. */
1395 removeMisfits();
1396 setMisfitAccounting( false );
1399 void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction, bool sense )
1401 ExpansionList expList;
1403 findEmbedExpansions( expList, state, condAction, sense );
1404 doExpand( md, state, expList );
1405 doRemove( md, state, expList );
1406 expList.empty();
1409 /* Check if a machine defines a single character. This is useful in validating
1410 * ranges and machines to export. */
1411 bool FsmAp::checkSingleCharMachine()
1413 /* Must have two states. */
1414 if ( stateList.length() != 2 )
1415 return false;
1416 /* The start state cannot be final. */
1417 if ( startState->isFinState() )
1418 return false;
1419 /* There should be only one final state. */
1420 if ( finStateSet.length() != 1 )
1421 return false;
1422 /* The final state cannot have any transitions out. */
1423 if ( finStateSet[0]->outList.length() != 0 )
1424 return false;
1425 /* The start state should have only one transition out. */
1426 if ( startState->outList.length() != 1 )
1427 return false;
1428 /* The singe transition out of the start state should not be a range. */
1429 TransAp *startTrans = startState->outList.head;
1430 if ( startTrans->lowKey != startTrans->highKey )
1431 return false;
1432 return true;