2 * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "mergesort.h"
27 #include "parsedata.h"
32 /* Make a new state. The new state will be put on the graph's
33 * list of state. The new state can be created final or non final. */
34 StateAp
*FsmAp::addState()
36 /* Make the new state to return. */
37 StateAp
*state
= new StateAp();
39 if ( misfitAccounting
) {
40 /* Create the new state on the misfit list. All states are created
41 * with no foreign in transitions. */
42 misfitList
.append( state
);
45 /* Create the new state. */
46 stateList
.append( state
);
52 /* Construct an FSM that is the concatenation of an array of characters. A new
53 * machine will be made that has len+1 states with one transition between each
54 * state for each integer in str. IsSigned determines if the integers are to
55 * be considered as signed or unsigned ints. */
56 void FsmAp::concatFsm( Key
*str
, int len
)
58 /* Make the first state and set it as the start state. */
59 StateAp
*last
= addState();
60 setStartState( last
);
62 /* Attach subsequent states. */
63 for ( int i
= 0; i
< len
; i
++ ) {
64 StateAp
*newState
= addState();
65 attachNewTrans( last
, newState
, str
[i
], str
[i
] );
69 /* Make the last state the final state. */
73 /* Case insensitive version of concatFsm. */
74 void FsmAp::concatFsmCI( Key
*str
, int len
)
76 /* Make the first state and set it as the start state. */
77 StateAp
*last
= addState();
78 setStartState( last
);
80 /* Attach subsequent states. */
81 for ( int i
= 0; i
< len
; i
++ ) {
82 StateAp
*newState
= addState();
85 if ( str
[i
].isLower() )
86 keySet
.insert( str
[i
].toUpper() );
87 if ( str
[i
].isUpper() )
88 keySet
.insert( str
[i
].toLower() );
89 keySet
.insert( str
[i
] );
91 for ( int i
= 0; i
< keySet
.length(); i
++ )
92 attachNewTrans( last
, newState
, keySet
[i
], keySet
[i
] );
97 /* Make the last state the final state. */
101 /* Construct a machine that matches one character. A new machine will be made
102 * that has two states with a single transition between the states. IsSigned
103 * determines if the integers are to be considered as signed or unsigned ints. */
104 void FsmAp::concatFsm( Key chr
)
106 /* Two states first start, second final. */
107 setStartState( addState() );
109 StateAp
*end
= addState();
112 /* Attach on the character. */
113 attachNewTrans( startState
, end
, chr
, chr
);
116 /* Construct a machine that matches any character in set. A new machine will
117 * be made that has two states and len transitions between the them. The set
118 * should be ordered correctly accroding to KeyOps and should not contain
120 void FsmAp::orFsm( Key
*set
, int len
)
122 /* Two states first start, second final. */
123 setStartState( addState() );
125 StateAp
*end
= addState();
128 for ( int i
= 1; i
< len
; i
++ )
129 assert( set
[i
-1] < set
[i
] );
131 /* Attach on all the integers in the given string of ints. */
132 for ( int i
= 0; i
< len
; i
++ )
133 attachNewTrans( startState
, end
, set
[i
], set
[i
] );
136 /* Construct a machine that matches a range of characters. A new machine will
137 * be made with two states and a range transition between them. The range will
138 * match any characters from low to high inclusive. Low should be less than or
139 * equal to high otherwise undefined behaviour results. IsSigned determines
140 * if the integers are to be considered as signed or unsigned ints. */
141 void FsmAp::rangeFsm( Key low
, Key high
)
143 /* Two states first start, second final. */
144 setStartState( addState() );
146 StateAp
*end
= addState();
149 /* Attach using the range of characters. */
150 attachNewTrans( startState
, end
, low
, high
);
153 /* Construct a machine that a repeated range of characters. */
154 void FsmAp::rangeStarFsm( Key low
, Key high
)
156 /* One state which is final and is the start state. */
157 setStartState( addState() );
158 setFinState( startState
);
160 /* Attach start to start using range of characters. */
161 attachNewTrans( startState
, startState
, low
, high
);
164 /* Construct a machine that matches the empty string. A new machine will be
165 * made with only one state. The new state will be both a start and final
166 * state. IsSigned determines if the machine has a signed or unsigned
167 * alphabet. Fsm operations must be done on machines with the same alphabet
169 void FsmAp::lambdaFsm( )
171 /* Give it one state with no transitions making it
172 * the start state and final state. */
173 setStartState( addState() );
174 setFinState( startState
);
177 /* Construct a machine that matches nothing at all. A new machine will be
178 * made with only one state. It will not be final. */
179 void FsmAp::emptyFsm( )
181 /* Give it one state with no transitions making it
182 * the start state and final state. */
183 setStartState( addState() );
186 void FsmAp::transferOutData( StateAp
*destState
, StateAp
*srcState
)
188 for ( TransList::Iter trans
= destState
->outList
; trans
.lte(); trans
++ ) {
189 if ( trans
->toState
!= 0 ) {
190 /* Get the actions data from the outActionTable. */
191 trans
->actionTable
.setActions( srcState
->outActionTable
);
193 /* Get the priorities from the outPriorTable. */
194 trans
->priorTable
.setPriors( srcState
->outPriorTable
);
199 /* Kleene star operator. Makes this machine the kleene star of itself. Any
200 * transitions made going out of the machine and back into itself will be
201 * notified that they are leaving transitions by having the leavingFromState
202 * callback invoked. */
203 void FsmAp::starOp( )
205 /* For the merging process. */
208 /* Turn on misfit accounting to possibly catch the old start state. */
209 setMisfitAccounting( true );
211 /* Create the new new start state. It will be set final after the merging
212 * of the final states with the start state is complete. */
213 StateAp
*prevStartState
= startState
;
215 setStartState( addState() );
217 /* Merge the new start state with the old one to isolate it. */
218 mergeStates( md
, startState
, prevStartState
);
220 /* Merge the start state into all final states. Except the start state on
221 * the first pass. If the start state is set final we will be doubling up
222 * its transitions, which will get transfered to any final states that
223 * follow it in the final state set. This will be determined by the order
224 * of items in the final state set. To prevent this we just merge with the
225 * start on a second pass. */
226 for ( StateSet::Iter st
= finStateSet
; st
.lte(); st
++ ) {
227 if ( *st
!= startState
)
228 mergeStatesLeaving( md
, *st
, startState
);
231 /* Now it is safe to merge the start state with itself (provided it
233 if ( startState
->isFinState() )
234 mergeStatesLeaving( md
, startState
, startState
);
236 /* Now ensure the new start state is a final state. */
237 setFinState( startState
);
239 /* Fill in any states that were newed up as combinations of others. */
242 /* Remove the misfits and turn off misfit accounting. */
244 setMisfitAccounting( false );
247 void FsmAp::repeatOp( int times
)
249 /* Must be 1 and up. 0 produces null machine and requires deleting this. */
252 /* A repeat of one does absolutely nothing. */
256 /* Make a machine to make copies from. */
257 FsmAp
*copyFrom
= new FsmAp( *this );
259 /* Concatentate duplicates onto the end up until before the last. */
260 for ( int i
= 1; i
< times
-1; i
++ ) {
261 FsmAp
*dup
= new FsmAp( *copyFrom
);
262 doConcat( dup
, 0, false );
265 /* Now use the copyFrom on the end. */
266 doConcat( copyFrom
, 0, false );
269 void FsmAp::optionalRepeatOp( int times
)
271 /* Must be 1 and up. 0 produces null machine and requires deleting this. */
274 /* A repeat of one optional merely allows zero string. */
276 setFinState( startState
);
280 /* Make a machine to make copies from. */
281 FsmAp
*copyFrom
= new FsmAp( *this );
283 /* The state set used in the from end of the concatentation. Starts with
284 * the initial final state set, then after each concatenation, gets set to
285 * the the final states that come from the the duplicate. */
286 StateSet
lastFinSet( finStateSet
);
288 /* Set the initial state to zero to allow zero copies. */
289 setFinState( startState
);
291 /* Concatentate duplicates onto the end up until before the last. */
292 for ( int i
= 1; i
< times
-1; i
++ ) {
293 /* Make a duplicate for concating and set the fin bits to graph 2 so we
294 * can pick out it's final states after the optional style concat. */
295 FsmAp
*dup
= new FsmAp( *copyFrom
);
296 dup
->setFinBits( STB_GRAPH2
);
297 doConcat( dup
, &lastFinSet
, true );
299 /* Clear the last final state set and make the new one by taking only
300 * the final states that come from graph 2.*/
302 for ( int i
= 0; i
< finStateSet
.length(); i
++ ) {
303 /* If the state came from graph 2, add it to the last set and clear
305 StateAp
*fs
= finStateSet
[i
];
306 if ( fs
->stateBits
& STB_GRAPH2
) {
307 lastFinSet
.insert( fs
);
308 fs
->stateBits
&= ~STB_GRAPH2
;
313 /* Now use the copyFrom on the end, no bits set, no bits to clear. */
314 doConcat( copyFrom
, &lastFinSet
, true );
318 /* Fsm concatentation worker. Supports treating the concatentation as optional,
319 * which essentially leaves the final states of machine one as final. */
320 void FsmAp::doConcat( FsmAp
*other
, StateSet
*fromStates
, bool optional
)
322 /* For the merging process. */
323 StateSet finStateSetCopy
, startStateSet
;
326 /* Turn on misfit accounting for both graphs. */
327 setMisfitAccounting( true );
328 other
->setMisfitAccounting( true );
330 /* Get the other's start state. */
331 StateAp
*otherStartState
= other
->startState
;
333 /* Unset other's start state before bringing in the entry points. */
334 other
->unsetStartState();
336 /* Bring in the rest of other's entry points. */
337 copyInEntryPoints( other
);
338 other
->entryPoints
.empty();
340 /* Bring in other's states into our state lists. */
341 stateList
.append( other
->stateList
);
342 misfitList
.append( other
->misfitList
);
344 /* If from states is not set, then get a copy of our final state set before
345 * we clobber it and use it instead. */
346 if ( fromStates
== 0 ) {
347 finStateSetCopy
= finStateSet
;
348 fromStates
= &finStateSetCopy
;
351 /* Unset all of our final states and get the final states from other. */
354 finStateSet
.insert( other
->finStateSet
);
356 /* Since other's lists are empty, we can delete the fsm without
357 * affecting any states. */
360 /* Merge our former final states with the start state of other. */
361 for ( int i
= 0; i
< fromStates
->length(); i
++ ) {
362 StateAp
*state
= fromStates
->data
[i
];
364 /* Merge the former final state with other's start state. */
365 mergeStatesLeaving( md
, state
, otherStartState
);
367 /* If the former final state was not reset final then we must clear
368 * the state's out trans data. If it got reset final then it gets to
369 * keep its out trans data. This must be done before fillInStates gets
370 * called to prevent the data from being sourced. */
371 if ( ! state
->isFinState() )
372 clearOutData( state
);
375 /* Fill in any new states made from merging. */
378 /* Remove the misfits and turn off misfit accounting. */
380 setMisfitAccounting( false );
383 /* Concatenates other to the end of this machine. Other is deleted. Any
384 * transitions made leaving this machine and entering into other are notified
385 * that they are leaving transitions by having the leavingFromState callback
387 void FsmAp::concatOp( FsmAp
*other
)
389 /* Assert same signedness and return graph concatenation op. */
390 doConcat( other
, 0, false );
394 void FsmAp::doOr( FsmAp
*other
)
396 /* For the merging process. */
399 /* Build a state set consisting of both start states */
400 StateSet startStateSet
;
401 startStateSet
.insert( startState
);
402 startStateSet
.insert( other
->startState
);
404 /* Both of the original start states loose their start state status. */
406 other
->unsetStartState();
408 /* Bring in the rest of other's entry points. */
409 copyInEntryPoints( other
);
410 other
->entryPoints
.empty();
412 /* Merge the lists. This will move all the states from other
413 * into this. No states will be deleted. */
414 stateList
.append( other
->stateList
);
415 misfitList
.append( other
->misfitList
);
417 /* Move the final set data from other into this. */
418 finStateSet
.insert(other
->finStateSet
);
419 other
->finStateSet
.empty();
421 /* Since other's list is empty, we can delete the fsm without
422 * affecting any states. */
425 /* Create a new start state. */
426 setStartState( addState() );
428 /* Merge the start states. */
429 mergeStates( md
, startState
, startStateSet
.data
, startStateSet
.length() );
431 /* Fill in any new states made from merging. */
435 /* Unions other with this machine. Other is deleted. */
436 void FsmAp::unionOp( FsmAp
*other
)
438 /* Turn on misfit accounting for both graphs. */
439 setMisfitAccounting( true );
440 other
->setMisfitAccounting( true );
442 /* Call Worker routine. */
445 /* Remove the misfits and turn off misfit accounting. */
447 setMisfitAccounting( false );
450 /* Intersects other with this machine. Other is deleted. */
451 void FsmAp::intersectOp( FsmAp
*other
)
453 /* Turn on misfit accounting for both graphs. */
454 setMisfitAccounting( true );
455 other
->setMisfitAccounting( true );
457 /* Set the fin bits on this and other to want each other. */
458 setFinBits( STB_GRAPH1
);
459 other
->setFinBits( STB_GRAPH2
);
461 /* Call worker Or routine. */
464 /* Unset any final states that are no longer to
465 * be final due to final bits. */
466 unsetIncompleteFinals();
468 /* Remove the misfits and turn off misfit accounting. */
470 setMisfitAccounting( false );
472 /* Remove states that have no path to a final state. */
473 removeDeadEndStates();
476 /* Set subtracts other machine from this machine. Other is deleted. */
477 void FsmAp::subtractOp( FsmAp
*other
)
479 /* Turn on misfit accounting for both graphs. */
480 setMisfitAccounting( true );
481 other
->setMisfitAccounting( true );
483 /* Set the fin bits of other to be killers. */
484 other
->setFinBits( STB_GRAPH1
);
486 /* Call worker Or routine. */
489 /* Unset any final states that are no longer to
490 * be final due to final bits. */
493 /* Remove the misfits and turn off misfit accounting. */
495 setMisfitAccounting( false );
497 /* Remove states that have no path to a final state. */
498 removeDeadEndStates();
501 bool FsmAp::inEptVect( EptVect
*eptVect
, StateAp
*state
)
503 if ( eptVect
!= 0 ) {
504 /* Vect is there, walk it looking for state. */
505 for ( int i
= 0; i
< eptVect
->length(); i
++ ) {
506 if ( eptVect
->data
[i
].targ
== state
)
513 /* Fill epsilon vectors in a root state from a given starting point. Epmploys
514 * a depth first search through the graph of epsilon transitions. */
515 void FsmAp::epsilonFillEptVectFrom( StateAp
*root
, StateAp
*from
, bool parentLeaving
)
517 /* Walk the epsilon transitions out of the state. */
518 for ( EpsilonTrans::Iter ep
= from
->epsilonTrans
; ep
.lte(); ep
++ ) {
519 /* Find the entry point, if the it does not resove, ignore it. */
520 EntryMapEl
*enLow
, *enHigh
;
521 if ( entryPoints
.findMulti( *ep
, enLow
, enHigh
) ) {
522 /* Loop the targets. */
523 for ( EntryMapEl
*en
= enLow
; en
<= enHigh
; en
++ ) {
524 /* Do not add the root or states already in eptVect. */
525 StateAp
*targ
= en
->value
;
526 if ( targ
!= from
&& !inEptVect(root
->eptVect
, targ
) ) {
527 /* Maybe need to create the eptVect. */
528 if ( root
->eptVect
== 0 )
529 root
->eptVect
= new EptVect();
531 /* If moving to a different graph or if any parent is
532 * leaving then we are leaving. */
533 bool leaving
= parentLeaving
||
534 root
->owningGraph
!= targ
->owningGraph
;
536 /* All ok, add the target epsilon and recurse. */
537 root
->eptVect
->append( EptVectEl(targ
, leaving
) );
538 epsilonFillEptVectFrom( root
, targ
, leaving
);
545 void FsmAp::shadowReadWriteStates( MergeData
&md
)
547 /* Init isolatedShadow algorithm data. */
548 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ )
549 st
->isolatedShadow
= 0;
551 /* Any states that may be both read from and written to must
553 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ ) {
554 /* Find such states by looping through stateVect lists, which give us
555 * the states that will be read from. May cause us to visit the states
556 * that we are interested in more than once. */
557 if ( st
->eptVect
!= 0 ) {
558 /* For all states that will be read from. */
559 for ( EptVect::Iter ept
= *st
->eptVect
; ept
.lte(); ept
++ ) {
560 /* Check for read and write to the same state. */
561 StateAp
*targ
= ept
->targ
;
562 if ( targ
->eptVect
!= 0 ) {
563 /* State is to be written to, if the shadow is not already
564 * there, create it. */
565 if ( targ
->isolatedShadow
== 0 ) {
566 StateAp
*shadow
= addState();
567 mergeStates( md
, shadow
, targ
);
568 targ
->isolatedShadow
= shadow
;
571 /* Write shadow into the state vector so that it is the
572 * state that the epsilon transition will read from. */
573 ept
->targ
= targ
->isolatedShadow
;
580 void FsmAp::resolveEpsilonTrans( MergeData
&md
)
582 /* Walk the state list and invoke recursive worker on each state. */
583 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ )
584 epsilonFillEptVectFrom( st
, st
, false );
586 /* Prevent reading from and writing to of the same state. */
587 shadowReadWriteStates( md
);
589 /* For all states that have epsilon transitions out, draw the transitions,
590 * clear the epsilon transitions. */
591 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ ) {
592 /* If there is a state vector, then create the pre-merge state. */
593 if ( st
->eptVect
!= 0 ) {
594 /* Merge all the epsilon targets into the state. */
595 for ( EptVect::Iter ept
= *st
->eptVect
; ept
.lte(); ept
++ ) {
597 mergeStatesLeaving( md
, st
, ept
->targ
);
599 mergeStates( md
, st
, ept
->targ
);
602 /* Clean up the target list. */
607 /* Clear the epsilon transitions vector. */
608 st
->epsilonTrans
.empty();
612 void FsmAp::epsilonOp()
614 /* For merging process. */
617 setMisfitAccounting( true );
619 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ )
622 /* Perform merges. */
623 resolveEpsilonTrans( md
);
625 /* Epsilons can caused merges which leave behind unreachable states. */
628 /* Remove the misfits and turn off misfit accounting. */
630 setMisfitAccounting( false );
633 /* Make a new maching by joining together a bunch of machines without making
634 * any transitions between them. A negative finalId results in there being no
636 void FsmAp::joinOp( int startId
, int finalId
, FsmAp
**others
, int numOthers
)
638 /* For the merging process. */
641 /* Set the owning machines. Start at one. Zero is reserved for the start
642 * and final states. */
643 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ )
645 for ( int m
= 0; m
< numOthers
; m
++ ) {
646 for ( StateList::Iter st
= others
[m
]->stateList
; st
.lte(); st
++ )
647 st
->owningGraph
= 2+m
;
650 /* All machines loose start state status. */
652 for ( int m
= 0; m
< numOthers
; m
++ )
653 others
[m
]->unsetStartState();
655 /* Bring the other machines into this. */
656 for ( int m
= 0; m
< numOthers
; m
++ ) {
657 /* Bring in the rest of other's entry points. */
658 copyInEntryPoints( others
[m
] );
659 others
[m
]->entryPoints
.empty();
661 /* Merge the lists. This will move all the states from other into
662 * this. No states will be deleted. */
663 stateList
.append( others
[m
]->stateList
);
664 assert( others
[m
]->misfitList
.length() == 0 );
666 /* Move the final set data from other into this. */
667 finStateSet
.insert( others
[m
]->finStateSet
);
668 others
[m
]->finStateSet
.empty();
670 /* Since other's list is empty, we can delete the fsm without
671 * affecting any states. */
675 /* Look up the start entry point. */
676 EntryMapEl
*enLow
= 0, *enHigh
= 0;
677 bool findRes
= entryPoints
.findMulti( startId
, enLow
, enHigh
);
679 /* No start state. Set a default one and proceed with the join. Note
680 * that the result of the join will be a very uninteresting machine. */
681 setStartState( addState() );
684 /* There is at least one start state, create a state that will become
685 * the new start state. */
686 StateAp
*newStart
= addState();
687 setStartState( newStart
);
689 /* The start state is in an owning machine class all it's own. */
690 newStart
->owningGraph
= 0;
692 /* Create the set of states to merge from. */
694 for ( EntryMapEl
*en
= enLow
; en
<= enHigh
; en
++ )
695 stateSet
.insert( en
->value
);
697 /* Merge in the set of start states into the new start state. */
698 mergeStates( md
, newStart
, stateSet
.data
, stateSet
.length() );
701 /* Take a copy of the final state set, before unsetting them all. This
702 * will allow us to call clearOutData on the states that don't get
703 * final state status back back. */
704 StateSet finStateSetCopy
= finStateSet
;
706 /* Now all final states are unset. */
709 if ( finalId
>= 0 ) {
710 /* Create the implicit final state. */
711 StateAp
*finState
= addState();
712 setFinState( finState
);
714 /* Assign an entry into the final state on the final state entry id. Note
715 * that there may already be an entry on this id. That's ok. Also set the
716 * final state owning machine id. It's in a class all it's own. */
717 setEntry( finalId
, finState
);
718 finState
->owningGraph
= 0;
721 /* Hand over to workers for resolving epsilon trans. This will merge states
722 * with the targets of their epsilon transitions. */
723 resolveEpsilonTrans( md
);
725 /* Invoke the relinquish final callback on any states that did not get
726 * final state status back. */
727 for ( StateSet::Iter st
= finStateSetCopy
; st
.lte(); st
++ ) {
728 if ( !((*st
)->stateBits
& STB_ISFINAL
) )
732 /* Fill in any new states made from merging. */
735 /* Joining can be messy. Instead of having misfit accounting on (which is
736 * tricky here) do a full cleaning. */
737 removeUnreachableStates();
740 void FsmAp::globOp( FsmAp
**others
, int numOthers
)
742 /* All other machines loose start states status. */
743 for ( int m
= 0; m
< numOthers
; m
++ )
744 others
[m
]->unsetStartState();
746 /* Bring the other machines into this. */
747 for ( int m
= 0; m
< numOthers
; m
++ ) {
748 /* Bring in the rest of other's entry points. */
749 copyInEntryPoints( others
[m
] );
750 others
[m
]->entryPoints
.empty();
752 /* Merge the lists. This will move all the states from other into
753 * this. No states will be deleted. */
754 stateList
.append( others
[m
]->stateList
);
755 assert( others
[m
]->misfitList
.length() == 0 );
757 /* Move the final set data from other into this. */
758 finStateSet
.insert( others
[m
]->finStateSet
);
759 others
[m
]->finStateSet
.empty();
761 /* Since other's list is empty, we can delete the fsm without
762 * affecting any states. */
767 void FsmAp::deterministicEntry()
769 /* For the merging process. */
772 /* States may loose their entry points, turn on misfit accounting. */
773 setMisfitAccounting( true );
775 /* Get a copy of the entry map then clear all the entry points. As we
776 * iterate the old entry map finding duplicates we will add the entry
777 * points for the new states that we create. */
778 EntryMap prevEntry
= entryPoints
;
779 unsetAllEntryPoints();
781 for ( int enId
= 0; enId
< prevEntry
.length(); ) {
782 /* Count the number of states on this entry key. */
784 while ( highId
< prevEntry
.length() && prevEntry
[enId
].key
== prevEntry
[highId
].key
)
787 int numIds
= highId
- enId
;
789 /* Only a single entry point, just set the entry. */
790 setEntry( prevEntry
[enId
].key
, prevEntry
[enId
].value
);
793 /* Multiple entry points, need to create a new state and merge in
794 * all the targets of entry points. */
795 StateAp
*newEntry
= addState();
796 for ( int en
= enId
; en
< highId
; en
++ )
797 mergeStates( md
, newEntry
, prevEntry
[en
].value
);
799 /* Add the new state as the single entry point. */
800 setEntry( prevEntry
[enId
].key
, newEntry
);
806 /* The old start state may be unreachable. Remove the misfits and turn off
807 * misfit accounting. */
809 setMisfitAccounting( false );
812 /* Unset any final states that are no longer to be final due to final bits. */
813 void FsmAp::unsetKilledFinals()
815 /* Duplicate the final state set before we begin modifying it. */
816 StateSet
fin( finStateSet
);
818 for ( int s
= 0; s
< fin
.length(); s
++ ) {
819 /* Check for killing bit. */
820 StateAp
*state
= fin
.data
[s
];
821 if ( state
->stateBits
& STB_GRAPH1
) {
822 /* One final state is a killer, set to non-final. */
823 unsetFinState( state
);
826 /* Clear all killing bits. Non final states should never have had those
827 * state bits set in the first place. */
828 state
->stateBits
&= ~STB_GRAPH1
;
832 /* Unset any final states that are no longer to be final due to final bits. */
833 void FsmAp::unsetIncompleteFinals()
835 /* Duplicate the final state set before we begin modifying it. */
836 StateSet
fin( finStateSet
);
838 for ( int s
= 0; s
< fin
.length(); s
++ ) {
839 /* Check for one set but not the other. */
840 StateAp
*state
= fin
.data
[s
];
841 if ( state
->stateBits
& STB_BOTH
&&
842 (state
->stateBits
& STB_BOTH
) != STB_BOTH
)
844 /* One state wants the other but it is not there. */
845 unsetFinState( state
);
848 /* Clear wanting bits. Non final states should never have had those
849 * state bits set in the first place. */
850 state
->stateBits
&= ~STB_BOTH
;
854 /* Ensure that the start state is free of entry points (aside from the fact
855 * that it is the start state). If the start state has entry points then Make a
856 * new start state by merging with the old one. Useful before modifying start
857 * transitions. If the existing start state has any entry points other than the
858 * start state entry then modifying its transitions changes more than the start
859 * transitions. So isolate the start state by separating it out such that it
860 * only has start stateness as it's entry point. */
861 void FsmAp::isolateStartState( )
863 /* For the merging process. */
866 /* Bail out if the start state is already isolated. */
867 if ( isStartStateIsolated() )
870 /* Turn on misfit accounting to possibly catch the old start state. */
871 setMisfitAccounting( true );
873 /* This will be the new start state. The existing start
874 * state is merged with it. */
875 StateAp
*prevStartState
= startState
;
877 setStartState( addState() );
879 /* Merge the new start state with the old one to isolate it. */
880 mergeStates( md
, startState
, prevStartState
);
882 /* Stfil and stateDict will be empty because the merging of the old start
883 * state into the new one will not have any conflicting transitions. */
884 assert( md
.stateDict
.treeSize
== 0 );
885 assert( md
.stfillHead
== 0 );
887 /* The old start state may be unreachable. Remove the misfits and turn off
888 * misfit accounting. */
890 setMisfitAccounting( false );
894 void logCondSpace( CondSpace
*condSpace
)
896 if ( condSpace
== 0 )
899 for ( CondSet::Iter csi
= condSpace
->condSet
.last(); csi
.gtb(); csi
-- ) {
902 (*csi
)->actionName( cerr
);
907 void logNewExpansion( Expansion
*exp
)
909 cerr
<< "created expansion:" << endl
;
910 cerr
<< " range: " << exp
->lowKey
.getVal() << " .. " <<
911 exp
->highKey
.getVal() << endl
;
913 cerr
<< " fromCondSpace: ";
914 logCondSpace( exp
->fromCondSpace
);
916 cerr
<< " fromVals: " << exp
->fromVals
<< endl
;
918 cerr
<< " toCondSpace: ";
919 logCondSpace( exp
->toCondSpace
);
921 cerr
<< " toValsList: ";
922 for ( LongVect::Iter to
= exp
->toValsList
; to
.lte(); to
++ )
929 void FsmAp::findTransExpansions( ExpansionList
&expansionList
,
930 StateAp
*destState
, StateAp
*srcState
)
932 PairIter
<TransAp
, StateCond
> transCond( destState
->outList
.head
,
933 srcState
->stateCondList
.head
);
934 for ( ; !transCond
.end(); transCond
++ ) {
935 if ( transCond
.userState
== RangeOverlap
) {
936 Expansion
*expansion
= new Expansion( transCond
.s1Tel
.lowKey
,
937 transCond
.s1Tel
.highKey
);
938 expansion
->fromTrans
= new TransAp(*transCond
.s1Tel
.trans
);
939 expansion
->fromTrans
->fromState
= 0;
940 expansion
->fromTrans
->toState
= transCond
.s1Tel
.trans
->toState
;
941 expansion
->fromCondSpace
= 0;
942 expansion
->fromVals
= 0;
943 CondSpace
*srcCS
= transCond
.s2Tel
.trans
->condSpace
;
944 expansion
->toCondSpace
= srcCS
;
946 long numTargVals
= (1 << srcCS
->condSet
.length());
947 for ( long targVals
= 0; targVals
< numTargVals
; targVals
++ )
948 expansion
->toValsList
.append( targVals
);
951 logNewExpansion( expansion
);
953 expansionList
.append( expansion
);
958 void FsmAp::findCondExpInTrans( ExpansionList
&expansionList
, StateAp
*state
,
959 Key lowKey
, Key highKey
, CondSpace
*fromCondSpace
, CondSpace
*toCondSpace
,
960 long fromVals
, LongVect
&toValsList
)
962 /* Make condition-space low and high keys for searching. */
964 searchTrans
.lowKey
= fromCondSpace
->baseKey
+ fromVals
* keyOps
->alphSize() +
965 (lowKey
- keyOps
->minKey
);
966 searchTrans
.highKey
= fromCondSpace
->baseKey
+ fromVals
* keyOps
->alphSize() +
967 (highKey
- keyOps
->minKey
);
968 searchTrans
.prev
= searchTrans
.next
= 0;
970 PairIter
<TransAp
> pairIter( state
->outList
.head
, &searchTrans
);
971 for ( ; !pairIter
.end(); pairIter
++ ) {
972 if ( pairIter
.userState
== RangeOverlap
) {
973 /* Need to make character-space low and high keys from the range
974 * overlap for the expansion object. */
975 Key expLowKey
= pairIter
.s1Tel
.lowKey
- fromCondSpace
->baseKey
- fromVals
*
976 keyOps
->alphSize() + keyOps
->minKey
;
977 Key expHighKey
= pairIter
.s1Tel
.highKey
- fromCondSpace
->baseKey
- fromVals
*
978 keyOps
->alphSize() + keyOps
->minKey
;
980 Expansion
*expansion
= new Expansion( expLowKey
, expHighKey
);
981 expansion
->fromTrans
= new TransAp(*pairIter
.s1Tel
.trans
);
982 expansion
->fromTrans
->fromState
= 0;
983 expansion
->fromTrans
->toState
= pairIter
.s1Tel
.trans
->toState
;
984 expansion
->fromCondSpace
= fromCondSpace
;
985 expansion
->fromVals
= fromVals
;
986 expansion
->toCondSpace
= toCondSpace
;
987 expansion
->toValsList
= toValsList
;
989 expansionList
.append( expansion
);
991 logNewExpansion( expansion
);
997 void FsmAp::findCondExpansions( ExpansionList
&expansionList
,
998 StateAp
*destState
, StateAp
*srcState
)
1000 PairIter
<StateCond
, StateCond
> condCond( destState
->stateCondList
.head
,
1001 srcState
->stateCondList
.head
);
1002 for ( ; !condCond
.end(); condCond
++ ) {
1003 if ( condCond
.userState
== RangeOverlap
) {
1004 /* Loop over all existing condVals . */
1005 CondSet
&destCS
= condCond
.s1Tel
.trans
->condSpace
->condSet
;
1006 long destLen
= destCS
.length();
1008 /* Find the items in src cond set that are not in dest
1009 * cond set. These are the items that we must expand. */
1010 CondSet srcOnlyCS
= condCond
.s2Tel
.trans
->condSpace
->condSet
;
1011 for ( CondSet::Iter dcsi
= destCS
; dcsi
.lte(); dcsi
++ )
1012 srcOnlyCS
.remove( *dcsi
);
1013 long srcOnlyLen
= srcOnlyCS
.length();
1015 if ( srcOnlyCS
.length() > 0 ) {
1017 cerr
<< "there are " << srcOnlyCS
.length() << " item(s) that are "
1018 "only in the srcCS" << endl
;
1021 CondSet mergedCS
= destCS
;
1022 mergedCS
.insert( condCond
.s2Tel
.trans
->condSpace
->condSet
);
1024 CondSpace
*fromCondSpace
= addCondSpace( destCS
);
1025 CondSpace
*toCondSpace
= addCondSpace( mergedCS
);
1027 /* Loop all values in the dest space. */
1028 for ( long destVals
= 0; destVals
< (1 << destLen
); destVals
++ ) {
1030 for ( CondSet::Iter csi
= destCS
; csi
.lte(); csi
++ ) {
1031 if ( destVals
& (1 << csi
.pos()) ) {
1032 Action
**cim
= mergedCS
.find( *csi
);
1033 long bitPos
= (cim
- mergedCS
.data
);
1034 basicVals
|= 1 << bitPos
;
1038 /* Loop all new values. */
1039 LongVect expandToVals
;
1040 for ( long soVals
= 0; soVals
< (1 << srcOnlyLen
); soVals
++ ) {
1041 long targVals
= basicVals
;
1042 for ( CondSet::Iter csi
= srcOnlyCS
; csi
.lte(); csi
++ ) {
1043 if ( soVals
& (1 << csi
.pos()) ) {
1044 Action
**cim
= mergedCS
.find( *csi
);
1045 long bitPos
= (cim
- mergedCS
.data
);
1046 targVals
|= 1 << bitPos
;
1049 expandToVals
.append( targVals
);
1052 findCondExpInTrans( expansionList
, destState
,
1053 condCond
.s1Tel
.lowKey
, condCond
.s1Tel
.highKey
,
1054 fromCondSpace
, toCondSpace
, destVals
, expandToVals
);
1061 void FsmAp::doExpand( MergeData
&md
, StateAp
*destState
, ExpansionList
&expList1
)
1063 for ( ExpansionList::Iter exp
= expList1
; exp
.lte(); exp
++ ) {
1064 for ( LongVect::Iter to
= exp
->toValsList
; to
.lte(); to
++ ) {
1065 long targVals
= *to
;
1067 /* We will use the copy of the transition that was made when the
1068 * expansion was created. It will get used multiple times. Each
1069 * time we must set up the keys, everything else is constant and
1070 * and already prepared. */
1071 TransAp
*srcTrans
= exp
->fromTrans
;
1073 srcTrans
->lowKey
= exp
->toCondSpace
->baseKey
+
1074 targVals
* keyOps
->alphSize() + (exp
->lowKey
- keyOps
->minKey
);
1075 srcTrans
->highKey
= exp
->toCondSpace
->baseKey
+
1076 targVals
* keyOps
->alphSize() + (exp
->highKey
- keyOps
->minKey
);
1079 srcList
.append( srcTrans
);
1080 outTransCopy( md
, destState
, srcList
.head
);
1087 void FsmAp::doRemove( MergeData
&md
, StateAp
*destState
, ExpansionList
&expList1
)
1089 for ( ExpansionList::Iter exp
= expList1
; exp
.lte(); exp
++ ) {
1091 if ( exp
->fromCondSpace
== 0 ) {
1092 removal
.lowKey
= exp
->lowKey
;
1093 removal
.highKey
= exp
->highKey
;
1096 removal
.lowKey
= exp
->fromCondSpace
->baseKey
+
1097 exp
->fromVals
* keyOps
->alphSize() + (exp
->lowKey
- keyOps
->minKey
);
1098 removal
.highKey
= exp
->fromCondSpace
->baseKey
+
1099 exp
->fromVals
* keyOps
->alphSize() + (exp
->highKey
- keyOps
->minKey
);
1104 PairIter
<TransAp
, Removal
> pairIter( destState
->outList
.head
, &removal
);
1105 for ( ; !pairIter
.end(); pairIter
++ ) {
1106 switch ( pairIter
.userState
) {
1108 TransAp
*destTrans
= pairIter
.s1Tel
.trans
;
1109 destTrans
->lowKey
= pairIter
.s1Tel
.lowKey
;
1110 destTrans
->highKey
= pairIter
.s1Tel
.highKey
;
1111 destList
.append( destTrans
);
1116 case RangeOverlap
: {
1117 TransAp
*trans
= pairIter
.s1Tel
.trans
;
1118 detachTrans( trans
->fromState
, trans
->toState
, trans
);
1123 pairIter
.s1Tel
.trans
= dupTrans( destState
,
1124 pairIter
.s1Tel
.trans
);
1131 destState
->outList
.transfer( destList
);
1135 void FsmAp::mergeStateConds( StateAp
*destState
, StateAp
*srcState
)
1137 StateCondList destList
;
1138 PairIter
<StateCond
> pairIter( destState
->stateCondList
.head
,
1139 srcState
->stateCondList
.head
);
1140 for ( ; !pairIter
.end(); pairIter
++ ) {
1141 switch ( pairIter
.userState
) {
1143 StateCond
*destCond
= pairIter
.s1Tel
.trans
;
1144 destCond
->lowKey
= pairIter
.s1Tel
.lowKey
;
1145 destCond
->highKey
= pairIter
.s1Tel
.highKey
;
1146 destList
.append( destCond
);
1150 StateCond
*newCond
= new StateCond( *pairIter
.s2Tel
.trans
);
1151 newCond
->lowKey
= pairIter
.s2Tel
.lowKey
;
1152 newCond
->highKey
= pairIter
.s2Tel
.highKey
;
1153 destList
.append( newCond
);
1156 case RangeOverlap
: {
1157 StateCond
*destCond
= pairIter
.s1Tel
.trans
;
1158 StateCond
*srcCond
= pairIter
.s2Tel
.trans
;
1159 CondSet mergedCondSet
;
1160 mergedCondSet
.insert( destCond
->condSpace
->condSet
);
1161 mergedCondSet
.insert( srcCond
->condSpace
->condSet
);
1162 destCond
->condSpace
= addCondSpace( mergedCondSet
);
1164 destCond
->lowKey
= pairIter
.s1Tel
.lowKey
;
1165 destCond
->highKey
= pairIter
.s1Tel
.highKey
;
1166 destList
.append( destCond
);
1170 pairIter
.s1Tel
.trans
= new StateCond( *pairIter
.s1Tel
.trans
);
1177 destState
->stateCondList
.transfer( destList
);
1180 /* A state merge which represents the drawing in of leaving transitions. If
1181 * there is any out data then we duplicate the source state, transfer the out
1182 * data, then merge in the state. The new state will be reaped because it will
1183 * not be given any in transitions. */
1184 void FsmAp::mergeStatesLeaving( MergeData
&md
, StateAp
*destState
, StateAp
*srcState
)
1186 if ( !hasOutData( destState
) )
1187 mergeStates( md
, destState
, srcState
);
1189 StateAp
*ssMutable
= addState();
1190 mergeStates( md
, ssMutable
, srcState
);
1191 transferOutData( ssMutable
, destState
);
1193 for ( OutCondSet::Iter cond
= destState
->outCondSet
; cond
.lte(); cond
++ )
1194 embedCondition( md
, ssMutable
, cond
->action
, cond
->sense
);
1196 mergeStates( md
, destState
, ssMutable
);
1200 void FsmAp::mergeStates( MergeData
&md
, StateAp
*destState
,
1201 StateAp
**srcStates
, int numSrc
)
1203 for ( int s
= 0; s
< numSrc
; s
++ )
1204 mergeStates( md
, destState
, srcStates
[s
] );
1207 void FsmAp::mergeStates( MergeData
&md
, StateAp
*destState
, StateAp
*srcState
)
1209 ExpansionList expList1
;
1210 ExpansionList expList2
;
1212 findTransExpansions( expList1
, destState
, srcState
);
1213 findCondExpansions( expList1
, destState
, srcState
);
1214 findTransExpansions( expList2
, srcState
, destState
);
1215 findCondExpansions( expList2
, srcState
, destState
);
1217 mergeStateConds( destState
, srcState
);
1219 outTransCopy( md
, destState
, srcState
->outList
.head
);
1221 doExpand( md
, destState
, expList1
);
1222 doExpand( md
, destState
, expList2
);
1224 doRemove( md
, destState
, expList1
);
1225 doRemove( md
, destState
, expList2
);
1230 /* Get its bits and final state status. */
1231 destState
->stateBits
|= ( srcState
->stateBits
& ~STB_ISFINAL
);
1232 if ( srcState
->isFinState() )
1233 setFinState( destState
);
1235 /* Draw in any properties of srcState into destState. */
1236 if ( srcState
== destState
) {
1237 /* Duplicate the list to protect against write to source. The
1238 * priorities sets are not copied in because that would have no
1240 destState
->epsilonTrans
.append( EpsilonTrans( srcState
->epsilonTrans
) );
1242 /* Get all actions, duplicating to protect against write to source. */
1243 destState
->toStateActionTable
.setActions(
1244 ActionTable( srcState
->toStateActionTable
) );
1245 destState
->fromStateActionTable
.setActions(
1246 ActionTable( srcState
->fromStateActionTable
) );
1247 destState
->outActionTable
.setActions( ActionTable( srcState
->outActionTable
) );
1248 destState
->outCondSet
.insert( OutCondSet( srcState
->outCondSet
) );
1249 destState
->errActionTable
.setActions( ErrActionTable( srcState
->errActionTable
) );
1250 destState
->eofActionTable
.setActions( ActionTable( srcState
->eofActionTable
) );
1253 /* Get the epsilons, out priorities. */
1254 destState
->epsilonTrans
.append( srcState
->epsilonTrans
);
1255 destState
->outPriorTable
.setPriors( srcState
->outPriorTable
);
1257 /* Get all actions. */
1258 destState
->toStateActionTable
.setActions( srcState
->toStateActionTable
);
1259 destState
->fromStateActionTable
.setActions( srcState
->fromStateActionTable
);
1260 destState
->outActionTable
.setActions( srcState
->outActionTable
);
1261 destState
->outCondSet
.insert( srcState
->outCondSet
);
1262 destState
->errActionTable
.setActions( srcState
->errActionTable
);
1263 destState
->eofActionTable
.setActions( srcState
->eofActionTable
);
1267 void FsmAp::fillInStates( MergeData
&md
)
1269 /* Merge any states that are awaiting merging. This will likey cause
1270 * other states to be added to the stfil list. */
1271 StateAp
*state
= md
.stfillHead
;
1272 while ( state
!= 0 ) {
1273 StateSet
*stateSet
= &state
->stateDictEl
->stateSet
;
1274 mergeStates( md
, state
, stateSet
->data
, stateSet
->length() );
1275 state
= state
->alg
.next
;
1278 /* Delete the state sets of all states that are on the fill list. */
1279 state
= md
.stfillHead
;
1280 while ( state
!= 0 ) {
1281 /* Delete and reset the state set. */
1282 delete state
->stateDictEl
;
1283 state
->stateDictEl
= 0;
1285 /* Next state in the stfill list. */
1286 state
= state
->alg
.next
;
1289 /* StateDict will still have its ptrs/size set but all of it's element
1290 * will be deleted so we don't need to clean it up. */
1293 void FsmAp::findEmbedExpansions( ExpansionList
&expansionList
,
1294 StateAp
*destState
, Action
*condAction
, bool sense
)
1296 StateCondList destList
;
1297 PairIter
<TransAp
, StateCond
> transCond( destState
->outList
.head
,
1298 destState
->stateCondList
.head
);
1299 for ( ; !transCond
.end(); transCond
++ ) {
1300 switch ( transCond
.userState
) {
1302 if ( transCond
.s1Tel
.lowKey
<= keyOps
->maxKey
) {
1303 assert( transCond
.s1Tel
.highKey
<= keyOps
->maxKey
);
1305 /* Make a new state cond. */
1306 StateCond
*newStateCond
= new StateCond( transCond
.s1Tel
.lowKey
,
1307 transCond
.s1Tel
.highKey
);
1308 newStateCond
->condSpace
= addCondSpace( CondSet( condAction
) );
1309 destList
.append( newStateCond
);
1311 /* Create the expansion. */
1312 Expansion
*expansion
= new Expansion( transCond
.s1Tel
.lowKey
,
1313 transCond
.s1Tel
.highKey
);
1314 expansion
->fromTrans
= new TransAp(*transCond
.s1Tel
.trans
);
1315 expansion
->fromTrans
->fromState
= 0;
1316 expansion
->fromTrans
->toState
= transCond
.s1Tel
.trans
->toState
;
1317 expansion
->fromCondSpace
= 0;
1318 expansion
->fromVals
= 0;
1319 expansion
->toCondSpace
= newStateCond
->condSpace
;
1320 expansion
->toValsList
.append( sense
?1:0 );
1322 logNewExpansion( expansion
);
1324 expansionList
.append( expansion
);
1329 /* Enhance state cond and find the expansion. */
1330 StateCond
*stateCond
= transCond
.s2Tel
.trans
;
1331 stateCond
->lowKey
= transCond
.s2Tel
.lowKey
;
1332 stateCond
->highKey
= transCond
.s2Tel
.highKey
;
1334 CondSet
&destCS
= stateCond
->condSpace
->condSet
;
1335 long destLen
= destCS
.length();
1336 CondSpace
*fromCondSpace
= stateCond
->condSpace
;
1338 CondSet mergedCS
= destCS
;
1339 mergedCS
.insert( condAction
);
1340 CondSpace
*toCondSpace
= addCondSpace( mergedCS
);
1341 stateCond
->condSpace
= toCondSpace
;
1342 destList
.append( stateCond
);
1344 /* Loop all values in the dest space. */
1345 for ( long destVals
= 0; destVals
< (1 << destLen
); destVals
++ ) {
1347 for ( CondSet::Iter csi
= destCS
; csi
.lte(); csi
++ ) {
1348 if ( destVals
& (1 << csi
.pos()) ) {
1349 Action
**cim
= mergedCS
.find( *csi
);
1350 long bitPos
= (cim
- mergedCS
.data
);
1351 basicVals
|= 1 << bitPos
;
1355 long targVals
= basicVals
;
1356 Action
**cim
= mergedCS
.find( condAction
);
1357 long bitPos
= (cim
- mergedCS
.data
);
1358 targVals
|= (sense
?1:0) << bitPos
;
1360 LongVect
expandToVals( targVals
);
1361 findCondExpInTrans( expansionList
, destState
,
1362 transCond
.s2Tel
.lowKey
, transCond
.s2Tel
.highKey
,
1363 fromCondSpace
, toCondSpace
, destVals
, expandToVals
);
1377 destState
->stateCondList
.transfer( destList
);
1380 void FsmAp::embedCondition( StateAp
*state
, Action
*condAction
, bool sense
)
1383 ExpansionList expList
;
1385 /* Turn on misfit accounting to possibly catch the old start state. */
1386 setMisfitAccounting( true );
1389 embedCondition( md
, state
, condAction
, sense
);
1391 /* Fill in any states that were newed up as combinations of others. */
1394 /* Remove the misfits and turn off misfit accounting. */
1396 setMisfitAccounting( false );
1399 void FsmAp::embedCondition( MergeData
&md
, StateAp
*state
, Action
*condAction
, bool sense
)
1401 ExpansionList expList
;
1403 findEmbedExpansions( expList
, state
, condAction
, sense
);
1404 doExpand( md
, state
, expList
);
1405 doRemove( md
, state
, expList
);
1409 /* Check if a machine defines a single character. This is useful in validating
1410 * ranges and machines to export. */
1411 bool FsmAp::checkSingleCharMachine()
1413 /* Must have two states. */
1414 if ( stateList
.length() != 2 )
1416 /* The start state cannot be final. */
1417 if ( startState
->isFinState() )
1419 /* There should be only one final state. */
1420 if ( finStateSet
.length() != 1 )
1422 /* The final state cannot have any transitions out. */
1423 if ( finStateSet
[0]->outList
.length() != 0 )
1425 /* The start state should have only one transition out. */
1426 if ( startState
->outList
.length() != 1 )
1428 /* The singe transition out of the start state should not be a range. */
1429 TransAp
*startTrans
= startState
->outList
.head
;
1430 if ( startTrans
->lowKey
!= startTrans
->highKey
)