2 * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "mergesort.h"
25 int FsmAp::partitionRound( StateAp
**statePtrs
, MinPartition
*parts
, int numParts
)
27 /* Need a mergesort object and a single partition compare. */
28 MergeSort
<StateAp
*, PartitionCompare
> mergeSort
;
29 PartitionCompare partCompare
;
31 /* For each partition. */
32 for ( int p
= 0; p
< numParts
; p
++ ) {
33 /* Fill the pointer array with the states in the partition. */
34 StateList::Iter state
= parts
[p
].list
;
35 for ( int s
= 0; state
.lte(); state
++, s
++ )
38 /* Sort the states using the partitioning compare. */
39 int numStates
= parts
[p
].list
.length();
40 mergeSort
.sort( statePtrs
, numStates
);
42 /* Assign the states into partitions based on the results of the sort. */
43 int destPart
= p
, firstNewPart
= numParts
;
44 for ( int s
= 1; s
< numStates
; s
++ ) {
45 /* If this state differs from the last then move to the next partition. */
46 if ( partCompare
.compare( statePtrs
[s
-1], statePtrs
[s
] ) < 0 ) {
47 /* The new partition is the next avail spot. */
52 /* If the state is not staying in the first partition, then
53 * transfer it to its destination partition. */
54 if ( destPart
!= p
) {
55 StateAp
*state
= parts
[p
].list
.detach( statePtrs
[s
] );
56 parts
[destPart
].list
.append( state
);
60 /* Fix the partition pointer for all the states that got moved to a new
61 * partition. This must be done after the states are transfered so the
62 * result of the sort is not altered. */
63 for ( int newPart
= firstNewPart
; newPart
< numParts
; newPart
++ ) {
64 StateList::Iter state
= parts
[newPart
].list
;
65 for ( ; state
.lte(); state
++ )
66 state
->alg
.partition
= &parts
[newPart
];
74 * \brief Minimize by partitioning version 1.
76 * Repeatedly tries to split partitions until all partitions are unsplittable.
77 * Produces the most minimal FSM possible.
79 void FsmAp::minimizePartition1()
81 /* Need one mergesort object and partition compares. */
82 MergeSort
<StateAp
*, InitPartitionCompare
> mergeSort
;
83 InitPartitionCompare initPartCompare
;
85 /* Nothing to do if there are no states. */
86 if ( stateList
.length() == 0 )
90 * First thing is to partition the states by final state status and
91 * transition functions. This gives us an initial partitioning to work
95 /* Make a array of pointers to states. */
96 int numStates
= stateList
.length();
97 StateAp
** statePtrs
= new StateAp
*[numStates
];
99 /* Fill up an array of pointers to the states for easy sorting. */
100 StateList::Iter state
= stateList
;
101 for ( int s
= 0; state
.lte(); state
++, s
++ )
102 statePtrs
[s
] = state
;
104 /* Sort the states using the array of states. */
105 mergeSort
.sort( statePtrs
, numStates
);
107 /* An array of lists of states is used to partition the states. */
108 MinPartition
*parts
= new MinPartition
[numStates
];
110 /* Assign the states into partitions. */
112 for ( int s
= 0; s
< numStates
; s
++ ) {
113 /* If this state differs from the last then move to the next partition. */
114 if ( s
> 0 && initPartCompare
.compare( statePtrs
[s
-1], statePtrs
[s
] ) < 0 ) {
115 /* Move to the next partition. */
119 /* Put the state into its partition. */
120 statePtrs
[s
]->alg
.partition
= &parts
[destPart
];
121 parts
[destPart
].list
.append( statePtrs
[s
] );
124 /* We just moved all the states from the main list into partitions without
125 * taking them off the main list. So clean up the main list now. */
128 /* Split partitions. */
129 int numParts
= destPart
+ 1;
131 /* Test all partitions for splitting. */
132 int newNum
= partitionRound( statePtrs
, parts
, numParts
);
134 /* When no partitions can be split, stop. */
135 if ( newNum
== numParts
)
141 /* Fuse states in the same partition. The states will end up back on the
143 fusePartitions( parts
, numParts
);
150 /* Split partitions that need splittting, decide which partitions might need
151 * to be split as a result, continue until there are no more that might need
153 int FsmAp::splitCandidates( StateAp
**statePtrs
, MinPartition
*parts
, int numParts
)
155 /* Need a mergesort and a partition compare. */
156 MergeSort
<StateAp
*, PartitionCompare
> mergeSort
;
157 PartitionCompare partCompare
;
159 /* The lists of unsplitable (partList) and splitable partitions.
160 * Only partitions in the splitable list are check for needing splitting. */
161 PartitionList partList
, splittable
;
163 /* Initially, all partitions are born from a split (the initial
164 * partitioning) and can cause other partitions to be split. So any
165 * partition with a state with a transition out to another partition is a
166 * candidate for splitting. This will make every partition except possibly
167 * partitions of final states split candidates. */
168 for ( int p
= 0; p
< numParts
; p
++ ) {
169 /* Assume not active. */
170 parts
[p
].active
= false;
172 /* Look for a trans out of any state in the partition. */
173 for ( StateList::Iter state
= parts
[p
].list
; state
.lte(); state
++ ) {
174 /* If there is at least one transition out to another state then
175 * the partition becomes splittable. */
176 if ( state
->outList
.length() > 0 ) {
177 parts
[p
].active
= true;
182 /* If it was found active then it goes on the splittable list. */
183 if ( parts
[p
].active
)
184 splittable
.append( &parts
[p
] );
186 partList
.append( &parts
[p
] );
189 /* While there are partitions that are splittable, pull one off and try
190 * to split it. If it splits, determine which partitions may now be split
191 * as a result of the newly split partition. */
192 while ( splittable
.length() > 0 ) {
193 MinPartition
*partition
= splittable
.detachFirst();
195 /* Fill the pointer array with the states in the partition. */
196 StateList::Iter state
= partition
->list
;
197 for ( int s
= 0; state
.lte(); state
++, s
++ )
198 statePtrs
[s
] = state
;
200 /* Sort the states using the partitioning compare. */
201 int numStates
= partition
->list
.length();
202 mergeSort
.sort( statePtrs
, numStates
);
204 /* Assign the states into partitions based on the results of the sort. */
205 MinPartition
*destPart
= partition
;
206 int firstNewPart
= numParts
;
207 for ( int s
= 1; s
< numStates
; s
++ ) {
208 /* If this state differs from the last then move to the next partition. */
209 if ( partCompare
.compare( statePtrs
[s
-1], statePtrs
[s
] ) < 0 ) {
210 /* The new partition is the next avail spot. */
211 destPart
= &parts
[numParts
];
215 /* If the state is not staying in the first partition, then
216 * transfer it to its destination partition. */
217 if ( destPart
!= partition
) {
218 StateAp
*state
= partition
->list
.detach( statePtrs
[s
] );
219 destPart
->list
.append( state
);
223 /* Fix the partition pointer for all the states that got moved to a new
224 * partition. This must be done after the states are transfered so the
225 * result of the sort is not altered. */
227 for ( newPart
= firstNewPart
; newPart
< numParts
; newPart
++ ) {
228 StateList::Iter state
= parts
[newPart
].list
;
229 for ( ; state
.lte(); state
++ )
230 state
->alg
.partition
= &parts
[newPart
];
233 /* Put the partition we just split and any new partitions that came out
234 * of the split onto the inactive list. */
235 partition
->active
= false;
236 partList
.append( partition
);
237 for ( newPart
= firstNewPart
; newPart
< numParts
; newPart
++ ) {
238 parts
[newPart
].active
= false;
239 partList
.append( &parts
[newPart
] );
242 if ( destPart
== partition
)
245 /* Now determine which partitions are splittable as a result of
246 * splitting partition by walking the in lists of the states in
247 * partitions that got split. Partition is the faked first item in the
249 MinPartition
*causalPart
= partition
;
250 newPart
= firstNewPart
- 1;
251 while ( newPart
< numParts
) {
252 /* Loop all states in the causal partition. */
253 StateList::Iter state
= causalPart
->list
;
254 for ( ; state
.lte(); state
++ ) {
255 /* Walk all transition into the state and put the partition
256 * that the from state is in onto the splittable list. */
257 for ( TransInList::Iter trans
= state
->inList
; trans
.lte(); trans
++ ) {
258 MinPartition
*fromPart
= trans
->fromState
->alg
.partition
;
259 if ( ! fromPart
->active
) {
260 fromPart
->active
= true;
261 partList
.detach( fromPart
);
262 splittable
.append( fromPart
);
268 causalPart
= &parts
[newPart
];
276 * \brief Minimize by partitioning version 2 (best alg).
278 * Repeatedly tries to split partitions that may splittable until there are no
279 * more partitions that might possibly need splitting. Runs faster than
280 * version 1. Produces the most minimal fsm possible.
282 void FsmAp::minimizePartition2()
284 /* Need a mergesort and an initial partition compare. */
285 MergeSort
<StateAp
*, InitPartitionCompare
> mergeSort
;
286 InitPartitionCompare initPartCompare
;
288 /* Nothing to do if there are no states. */
289 if ( stateList
.length() == 0 )
293 * First thing is to partition the states by final state status and
294 * transition functions. This gives us an initial partitioning to work
298 /* Make a array of pointers to states. */
299 int numStates
= stateList
.length();
300 StateAp
** statePtrs
= new StateAp
*[numStates
];
302 /* Fill up an array of pointers to the states for easy sorting. */
303 StateList::Iter state
= stateList
;
304 for ( int s
= 0; state
.lte(); state
++, s
++ )
305 statePtrs
[s
] = state
;
307 /* Sort the states using the array of states. */
308 mergeSort
.sort( statePtrs
, numStates
);
310 /* An array of lists of states is used to partition the states. */
311 MinPartition
*parts
= new MinPartition
[numStates
];
313 /* Assign the states into partitions. */
315 for ( int s
= 0; s
< numStates
; s
++ ) {
316 /* If this state differs from the last then move to the next partition. */
317 if ( s
> 0 && initPartCompare
.compare( statePtrs
[s
-1], statePtrs
[s
] ) < 0 ) {
318 /* Move to the next partition. */
322 /* Put the state into its partition. */
323 statePtrs
[s
]->alg
.partition
= &parts
[destPart
];
324 parts
[destPart
].list
.append( statePtrs
[s
] );
327 /* We just moved all the states from the main list into partitions without
328 * taking them off the main list. So clean up the main list now. */
331 /* Split partitions. */
332 int numParts
= splitCandidates( statePtrs
, parts
, destPart
+1 );
334 /* Fuse states in the same partition. The states will end up back on the
336 fusePartitions( parts
, numParts
);
343 void FsmAp::initialMarkRound( MarkIndex
&markIndex
)
345 /* P and q for walking pairs. */
346 StateAp
*p
= stateList
.head
, *q
;
348 /* Need an initial partition compare. */
349 InitPartitionCompare initPartCompare
;
351 /* Walk all unordered pairs of (p, q) where p != q.
352 * The second depth of the walk stops before reaching p. This
353 * gives us all unordered pairs of states (p, q) where p != q. */
357 /* If the states differ on final state status, out transitions or
358 * any transition data then they should be separated on the initial
360 if ( initPartCompare
.compare( p
, q
) != 0 )
361 markIndex
.markPair( p
->alg
.stateNum
, q
->alg
.stateNum
);
369 bool FsmAp::markRound( MarkIndex
&markIndex
)
371 /* P an q for walking pairs. Take note if any pair gets marked. */
372 StateAp
*p
= stateList
.head
, *q
;
373 bool pairWasMarked
= false;
375 /* Need a mark comparison. */
376 MarkCompare markCompare
;
378 /* Walk all unordered pairs of (p, q) where p != q.
379 * The second depth of the walk stops before reaching p. This
380 * gives us all unordered pairs of states (p, q) where p != q. */
384 /* Should we mark the pair? */
385 if ( !markIndex
.isPairMarked( p
->alg
.stateNum
, q
->alg
.stateNum
) ) {
386 if ( markCompare
.shouldMark( markIndex
, p
, q
) ) {
387 markIndex
.markPair( p
->alg
.stateNum
, q
->alg
.stateNum
);
388 pairWasMarked
= true;
396 return pairWasMarked
;
401 * \brief Minimize by pair marking.
403 * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
404 * should only be used on small graphs. Produces the most minmimal FSM
407 void FsmAp::minimizeStable()
409 /* Set the state numbers. */
410 setStateNumbers( 0 );
412 /* This keeps track of which pairs have been marked. */
413 MarkIndex
markIndex( stateList
.length() );
415 /* Mark pairs where final stateness, out trans, or trans data differ. */
416 initialMarkRound( markIndex
);
418 /* While the last round of marking succeeded in marking a state
419 * continue to do another round. */
420 int modified
= markRound( markIndex
);
422 modified
= markRound( markIndex
);
424 /* Merge pairs that are unmarked. */
425 fuseUnmarkedPairs( markIndex
);
428 bool FsmAp::minimizeRound()
430 /* Nothing to do if there are no states. */
431 if ( stateList
.length() == 0 )
434 /* Need a mergesort on approx compare and an approx compare. */
435 MergeSort
<StateAp
*, ApproxCompare
> mergeSort
;
436 ApproxCompare approxCompare
;
438 /* Fill up an array of pointers to the states. */
439 StateAp
**statePtrs
= new StateAp
*[stateList
.length()];
440 StateList::Iter state
= stateList
;
441 for ( int s
= 0; state
.lte(); state
++, s
++ )
442 statePtrs
[s
] = state
;
444 bool modified
= false;
447 mergeSort
.sort( statePtrs
, stateList
.length() );
449 /* Walk the list looking for duplicates next to each other,
450 * merge in any duplicates. */
451 StateAp
**pLast
= statePtrs
;
452 StateAp
**pState
= statePtrs
+ 1;
453 for ( int i
= 1; i
< stateList
.length(); i
++, pState
++ ) {
454 if ( approxCompare
.compare( *pLast
, *pState
) == 0 ) {
455 /* Last and pState are the same, so fuse together. Move forward
456 * with pState but not with pLast. If any more are identical, we
458 fuseEquivStates( *pLast
, *pState
);
462 /* Last and this are different, do not set to merge them. Move
463 * pLast to the current (it may be way behind from merging many
464 * states) and pState forward one to consider the next pair. */
473 * \brief Minmimize by an approximation.
475 * Repeatedly tries to find states with transitions out to the same set of
476 * states on the same set of keys until no more identical states can be found.
477 * Does not produce the most minimial FSM possible.
479 void FsmAp::minimizeApproximate()
481 /* While the last minimization round succeeded in compacting states,
482 * continue to try to compact states. */
484 bool modified
= minimizeRound();
491 /* Remove states that have no path to them from the start state. Recursively
492 * traverses the graph marking states that have paths into them. Then removes
493 * all states that did not get marked. */
494 void FsmAp::removeUnreachableStates()
496 /* Misfit accounting should be off and there should be no states on the
498 assert( !misfitAccounting
&& misfitList
.length() == 0 );
500 /* Mark all the states that can be reached
501 * through the existing set of entry points. */
502 markReachableFromHere( startState
);
503 for ( EntryMap::Iter en
= entryPoints
; en
.lte(); en
++ )
504 markReachableFromHere( en
->value
);
506 /* Delete all states that are not marked
507 * and unmark the ones that are marked. */
508 StateAp
*state
= stateList
.head
;
510 StateAp
*next
= state
->next
;
512 if ( state
->stateBits
& STB_ISMARKED
)
513 state
->stateBits
&= ~ STB_ISMARKED
;
515 detachState( state
);
516 stateList
.detach( state
);
524 bool FsmAp::outListCovers( StateAp
*state
)
526 /* Must be at least one range to cover. */
527 if ( state
->outList
.length() == 0 )
530 /* The first must start at the lower bound. */
531 TransList::Iter trans
= state
->outList
.first();
532 if ( keyOps
->minKey
< trans
->lowKey
)
535 /* Loop starts at second el. */
538 /* Loop checks lower against prev upper. */
539 for ( ; trans
.lte(); trans
++ ) {
540 /* Lower end of the trans must be one greater than the
541 * previous' high end. */
542 Key lowKey
= trans
->lowKey
;
544 if ( trans
->prev
->highKey
< lowKey
)
548 /* Require that the last range extends to the upper bound. */
549 trans
= state
->outList
.last();
550 if ( trans
->highKey
< keyOps
->maxKey
)
556 /* Remove states that that do not lead to a final states. Works recursivly traversing
557 * the graph in reverse (starting from all final states) and marking seen states. Then
558 * removes states that did not get marked. */
559 void FsmAp::removeDeadEndStates()
561 /* Misfit accounting should be off and there should be no states on the
563 assert( !misfitAccounting
&& misfitList
.length() == 0 );
565 /* Mark all states that have paths to the final states. */
566 StateAp
**st
= finStateSet
.data
;
567 int nst
= finStateSet
.length();
568 for ( int i
= 0; i
< nst
; i
++, st
++ )
569 markReachableFromHereReverse( *st
);
571 /* Start state gets honorary marking. If the machine accepts nothing we
572 * still want the start state to hang around. This must be done after the
573 * recursive call on all the final states so that it does not cause the
574 * start state in transitions to be skipped when the start state is
575 * visited by the traversal. */
576 startState
->stateBits
|= STB_ISMARKED
;
578 /* Delete all states that are not marked
579 * and unmark the ones that are marked. */
580 StateAp
*state
= stateList
.head
;
581 while ( state
!= 0 ) {
582 StateAp
*next
= state
->next
;
584 if ( state
->stateBits
& STB_ISMARKED
)
585 state
->stateBits
&= ~ STB_ISMARKED
;
587 detachState( state
);
588 stateList
.detach( state
);
596 /* Remove states on the misfit list. To work properly misfit accounting should
597 * be on when this is called. The detaching of a state will likely cause
598 * another misfit to be collected and it can then be removed. */
599 void FsmAp::removeMisfits()
601 while ( misfitList
.length() > 0 ) {
602 /* Get the first state. */
603 StateAp
*state
= misfitList
.head
;
605 /* Detach and delete. */
606 detachState( state
);
608 /* The state was previously on the misfit list and detaching can only
609 * remove in transitions so the state must still be on the misfit
611 misfitList
.detach( state
);
616 /* Fuse src into dest because they have been deemed equivalent states.
617 * Involves moving transitions into src to go into dest and invoking
618 * callbacks. Src is deleted detached from the graph and deleted. */
619 void FsmAp::fuseEquivStates( StateAp
*dest
, StateAp
*src
)
621 /* This would get ugly. */
622 assert( dest
!= src
);
624 /* Cur is a duplicate. We can merge it with trail. */
625 inTransMove( dest
, src
);
628 stateList
.detach( src
);
632 void FsmAp::fuseUnmarkedPairs( MarkIndex
&markIndex
)
634 StateAp
*p
= stateList
.head
, *nextP
, *q
;
636 /* Definition: The primary state of an equivalence class is the first state
637 * encounterd that belongs to the equivalence class. All equivalence
638 * classes have primary state including equivalence classes with one state
641 /* For each unmarked pair merge p into q and delete p. q is always the
642 * primary state of it's equivalence class. We wouldn't have landed on it
643 * here if it were not, because it would have been deleted.
645 * Proof that q is the primaray state of it's equivalence class: Assume q
646 * is not the primary state of it's equivalence class, then it would be
647 * merged into some state that came before it and thus p would be
648 * equivalent to that state. But q is the first state that p is equivalent
649 * to so we have a contradiction. */
651 /* Walk all unordered pairs of (p, q) where p != q.
652 * The second depth of the walk stops before reaching p. This
653 * gives us all unordered pairs of states (p, q) where p != q. */
659 /* If one of p or q is a final state then mark. */
660 if ( ! markIndex
.isPairMarked( p
->alg
.stateNum
, q
->alg
.stateNum
) ) {
661 fuseEquivStates( q
, p
);
670 void FsmAp::fusePartitions( MinPartition
*parts
, int numParts
)
672 /* For each partition, fuse state 2, 3, ... into state 1. */
673 for ( int p
= 0; p
< numParts
; p
++ ) {
674 /* Assume that there will always be at least one state. */
675 StateAp
*first
= parts
[p
].list
.head
, *toFuse
= first
->next
;
677 /* Put the first state back onto the main state list. Don't bother
678 * removing it from the partition list first. */
679 stateList
.append( first
);
681 /* Fuse the rest of the state into the first. */
682 while ( toFuse
!= 0 ) {
683 /* Save the next. We will trash it before it is needed. */
684 StateAp
*next
= toFuse
->next
;
686 /* Put the state to be fused in to the first back onto the main
687 * list before it is fuse. the graph. The state needs to be on
688 * the main list for the detach from the graph to work. Don't
689 * bother removing the state from the partition list first. We
690 * need not maintain it. */
691 stateList
.append( toFuse
);
693 /* Now fuse to the first. */
694 fuseEquivStates( first
, toFuse
);
696 /* Go to the next that we saved before trashing the next pointer. */
700 /* We transfered the states from the partition list into the main list without
701 * removing the states from the partition list first. Clean it up. */
702 parts
[p
].list
.abandon();
707 /* Merge neighboring transitions go to the same state and have the same
708 * transitions data. */
709 void FsmAp::compressTransitions()
711 for ( StateList::Iter st
= stateList
; st
.lte(); st
++ ) {
712 if ( st
->outList
.length() > 1 ) {
713 for ( TransList::Iter trans
= st
->outList
, next
= trans
.next(); next
.lte(); ) {
714 Key nextLow
= next
->lowKey
;
716 if ( trans
->highKey
== nextLow
&& trans
->toState
== next
->toState
&&
717 CmpActionTable::compare( trans
->actionTable
, next
->actionTable
) == 0 )
719 trans
->highKey
= next
->highKey
;
720 st
->outList
.detach( next
);
721 detachTrans( next
->fromState
, next
->toState
, next
);