2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include "parsetree.h"
34 ostream
&operator<<( ostream
&out
, const NameRef
&nameRef
);
35 ostream
&operator<<( ostream
&out
, const NameInst
&nameInst
);
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain \0 */
41 char *prepareLitString( const InputLoc
&loc
, const char *data
, long length
,
42 long &resLen
, bool &caseInsensitive
)
44 char *resData
= new char[length
+1];
45 caseInsensitive
= false;
47 const char *src
= data
+ 1;
48 const char *end
= data
+ length
- 1;
50 while ( *end
!= '\'' && *end
!= '\"' ) {
52 caseInsensitive
= true;
54 error( loc
) << "literal string '" << *end
<<
55 "' option not supported" << endl
;
62 while ( src
!= end
) {
65 case '0': dest
[len
++] = '\0'; break;
66 case 'a': dest
[len
++] = '\a'; break;
67 case 'b': dest
[len
++] = '\b'; break;
68 case 't': dest
[len
++] = '\t'; break;
69 case 'n': dest
[len
++] = '\n'; break;
70 case 'v': dest
[len
++] = '\v'; break;
71 case 'f': dest
[len
++] = '\f'; break;
72 case 'r': dest
[len
++] = '\r'; break;
74 default: dest
[len
++] = src
[1]; break;
88 FsmAp
*VarDef::walk( ParseData
*pd
)
90 /* We enter into a new name scope. */
91 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
93 /* Recurse on the expression. */
94 FsmAp
*rtnVal
= joinOrLm
->walk( pd
);
96 /* Do the tranfer of local error actions. */
97 LocalErrDictEl
*localErrDictEl
= pd
->localErrDict
.find( name
);
98 if ( localErrDictEl
!= 0 ) {
99 for ( StateList::Iter state
= rtnVal
->stateList
; state
.lte(); state
++ )
100 rtnVal
->transferErrorActions( state
, localErrDictEl
->value
);
103 /* If the expression below is a join operation with multiple expressions
104 * then it just had epsilon transisions resolved. If it is a join
105 * with only a single expression then run the epsilon op now. */
106 if ( joinOrLm
->type
== JoinOrLm::JoinType
&& joinOrLm
->join
->exprList
.length() == 1 )
109 /* We can now unset entry points that are not longer used. */
110 pd
->unsetObsoleteEntries( rtnVal
);
112 /* If the name of the variable is referenced then add the entry point to
114 if ( pd
->curNameInst
->numRefs
> 0 )
115 rtnVal
->setEntry( pd
->curNameInst
->id
, rtnVal
->startState
);
117 /* Pop the name scope. */
118 pd
->popNameScope( nameFrame
);
122 void VarDef::makeNameTree( const InputLoc
&loc
, ParseData
*pd
)
124 /* The variable definition enters a new scope. */
125 NameInst
*prevNameInst
= pd
->curNameInst
;
126 pd
->curNameInst
= pd
->addNameInst( loc
, name
, false );
128 if ( joinOrLm
->type
== JoinOrLm::LongestMatchType
)
129 pd
->curNameInst
->isLongestMatch
= true;
132 joinOrLm
->makeNameTree( pd
);
134 /* The name scope ends, pop the name instantiation. */
135 pd
->curNameInst
= prevNameInst
;
138 void VarDef::resolveNameRefs( ParseData
*pd
)
140 /* Entering into a new scope. */
141 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
144 joinOrLm
->resolveNameRefs( pd
);
146 /* The name scope ends, pop the name instantiation. */
147 pd
->popNameScope( nameFrame
);
150 InputLoc
LongestMatchPart::getLoc()
152 return action
!= 0 ? action
->loc
: semiLoc
;
156 * If there are any LMs then all of the following entry points must reset
159 * 1. fentry(StateRef)
160 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
161 * 3. targt of any transition that has an fcall (the return loc).
162 * 4. start state of all longest match routines.
165 Action
*LongestMatch::newAction( ParseData
*pd
, const InputLoc
&loc
,
166 const char *name
, InlineList
*inlineList
)
168 Action
*action
= new Action( loc
, name
, inlineList
, pd
->nextCondId
++ );
169 action
->actionRefs
.append( pd
->curNameInst
);
170 pd
->actionList
.append( action
);
171 action
->isLmAction
= true;
175 void LongestMatch::makeActions( ParseData
*pd
)
177 /* Make actions that set the action id. */
178 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
179 /* For each part create actions for setting the match type. We need
180 * to do this so that the actions will go into the actionIndex. */
181 InlineList
*inlineList
= new InlineList
;
182 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
183 InlineItem::LmSetActId
) );
184 char *actName
= new char[50];
185 sprintf( actName
, "store%i", lmi
->longestMatchId
);
186 lmi
->setActId
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
189 /* Make actions that execute the user action and restart on the last
191 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
192 /* For each part create actions for setting the match type. We need
193 * to do this so that the actions will go into the actionIndex. */
194 InlineList
*inlineList
= new InlineList
;
195 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
196 InlineItem::LmOnLast
) );
197 char *actName
= new char[50];
198 sprintf( actName
, "last%i", lmi
->longestMatchId
);
199 lmi
->actOnLast
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
202 /* Make actions that execute the user action and restart on the next
203 * character. These actions will set tokend themselves (it is the current
205 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
206 /* For each part create actions for setting the match type. We need
207 * to do this so that the actions will go into the actionIndex. */
208 InlineList
*inlineList
= new InlineList
;
209 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
210 InlineItem::LmOnNext
) );
211 char *actName
= new char[50];
212 sprintf( actName
, "next%i", lmi
->longestMatchId
);
213 lmi
->actOnNext
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
216 /* Make actions that execute the user action and restart at tokend. These
217 * actions execute some time after matching the last char. */
218 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
219 /* For each part create actions for setting the match type. We need
220 * to do this so that the actions will go into the actionIndex. */
221 InlineList
*inlineList
= new InlineList
;
222 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
223 InlineItem::LmOnLagBehind
) );
224 char *actName
= new char[50];
225 sprintf( actName
, "lag%i", lmi
->longestMatchId
);
226 lmi
->actLagBehind
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
233 /* Create the error action. */
234 InlineList
*il6
= new InlineList
;
235 il6
->append( new InlineItem( loc
, this, 0, InlineItem::LmSwitch
) );
236 lmActSelect
= newAction( pd
, loc
, "switch", il6
);
239 void LongestMatch::findName( ParseData
*pd
)
241 NameInst
*nameInst
= pd
->curNameInst
;
242 while ( nameInst
->name
== 0 ) {
243 nameInst
= nameInst
->parent
;
244 /* Since every machine must must have a name, we should always find a
245 * name for the longest match. */
246 assert( nameInst
!= 0 );
248 name
= nameInst
->name
;
251 void LongestMatch::makeNameTree( ParseData
*pd
)
253 /* Create an anonymous scope for the longest match. Will be used for
254 * restarting machine after matching a token. */
255 NameInst
*prevNameInst
= pd
->curNameInst
;
256 pd
->curNameInst
= pd
->addNameInst( loc
, 0, false );
258 /* Recurse into all parts of the longest match operator. */
259 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ )
260 lmi
->join
->makeNameTree( pd
);
262 /* Traverse the name tree upwards to find a name for this lm. */
265 /* Also make the longest match's actions at this point. */
268 /* The name scope ends, pop the name instantiation. */
269 pd
->curNameInst
= prevNameInst
;
272 void LongestMatch::resolveNameRefs( ParseData
*pd
)
274 /* The longest match gets its own name scope. */
275 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
277 /* Take an action reference for each longest match item and recurse. */
278 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
279 /* Record the reference if the item has an action. */
280 if ( lmi
->action
!= 0 )
281 lmi
->action
->actionRefs
.append( pd
->localNameScope
);
283 /* Recurse down the join. */
284 lmi
->join
->resolveNameRefs( pd
);
287 /* The name scope ends, pop the name instantiation. */
288 pd
->popNameScope( nameFrame
);
291 void LongestMatch::restart( FsmAp
*graph
, TransAp
*trans
)
293 StateAp
*fromState
= trans
->fromState
;
294 graph
->detachTrans( fromState
, trans
->toState
, trans
);
295 graph
->attachTrans( fromState
, graph
->startState
, trans
);
298 void LongestMatch::runLongestMatch( ParseData
*pd
, FsmAp
*graph
)
300 graph
->markReachableFromHereStopFinal( graph
->startState
);
301 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
302 if ( ms
->stateBits
& STB_ISMARKED
) {
303 ms
->lmItemSet
.insert( 0 );
304 ms
->stateBits
&= ~ STB_ISMARKED
;
308 /* Transfer the first item of non-empty lmAction tables to the item sets
309 * of the states that follow. Exclude states that have no transitions out.
310 * This must happen on a separate pass so that on each iteration of the
311 * next pass we have the item set entries from all lmAction tables. */
312 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
313 for ( TransList::Iter trans
= st
->outList
; trans
.lte(); trans
++ ) {
314 if ( trans
->lmActionTable
.length() > 0 ) {
315 LmActionTableEl
*lmAct
= trans
->lmActionTable
.data
;
316 StateAp
*toState
= trans
->toState
;
319 /* Can only optimize this if there are no transitions out.
320 * Note there can be out transitions going nowhere with
321 * actions and they too must inhibit this optimization. */
322 if ( toState
->outList
.length() > 0 ) {
323 /* Fill the item sets. */
324 graph
->markReachableFromHereStopFinal( toState
);
325 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
326 if ( ms
->stateBits
& STB_ISMARKED
) {
327 ms
->lmItemSet
.insert( lmAct
->value
);
328 ms
->stateBits
&= ~ STB_ISMARKED
;
336 /* The lmItem sets are now filled, telling us which longest match rules
337 * can succeed in which states. First determine if we need to make sure
338 * act is defaulted to zero. We need to do this if there are any states
339 * with lmItemSet.length() > 1 and NULL is included. That is, that the
340 * switch may get called when in fact nothing has been matched. */
341 int maxItemSetLength
= 0;
342 graph
->markReachableFromHereStopFinal( graph
->startState
);
343 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
344 if ( ms
->stateBits
& STB_ISMARKED
) {
345 if ( ms
->lmItemSet
.length() > maxItemSetLength
)
346 maxItemSetLength
= ms
->lmItemSet
.length();
347 ms
->stateBits
&= ~ STB_ISMARKED
;
351 /* The actions executed on starting to match a token. */
352 graph
->isolateStartState();
353 graph
->startState
->toStateActionTable
.setAction( pd
->initTokStartOrd
, pd
->initTokStart
);
354 graph
->startState
->fromStateActionTable
.setAction( pd
->setTokStartOrd
, pd
->setTokStart
);
355 if ( maxItemSetLength
> 1 ) {
356 /* The longest match action switch may be called when tokens are
357 * matched, in which case act must be initialized, there must be a
358 * case to handle the error, and the generated machine will require an
360 lmSwitchHandlesError
= true;
361 pd
->lmRequiresErrorState
= true;
362 graph
->startState
->toStateActionTable
.setAction( pd
->initActIdOrd
, pd
->initActId
);
365 /* The place to store transitions to restart. It maybe possible for the
366 * restarting to affect the searching through the graph that follows. For
367 * now take the safe route and save the list of transitions to restart
368 * until after all searching is done. */
369 Vector
<TransAp
*> restartTrans
;
371 /* Set actions that do immediate token recognition, set the longest match part
372 * id and set the token ending. */
373 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
374 for ( TransList::Iter trans
= st
->outList
; trans
.lte(); trans
++ ) {
375 if ( trans
->lmActionTable
.length() > 0 ) {
376 LmActionTableEl
*lmAct
= trans
->lmActionTable
.data
;
377 StateAp
*toState
= trans
->toState
;
380 /* Can only optimize this if there are no transitions out.
381 * Note there can be out transitions going nowhere with
382 * actions and they too must inhibit this optimization. */
383 if ( toState
->outList
.length() == 0 ) {
384 /* Can execute the immediate action for the longest match
385 * part. Redirect the action to the start state.
387 * NOTE: When we need to inhibit on_last due to leaving
388 * actions the above test suffices. If the state has out
389 * actions then it will fail because the out action will
390 * have been transferred to an error transition, which
391 * makes the outlist non-empty. */
392 trans
->actionTable
.setAction( lmAct
->key
,
393 lmAct
->value
->actOnLast
);
394 restartTrans
.append( trans
);
397 /* Look for non final states that have a non-empty item
398 * set. If these are present then we need to record the
399 * end of the token. Also Find the highest item set
400 * length reachable from here (excluding at transtions to
402 bool nonFinalNonEmptyItemSet
= false;
403 maxItemSetLength
= 0;
404 graph
->markReachableFromHereStopFinal( toState
);
405 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
406 if ( ms
->stateBits
& STB_ISMARKED
) {
407 if ( ms
->lmItemSet
.length() > 0 && !ms
->isFinState() )
408 nonFinalNonEmptyItemSet
= true;
409 if ( ms
->lmItemSet
.length() > maxItemSetLength
)
410 maxItemSetLength
= ms
->lmItemSet
.length();
411 ms
->stateBits
&= ~ STB_ISMARKED
;
415 /* If there are reachable states that are not final and
416 * have non empty item sets or that have an item set
417 * length greater than one then we need to set tokend
418 * because the error action that matches the token will
420 if ( nonFinalNonEmptyItemSet
|| maxItemSetLength
> 1 )
421 trans
->actionTable
.setAction( pd
->setTokEndOrd
, pd
->setTokEnd
);
423 /* Some states may not know which longest match item to
424 * execute, must set it. */
425 if ( maxItemSetLength
> 1 ) {
426 /* There are transitions out, another match may come. */
427 trans
->actionTable
.setAction( lmAct
->key
,
428 lmAct
->value
->setActId
);
435 /* Now that all graph searching is done it certainly safe set the
436 * restarting. It may be safe above, however this must be verified. */
437 for ( Vector
<TransAp
*>::Iter pt
= restartTrans
; pt
.lte(); pt
++ )
438 restart( graph
, *pt
);
440 int lmErrActionOrd
= pd
->curActionOrd
++;
442 /* Embed the error for recognizing a char. */
443 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
444 if ( st
->lmItemSet
.length() == 1 && st
->lmItemSet
[0] != 0 ) {
445 if ( st
->isFinState() ) {
446 /* On error execute the onActNext action, which knows that
447 * the last character of the token was one back and restart. */
448 graph
->setErrorTarget( st
, graph
->startState
, &lmErrActionOrd
,
449 &st
->lmItemSet
[0]->actOnNext
, 1 );
450 st
->eofActionTable
.setAction( lmErrActionOrd
,
451 st
->lmItemSet
[0]->actOnNext
);
452 st
->eofTarget
= graph
->startState
;
455 graph
->setErrorTarget( st
, graph
->startState
, &lmErrActionOrd
,
456 &st
->lmItemSet
[0]->actLagBehind
, 1 );
457 st
->eofActionTable
.setAction( lmErrActionOrd
,
458 st
->lmItemSet
[0]->actLagBehind
);
459 st
->eofTarget
= graph
->startState
;
462 else if ( st
->lmItemSet
.length() > 1 ) {
463 /* Need to use the select. Take note of which items the select
464 * is needed for so only the necessary actions are included. */
465 for ( LmItemSet::Iter plmi
= st
->lmItemSet
; plmi
.lte(); plmi
++ ) {
467 (*plmi
)->inLmSelect
= true;
469 /* On error, execute the action select and go to the start state. */
470 graph
->setErrorTarget( st
, graph
->startState
, &lmErrActionOrd
,
472 st
->eofActionTable
.setAction( lmErrActionOrd
, lmActSelect
);
473 st
->eofTarget
= graph
->startState
;
477 /* Finally, the start state should be made final. */
478 graph
->setFinState( graph
->startState
);
481 void LongestMatch::transferScannerLeavingActions( FsmAp
*graph
)
483 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
484 if ( st
->outActionTable
.length() > 0 )
485 graph
->setErrorActions( st
, st
->outActionTable
);
489 FsmAp
*LongestMatch::walk( ParseData
*pd
)
491 /* The longest match has it's own name scope. */
492 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
494 /* Make each part of the longest match. */
495 FsmAp
**parts
= new FsmAp
*[longestMatchList
->length()];
496 LmPartList::Iter lmi
= *longestMatchList
;
497 for ( int i
= 0; lmi
.lte(); lmi
++, i
++ ) {
498 /* Create the machine and embed the setting of the longest match id. */
499 parts
[i
] = lmi
->join
->walk( pd
);
500 parts
[i
]->longMatchAction( pd
->curActionOrd
++, lmi
);
503 /* Before we union the patterns we need to deal with leaving actions. They
504 * are transfered to error transitions out of the final states (like local
505 * error actions) and to eof actions. In the scanner we need to forbid
506 * on_last for any final state that has an leaving action. */
507 for ( int i
= 0; i
< longestMatchList
->length(); i
++ )
508 transferScannerLeavingActions( parts
[i
] );
510 /* Union machines one and up with machine zero. The grammar dictates that
511 * there will always be at least one part. */
512 FsmAp
*rtnVal
= parts
[0];
513 for ( int i
= 1; i
< longestMatchList
->length(); i
++ ) {
514 rtnVal
->unionOp( parts
[i
] );
515 afterOpMinimize( rtnVal
);
518 runLongestMatch( pd
, rtnVal
);
520 /* Pop the name scope. */
521 pd
->popNameScope( nameFrame
);
527 FsmAp
*JoinOrLm::walk( ParseData
*pd
)
532 rtnVal
= join
->walk( pd
);
534 case LongestMatchType
:
535 rtnVal
= longestMatch
->walk( pd
);
541 void JoinOrLm::makeNameTree( ParseData
*pd
)
545 join
->makeNameTree( pd
);
547 case LongestMatchType
:
548 longestMatch
->makeNameTree( pd
);
553 void JoinOrLm::resolveNameRefs( ParseData
*pd
)
557 join
->resolveNameRefs( pd
);
559 case LongestMatchType
:
560 longestMatch
->resolveNameRefs( pd
);
566 /* Construct with a location and the first expression. */
567 Join::Join( const InputLoc
&loc
, Expression
*expr
)
571 exprList
.append( expr
);
574 /* Construct with a location and the first expression. */
575 Join::Join( Expression
*expr
)
579 exprList
.append( expr
);
582 /* Walk an expression node. */
583 FsmAp
*Join::walk( ParseData
*pd
)
585 if ( exprList
.length() > 1 )
586 return walkJoin( pd
);
588 return exprList
.head
->walk( pd
);
591 /* There is a list of expressions to join. */
592 FsmAp
*Join::walkJoin( ParseData
*pd
)
594 /* We enter into a new name scope. */
595 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
597 /* Evaluate the machines. */
598 FsmAp
**fsms
= new FsmAp
*[exprList
.length()];
599 ExprList::Iter expr
= exprList
;
600 for ( int e
= 0; e
< exprList
.length(); e
++, expr
++ )
601 fsms
[e
] = expr
->walk( pd
);
603 /* Get the start and final names. Final is
604 * guaranteed to exist, start is not. */
605 NameInst
*startName
= pd
->curNameInst
->start
;
606 NameInst
*finalName
= pd
->curNameInst
->final
;
609 if ( startName
!= 0 ) {
610 /* Take note that there was an implicit link to the start machine. */
611 pd
->localNameScope
->referencedNames
.append( startName
);
612 startId
= startName
->id
;
615 /* A final id of -1 indicates there is no epsilon that references the
616 * final state, therefor do not create one or set an entry point to it. */
618 if ( finalName
->numRefs
> 0 )
619 finalId
= finalName
->id
;
621 /* Join machines 1 and up onto machine 0. */
622 FsmAp
*retFsm
= fsms
[0];
623 retFsm
->joinOp( startId
, finalId
, fsms
+1, exprList
.length()-1 );
625 /* We can now unset entry points that are not longer used. */
626 pd
->unsetObsoleteEntries( retFsm
);
628 /* Pop the name scope. */
629 pd
->popNameScope( nameFrame
);
635 void Join::makeNameTree( ParseData
*pd
)
637 if ( exprList
.length() > 1 ) {
638 /* Create the new anonymous scope. */
639 NameInst
*prevNameInst
= pd
->curNameInst
;
640 pd
->curNameInst
= pd
->addNameInst( loc
, 0, false );
642 /* Join scopes need an implicit "final" target. */
643 pd
->curNameInst
->final
= new NameInst( InputLoc(), pd
->curNameInst
, "final",
644 pd
->nextNameId
++, false );
646 /* Recurse into all expressions in the list. */
647 for ( ExprList::Iter expr
= exprList
; expr
.lte(); expr
++ )
648 expr
->makeNameTree( pd
);
650 /* The name scope ends, pop the name instantiation. */
651 pd
->curNameInst
= prevNameInst
;
654 /* Recurse into the single expression. */
655 exprList
.head
->makeNameTree( pd
);
660 void Join::resolveNameRefs( ParseData
*pd
)
662 /* Branch on whether or not there is to be a join. */
663 if ( exprList
.length() > 1 ) {
664 /* The variable definition enters a new scope. */
665 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
667 /* The join scope must contain a start label. */
668 NameSet resolved
= pd
->resolvePart( pd
->localNameScope
, "start", true );
669 if ( resolved
.length() > 0 ) {
670 /* Take the first. */
671 pd
->curNameInst
->start
= resolved
[0];
672 if ( resolved
.length() > 1 ) {
673 /* Complain about the multiple references. */
674 error(loc
) << "join operation has multiple start labels" << endl
;
675 errorStateLabels( resolved
);
679 /* Make sure there is a start label. */
680 if ( pd
->curNameInst
->start
!= 0 ) {
681 /* There is an implicit reference to start name. */
682 pd
->curNameInst
->start
->numRefs
+= 1;
685 /* No start label. */
686 error(loc
) << "join operation has no start label" << endl
;
689 /* Recurse into all expressions in the list. */
690 for ( ExprList::Iter expr
= exprList
; expr
.lte(); expr
++ )
691 expr
->resolveNameRefs( pd
);
693 /* The name scope ends, pop the name instantiation. */
694 pd
->popNameScope( nameFrame
);
697 /* Recurse into the single expression. */
698 exprList
.head
->resolveNameRefs( pd
);
702 /* Clean up after an expression node. */
703 Expression::~Expression()
706 case OrType
: case IntersectType
: case SubtractType
:
707 case StrongSubtractType
:
719 /* Evaluate a single expression node. */
720 FsmAp
*Expression::walk( ParseData
*pd
, bool lastInSeq
)
725 /* Evaluate the expression. */
726 rtnVal
= expression
->walk( pd
, false );
727 /* Evaluate the term. */
728 FsmAp
*rhs
= term
->walk( pd
);
730 rtnVal
->unionOp( rhs
);
731 afterOpMinimize( rtnVal
, lastInSeq
);
734 case IntersectType
: {
735 /* Evaluate the expression. */
736 rtnVal
= expression
->walk( pd
);
737 /* Evaluate the term. */
738 FsmAp
*rhs
= term
->walk( pd
);
739 /* Perform intersection. */
740 rtnVal
->intersectOp( rhs
);
741 afterOpMinimize( rtnVal
, lastInSeq
);
745 /* Evaluate the expression. */
746 rtnVal
= expression
->walk( pd
);
747 /* Evaluate the term. */
748 FsmAp
*rhs
= term
->walk( pd
);
749 /* Perform subtraction. */
750 rtnVal
->subtractOp( rhs
);
751 afterOpMinimize( rtnVal
, lastInSeq
);
754 case StrongSubtractType
: {
755 /* Evaluate the expression. */
756 rtnVal
= expression
->walk( pd
);
758 /* Evaluate the term and pad it with any* machines. */
759 FsmAp
*rhs
= dotStarFsm( pd
);
760 FsmAp
*termFsm
= term
->walk( pd
);
761 FsmAp
*trailAnyStar
= dotStarFsm( pd
);
762 rhs
->concatOp( termFsm
);
763 rhs
->concatOp( trailAnyStar
);
765 /* Perform subtraction. */
766 rtnVal
->subtractOp( rhs
);
767 afterOpMinimize( rtnVal
, lastInSeq
);
771 /* Return result of the term. */
772 rtnVal
= term
->walk( pd
);
776 /* Duplicate the builtin. */
777 rtnVal
= makeBuiltin( builtin
, pd
);
785 void Expression::makeNameTree( ParseData
*pd
)
791 case StrongSubtractType
:
792 expression
->makeNameTree( pd
);
793 term
->makeNameTree( pd
);
796 term
->makeNameTree( pd
);
803 void Expression::resolveNameRefs( ParseData
*pd
)
809 case StrongSubtractType
:
810 expression
->resolveNameRefs( pd
);
811 term
->resolveNameRefs( pd
);
814 term
->resolveNameRefs( pd
);
821 /* Clean up after a term node. */
827 case RightFinishType
:
830 delete factorWithAug
;
832 case FactorWithAugType
:
833 delete factorWithAug
;
838 /* Evaluate a term node. */
839 FsmAp
*Term::walk( ParseData
*pd
, bool lastInSeq
)
844 /* Evaluate the Term. */
845 rtnVal
= term
->walk( pd
, false );
846 /* Evaluate the FactorWithRep. */
847 FsmAp
*rhs
= factorWithAug
->walk( pd
);
848 /* Perform concatenation. */
849 rtnVal
->concatOp( rhs
);
850 afterOpMinimize( rtnVal
, lastInSeq
);
853 case RightStartType
: {
854 /* Evaluate the Term. */
855 rtnVal
= term
->walk( pd
);
857 /* Evaluate the FactorWithRep. */
858 FsmAp
*rhs
= factorWithAug
->walk( pd
);
860 /* Set up the priority descriptors. The left machine gets the
861 * lower priority where as the right get the higher start priority. */
862 priorDescs
[0].key
= pd
->nextPriorKey
++;
863 priorDescs
[0].priority
= 0;
864 rtnVal
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
866 /* The start transitions of the right machine gets the higher
867 * priority. Use the same unique key. */
868 priorDescs
[1].key
= priorDescs
[0].key
;
869 priorDescs
[1].priority
= 1;
870 rhs
->startFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
872 /* Perform concatenation. */
873 rtnVal
->concatOp( rhs
);
874 afterOpMinimize( rtnVal
, lastInSeq
);
877 case RightFinishType
: {
878 /* Evaluate the Term. */
879 rtnVal
= term
->walk( pd
);
881 /* Evaluate the FactorWithRep. */
882 FsmAp
*rhs
= factorWithAug
->walk( pd
);
884 /* Set up the priority descriptors. The left machine gets the
885 * lower priority where as the finishing transitions to the right
886 * get the higher priority. */
887 priorDescs
[0].key
= pd
->nextPriorKey
++;
888 priorDescs
[0].priority
= 0;
889 rtnVal
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
891 /* The finishing transitions of the right machine get the higher
892 * priority. Use the same unique key. */
893 priorDescs
[1].key
= priorDescs
[0].key
;
894 priorDescs
[1].priority
= 1;
895 rhs
->finishFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
897 /* If the right machine's start state is final we need to guard
898 * against the left machine persisting by moving through the empty
900 if ( rhs
->startState
->isFinState() ) {
901 rhs
->startState
->outPriorTable
.setPrior(
902 pd
->curPriorOrd
++, &priorDescs
[1] );
905 /* Perform concatenation. */
906 rtnVal
->concatOp( rhs
);
907 afterOpMinimize( rtnVal
, lastInSeq
);
911 /* Evaluate the Term. */
912 rtnVal
= term
->walk( pd
);
914 /* Evaluate the FactorWithRep. */
915 FsmAp
*rhs
= factorWithAug
->walk( pd
);
917 /* Set up the priority descriptors. The left machine gets the
918 * higher priority. */
919 priorDescs
[0].key
= pd
->nextPriorKey
++;
920 priorDescs
[0].priority
= 1;
921 rtnVal
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
923 /* The right machine gets the lower priority. We cannot use
924 * allTransPrior here in case the start state of the right machine
925 * is final. It would allow the right machine thread to run along
926 * with the left if just passing through the start state. Using
927 * startFsmPrior prevents this. */
928 priorDescs
[1].key
= priorDescs
[0].key
;
929 priorDescs
[1].priority
= 0;
930 rhs
->startFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
932 /* Perform concatenation. */
933 rtnVal
->concatOp( rhs
);
934 afterOpMinimize( rtnVal
, lastInSeq
);
937 case FactorWithAugType
: {
938 rtnVal
= factorWithAug
->walk( pd
);
945 void Term::makeNameTree( ParseData
*pd
)
950 case RightFinishType
:
952 term
->makeNameTree( pd
);
953 factorWithAug
->makeNameTree( pd
);
955 case FactorWithAugType
:
956 factorWithAug
->makeNameTree( pd
);
961 void Term::resolveNameRefs( ParseData
*pd
)
966 case RightFinishType
:
968 term
->resolveNameRefs( pd
);
969 factorWithAug
->resolveNameRefs( pd
);
971 case FactorWithAugType
:
972 factorWithAug
->resolveNameRefs( pd
);
977 /* Clean up after a factor with augmentation node. */
978 FactorWithAug::~FactorWithAug()
980 delete factorWithRep
;
982 /* Walk the vector of parser actions, deleting function names. */
984 /* Clean up priority descriptors. */
985 if ( priorDescs
!= 0 )
989 void FactorWithAug::assignActions( ParseData
*pd
, FsmAp
*graph
, int *actionOrd
)
991 /* Assign actions. */
992 for ( int i
= 0; i
< actions
.length(); i
++ ) {
993 switch ( actions
[i
].type
) {
994 /* Transition actions. */
996 graph
->startFsmAction( actionOrd
[i
], actions
[i
].action
);
997 afterOpMinimize( graph
);
1000 graph
->allTransAction( actionOrd
[i
], actions
[i
].action
);
1003 graph
->finishFsmAction( actionOrd
[i
], actions
[i
].action
);
1006 graph
->leaveFsmAction( actionOrd
[i
], actions
[i
].action
);
1009 /* Global error actions. */
1010 case at_start_gbl_error
:
1011 graph
->startErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
1012 afterOpMinimize( graph
);
1014 case at_all_gbl_error
:
1015 graph
->allErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
1017 case at_final_gbl_error
:
1018 graph
->finalErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
1020 case at_not_start_gbl_error
:
1021 graph
->notStartErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
1023 case at_not_final_gbl_error
:
1024 graph
->notFinalErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
1026 case at_middle_gbl_error
:
1027 graph
->middleErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
1030 /* Local error actions. */
1031 case at_start_local_error
:
1032 graph
->startErrorAction( actionOrd
[i
], actions
[i
].action
,
1033 actions
[i
].localErrKey
);
1034 afterOpMinimize( graph
);
1036 case at_all_local_error
:
1037 graph
->allErrorAction( actionOrd
[i
], actions
[i
].action
,
1038 actions
[i
].localErrKey
);
1040 case at_final_local_error
:
1041 graph
->finalErrorAction( actionOrd
[i
], actions
[i
].action
,
1042 actions
[i
].localErrKey
);
1044 case at_not_start_local_error
:
1045 graph
->notStartErrorAction( actionOrd
[i
], actions
[i
].action
,
1046 actions
[i
].localErrKey
);
1048 case at_not_final_local_error
:
1049 graph
->notFinalErrorAction( actionOrd
[i
], actions
[i
].action
,
1050 actions
[i
].localErrKey
);
1052 case at_middle_local_error
:
1053 graph
->middleErrorAction( actionOrd
[i
], actions
[i
].action
,
1054 actions
[i
].localErrKey
);
1059 graph
->startEOFAction( actionOrd
[i
], actions
[i
].action
);
1060 afterOpMinimize( graph
);
1063 graph
->allEOFAction( actionOrd
[i
], actions
[i
].action
);
1066 graph
->finalEOFAction( actionOrd
[i
], actions
[i
].action
);
1068 case at_not_start_eof
:
1069 graph
->notStartEOFAction( actionOrd
[i
], actions
[i
].action
);
1071 case at_not_final_eof
:
1072 graph
->notFinalEOFAction( actionOrd
[i
], actions
[i
].action
);
1075 graph
->middleEOFAction( actionOrd
[i
], actions
[i
].action
);
1078 /* To State Actions. */
1079 case at_start_to_state
:
1080 graph
->startToStateAction( actionOrd
[i
], actions
[i
].action
);
1081 afterOpMinimize( graph
);
1083 case at_all_to_state
:
1084 graph
->allToStateAction( actionOrd
[i
], actions
[i
].action
);
1086 case at_final_to_state
:
1087 graph
->finalToStateAction( actionOrd
[i
], actions
[i
].action
);
1089 case at_not_start_to_state
:
1090 graph
->notStartToStateAction( actionOrd
[i
], actions
[i
].action
);
1092 case at_not_final_to_state
:
1093 graph
->notFinalToStateAction( actionOrd
[i
], actions
[i
].action
);
1095 case at_middle_to_state
:
1096 graph
->middleToStateAction( actionOrd
[i
], actions
[i
].action
);
1099 /* From State Actions. */
1100 case at_start_from_state
:
1101 graph
->startFromStateAction( actionOrd
[i
], actions
[i
].action
);
1102 afterOpMinimize( graph
);
1104 case at_all_from_state
:
1105 graph
->allFromStateAction( actionOrd
[i
], actions
[i
].action
);
1107 case at_final_from_state
:
1108 graph
->finalFromStateAction( actionOrd
[i
], actions
[i
].action
);
1110 case at_not_start_from_state
:
1111 graph
->notStartFromStateAction( actionOrd
[i
], actions
[i
].action
);
1113 case at_not_final_from_state
:
1114 graph
->notFinalFromStateAction( actionOrd
[i
], actions
[i
].action
);
1116 case at_middle_from_state
:
1117 graph
->middleFromStateAction( actionOrd
[i
], actions
[i
].action
);
1120 /* Remaining cases, prevented by the parser. */
1128 void FactorWithAug::assignPriorities( FsmAp
*graph
, int *priorOrd
)
1130 /* Assign priorities. */
1131 for ( int i
= 0; i
< priorityAugs
.length(); i
++ ) {
1132 switch ( priorityAugs
[i
].type
) {
1134 graph
->startFsmPrior( priorOrd
[i
], &priorDescs
[i
]);
1135 /* Start fsm priorities are a special case that may require
1136 * minimization afterwards. */
1137 afterOpMinimize( graph
);
1140 graph
->allTransPrior( priorOrd
[i
], &priorDescs
[i
] );
1143 graph
->finishFsmPrior( priorOrd
[i
], &priorDescs
[i
] );
1146 graph
->leaveFsmPrior( priorOrd
[i
], &priorDescs
[i
] );
1150 /* Parser Prevents this case. */
1156 void FactorWithAug::assignConditions( FsmAp
*graph
)
1158 for ( int i
= 0; i
< conditions
.length(); i
++ ) {
1159 switch ( conditions
[i
].type
) {
1160 /* Transition actions. */
1162 graph
->startFsmCondition( conditions
[i
].action
, conditions
[i
].sense
);
1163 afterOpMinimize( graph
);
1166 graph
->allTransCondition( conditions
[i
].action
, conditions
[i
].sense
);
1169 graph
->leaveFsmCondition( conditions
[i
].action
, conditions
[i
].sense
);
1178 /* Evaluate a factor with augmentation node. */
1179 FsmAp
*FactorWithAug::walk( ParseData
*pd
)
1181 /* Enter into the scopes created for the labels. */
1182 NameFrame nameFrame
= pd
->enterNameScope( false, labels
.length() );
1184 /* Make the array of function orderings. */
1186 if ( actions
.length() > 0 )
1187 actionOrd
= new int[actions
.length()];
1189 /* First walk the list of actions, assigning order to all starting
1191 for ( int i
= 0; i
< actions
.length(); i
++ ) {
1192 if ( actions
[i
].type
== at_start
||
1193 actions
[i
].type
== at_start_gbl_error
||
1194 actions
[i
].type
== at_start_local_error
||
1195 actions
[i
].type
== at_start_to_state
||
1196 actions
[i
].type
== at_start_from_state
||
1197 actions
[i
].type
== at_start_eof
)
1198 actionOrd
[i
] = pd
->curActionOrd
++;
1201 /* Evaluate the factor with repetition. */
1202 FsmAp
*rtnVal
= factorWithRep
->walk( pd
);
1204 /* Compute the remaining action orderings. */
1205 for ( int i
= 0; i
< actions
.length(); i
++ ) {
1206 if ( actions
[i
].type
!= at_start
&&
1207 actions
[i
].type
!= at_start_gbl_error
&&
1208 actions
[i
].type
!= at_start_local_error
&&
1209 actions
[i
].type
!= at_start_to_state
&&
1210 actions
[i
].type
!= at_start_from_state
&&
1211 actions
[i
].type
!= at_start_eof
)
1212 actionOrd
[i
] = pd
->curActionOrd
++;
1215 /* Embed conditions. */
1216 assignConditions( rtnVal
);
1218 /* Embed actions. */
1219 assignActions( pd
, rtnVal
, actionOrd
);
1221 /* Make the array of priority orderings. Orderings are local to this walk
1222 * of the factor with augmentation. */
1224 if ( priorityAugs
.length() > 0 )
1225 priorOrd
= new int[priorityAugs
.length()];
1227 /* Walk all priorities, assigning the priority ordering. */
1228 for ( int i
= 0; i
< priorityAugs
.length(); i
++ )
1229 priorOrd
[i
] = pd
->curPriorOrd
++;
1231 /* If the priority descriptors have not been made, make them now. Make
1232 * priority descriptors for each priority asignment that will be passed to
1233 * the fsm. Used to keep track of the key, value and used bit. */
1234 if ( priorDescs
== 0 && priorityAugs
.length() > 0 ) {
1235 priorDescs
= new PriorDesc
[priorityAugs
.length()];
1236 for ( int i
= 0; i
< priorityAugs
.length(); i
++ ) {
1237 /* Init the prior descriptor for the priority setting. */
1238 priorDescs
[i
].key
= priorityAugs
[i
].priorKey
;
1239 priorDescs
[i
].priority
= priorityAugs
[i
].priorValue
;
1243 /* Assign priorities into the machine. */
1244 assignPriorities( rtnVal
, priorOrd
);
1246 /* Assign epsilon transitions. */
1247 for ( int e
= 0; e
< epsilonLinks
.length(); e
++ ) {
1248 /* Get the name, which may not exist. If it doesn't then silently
1249 * ignore it because an error has already been reported. */
1250 NameInst
*epTarg
= pd
->epsilonResolvedLinks
[pd
->nextEpsilonResolvedLink
++];
1251 if ( epTarg
!= 0 ) {
1252 /* Make the epsilon transitions. */
1253 rtnVal
->epsilonTrans( epTarg
->id
);
1255 /* Note that we have made a link to the name. */
1256 pd
->localNameScope
->referencedNames
.append( epTarg
);
1260 /* Set entry points for labels. */
1261 if ( labels
.length() > 0 ) {
1262 /* Pop the names. */
1263 pd
->resetNameScope( nameFrame
);
1265 /* Make labels that are referenced into entry points. */
1266 for ( int i
= 0; i
< labels
.length(); i
++ ) {
1267 pd
->enterNameScope( false, 1 );
1269 /* Will always be found. */
1270 NameInst
*name
= pd
->curNameInst
;
1272 /* If the name is referenced then set the entry point. */
1273 if ( name
->numRefs
> 0 )
1274 rtnVal
->setEntry( name
->id
, rtnVal
->startState
);
1277 pd
->popNameScope( nameFrame
);
1280 if ( priorOrd
!= 0 )
1282 if ( actionOrd
!= 0 )
1287 void FactorWithAug::makeNameTree( ParseData
*pd
)
1289 /* Add the labels to the tree of instantiated names. Each label
1290 * makes a new scope. */
1291 NameInst
*prevNameInst
= pd
->curNameInst
;
1292 for ( int i
= 0; i
< labels
.length(); i
++ )
1293 pd
->curNameInst
= pd
->addNameInst( labels
[i
].loc
, labels
[i
].data
, true );
1295 /* Recurse, then pop the names. */
1296 factorWithRep
->makeNameTree( pd
);
1297 pd
->curNameInst
= prevNameInst
;
1301 void FactorWithAug::resolveNameRefs( ParseData
*pd
)
1303 /* Enter into the name scope created by any labels. */
1304 NameFrame nameFrame
= pd
->enterNameScope( false, labels
.length() );
1306 /* Note action references. */
1307 for ( int i
= 0; i
< actions
.length(); i
++ )
1308 actions
[i
].action
->actionRefs
.append( pd
->localNameScope
);
1310 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1311 * the tree is constructed. */
1312 factorWithRep
->resolveNameRefs( pd
);
1314 /* Resolve epsilon transitions. */
1315 for ( int ep
= 0; ep
< epsilonLinks
.length(); ep
++ ) {
1317 EpsilonLink
&link
= epsilonLinks
[ep
];
1318 NameInst
*resolvedName
= 0;
1320 if ( link
.target
.length() == 1 && strcmp( link
.target
.data
[0], "final" ) == 0 ) {
1321 /* Epsilon drawn to an implicit final state. An implicit final is
1322 * only available in join operations. */
1323 resolvedName
= pd
->localNameScope
->final
;
1326 /* Do an search for the name. */
1328 pd
->resolveFrom( resolved
, pd
->localNameScope
, link
.target
, 0 );
1329 if ( resolved
.length() > 0 ) {
1330 /* Take the first one. */
1331 resolvedName
= resolved
[0];
1332 if ( resolved
.length() > 1 ) {
1333 /* Complain about the multiple references. */
1334 error(link
.loc
) << "state reference " << link
.target
<<
1335 " resolves to multiple entry points" << endl
;
1336 errorStateLabels( resolved
);
1341 /* This is tricky, we stuff resolved epsilon transitions into one long
1342 * vector in the parse data structure. Since the name resolution and
1343 * graph generation both do identical walks of the parse tree we
1344 * should always find the link resolutions in the right place. */
1345 pd
->epsilonResolvedLinks
.append( resolvedName
);
1347 if ( resolvedName
!= 0 ) {
1348 /* Found the name, bump of the reference count on it. */
1349 resolvedName
->numRefs
+= 1;
1352 /* Complain, no recovery action, the epsilon op will ignore any
1353 * epsilon transitions whose names did not resolve. */
1354 error(link
.loc
) << "could not resolve label " << link
.target
<< endl
;
1358 if ( labels
.length() > 0 )
1359 pd
->popNameScope( nameFrame
);
1363 /* Clean up after a factor with repetition node. */
1364 FactorWithRep::~FactorWithRep()
1367 case StarType
: case StarStarType
: case OptionalType
: case PlusType
:
1368 case ExactType
: case MaxType
: case MinType
: case RangeType
:
1369 delete factorWithRep
;
1371 case FactorWithNegType
:
1372 delete factorWithNeg
;
1377 /* Evaluate a factor with repetition node. */
1378 FsmAp
*FactorWithRep::walk( ParseData
*pd
)
1384 /* Evaluate the FactorWithRep. */
1385 retFsm
= factorWithRep
->walk( pd
);
1386 if ( retFsm
->startState
->isFinState() ) {
1387 warning(loc
) << "applying kleene star to a machine that "
1388 "accepts zero length word" << endl
;
1389 retFsm
->unsetFinState( retFsm
->startState
);
1392 /* Shift over the start action orders then do the kleene star. */
1393 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1395 afterOpMinimize( retFsm
);
1398 case StarStarType
: {
1399 /* Evaluate the FactorWithRep. */
1400 retFsm
= factorWithRep
->walk( pd
);
1401 if ( retFsm
->startState
->isFinState() ) {
1402 warning(loc
) << "applying kleene star to a machine that "
1403 "accepts zero length word" << endl
;
1406 /* Set up the prior descs. All gets priority one, whereas leaving gets
1407 * priority zero. Make a unique key so that these priorities don't
1408 * interfere with any priorities set by the user. */
1409 priorDescs
[0].key
= pd
->nextPriorKey
++;
1410 priorDescs
[0].priority
= 1;
1411 retFsm
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
1413 /* Leaveing gets priority 0. Use same unique key. */
1414 priorDescs
[1].key
= priorDescs
[0].key
;
1415 priorDescs
[1].priority
= 0;
1416 retFsm
->leaveFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
1418 /* Shift over the start action orders then do the kleene star. */
1419 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1421 afterOpMinimize( retFsm
);
1424 case OptionalType
: {
1425 /* Make the null fsm. */
1426 FsmAp
*nu
= new FsmAp();
1429 /* Evaluate the FactorWithRep. */
1430 retFsm
= factorWithRep
->walk( pd
);
1432 /* Perform the question operator. */
1433 retFsm
->unionOp( nu
);
1434 afterOpMinimize( retFsm
);
1438 /* Evaluate the FactorWithRep. */
1439 retFsm
= factorWithRep
->walk( pd
);
1440 if ( retFsm
->startState
->isFinState() ) {
1441 warning(loc
) << "applying plus operator to a machine that "
1442 "accepts zero length word" << endl
;
1445 /* Need a duplicated for the star end. */
1446 FsmAp
*dup
= new FsmAp( *retFsm
);
1448 /* The start func orders need to be shifted before doing the star. */
1449 pd
->curActionOrd
+= dup
->shiftStartActionOrder( pd
->curActionOrd
);
1451 /* Star the duplicate. */
1453 afterOpMinimize( dup
);
1455 retFsm
->concatOp( dup
);
1456 afterOpMinimize( retFsm
);
1460 /* Get an int from the repetition amount. */
1461 if ( lowerRep
== 0 ) {
1462 /* No copies. Don't need to evaluate the factorWithRep.
1463 * This Defeats the purpose so give a warning. */
1464 warning(loc
) << "exactly zero repetitions results "
1465 "in the null machine" << endl
;
1467 retFsm
= new FsmAp();
1468 retFsm
->lambdaFsm();
1471 /* Evaluate the first FactorWithRep. */
1472 retFsm
= factorWithRep
->walk( pd
);
1473 if ( retFsm
->startState
->isFinState() ) {
1474 warning(loc
) << "applying repetition to a machine that "
1475 "accepts zero length word" << endl
;
1478 /* The start func orders need to be shifted before doing the
1480 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1482 /* Do the repetition on the machine. Already guarded against n == 0 */
1483 retFsm
->repeatOp( lowerRep
);
1484 afterOpMinimize( retFsm
);
1489 /* Get an int from the repetition amount. */
1490 if ( upperRep
== 0 ) {
1491 /* No copies. Don't need to evaluate the factorWithRep.
1492 * This Defeats the purpose so give a warning. */
1493 warning(loc
) << "max zero repetitions results "
1494 "in the null machine" << endl
;
1496 retFsm
= new FsmAp();
1497 retFsm
->lambdaFsm();
1500 /* Evaluate the first FactorWithRep. */
1501 retFsm
= factorWithRep
->walk( pd
);
1502 if ( retFsm
->startState
->isFinState() ) {
1503 warning(loc
) << "applying max repetition to a machine that "
1504 "accepts zero length word" << endl
;
1507 /* The start func orders need to be shifted before doing the
1509 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1511 /* Do the repetition on the machine. Already guarded against n == 0 */
1512 retFsm
->optionalRepeatOp( upperRep
);
1513 afterOpMinimize( retFsm
);
1518 /* Evaluate the repeated machine. */
1519 retFsm
= factorWithRep
->walk( pd
);
1520 if ( retFsm
->startState
->isFinState() ) {
1521 warning(loc
) << "applying min repetition to a machine that "
1522 "accepts zero length word" << endl
;
1525 /* The start func orders need to be shifted before doing the repetition
1526 * and the kleene star. */
1527 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1529 if ( lowerRep
== 0 ) {
1530 /* Acts just like a star op on the machine to return. */
1532 afterOpMinimize( retFsm
);
1535 /* Take a duplicate for the plus. */
1536 FsmAp
*dup
= new FsmAp( *retFsm
);
1538 /* Do repetition on the first half. */
1539 retFsm
->repeatOp( lowerRep
);
1540 afterOpMinimize( retFsm
);
1542 /* Star the duplicate. */
1544 afterOpMinimize( dup
);
1546 /* Tak on the kleene star. */
1547 retFsm
->concatOp( dup
);
1548 afterOpMinimize( retFsm
);
1553 /* Check for bogus range. */
1554 if ( upperRep
- lowerRep
< 0 ) {
1555 error(loc
) << "invalid range repetition" << endl
;
1557 /* Return null machine as recovery. */
1558 retFsm
= new FsmAp();
1559 retFsm
->lambdaFsm();
1561 else if ( lowerRep
== 0 && upperRep
== 0 ) {
1562 /* No copies. Don't need to evaluate the factorWithRep. This
1563 * defeats the purpose so give a warning. */
1564 warning(loc
) << "zero to zero repetitions results "
1565 "in the null machine" << endl
;
1567 retFsm
= new FsmAp();
1568 retFsm
->lambdaFsm();
1571 /* Now need to evaluate the repeated machine. */
1572 retFsm
= factorWithRep
->walk( pd
);
1573 if ( retFsm
->startState
->isFinState() ) {
1574 warning(loc
) << "applying range repetition to a machine that "
1575 "accepts zero length word" << endl
;
1578 /* The start func orders need to be shifted before doing both kinds
1580 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1582 if ( lowerRep
== 0 ) {
1583 /* Just doing max repetition. Already guarded against n == 0. */
1584 retFsm
->optionalRepeatOp( upperRep
);
1585 afterOpMinimize( retFsm
);
1587 else if ( lowerRep
== upperRep
) {
1588 /* Just doing exact repetition. Already guarded against n == 0. */
1589 retFsm
->repeatOp( lowerRep
);
1590 afterOpMinimize( retFsm
);
1593 /* This is the case that 0 < lowerRep < upperRep. Take a
1594 * duplicate for the optional repeat. */
1595 FsmAp
*dup
= new FsmAp( *retFsm
);
1597 /* Do repetition on the first half. */
1598 retFsm
->repeatOp( lowerRep
);
1599 afterOpMinimize( retFsm
);
1601 /* Do optional repetition on the second half. */
1602 dup
->optionalRepeatOp( upperRep
- lowerRep
);
1603 afterOpMinimize( dup
);
1605 /* Tak on the duplicate machine. */
1606 retFsm
->concatOp( dup
);
1607 afterOpMinimize( retFsm
);
1612 case FactorWithNegType
: {
1613 /* Evaluate the Factor. Pass it up. */
1614 retFsm
= factorWithNeg
->walk( pd
);
1620 void FactorWithRep::makeNameTree( ParseData
*pd
)
1631 factorWithRep
->makeNameTree( pd
);
1633 case FactorWithNegType
:
1634 factorWithNeg
->makeNameTree( pd
);
1639 void FactorWithRep::resolveNameRefs( ParseData
*pd
)
1650 factorWithRep
->resolveNameRefs( pd
);
1652 case FactorWithNegType
:
1653 factorWithNeg
->resolveNameRefs( pd
);
1658 /* Clean up after a factor with negation node. */
1659 FactorWithNeg::~FactorWithNeg()
1663 case CharNegateType
:
1664 delete factorWithNeg
;
1672 /* Evaluate a factor with negation node. */
1673 FsmAp
*FactorWithNeg::walk( ParseData
*pd
)
1679 /* Evaluate the factorWithNeg. */
1680 FsmAp
*toNegate
= factorWithNeg
->walk( pd
);
1682 /* Negation is subtract from dot-star. */
1683 retFsm
= dotStarFsm( pd
);
1684 retFsm
->subtractOp( toNegate
);
1685 afterOpMinimize( retFsm
);
1688 case CharNegateType
: {
1689 /* Evaluate the factorWithNeg. */
1690 FsmAp
*toNegate
= factorWithNeg
->walk( pd
);
1692 /* CharNegation is subtract from dot. */
1693 retFsm
= dotFsm( pd
);
1694 retFsm
->subtractOp( toNegate
);
1695 afterOpMinimize( retFsm
);
1699 /* Evaluate the Factor. Pass it up. */
1700 retFsm
= factor
->walk( pd
);
1706 void FactorWithNeg::makeNameTree( ParseData
*pd
)
1710 case CharNegateType
:
1711 factorWithNeg
->makeNameTree( pd
);
1714 factor
->makeNameTree( pd
);
1719 void FactorWithNeg::resolveNameRefs( ParseData
*pd
)
1723 case CharNegateType
:
1724 factorWithNeg
->resolveNameRefs( pd
);
1727 factor
->resolveNameRefs( pd
);
1732 /* Clean up after a factor node. */
1753 case LongestMatchType
:
1754 delete longestMatch
;
1759 /* Evaluate a factor node. */
1760 FsmAp
*Factor::walk( ParseData
*pd
)
1765 rtnVal
= literal
->walk( pd
);
1768 rtnVal
= range
->walk( pd
);
1771 rtnVal
= reItem
->walk( pd
, 0 );
1774 rtnVal
= regExpr
->walk( pd
, 0 );
1777 rtnVal
= varDef
->walk( pd
);
1780 rtnVal
= join
->walk( pd
);
1782 case LongestMatchType
:
1783 rtnVal
= longestMatch
->walk( pd
);
1790 void Factor::makeNameTree( ParseData
*pd
)
1799 varDef
->makeNameTree( loc
, pd
);
1802 join
->makeNameTree( pd
);
1804 case LongestMatchType
:
1805 longestMatch
->makeNameTree( pd
);
1810 void Factor::resolveNameRefs( ParseData
*pd
)
1819 varDef
->resolveNameRefs( pd
);
1822 join
->resolveNameRefs( pd
);
1824 case LongestMatchType
:
1825 longestMatch
->resolveNameRefs( pd
);
1830 /* Clean up a range object. Must delete the two literals. */
1837 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1838 FsmAp
*Range::walk( ParseData
*pd
)
1840 /* Construct and verify the suitability of the lower end of the range. */
1841 FsmAp
*lowerFsm
= lowerLit
->walk( pd
);
1842 if ( !lowerFsm
->checkSingleCharMachine() ) {
1843 error(lowerLit
->token
.loc
) <<
1844 "bad range lower end, must be a single character" << endl
;
1847 /* Construct and verify the upper end. */
1848 FsmAp
*upperFsm
= upperLit
->walk( pd
);
1849 if ( !upperFsm
->checkSingleCharMachine() ) {
1850 error(upperLit
->token
.loc
) <<
1851 "bad range upper end, must be a single character" << endl
;
1854 /* Grab the keys from the machines, then delete them. */
1855 Key lowKey
= lowerFsm
->startState
->outList
.head
->lowKey
;
1856 Key highKey
= upperFsm
->startState
->outList
.head
->lowKey
;
1860 /* Validate the range. */
1861 if ( lowKey
> highKey
) {
1862 /* Recover by setting upper to lower; */
1863 error(lowerLit
->token
.loc
) << "lower end of range is greater then upper end" << endl
;
1867 /* Return the range now that it is validated. */
1868 FsmAp
*retFsm
= new FsmAp();
1869 retFsm
->rangeFsm( lowKey
, highKey
);
1873 /* Evaluate a literal object. */
1874 FsmAp
*Literal::walk( ParseData
*pd
)
1876 /* FsmAp to return, is the alphabet signed. */
1881 /* Make the fsm key in int format. */
1882 Key fsmKey
= makeFsmKeyNum( token
.data
, token
.loc
, pd
);
1883 /* Make the new machine. */
1884 rtnVal
= new FsmAp();
1885 rtnVal
->concatFsm( fsmKey
);
1889 /* Make the array of keys in int format. */
1891 bool caseInsensitive
;
1892 char *data
= prepareLitString( token
.loc
, token
.data
, token
.length
,
1893 length
, caseInsensitive
);
1894 Key
*arr
= new Key
[length
];
1895 makeFsmKeyArray( arr
, data
, length
, pd
);
1897 /* Make the new machine. */
1898 rtnVal
= new FsmAp();
1899 if ( caseInsensitive
)
1900 rtnVal
->concatFsmCI( arr
, length
);
1902 rtnVal
->concatFsm( arr
, length
);
1910 /* Clean up after a regular expression object. */
1923 /* Evaluate a regular expression object. */
1924 FsmAp
*RegExpr::walk( ParseData
*pd
, RegExpr
*rootRegex
)
1926 /* This is the root regex, pass down a pointer to this. */
1927 if ( rootRegex
== 0 )
1933 /* Walk both items. */
1934 rtnVal
= regExpr
->walk( pd
, rootRegex
);
1935 FsmAp
*fsm2
= item
->walk( pd
, rootRegex
);
1936 rtnVal
->concatOp( fsm2
);
1940 rtnVal
= new FsmAp();
1941 rtnVal
->lambdaFsm();
1948 /* Clean up after an item in a regular expression. */
1962 /* Evaluate a regular expression object. */
1963 FsmAp
*ReItem::walk( ParseData
*pd
, RegExpr
*rootRegex
)
1965 /* The fsm to return, is the alphabet signed? */
1970 /* Move the data into an integer array and make a concat fsm. */
1971 Key
*arr
= new Key
[token
.length
];
1972 makeFsmKeyArray( arr
, token
.data
, token
.length
, pd
);
1974 /* Make the concat fsm. */
1975 rtnVal
= new FsmAp();
1976 if ( rootRegex
!= 0 && rootRegex
->caseInsensitive
)
1977 rtnVal
->concatFsmCI( arr
, token
.length
);
1979 rtnVal
->concatFsm( arr
, token
.length
);
1984 /* Make the dot fsm. */
1985 rtnVal
= dotFsm( pd
);
1989 /* Get the or block and minmize it. */
1990 rtnVal
= orBlock
->walk( pd
, rootRegex
);
1991 if ( rtnVal
== 0 ) {
1992 rtnVal
= new FsmAp();
1993 rtnVal
->lambdaFsm();
1995 rtnVal
->minimizePartition2();
1999 /* Get the or block and minimize it. */
2000 FsmAp
*fsm
= orBlock
->walk( pd
, rootRegex
);
2001 fsm
->minimizePartition2();
2003 /* Make a dot fsm and subtract from it. */
2004 rtnVal
= dotFsm( pd
);
2005 rtnVal
->subtractOp( fsm
);
2006 rtnVal
->minimizePartition2();
2011 /* If the item is followed by a star, then apply the star op. */
2013 if ( rtnVal
->startState
->isFinState() ) {
2014 warning(loc
) << "applying kleene star to a machine that "
2015 "accepts zero length word" << endl
;
2019 rtnVal
->minimizePartition2();
2024 /* Clean up after an or block of a regular expression. */
2025 ReOrBlock::~ReOrBlock()
2038 /* Evaluate an or block of a regular expression. */
2039 FsmAp
*ReOrBlock::walk( ParseData
*pd
, RegExpr
*rootRegex
)
2044 /* Evaluate the two fsm. */
2045 FsmAp
*fsm1
= orBlock
->walk( pd
, rootRegex
);
2046 FsmAp
*fsm2
= item
->walk( pd
, rootRegex
);
2050 fsm1
->unionOp( fsm2
);
2063 /* Evaluate an or block item of a regular expression. */
2064 FsmAp
*ReOrItem::walk( ParseData
*pd
, RegExpr
*rootRegex
)
2066 /* The return value, is the alphabet signed? */
2070 /* Make the or machine. */
2071 rtnVal
= new FsmAp();
2073 /* Put the or data into an array of ints. Note that we find unique
2074 * keys. Duplicates are silently ignored. The alternative would be to
2075 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2076 * 'a' don't bother here. */
2078 makeFsmUniqueKeyArray( keySet
, token
.data
, token
.length
,
2079 rootRegex
!= 0 ? rootRegex
->caseInsensitive
: false, pd
);
2081 /* Run the or operator. */
2082 rtnVal
->orFsm( keySet
.data
, keySet
.length() );
2086 /* Make the upper and lower keys. */
2087 Key lowKey
= makeFsmKeyChar( lower
, pd
);
2088 Key highKey
= makeFsmKeyChar( upper
, pd
);
2090 /* Validate the range. */
2091 if ( lowKey
> highKey
) {
2092 /* Recover by setting upper to lower; */
2093 error(loc
) << "lower end of range is greater then upper end" << endl
;
2097 /* Make the range machine. */
2098 rtnVal
= new FsmAp();
2099 rtnVal
->rangeFsm( lowKey
, highKey
);
2101 if ( rootRegex
!= 0 && rootRegex
->caseInsensitive
) {
2102 if ( lowKey
<= 'Z' && 'A' <= highKey
) {
2103 Key otherLow
= lowKey
< 'A' ? Key('A') : lowKey
;
2104 Key otherHigh
= 'Z' < highKey
? Key('Z') : highKey
;
2106 otherLow
= 'a' + ( otherLow
- 'A' );
2107 otherHigh
= 'a' + ( otherHigh
- 'A' );
2109 FsmAp
*otherRange
= new FsmAp();
2110 otherRange
->rangeFsm( otherLow
, otherHigh
);
2111 rtnVal
->unionOp( otherRange
);
2112 rtnVal
->minimizePartition2();
2114 else if ( lowKey
<= 'z' && 'a' <= highKey
) {
2115 Key otherLow
= lowKey
< 'a' ? Key('a') : lowKey
;
2116 Key otherHigh
= 'z' < highKey
? Key('z') : highKey
;
2118 otherLow
= 'A' + ( otherLow
- 'a' );
2119 otherHigh
= 'A' + ( otherHigh
- 'a' );
2121 FsmAp
*otherRange
= new FsmAp();
2122 otherRange
->rangeFsm( otherLow
, otherHigh
);
2123 rtnVal
->unionOp( otherRange
);
2124 rtnVal
->minimizePartition2();