2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include "parsetree.h"
34 ostream
&operator<<( ostream
&out
, const NameRef
&nameRef
);
35 ostream
&operator<<( ostream
&out
, const NameInst
&nameInst
);
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain a
41 * literal string with \0 */
42 void Token::prepareLitString( Token
&result
, bool &caseInsensitive
)
44 result
.data
= new char[this->length
+1];
45 caseInsensitive
= false;
47 char *src
= this->data
+ 1;
48 char *end
= this->data
+ this->length
- 1;
50 while ( *end
!= '\'' && *end
!= '\"' ) {
52 caseInsensitive
= true;
54 error( this->loc
) << "literal string '" << *end
<<
55 "' option not supported" << endl
;
60 char *dest
= result
.data
;
62 while ( src
!= end
) {
65 case '0': dest
[len
++] = '\0'; break;
66 case 'a': dest
[len
++] = '\a'; break;
67 case 'b': dest
[len
++] = '\b'; break;
68 case 't': dest
[len
++] = '\t'; break;
69 case 'n': dest
[len
++] = '\n'; break;
70 case 'v': dest
[len
++] = '\v'; break;
71 case 'f': dest
[len
++] = '\f'; break;
72 case 'r': dest
[len
++] = '\r'; break;
74 default: dest
[len
++] = src
[1]; break;
83 result
.data
[result
.length
] = 0;
87 FsmAp
*VarDef::walk( ParseData
*pd
)
89 /* We enter into a new name scope. */
90 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
92 /* Recurse on the expression. */
93 FsmAp
*rtnVal
= joinOrLm
->walk( pd
);
95 /* Do the tranfer of local error actions. */
96 LocalErrDictEl
*localErrDictEl
= pd
->localErrDict
.find( name
);
97 if ( localErrDictEl
!= 0 ) {
98 for ( StateList::Iter state
= rtnVal
->stateList
; state
.lte(); state
++ )
99 rtnVal
->transferErrorActions( state
, localErrDictEl
->value
);
102 /* If the expression below is a join operation with multiple expressions
103 * then it just had epsilon transisions resolved. If it is a join
104 * with only a single expression then run the epsilon op now. */
105 if ( joinOrLm
->type
== JoinOrLm::JoinType
&& joinOrLm
->join
->exprList
.length() == 1 )
108 /* We can now unset entry points that are not longer used. */
109 pd
->unsetObsoleteEntries( rtnVal
);
111 /* If the name of the variable is referenced then add the entry point to
113 if ( pd
->curNameInst
->numRefs
> 0 )
114 rtnVal
->setEntry( pd
->curNameInst
->id
, rtnVal
->startState
);
116 /* Pop the name scope. */
117 pd
->popNameScope( nameFrame
);
121 void VarDef::makeNameTree( const InputLoc
&loc
, ParseData
*pd
)
123 /* The variable definition enters a new scope. */
124 NameInst
*prevNameInst
= pd
->curNameInst
;
125 pd
->curNameInst
= pd
->addNameInst( loc
, name
, false );
127 if ( joinOrLm
->type
== JoinOrLm::LongestMatchType
)
128 pd
->curNameInst
->isLongestMatch
= true;
131 joinOrLm
->makeNameTree( pd
);
133 /* The name scope ends, pop the name instantiation. */
134 pd
->curNameInst
= prevNameInst
;
137 void VarDef::resolveNameRefs( ParseData
*pd
)
139 /* Entering into a new scope. */
140 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
143 joinOrLm
->resolveNameRefs( pd
);
145 /* The name scope ends, pop the name instantiation. */
146 pd
->popNameScope( nameFrame
);
149 InputLoc
LongestMatchPart::getLoc()
151 return action
!= 0 ? action
->loc
: semiLoc
;
155 * If there are any LMs then all of the following entry points must reset
158 * 1. fentry(StateRef)
159 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
160 * 3. targt of any transition that has an fcall (the return loc).
161 * 4. start state of all longest match routines.
164 Action
*LongestMatch::newAction( ParseData
*pd
, const InputLoc
&loc
,
165 char *name
, InlineList
*inlineList
)
167 Action
*action
= new Action( loc
, name
, inlineList
, pd
->nextCondId
++ );
168 action
->actionRefs
.append( pd
->curNameInst
);
169 pd
->actionList
.append( action
);
170 action
->isLmAction
= true;
174 void LongestMatch::makeActions( ParseData
*pd
)
176 /* Make actions that set the action id. */
177 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
178 /* For each part create actions for setting the match type. We need
179 * to do this so that the actions will go into the actionIndex. */
180 InlineList
*inlineList
= new InlineList
;
181 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
, InlineItem::LmSetActId
) );
182 char *actName
= new char[50];
183 sprintf( actName
, "store%i", lmi
->longestMatchId
);
184 lmi
->setActId
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
187 /* Make actions that execute the user action and restart on the last character. */
188 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
189 /* For each part create actions for setting the match type. We need
190 * to do this so that the actions will go into the actionIndex. */
191 InlineList
*inlineList
= new InlineList
;
192 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
193 InlineItem::LmOnLast
) );
194 char *actName
= new char[50];
195 sprintf( actName
, "last%i", lmi
->longestMatchId
);
196 lmi
->actOnLast
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
199 /* Make actions that execute the user action and restart on the next
200 * character. These actions will set tokend themselves (it is the current
202 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
203 /* For each part create actions for setting the match type. We need
204 * to do this so that the actions will go into the actionIndex. */
205 InlineList
*inlineList
= new InlineList
;
206 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
207 InlineItem::LmOnNext
) );
208 char *actName
= new char[50];
209 sprintf( actName
, "next%i", lmi
->longestMatchId
);
210 lmi
->actOnNext
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
213 /* Make actions that execute the user action and restart at tokend. These
214 * actions execute some time after matching the last char. */
215 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
216 /* For each part create actions for setting the match type. We need
217 * to do this so that the actions will go into the actionIndex. */
218 InlineList
*inlineList
= new InlineList
;
219 inlineList
->append( new InlineItem( lmi
->getLoc(), this, lmi
,
220 InlineItem::LmOnLagBehind
) );
221 char *actName
= new char[50];
222 sprintf( actName
, "lag%i", lmi
->longestMatchId
);
223 lmi
->actLagBehind
= newAction( pd
, lmi
->getLoc(), actName
, inlineList
);
230 /* Create the error action. */
231 InlineList
*il6
= new InlineList
;
232 il6
->append( new InlineItem( loc
, this, 0, InlineItem::LmSwitch
) );
233 lmActSelect
= newAction( pd
, loc
, "switch", il6
);
236 void LongestMatch::findName( ParseData
*pd
)
238 NameInst
*nameInst
= pd
->curNameInst
;
239 while ( nameInst
->name
== 0 ) {
240 nameInst
= nameInst
->parent
;
241 /* Since every machine must must have a name, we should always find a
242 * name for the longest match. */
243 assert( nameInst
!= 0 );
245 name
= nameInst
->name
;
248 void LongestMatch::makeNameTree( ParseData
*pd
)
250 /* Create an anonymous scope for the longest match. Will be used for
251 * restarting machine after matching a token. */
252 NameInst
*prevNameInst
= pd
->curNameInst
;
253 pd
->curNameInst
= pd
->addNameInst( loc
, 0, false );
255 /* Recurse into all parts of the longest match operator. */
256 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ )
257 lmi
->join
->makeNameTree( pd
);
259 /* Traverse the name tree upwards to find a name for this lm. */
262 /* Also make the longest match's actions at this point. */
265 /* The name scope ends, pop the name instantiation. */
266 pd
->curNameInst
= prevNameInst
;
269 void LongestMatch::resolveNameRefs( ParseData
*pd
)
271 /* The longest match gets its own name scope. */
272 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
274 /* Take an action reference for each longest match item and recurse. */
275 for ( LmPartList::Iter lmi
= *longestMatchList
; lmi
.lte(); lmi
++ ) {
276 /* Record the reference if the item has an action. */
277 if ( lmi
->action
!= 0 )
278 lmi
->action
->actionRefs
.append( pd
->localNameScope
);
280 /* Recurse down the join. */
281 lmi
->join
->resolveNameRefs( pd
);
284 /* The name scope ends, pop the name instantiation. */
285 pd
->popNameScope( nameFrame
);
288 void LongestMatch::restart( FsmAp
*graph
, TransAp
*trans
)
290 StateAp
*fromState
= trans
->fromState
;
291 graph
->detachTrans( fromState
, trans
->toState
, trans
);
292 graph
->attachTrans( fromState
, graph
->startState
, trans
);
295 void LongestMatch::runLonestMatch( ParseData
*pd
, FsmAp
*graph
)
297 graph
->markReachableFromHereStopFinal( graph
->startState
);
298 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
299 if ( ms
->stateBits
& STB_ISMARKED
) {
300 ms
->lmItemSet
.insert( 0 );
301 ms
->stateBits
&= ~ STB_ISMARKED
;
305 /* Transfer the first item of non-empty lmAction tables to the item sets
306 * of the states that follow. Exclude states that have no transitions out.
307 * This must happen on a separate pass so that on each iteration of the
308 * next pass we have the item set entries from all lmAction tables. */
309 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
310 for ( TransList::Iter trans
= st
->outList
; trans
.lte(); trans
++ ) {
311 if ( trans
->lmActionTable
.length() > 0 ) {
312 LmActionTableEl
*lmAct
= trans
->lmActionTable
.data
;
313 StateAp
*toState
= trans
->toState
;
316 /* Check if there are transitions out, this may be a very
317 * close approximation? Out transitions going nowhere?
319 if ( toState
->outList
.length() > 0 ) {
320 /* Fill the item sets. */
321 graph
->markReachableFromHereStopFinal( toState
);
322 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
323 if ( ms
->stateBits
& STB_ISMARKED
) {
324 ms
->lmItemSet
.insert( lmAct
->value
);
325 ms
->stateBits
&= ~ STB_ISMARKED
;
333 /* The lmItem sets are now filled, telling us which longest match rules
334 * can succeed in which states. First determine if we need to make sure
335 * act is defaulted to zero. We need to do this if there are any states
336 * with lmItemSet.length() > 1 and NULL is included. That is, that the
337 * switch may get called when in fact nothing has been matched. */
338 int maxItemSetLength
= 0;
339 graph
->markReachableFromHereStopFinal( graph
->startState
);
340 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
341 if ( ms
->stateBits
& STB_ISMARKED
) {
342 if ( ms
->lmItemSet
.length() > maxItemSetLength
)
343 maxItemSetLength
= ms
->lmItemSet
.length();
344 ms
->stateBits
&= ~ STB_ISMARKED
;
348 /* The actions executed on starting to match a token. */
349 graph
->isolateStartState();
350 graph
->startState
->toStateActionTable
.setAction( pd
->initTokStartOrd
, pd
->initTokStart
);
351 graph
->startState
->fromStateActionTable
.setAction( pd
->setTokStartOrd
, pd
->setTokStart
);
352 if ( maxItemSetLength
> 1 ) {
353 /* The longest match action switch may be called when tokens are
354 * matched, in which case act must be initialized, there must be a
355 * case to handle the error, and the generated machine will require an
357 lmSwitchHandlesError
= true;
358 pd
->lmRequiresErrorState
= true;
359 graph
->startState
->toStateActionTable
.setAction( pd
->initActIdOrd
, pd
->initActId
);
362 /* The place to store transitions to restart. It maybe possible for the
363 * restarting to affect the searching through the graph that follows. For
364 * now take the safe route and save the list of transitions to restart
365 * until after all searching is done. */
366 Vector
<TransAp
*> restartTrans
;
368 /* Set actions that do immediate token recognition, set the longest match part
369 * id and set the token ending. */
370 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
371 for ( TransList::Iter trans
= st
->outList
; trans
.lte(); trans
++ ) {
372 if ( trans
->lmActionTable
.length() > 0 ) {
373 LmActionTableEl
*lmAct
= trans
->lmActionTable
.data
;
374 StateAp
*toState
= trans
->toState
;
377 /* Check if there are transitions out, this may be a very
378 * close approximation? Out transitions going nowhere?
380 if ( toState
->outList
.length() == 0 ) {
381 /* Can execute the immediate action for the longest match
382 * part. Redirect the action to the start state. */
383 trans
->actionTable
.setAction( lmAct
->key
,
384 lmAct
->value
->actOnLast
);
385 restartTrans
.append( trans
);
388 /* Look for non final states that have a non-empty item
389 * set. If these are present then we need to record the
390 * end of the token. Also Find the highest item set
391 * length reachable from here (excluding at transtions to
393 bool nonFinalNonEmptyItemSet
= false;
394 maxItemSetLength
= 0;
395 graph
->markReachableFromHereStopFinal( toState
);
396 for ( StateList::Iter ms
= graph
->stateList
; ms
.lte(); ms
++ ) {
397 if ( ms
->stateBits
& STB_ISMARKED
) {
398 if ( ms
->lmItemSet
.length() > 0 && !ms
->isFinState() )
399 nonFinalNonEmptyItemSet
= true;
400 if ( ms
->lmItemSet
.length() > maxItemSetLength
)
401 maxItemSetLength
= ms
->lmItemSet
.length();
402 ms
->stateBits
&= ~ STB_ISMARKED
;
406 /* If there are reachable states that are not final and
407 * have non empty item sets or that have an item set
408 * length greater than one then we need to set tokend
409 * because the error action that matches the token will
411 if ( nonFinalNonEmptyItemSet
|| maxItemSetLength
> 1 )
412 trans
->actionTable
.setAction( pd
->setTokEndOrd
, pd
->setTokEnd
);
414 /* Some states may not know which longest match item to
415 * execute, must set it. */
416 if ( maxItemSetLength
> 1 ) {
417 /* There are transitions out, another match may come. */
418 trans
->actionTable
.setAction( lmAct
->key
,
419 lmAct
->value
->setActId
);
426 /* Now that all graph searching is done it certainly safe set the
427 * restarting. It may be safe above, however this must be verified. */
428 for ( Vector
<TransAp
*>::Iter pt
= restartTrans
; pt
.lte(); pt
++ )
429 restart( graph
, *pt
);
431 int lmErrActionOrd
= pd
->curActionOrd
++;
433 /* Embed the error for recognizing a char. */
434 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
435 if ( st
->lmItemSet
.length() == 1 && st
->lmItemSet
[0] != 0 ) {
436 if ( st
->isFinState() ) {
437 /* On error execute the onActNext action, which knows that
438 * the last character of the token was one back and restart. */
439 graph
->setErrorTarget( st
, graph
->startState
, &lmErrActionOrd
,
440 &st
->lmItemSet
[0]->actOnNext
, 1 );
441 st
->eofActionTable
.setAction( lmErrActionOrd
,
442 st
->lmItemSet
[0]->actOnNext
);
443 st
->eofTarget
= graph
->startState
;
446 graph
->setErrorTarget( st
, graph
->startState
, &lmErrActionOrd
,
447 &st
->lmItemSet
[0]->actLagBehind
, 1 );
448 st
->eofActionTable
.setAction( lmErrActionOrd
,
449 st
->lmItemSet
[0]->actLagBehind
);
450 st
->eofTarget
= graph
->startState
;
453 else if ( st
->lmItemSet
.length() > 1 ) {
454 /* Need to use the select. Take note of the which items the select
455 * is needed for so only the necessary actions are included. */
456 for ( LmItemSet::Iter plmi
= st
->lmItemSet
; plmi
.lte(); plmi
++ ) {
458 (*plmi
)->inLmSelect
= true;
460 /* On error, execute the action select and go to the start state. */
461 graph
->setErrorTarget( st
, graph
->startState
, &lmErrActionOrd
,
463 st
->eofActionTable
.setAction( lmErrActionOrd
, lmActSelect
);
464 st
->eofTarget
= graph
->startState
;
468 /* Finally, the start state should be made final. */
469 graph
->setFinState( graph
->startState
);
472 FsmAp
*LongestMatch::walk( ParseData
*pd
)
474 /* The longest match has it's own name scope. */
475 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
477 /* Make each part of the longest match. */
478 FsmAp
**parts
= new FsmAp
*[longestMatchList
->length()];
479 LmPartList::Iter lmi
= *longestMatchList
;
480 for ( int i
= 0; lmi
.lte(); lmi
++, i
++ ) {
481 /* Create the machine and embed the setting of the longest match id. */
482 parts
[i
] = lmi
->join
->walk( pd
);
483 parts
[i
]->longMatchAction( pd
->curActionOrd
++, lmi
);
486 /* Union machines one and up with machine zero. The grammar dictates that
487 * there will always be at least one part. */
488 FsmAp
*rtnVal
= parts
[0];
489 for ( int i
= 1; i
< longestMatchList
->length(); i
++ ) {
490 rtnVal
->unionOp( parts
[i
] );
491 afterOpMinimize( rtnVal
);
494 runLonestMatch( pd
, rtnVal
);
496 /* Pop the name scope. */
497 pd
->popNameScope( nameFrame
);
503 FsmAp
*JoinOrLm::walk( ParseData
*pd
)
508 rtnVal
= join
->walk( pd
);
510 case LongestMatchType
:
511 rtnVal
= longestMatch
->walk( pd
);
517 void JoinOrLm::makeNameTree( ParseData
*pd
)
521 join
->makeNameTree( pd
);
523 case LongestMatchType
:
524 longestMatch
->makeNameTree( pd
);
529 void JoinOrLm::resolveNameRefs( ParseData
*pd
)
533 join
->resolveNameRefs( pd
);
535 case LongestMatchType
:
536 longestMatch
->resolveNameRefs( pd
);
542 /* Construct with a location and the first expression. */
543 Join::Join( const InputLoc
&loc
, Expression
*expr
)
547 exprList
.append( expr
);
550 /* Construct with a location and the first expression. */
551 Join::Join( Expression
*expr
)
555 exprList
.append( expr
);
558 /* Walk an expression node. */
559 FsmAp
*Join::walk( ParseData
*pd
)
561 if ( exprList
.length() > 1 )
562 return walkJoin( pd
);
564 return exprList
.head
->walk( pd
);
567 /* There is a list of expressions to join. */
568 FsmAp
*Join::walkJoin( ParseData
*pd
)
570 /* We enter into a new name scope. */
571 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
573 /* Evaluate the machines. */
574 FsmAp
**fsms
= new FsmAp
*[exprList
.length()];
575 ExprList::Iter expr
= exprList
;
576 for ( int e
= 0; e
< exprList
.length(); e
++, expr
++ )
577 fsms
[e
] = expr
->walk( pd
);
579 /* Get the start and final names. Final is
580 * guaranteed to exist, start is not. */
581 NameInst
*startName
= pd
->curNameInst
->start
;
582 NameInst
*finalName
= pd
->curNameInst
->final
;
585 if ( startName
!= 0 ) {
586 /* Take note that there was an implicit link to the start machine. */
587 pd
->localNameScope
->referencedNames
.append( startName
);
588 startId
= startName
->id
;
591 /* A final id of -1 indicates there is no epsilon that references the
592 * final state, therefor do not create one or set an entry point to it. */
594 if ( finalName
->numRefs
> 0 )
595 finalId
= finalName
->id
;
597 /* Join machines 1 and up onto machine 0. */
598 FsmAp
*retFsm
= fsms
[0];
599 retFsm
->joinOp( startId
, finalId
, fsms
+1, exprList
.length()-1 );
601 /* We can now unset entry points that are not longer used. */
602 pd
->unsetObsoleteEntries( retFsm
);
604 /* Pop the name scope. */
605 pd
->popNameScope( nameFrame
);
611 void Join::makeNameTree( ParseData
*pd
)
613 if ( exprList
.length() > 1 ) {
614 /* Create the new anonymous scope. */
615 NameInst
*prevNameInst
= pd
->curNameInst
;
616 pd
->curNameInst
= pd
->addNameInst( loc
, 0, false );
618 /* Join scopes need an implicit "final" target. */
619 pd
->curNameInst
->final
= new NameInst( InputLoc(), pd
->curNameInst
, "final",
620 pd
->nextNameId
++, false );
622 /* Recurse into all expressions in the list. */
623 for ( ExprList::Iter expr
= exprList
; expr
.lte(); expr
++ )
624 expr
->makeNameTree( pd
);
626 /* The name scope ends, pop the name instantiation. */
627 pd
->curNameInst
= prevNameInst
;
630 /* Recurse into the single expression. */
631 exprList
.head
->makeNameTree( pd
);
636 void Join::resolveNameRefs( ParseData
*pd
)
638 /* Branch on whether or not there is to be a join. */
639 if ( exprList
.length() > 1 ) {
640 /* The variable definition enters a new scope. */
641 NameFrame nameFrame
= pd
->enterNameScope( true, 1 );
643 /* The join scope must contain a start label. */
644 NameSet resolved
= pd
->resolvePart( pd
->localNameScope
, "start", true );
645 if ( resolved
.length() > 0 ) {
646 /* Take the first. */
647 pd
->curNameInst
->start
= resolved
[0];
648 if ( resolved
.length() > 1 ) {
649 /* Complain about the multiple references. */
650 error(loc
) << "multiple start labels" << endl
;
651 errorStateLabels( resolved
);
655 /* Make sure there is a start label. */
656 if ( pd
->curNameInst
->start
!= 0 ) {
657 /* There is an implicit reference to start name. */
658 pd
->curNameInst
->start
->numRefs
+= 1;
661 /* No start label. Complain and recover by adding a label to the
662 * adding one. Recover ignoring the problem. */
663 error(loc
) << "no start label" << endl
;
666 /* Recurse into all expressions in the list. */
667 for ( ExprList::Iter expr
= exprList
; expr
.lte(); expr
++ )
668 expr
->resolveNameRefs( pd
);
670 /* The name scope ends, pop the name instantiation. */
671 pd
->popNameScope( nameFrame
);
674 /* Recurse into the single expression. */
675 exprList
.head
->resolveNameRefs( pd
);
679 /* Clean up after an expression node. */
680 Expression::~Expression()
683 case OrType
: case IntersectType
: case SubtractType
:
684 case StrongSubtractType
:
696 /* Evaluate a single expression node. */
697 FsmAp
*Expression::walk( ParseData
*pd
, bool lastInSeq
)
702 /* Evaluate the expression. */
703 rtnVal
= expression
->walk( pd
, false );
704 /* Evaluate the term. */
705 FsmAp
*rhs
= term
->walk( pd
);
707 rtnVal
->unionOp( rhs
);
708 afterOpMinimize( rtnVal
, lastInSeq
);
711 case IntersectType
: {
712 /* Evaluate the expression. */
713 rtnVal
= expression
->walk( pd
);
714 /* Evaluate the term. */
715 FsmAp
*rhs
= term
->walk( pd
);
716 /* Perform intersection. */
717 rtnVal
->intersectOp( rhs
);
718 afterOpMinimize( rtnVal
, lastInSeq
);
722 /* Evaluate the expression. */
723 rtnVal
= expression
->walk( pd
);
724 /* Evaluate the term. */
725 FsmAp
*rhs
= term
->walk( pd
);
726 /* Perform subtraction. */
727 rtnVal
->subtractOp( rhs
);
728 afterOpMinimize( rtnVal
, lastInSeq
);
731 case StrongSubtractType
: {
732 /* Evaluate the expression. */
733 rtnVal
= expression
->walk( pd
);
735 /* Evaluate the term and pad it with any* machines. */
736 FsmAp
*rhs
= dotStarFsm( pd
);
737 FsmAp
*termFsm
= term
->walk( pd
);
738 FsmAp
*trailAnyStar
= dotStarFsm( pd
);
739 rhs
->concatOp( termFsm
);
740 rhs
->concatOp( trailAnyStar
);
742 /* Perform subtraction. */
743 rtnVal
->subtractOp( rhs
);
744 afterOpMinimize( rtnVal
, lastInSeq
);
748 /* Return result of the term. */
749 rtnVal
= term
->walk( pd
);
753 /* Duplicate the builtin. */
754 rtnVal
= makeBuiltin( builtin
, pd
);
762 void Expression::makeNameTree( ParseData
*pd
)
768 case StrongSubtractType
:
769 expression
->makeNameTree( pd
);
770 term
->makeNameTree( pd
);
773 term
->makeNameTree( pd
);
780 void Expression::resolveNameRefs( ParseData
*pd
)
786 case StrongSubtractType
:
787 expression
->resolveNameRefs( pd
);
788 term
->resolveNameRefs( pd
);
791 term
->resolveNameRefs( pd
);
798 /* Clean up after a term node. */
804 case RightFinishType
:
807 delete factorWithAug
;
809 case FactorWithAugType
:
810 delete factorWithAug
;
815 /* Evaluate a term node. */
816 FsmAp
*Term::walk( ParseData
*pd
, bool lastInSeq
)
821 /* Evaluate the Term. */
822 rtnVal
= term
->walk( pd
, false );
823 /* Evaluate the FactorWithRep. */
824 FsmAp
*rhs
= factorWithAug
->walk( pd
);
825 /* Perform concatenation. */
826 rtnVal
->concatOp( rhs
);
827 afterOpMinimize( rtnVal
, lastInSeq
);
830 case RightStartType
: {
831 /* Evaluate the Term. */
832 rtnVal
= term
->walk( pd
);
834 /* Evaluate the FactorWithRep. */
835 FsmAp
*rhs
= factorWithAug
->walk( pd
);
837 /* Set up the priority descriptors. The left machine gets the
838 * lower priority where as the right get the higher start priority. */
839 priorDescs
[0].key
= pd
->nextPriorKey
++;
840 priorDescs
[0].priority
= 0;
841 rtnVal
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
843 /* The start transitions right machine get the higher priority.
844 * Use the same unique key. */
845 priorDescs
[1].key
= priorDescs
[0].key
;
846 priorDescs
[1].priority
= 1;
847 rhs
->startFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
849 /* Perform concatenation. */
850 rtnVal
->concatOp( rhs
);
851 afterOpMinimize( rtnVal
, lastInSeq
);
854 case RightFinishType
: {
855 /* Evaluate the Term. */
856 rtnVal
= term
->walk( pd
);
858 /* Evaluate the FactorWithRep. */
859 FsmAp
*rhs
= factorWithAug
->walk( pd
);
861 /* Set up the priority descriptors. The left machine gets the
862 * lower priority where as the finishing transitions to the right
863 * get the higher priority. */
864 priorDescs
[0].key
= pd
->nextPriorKey
++;
865 priorDescs
[0].priority
= 0;
866 rtnVal
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
868 /* The finishing transitions of the right machine get the higher
869 * priority. Use the same unique key. */
870 priorDescs
[1].key
= priorDescs
[0].key
;
871 priorDescs
[1].priority
= 1;
872 rhs
->finishFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
874 /* Perform concatenation. */
875 rtnVal
->concatOp( rhs
);
876 afterOpMinimize( rtnVal
, lastInSeq
);
880 /* Evaluate the Term. */
881 rtnVal
= term
->walk( pd
);
883 /* Evaluate the FactorWithRep. */
884 FsmAp
*rhs
= factorWithAug
->walk( pd
);
886 /* Set up the priority descriptors. The left machine gets the
887 * higher priority. */
888 priorDescs
[0].key
= pd
->nextPriorKey
++;
889 priorDescs
[0].priority
= 1;
890 rtnVal
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
892 /* The right machine gets the lower priority. Since
893 * startTransPrior might unnecessarily increase the number of
894 * states during the state machine construction process (due to
895 * isolation), we use allTransPrior instead, which has the same
897 priorDescs
[1].key
= priorDescs
[0].key
;
898 priorDescs
[1].priority
= 0;
899 rhs
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
901 /* Perform concatenation. */
902 rtnVal
->concatOp( rhs
);
903 afterOpMinimize( rtnVal
, lastInSeq
);
906 case FactorWithAugType
: {
907 rtnVal
= factorWithAug
->walk( pd
);
914 void Term::makeNameTree( ParseData
*pd
)
919 case RightFinishType
:
921 term
->makeNameTree( pd
);
922 factorWithAug
->makeNameTree( pd
);
924 case FactorWithAugType
:
925 factorWithAug
->makeNameTree( pd
);
930 void Term::resolveNameRefs( ParseData
*pd
)
935 case RightFinishType
:
937 term
->resolveNameRefs( pd
);
938 factorWithAug
->resolveNameRefs( pd
);
940 case FactorWithAugType
:
941 factorWithAug
->resolveNameRefs( pd
);
946 /* Clean up after a factor with augmentation node. */
947 FactorWithAug::~FactorWithAug()
949 delete factorWithRep
;
951 /* Walk the vector of parser actions, deleting function names. */
953 /* Clean up priority descriptors. */
954 if ( priorDescs
!= 0 )
958 void FactorWithAug::assignActions( ParseData
*pd
, FsmAp
*graph
, int *actionOrd
)
960 /* Assign actions. */
961 for ( int i
= 0; i
< actions
.length(); i
++ ) {
962 switch ( actions
[i
].type
) {
963 /* Transition actions. */
965 graph
->startFsmAction( actionOrd
[i
], actions
[i
].action
);
966 afterOpMinimize( graph
);
969 graph
->allTransAction( actionOrd
[i
], actions
[i
].action
);
972 graph
->finishFsmAction( actionOrd
[i
], actions
[i
].action
);
975 graph
->leaveFsmAction( actionOrd
[i
], actions
[i
].action
);
978 /* Global error actions. */
979 case at_start_gbl_error
:
980 graph
->startErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
981 afterOpMinimize( graph
);
983 case at_all_gbl_error
:
984 graph
->allErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
986 case at_final_gbl_error
:
987 graph
->finalErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
989 case at_not_start_gbl_error
:
990 graph
->notStartErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
992 case at_not_final_gbl_error
:
993 graph
->notFinalErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
995 case at_middle_gbl_error
:
996 graph
->middleErrorAction( actionOrd
[i
], actions
[i
].action
, 0 );
999 /* Local error actions. */
1000 case at_start_local_error
:
1001 graph
->startErrorAction( actionOrd
[i
], actions
[i
].action
,
1002 actions
[i
].localErrKey
);
1003 afterOpMinimize( graph
);
1005 case at_all_local_error
:
1006 graph
->allErrorAction( actionOrd
[i
], actions
[i
].action
,
1007 actions
[i
].localErrKey
);
1009 case at_final_local_error
:
1010 graph
->finalErrorAction( actionOrd
[i
], actions
[i
].action
,
1011 actions
[i
].localErrKey
);
1013 case at_not_start_local_error
:
1014 graph
->notStartErrorAction( actionOrd
[i
], actions
[i
].action
,
1015 actions
[i
].localErrKey
);
1017 case at_not_final_local_error
:
1018 graph
->notFinalErrorAction( actionOrd
[i
], actions
[i
].action
,
1019 actions
[i
].localErrKey
);
1021 case at_middle_local_error
:
1022 graph
->middleErrorAction( actionOrd
[i
], actions
[i
].action
,
1023 actions
[i
].localErrKey
);
1028 graph
->startEOFAction( actionOrd
[i
], actions
[i
].action
);
1029 afterOpMinimize( graph
);
1032 graph
->allEOFAction( actionOrd
[i
], actions
[i
].action
);
1035 graph
->finalEOFAction( actionOrd
[i
], actions
[i
].action
);
1037 case at_not_start_eof
:
1038 graph
->notStartEOFAction( actionOrd
[i
], actions
[i
].action
);
1040 case at_not_final_eof
:
1041 graph
->notFinalEOFAction( actionOrd
[i
], actions
[i
].action
);
1044 graph
->middleEOFAction( actionOrd
[i
], actions
[i
].action
);
1047 /* To State Actions. */
1048 case at_start_to_state
:
1049 graph
->startToStateAction( actionOrd
[i
], actions
[i
].action
);
1050 afterOpMinimize( graph
);
1052 case at_all_to_state
:
1053 graph
->allToStateAction( actionOrd
[i
], actions
[i
].action
);
1055 case at_final_to_state
:
1056 graph
->finalToStateAction( actionOrd
[i
], actions
[i
].action
);
1058 case at_not_start_to_state
:
1059 graph
->notStartToStateAction( actionOrd
[i
], actions
[i
].action
);
1061 case at_not_final_to_state
:
1062 graph
->notFinalToStateAction( actionOrd
[i
], actions
[i
].action
);
1064 case at_middle_to_state
:
1065 graph
->middleToStateAction( actionOrd
[i
], actions
[i
].action
);
1068 /* From State Actions. */
1069 case at_start_from_state
:
1070 graph
->startFromStateAction( actionOrd
[i
], actions
[i
].action
);
1071 afterOpMinimize( graph
);
1073 case at_all_from_state
:
1074 graph
->allFromStateAction( actionOrd
[i
], actions
[i
].action
);
1076 case at_final_from_state
:
1077 graph
->finalFromStateAction( actionOrd
[i
], actions
[i
].action
);
1079 case at_not_start_from_state
:
1080 graph
->notStartFromStateAction( actionOrd
[i
], actions
[i
].action
);
1082 case at_not_final_from_state
:
1083 graph
->notFinalFromStateAction( actionOrd
[i
], actions
[i
].action
);
1085 case at_middle_from_state
:
1086 graph
->middleFromStateAction( actionOrd
[i
], actions
[i
].action
);
1089 /* Remaining cases, prevented by the parser. */
1097 void FactorWithAug::assignPriorities( FsmAp
*graph
, int *priorOrd
)
1099 /* Assign priorities. */
1100 for ( int i
= 0; i
< priorityAugs
.length(); i
++ ) {
1101 switch ( priorityAugs
[i
].type
) {
1103 graph
->startFsmPrior( priorOrd
[i
], &priorDescs
[i
]);
1104 /* Start fsm priorities are a special case that may require
1105 * minimization afterwards. */
1106 afterOpMinimize( graph
);
1109 graph
->allTransPrior( priorOrd
[i
], &priorDescs
[i
] );
1112 graph
->finishFsmPrior( priorOrd
[i
], &priorDescs
[i
] );
1115 graph
->leaveFsmPrior( priorOrd
[i
], &priorDescs
[i
] );
1119 /* Parser Prevents this case. */
1125 void FactorWithAug::assignConditions( FsmAp
*graph
)
1127 for ( int i
= 0; i
< conditions
.length(); i
++ ) {
1128 switch ( conditions
[i
].type
) {
1129 /* Transition actions. */
1131 graph
->startFsmCondition( conditions
[i
].action
, conditions
[i
].sense
);
1132 afterOpMinimize( graph
);
1135 graph
->allTransCondition( conditions
[i
].action
, conditions
[i
].sense
);
1138 graph
->leaveFsmCondition( conditions
[i
].action
, conditions
[i
].sense
);
1147 /* Evaluate a factor with augmentation node. */
1148 FsmAp
*FactorWithAug::walk( ParseData
*pd
)
1150 /* Enter into the scopes created for the labels. */
1151 NameFrame nameFrame
= pd
->enterNameScope( false, labels
.length() );
1153 /* Make the array of function orderings. */
1155 if ( actions
.length() > 0 )
1156 actionOrd
= new int[actions
.length()];
1158 /* First walk the list of actions, assigning order to all starting
1160 for ( int i
= 0; i
< actions
.length(); i
++ ) {
1161 if ( actions
[i
].type
== at_start
||
1162 actions
[i
].type
== at_start_gbl_error
||
1163 actions
[i
].type
== at_start_local_error
||
1164 actions
[i
].type
== at_start_to_state
||
1165 actions
[i
].type
== at_start_from_state
||
1166 actions
[i
].type
== at_start_eof
)
1167 actionOrd
[i
] = pd
->curActionOrd
++;
1170 /* Evaluate the factor with repetition. */
1171 FsmAp
*rtnVal
= factorWithRep
->walk( pd
);
1173 /* Compute the remaining action orderings. */
1174 for ( int i
= 0; i
< actions
.length(); i
++ ) {
1175 if ( actions
[i
].type
!= at_start
&&
1176 actions
[i
].type
!= at_start_gbl_error
&&
1177 actions
[i
].type
!= at_start_local_error
&&
1178 actions
[i
].type
!= at_start_to_state
&&
1179 actions
[i
].type
!= at_start_from_state
&&
1180 actions
[i
].type
!= at_start_eof
)
1181 actionOrd
[i
] = pd
->curActionOrd
++;
1184 /* Embed conditions. */
1185 assignConditions( rtnVal
);
1187 /* Embed actions. */
1188 assignActions( pd
, rtnVal
, actionOrd
);
1190 /* Make the array of priority orderings. Orderings are local to this walk
1191 * of the factor with augmentation. */
1193 if ( priorityAugs
.length() > 0 )
1194 priorOrd
= new int[priorityAugs
.length()];
1196 /* Walk all priorities, assigning the priority ordering. */
1197 for ( int i
= 0; i
< priorityAugs
.length(); i
++ )
1198 priorOrd
[i
] = pd
->curPriorOrd
++;
1200 /* If the priority descriptors have not been made, make them now. Make
1201 * priority descriptors for each priority asignment that will be passed to
1202 * the fsm. Used to keep track of the key, value and used bit. */
1203 if ( priorDescs
== 0 && priorityAugs
.length() > 0 ) {
1204 priorDescs
= new PriorDesc
[priorityAugs
.length()];
1205 for ( int i
= 0; i
< priorityAugs
.length(); i
++ ) {
1206 /* Init the prior descriptor for the priority setting. */
1207 priorDescs
[i
].key
= priorityAugs
[i
].priorKey
;
1208 priorDescs
[i
].priority
= priorityAugs
[i
].priorValue
;
1212 /* Assign priorities into the machine. */
1213 assignPriorities( rtnVal
, priorOrd
);
1215 /* Assign epsilon transitions. */
1216 for ( int e
= 0; e
< epsilonLinks
.length(); e
++ ) {
1217 /* Get the name, which may not exist. If it doesn't then silently
1218 * ignore it because an error has already been reported. */
1219 NameInst
*epTarg
= pd
->epsilonResolvedLinks
[pd
->nextEpsilonResolvedLink
++];
1220 if ( epTarg
!= 0 ) {
1221 /* Make the epsilon transitions. */
1222 rtnVal
->epsilonTrans( epTarg
->id
);
1224 /* Note that we have made a link to the name. */
1225 pd
->localNameScope
->referencedNames
.append( epTarg
);
1229 /* Set entry points for labels. */
1230 if ( labels
.length() > 0 ) {
1231 /* Pop the names. */
1232 pd
->resetNameScope( nameFrame
);
1234 /* Make labels that are referenced into entry points. */
1235 for ( int i
= 0; i
< labels
.length(); i
++ ) {
1236 pd
->enterNameScope( false, 1 );
1238 /* Will always be found. */
1239 NameInst
*name
= pd
->curNameInst
;
1241 /* If the name is referenced then set the entry point. */
1242 if ( name
->numRefs
> 0 )
1243 rtnVal
->setEntry( name
->id
, rtnVal
->startState
);
1246 pd
->popNameScope( nameFrame
);
1249 if ( priorOrd
!= 0 )
1251 if ( actionOrd
!= 0 )
1256 void FactorWithAug::makeNameTree( ParseData
*pd
)
1258 /* Add the labels to the tree of instantiated names. Each label
1259 * makes a new scope. */
1260 NameInst
*prevNameInst
= pd
->curNameInst
;
1261 for ( int i
= 0; i
< labels
.length(); i
++ )
1262 pd
->curNameInst
= pd
->addNameInst( labels
[i
].loc
, labels
[i
].data
, true );
1264 /* Recurse, then pop the names. */
1265 factorWithRep
->makeNameTree( pd
);
1266 pd
->curNameInst
= prevNameInst
;
1270 void FactorWithAug::resolveNameRefs( ParseData
*pd
)
1272 /* Enter into the name scope created by any labels. */
1273 NameFrame nameFrame
= pd
->enterNameScope( false, labels
.length() );
1275 /* Note action references. */
1276 for ( int i
= 0; i
< actions
.length(); i
++ )
1277 actions
[i
].action
->actionRefs
.append( pd
->localNameScope
);
1279 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1280 * the tree is constructed. */
1281 factorWithRep
->resolveNameRefs( pd
);
1283 /* Resolve epsilon transitions. */
1284 for ( int ep
= 0; ep
< epsilonLinks
.length(); ep
++ ) {
1286 EpsilonLink
&link
= epsilonLinks
[ep
];
1287 NameInst
*resolvedName
= 0;
1289 if ( link
.target
.length() == 1 && strcmp( link
.target
.data
[0], "final" ) == 0 ) {
1290 /* Epsilon drawn to an implicit final state. An implicit final is
1291 * only available in join operations. */
1292 resolvedName
= pd
->localNameScope
->final
;
1295 /* Do an search for the name. */
1297 pd
->resolveFrom( resolved
, pd
->localNameScope
, link
.target
, 0 );
1298 if ( resolved
.length() > 0 ) {
1299 /* Take the first one. */
1300 resolvedName
= resolved
[0];
1301 if ( resolved
.length() > 1 ) {
1302 /* Complain about the multiple references. */
1303 error(link
.loc
) << "state reference " << link
.target
<<
1304 " resolves to multiple entry points" << endl
;
1305 errorStateLabels( resolved
);
1310 /* This is tricky, we stuff resolved epsilon transitions into one long
1311 * vector in the parse data structure. Since the name resolution and
1312 * graph generation both do identical walks of the parse tree we
1313 * should always find the link resolutions in the right place. */
1314 pd
->epsilonResolvedLinks
.append( resolvedName
);
1316 if ( resolvedName
!= 0 ) {
1317 /* Found the name, bump of the reference count on it. */
1318 resolvedName
->numRefs
+= 1;
1321 /* Complain, no recovery action, the epsilon op will ignore any
1322 * epsilon transitions whose names did not resolve. */
1323 error(link
.loc
) << "could not resolve label " << link
.target
<< endl
;
1327 if ( labels
.length() > 0 )
1328 pd
->popNameScope( nameFrame
);
1332 /* Clean up after a factor with repetition node. */
1333 FactorWithRep::~FactorWithRep()
1336 case StarType
: case StarStarType
: case OptionalType
: case PlusType
:
1337 case ExactType
: case MaxType
: case MinType
: case RangeType
:
1338 delete factorWithRep
;
1340 case FactorWithNegType
:
1341 delete factorWithNeg
;
1346 /* Evaluate a factor with repetition node. */
1347 FsmAp
*FactorWithRep::walk( ParseData
*pd
)
1353 /* Evaluate the FactorWithRep. */
1354 retFsm
= factorWithRep
->walk( pd
);
1355 if ( retFsm
->startState
->isFinState() ) {
1356 warning(loc
) << "applying kleene star to a machine that "
1357 "accepts zero length word" << endl
;
1360 /* Shift over the start action orders then do the kleene star. */
1361 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1363 afterOpMinimize( retFsm
);
1366 case StarStarType
: {
1367 /* Evaluate the FactorWithRep. */
1368 retFsm
= factorWithRep
->walk( pd
);
1369 if ( retFsm
->startState
->isFinState() ) {
1370 warning(loc
) << "applying kleene star to a machine that "
1371 "accepts zero length word" << endl
;
1374 /* Set up the prior descs. All gets priority one, whereas leaving gets
1375 * priority zero. Make a unique key so that these priorities don't
1376 * interfere with any priorities set by the user. */
1377 priorDescs
[0].key
= pd
->nextPriorKey
++;
1378 priorDescs
[0].priority
= 1;
1379 retFsm
->allTransPrior( pd
->curPriorOrd
++, &priorDescs
[0] );
1381 /* Leaveing gets priority 0. Use same unique key. */
1382 priorDescs
[1].key
= priorDescs
[0].key
;
1383 priorDescs
[1].priority
= 0;
1384 retFsm
->leaveFsmPrior( pd
->curPriorOrd
++, &priorDescs
[1] );
1386 /* Shift over the start action orders then do the kleene star. */
1387 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1389 afterOpMinimize( retFsm
);
1392 case OptionalType
: {
1393 /* Make the null fsm. */
1394 FsmAp
*nu
= new FsmAp();
1397 /* Evaluate the FactorWithRep. */
1398 retFsm
= factorWithRep
->walk( pd
);
1400 /* Perform the question operator. */
1401 retFsm
->unionOp( nu
);
1402 afterOpMinimize( retFsm
);
1406 /* Evaluate the FactorWithRep. */
1407 retFsm
= factorWithRep
->walk( pd
);
1408 if ( retFsm
->startState
->isFinState() ) {
1409 warning(loc
) << "applying plus operator to a machine that "
1410 "accpets zero length word" << endl
;
1413 /* Need a duplicated for the star end. */
1414 FsmAp
*dup
= new FsmAp( *retFsm
);
1416 /* The start func orders need to be shifted before doing the star. */
1417 pd
->curActionOrd
+= dup
->shiftStartActionOrder( pd
->curActionOrd
);
1419 /* Star the duplicate. */
1421 afterOpMinimize( dup
);
1423 retFsm
->concatOp( dup
);
1424 afterOpMinimize( retFsm
);
1428 /* Get an int from the repetition amount. */
1429 if ( lowerRep
== 0 ) {
1430 /* No copies. Don't need to evaluate the factorWithRep.
1431 * This Defeats the purpose so give a warning. */
1432 warning(loc
) << "exactly zero repetitions results "
1433 "in the null machine" << endl
;
1435 retFsm
= new FsmAp();
1436 retFsm
->lambdaFsm();
1439 /* Evaluate the first FactorWithRep. */
1440 retFsm
= factorWithRep
->walk( pd
);
1441 if ( retFsm
->startState
->isFinState() ) {
1442 warning(loc
) << "applying repetition to a machine that "
1443 "accepts zero length word" << endl
;
1446 /* The start func orders need to be shifted before doing the
1448 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1450 /* Do the repetition on the machine. Already guarded against n == 0 */
1451 retFsm
->repeatOp( lowerRep
);
1452 afterOpMinimize( retFsm
);
1457 /* Get an int from the repetition amount. */
1458 if ( upperRep
== 0 ) {
1459 /* No copies. Don't need to evaluate the factorWithRep.
1460 * This Defeats the purpose so give a warning. */
1461 warning(loc
) << "max zero repetitions results "
1462 "in the null machine" << endl
;
1464 retFsm
= new FsmAp();
1465 retFsm
->lambdaFsm();
1468 /* Evaluate the first FactorWithRep. */
1469 retFsm
= factorWithRep
->walk( pd
);
1470 if ( retFsm
->startState
->isFinState() ) {
1471 warning(loc
) << "applying max repetition to a machine that "
1472 "accepts zero length word" << endl
;
1475 /* The start func orders need to be shifted before doing the
1477 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1479 /* Do the repetition on the machine. Already guarded against n == 0 */
1480 retFsm
->optionalRepeatOp( upperRep
);
1481 afterOpMinimize( retFsm
);
1486 /* Evaluate the repeated machine. */
1487 retFsm
= factorWithRep
->walk( pd
);
1488 if ( retFsm
->startState
->isFinState() ) {
1489 warning(loc
) << "applying min repetition to a machine that "
1490 "accepts zero length word" << endl
;
1493 /* The start func orders need to be shifted before doing the repetition
1494 * and the kleene star. */
1495 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1497 if ( lowerRep
== 0 ) {
1498 /* Acts just like a star op on the machine to return. */
1500 afterOpMinimize( retFsm
);
1503 /* Take a duplicate for the plus. */
1504 FsmAp
*dup
= new FsmAp( *retFsm
);
1506 /* Do repetition on the first half. */
1507 retFsm
->repeatOp( lowerRep
);
1508 afterOpMinimize( retFsm
);
1510 /* Star the duplicate. */
1512 afterOpMinimize( dup
);
1514 /* Tak on the kleene star. */
1515 retFsm
->concatOp( dup
);
1516 afterOpMinimize( retFsm
);
1521 /* Check for bogus range. */
1522 if ( upperRep
- lowerRep
< 0 ) {
1523 error(loc
) << "invalid range repetition" << endl
;
1525 /* Return null machine as recovery. */
1526 retFsm
= new FsmAp();
1527 retFsm
->lambdaFsm();
1529 else if ( lowerRep
== 0 && upperRep
== 0 ) {
1530 /* No copies. Don't need to evaluate the factorWithRep. This
1531 * defeats the purpose so give a warning. */
1532 warning(loc
) << "zero to zero repetitions results "
1533 "in the null machine" << endl
;
1535 retFsm
= new FsmAp();
1536 retFsm
->lambdaFsm();
1539 /* Now need to evaluate the repeated machine. */
1540 retFsm
= factorWithRep
->walk( pd
);
1541 if ( retFsm
->startState
->isFinState() ) {
1542 warning(loc
) << "applying range repetition to a machine that "
1543 "accepts zero length word" << endl
;
1546 /* The start func orders need to be shifted before doing both kinds
1548 pd
->curActionOrd
+= retFsm
->shiftStartActionOrder( pd
->curActionOrd
);
1550 if ( lowerRep
== 0 ) {
1551 /* Just doing max repetition. Already guarded against n == 0. */
1552 retFsm
->optionalRepeatOp( upperRep
);
1553 afterOpMinimize( retFsm
);
1555 else if ( lowerRep
== upperRep
) {
1556 /* Just doing exact repetition. Already guarded against n == 0. */
1557 retFsm
->repeatOp( lowerRep
);
1558 afterOpMinimize( retFsm
);
1561 /* This is the case that 0 < lowerRep < upperRep. Take a
1562 * duplicate for the optional repeat. */
1563 FsmAp
*dup
= new FsmAp( *retFsm
);
1565 /* Do repetition on the first half. */
1566 retFsm
->repeatOp( lowerRep
);
1567 afterOpMinimize( retFsm
);
1569 /* Do optional repetition on the second half. */
1570 dup
->optionalRepeatOp( upperRep
- lowerRep
);
1571 afterOpMinimize( dup
);
1573 /* Tak on the duplicate machine. */
1574 retFsm
->concatOp( dup
);
1575 afterOpMinimize( retFsm
);
1580 case FactorWithNegType
: {
1581 /* Evaluate the Factor. Pass it up. */
1582 retFsm
= factorWithNeg
->walk( pd
);
1588 void FactorWithRep::makeNameTree( ParseData
*pd
)
1599 factorWithRep
->makeNameTree( pd
);
1601 case FactorWithNegType
:
1602 factorWithNeg
->makeNameTree( pd
);
1607 void FactorWithRep::resolveNameRefs( ParseData
*pd
)
1618 factorWithRep
->resolveNameRefs( pd
);
1620 case FactorWithNegType
:
1621 factorWithNeg
->resolveNameRefs( pd
);
1626 /* Clean up after a factor with negation node. */
1627 FactorWithNeg::~FactorWithNeg()
1631 case CharNegateType
:
1632 delete factorWithNeg
;
1640 /* Evaluate a factor with negation node. */
1641 FsmAp
*FactorWithNeg::walk( ParseData
*pd
)
1647 /* Evaluate the factorWithNeg. */
1648 FsmAp
*toNegate
= factorWithNeg
->walk( pd
);
1650 /* Negation is subtract from dot-star. */
1651 retFsm
= dotStarFsm( pd
);
1652 retFsm
->subtractOp( toNegate
);
1653 afterOpMinimize( retFsm
);
1656 case CharNegateType
: {
1657 /* Evaluate the factorWithNeg. */
1658 FsmAp
*toNegate
= factorWithNeg
->walk( pd
);
1660 /* CharNegation is subtract from dot. */
1661 retFsm
= dotFsm( pd
);
1662 retFsm
->subtractOp( toNegate
);
1663 afterOpMinimize( retFsm
);
1667 /* Evaluate the Factor. Pass it up. */
1668 retFsm
= factor
->walk( pd
);
1674 void FactorWithNeg::makeNameTree( ParseData
*pd
)
1678 case CharNegateType
:
1679 factorWithNeg
->makeNameTree( pd
);
1682 factor
->makeNameTree( pd
);
1687 void FactorWithNeg::resolveNameRefs( ParseData
*pd
)
1691 case CharNegateType
:
1692 factorWithNeg
->resolveNameRefs( pd
);
1695 factor
->resolveNameRefs( pd
);
1700 /* Clean up after a factor node. */
1721 case LongestMatchType
:
1722 delete longestMatch
;
1727 /* Evaluate a factor node. */
1728 FsmAp
*Factor::walk( ParseData
*pd
)
1733 rtnVal
= literal
->walk( pd
);
1736 rtnVal
= range
->walk( pd
);
1739 rtnVal
= reItem
->walk( pd
, 0 );
1742 rtnVal
= regExpr
->walk( pd
, 0 );
1745 rtnVal
= varDef
->walk( pd
);
1748 rtnVal
= join
->walk( pd
);
1750 case LongestMatchType
:
1751 rtnVal
= longestMatch
->walk( pd
);
1758 void Factor::makeNameTree( ParseData
*pd
)
1767 varDef
->makeNameTree( loc
, pd
);
1770 join
->makeNameTree( pd
);
1772 case LongestMatchType
:
1773 longestMatch
->makeNameTree( pd
);
1778 void Factor::resolveNameRefs( ParseData
*pd
)
1787 varDef
->resolveNameRefs( pd
);
1790 join
->resolveNameRefs( pd
);
1792 case LongestMatchType
:
1793 longestMatch
->resolveNameRefs( pd
);
1798 /* Clean up a range object. Must delete the two literals. */
1805 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1806 FsmAp
*Range::walk( ParseData
*pd
)
1808 /* Construct and verify the suitability of the lower end of the range. */
1809 FsmAp
*lowerFsm
= lowerLit
->walk( pd
);
1810 if ( !lowerFsm
->checkSingleCharMachine() ) {
1811 error(lowerLit
->token
.loc
) <<
1812 "bad range lower end, must be a single character" << endl
;
1815 /* Construct and verify the upper end. */
1816 FsmAp
*upperFsm
= upperLit
->walk( pd
);
1817 if ( !upperFsm
->checkSingleCharMachine() ) {
1818 error(upperLit
->token
.loc
) <<
1819 "bad range upper end, must be a single character" << endl
;
1822 /* Grab the keys from the machines, then delete them. */
1823 Key lowKey
= lowerFsm
->startState
->outList
.head
->lowKey
;
1824 Key highKey
= upperFsm
->startState
->outList
.head
->lowKey
;
1828 /* Validate the range. */
1829 if ( lowKey
> highKey
) {
1830 /* Recover by setting upper to lower; */
1831 error(lowerLit
->token
.loc
) << "lower end of range is greater then upper end" << endl
;
1835 /* Return the range now that it is validated. */
1836 FsmAp
*retFsm
= new FsmAp();
1837 retFsm
->rangeFsm( lowKey
, highKey
);
1841 /* Evaluate a literal object. */
1842 FsmAp
*Literal::walk( ParseData
*pd
)
1844 /* FsmAp to return, is the alphabet signed. */
1849 /* Make the fsm key in int format. */
1850 Key fsmKey
= makeFsmKeyNum( token
.data
, token
.loc
, pd
);
1851 /* Make the new machine. */
1852 rtnVal
= new FsmAp();
1853 rtnVal
->concatFsm( fsmKey
);
1857 /* Make the array of keys in int format. */
1859 bool caseInsensitive
;
1860 token
.prepareLitString( interp
, caseInsensitive
);
1861 Key
*arr
= new Key
[interp
.length
];
1862 makeFsmKeyArray( arr
, interp
.data
, interp
.length
, pd
);
1864 /* Make the new machine. */
1865 rtnVal
= new FsmAp();
1866 if ( caseInsensitive
)
1867 rtnVal
->concatFsmCI( arr
, interp
.length
);
1869 rtnVal
->concatFsm( arr
, interp
.length
);
1870 delete[] interp
.data
;
1877 /* Clean up after a regular expression object. */
1890 /* Evaluate a regular expression object. */
1891 FsmAp
*RegExpr::walk( ParseData
*pd
, RegExpr
*rootRegex
)
1893 /* This is the root regex, pass down a pointer to this. */
1894 if ( rootRegex
== 0 )
1900 /* Walk both items. */
1901 rtnVal
= regExpr
->walk( pd
, rootRegex
);
1902 FsmAp
*fsm2
= item
->walk( pd
, rootRegex
);
1903 rtnVal
->concatOp( fsm2
);
1907 rtnVal
= new FsmAp();
1908 rtnVal
->lambdaFsm();
1915 /* Clean up after an item in a regular expression. */
1929 /* Evaluate a regular expression object. */
1930 FsmAp
*ReItem::walk( ParseData
*pd
, RegExpr
*rootRegex
)
1932 /* The fsm to return, is the alphabet signed? */
1937 /* Move the data into an integer array and make a concat fsm. */
1938 Key
*arr
= new Key
[token
.length
];
1939 makeFsmKeyArray( arr
, token
.data
, token
.length
, pd
);
1941 /* Make the concat fsm. */
1942 rtnVal
= new FsmAp();
1943 if ( rootRegex
!= 0 && rootRegex
->caseInsensitive
)
1944 rtnVal
->concatFsmCI( arr
, token
.length
);
1946 rtnVal
->concatFsm( arr
, token
.length
);
1951 /* Make the dot fsm. */
1952 rtnVal
= dotFsm( pd
);
1956 /* Get the or block and minmize it. */
1957 rtnVal
= orBlock
->walk( pd
, rootRegex
);
1958 if ( rtnVal
== 0 ) {
1959 rtnVal
= new FsmAp();
1960 rtnVal
->lambdaFsm();
1962 rtnVal
->minimizePartition2();
1966 /* Get the or block and minimize it. */
1967 FsmAp
*fsm
= orBlock
->walk( pd
, rootRegex
);
1968 fsm
->minimizePartition2();
1970 /* Make a dot fsm and subtract from it. */
1971 rtnVal
= dotFsm( pd
);
1972 rtnVal
->subtractOp( fsm
);
1973 rtnVal
->minimizePartition2();
1978 /* If the item is followed by a star, then apply the star op. */
1980 if ( rtnVal
->startState
->isFinState() ) {
1981 warning(loc
) << "applying kleene star to a machine that "
1982 "accpets zero length word" << endl
;
1986 rtnVal
->minimizePartition2();
1991 /* Clean up after an or block of a regular expression. */
1992 ReOrBlock::~ReOrBlock()
2005 /* Evaluate an or block of a regular expression. */
2006 FsmAp
*ReOrBlock::walk( ParseData
*pd
, RegExpr
*rootRegex
)
2011 /* Evaluate the two fsm. */
2012 FsmAp
*fsm1
= orBlock
->walk( pd
, rootRegex
);
2013 FsmAp
*fsm2
= item
->walk( pd
, rootRegex
);
2017 fsm1
->unionOp( fsm2
);
2030 /* Evaluate an or block item of a regular expression. */
2031 FsmAp
*ReOrItem::walk( ParseData
*pd
, RegExpr
*rootRegex
)
2033 /* The return value, is the alphabet signed? */
2037 /* Make the or machine. */
2038 rtnVal
= new FsmAp();
2040 /* Put the or data into an array of ints. Note that we find unique
2041 * keys. Duplicates are silently ignored. The alternative would be to
2042 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2043 * 'a' don't bother here. */
2045 makeFsmUniqueKeyArray( keySet
, token
.data
, token
.length
,
2046 rootRegex
!= 0 ? rootRegex
->caseInsensitive
: false, pd
);
2048 /* Run the or operator. */
2049 rtnVal
->orFsm( keySet
.data
, keySet
.length() );
2053 /* Make the upper and lower keys. */
2054 Key lowKey
= makeFsmKeyChar( lower
, pd
);
2055 Key highKey
= makeFsmKeyChar( upper
, pd
);
2057 /* Validate the range. */
2058 if ( lowKey
> highKey
) {
2059 /* Recover by setting upper to lower; */
2060 error(loc
) << "lower end of range is greater then upper end" << endl
;
2064 /* Make the range machine. */
2065 rtnVal
= new FsmAp();
2066 rtnVal
->rangeFsm( lowKey
, highKey
);
2068 if ( rootRegex
!= 0 && rootRegex
->caseInsensitive
) {
2069 if ( lowKey
<= 'Z' && 'A' <= highKey
) {
2070 Key otherLow
= lowKey
< 'A' ? Key('A') : lowKey
;
2071 Key otherHigh
= 'Z' < highKey
? Key('Z') : highKey
;
2073 otherLow
= 'a' + ( otherLow
- 'A' );
2074 otherHigh
= 'a' + ( otherHigh
- 'A' );
2076 FsmAp
*otherRange
= new FsmAp();
2077 otherRange
->rangeFsm( otherLow
, otherHigh
);
2078 rtnVal
->unionOp( otherRange
);
2079 rtnVal
->minimizePartition2();
2081 else if ( lowKey
<= 'z' && 'a' <= highKey
) {
2082 Key otherLow
= lowKey
< 'a' ? Key('a') : lowKey
;
2083 Key otherHigh
= 'z' < highKey
? Key('z') : highKey
;
2085 otherLow
= 'A' + ( otherLow
- 'a' );
2086 otherHigh
= 'A' + ( otherHigh
- 'a' );
2088 FsmAp
*otherRange
= new FsmAp();
2089 otherRange
->rangeFsm( otherLow
, otherHigh
);
2090 rtnVal
->unionOp( otherRange
);
2091 rtnVal
->minimizePartition2();