One of the ST_* macros collides with a macro in windows.h.
[ragel.git] / ragel / parsetree.cpp
blob7310e167a538dba70856028faf469547193be320
1 /*
2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <iostream>
23 #include <iomanip>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdlib.h>
28 /* Parsing. */
29 #include "ragel.h"
30 #include "rlparse.h"
31 #include "parsetree.h"
33 using namespace std;
34 ostream &operator<<( ostream &out, const NameRef &nameRef );
35 ostream &operator<<( ostream &out, const NameInst &nameInst );
37 /* Convert the literal string which comes in from the scanner into an array of
38 * characters with escapes and options interpreted. Also null terminates the
39 * string. Though this null termination should not be relied on for
40 * interpreting literals in the parser because the string may contain a
41 * literal string with \0 */
42 void Token::prepareLitString( Token &result, bool &caseInsensitive )
44 result.data = new char[this->length+1];
45 caseInsensitive = false;
47 char *src = this->data + 1;
48 char *end = this->data + this->length - 1;
50 while ( *end != '\'' && *end != '\"' ) {
51 if ( *end == 'i' )
52 caseInsensitive = true;
53 else {
54 error( this->loc ) << "literal string '" << *end <<
55 "' option not supported" << endl;
57 end -= 1;
60 char *dest = result.data;
61 int len = 0;
62 while ( src != end ) {
63 if ( *src == '\\' ) {
64 switch ( src[1] ) {
65 case '0': dest[len++] = '\0'; break;
66 case 'a': dest[len++] = '\a'; break;
67 case 'b': dest[len++] = '\b'; break;
68 case 't': dest[len++] = '\t'; break;
69 case 'n': dest[len++] = '\n'; break;
70 case 'v': dest[len++] = '\v'; break;
71 case 'f': dest[len++] = '\f'; break;
72 case 'r': dest[len++] = '\r'; break;
73 case '\n': break;
74 default: dest[len++] = src[1]; break;
76 src += 2;
78 else {
79 dest[len++] = *src++;
82 result.length = len;
83 result.data[result.length] = 0;
87 FsmAp *VarDef::walk( ParseData *pd )
89 /* We enter into a new name scope. */
90 NameFrame nameFrame = pd->enterNameScope( true, 1 );
92 /* Recurse on the expression. */
93 FsmAp *rtnVal = joinOrLm->walk( pd );
95 /* Do the tranfer of local error actions. */
96 LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
97 if ( localErrDictEl != 0 ) {
98 for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
99 rtnVal->transferErrorActions( state, localErrDictEl->value );
102 /* If the expression below is a join operation with multiple expressions
103 * then it just had epsilon transisions resolved. If it is a join
104 * with only a single expression then run the epsilon op now. */
105 if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
106 rtnVal->epsilonOp();
108 /* We can now unset entry points that are not longer used. */
109 pd->unsetObsoleteEntries( rtnVal );
111 /* If the name of the variable is referenced then add the entry point to
112 * the graph. */
113 if ( pd->curNameInst->numRefs > 0 )
114 rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
116 /* Pop the name scope. */
117 pd->popNameScope( nameFrame );
118 return rtnVal;
121 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
123 /* The variable definition enters a new scope. */
124 NameInst *prevNameInst = pd->curNameInst;
125 pd->curNameInst = pd->addNameInst( loc, name, false );
127 if ( joinOrLm->type == JoinOrLm::LongestMatchType )
128 pd->curNameInst->isLongestMatch = true;
130 /* Recurse. */
131 joinOrLm->makeNameTree( pd );
133 /* The name scope ends, pop the name instantiation. */
134 pd->curNameInst = prevNameInst;
137 void VarDef::resolveNameRefs( ParseData *pd )
139 /* Entering into a new scope. */
140 NameFrame nameFrame = pd->enterNameScope( true, 1 );
142 /* Recurse. */
143 joinOrLm->resolveNameRefs( pd );
145 /* The name scope ends, pop the name instantiation. */
146 pd->popNameScope( nameFrame );
149 InputLoc LongestMatchPart::getLoc()
151 return action != 0 ? action->loc : semiLoc;
155 * If there are any LMs then all of the following entry points must reset
156 * tokstart:
158 * 1. fentry(StateRef)
159 * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
160 * 3. targt of any transition that has an fcall (the return loc).
161 * 4. start state of all longest match routines.
164 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
165 char *name, InlineList *inlineList )
167 Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
168 action->actionRefs.append( pd->curNameInst );
169 pd->actionList.append( action );
170 action->isLmAction = true;
171 return action;
174 void LongestMatch::makeActions( ParseData *pd )
176 /* Make actions that set the action id. */
177 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
178 /* For each part create actions for setting the match type. We need
179 * to do this so that the actions will go into the actionIndex. */
180 InlineList *inlineList = new InlineList;
181 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) );
182 char *actName = new char[50];
183 sprintf( actName, "store%i", lmi->longestMatchId );
184 lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
187 /* Make actions that execute the user action and restart on the last character. */
188 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
189 /* For each part create actions for setting the match type. We need
190 * to do this so that the actions will go into the actionIndex. */
191 InlineList *inlineList = new InlineList;
192 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
193 InlineItem::LmOnLast ) );
194 char *actName = new char[50];
195 sprintf( actName, "last%i", lmi->longestMatchId );
196 lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
199 /* Make actions that execute the user action and restart on the next
200 * character. These actions will set tokend themselves (it is the current
201 * char). */
202 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
203 /* For each part create actions for setting the match type. We need
204 * to do this so that the actions will go into the actionIndex. */
205 InlineList *inlineList = new InlineList;
206 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
207 InlineItem::LmOnNext ) );
208 char *actName = new char[50];
209 sprintf( actName, "next%i", lmi->longestMatchId );
210 lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
213 /* Make actions that execute the user action and restart at tokend. These
214 * actions execute some time after matching the last char. */
215 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
216 /* For each part create actions for setting the match type. We need
217 * to do this so that the actions will go into the actionIndex. */
218 InlineList *inlineList = new InlineList;
219 inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
220 InlineItem::LmOnLagBehind ) );
221 char *actName = new char[50];
222 sprintf( actName, "lag%i", lmi->longestMatchId );
223 lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
226 InputLoc loc;
227 loc.line = 1;
228 loc.col = 1;
230 /* Create the error action. */
231 InlineList *il6 = new InlineList;
232 il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
233 lmActSelect = newAction( pd, loc, "switch", il6 );
236 void LongestMatch::findName( ParseData *pd )
238 NameInst *nameInst = pd->curNameInst;
239 while ( nameInst->name == 0 ) {
240 nameInst = nameInst->parent;
241 /* Since every machine must must have a name, we should always find a
242 * name for the longest match. */
243 assert( nameInst != 0 );
245 name = nameInst->name;
248 void LongestMatch::makeNameTree( ParseData *pd )
250 /* Create an anonymous scope for the longest match. Will be used for
251 * restarting machine after matching a token. */
252 NameInst *prevNameInst = pd->curNameInst;
253 pd->curNameInst = pd->addNameInst( loc, 0, false );
255 /* Recurse into all parts of the longest match operator. */
256 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
257 lmi->join->makeNameTree( pd );
259 /* Traverse the name tree upwards to find a name for this lm. */
260 findName( pd );
262 /* Also make the longest match's actions at this point. */
263 makeActions( pd );
265 /* The name scope ends, pop the name instantiation. */
266 pd->curNameInst = prevNameInst;
269 void LongestMatch::resolveNameRefs( ParseData *pd )
271 /* The longest match gets its own name scope. */
272 NameFrame nameFrame = pd->enterNameScope( true, 1 );
274 /* Take an action reference for each longest match item and recurse. */
275 for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
276 /* Record the reference if the item has an action. */
277 if ( lmi->action != 0 )
278 lmi->action->actionRefs.append( pd->localNameScope );
280 /* Recurse down the join. */
281 lmi->join->resolveNameRefs( pd );
284 /* The name scope ends, pop the name instantiation. */
285 pd->popNameScope( nameFrame );
288 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
290 StateAp *fromState = trans->fromState;
291 graph->detachTrans( fromState, trans->toState, trans );
292 graph->attachTrans( fromState, graph->startState, trans );
295 void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph )
297 graph->markReachableFromHereStopFinal( graph->startState );
298 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
299 if ( ms->stateBits & STB_ISMARKED ) {
300 ms->lmItemSet.insert( 0 );
301 ms->stateBits &= ~ STB_ISMARKED;
305 /* Transfer the first item of non-empty lmAction tables to the item sets
306 * of the states that follow. Exclude states that have no transitions out.
307 * This must happen on a separate pass so that on each iteration of the
308 * next pass we have the item set entries from all lmAction tables. */
309 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
310 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
311 if ( trans->lmActionTable.length() > 0 ) {
312 LmActionTableEl *lmAct = trans->lmActionTable.data;
313 StateAp *toState = trans->toState;
314 assert( toState );
316 /* Check if there are transitions out, this may be a very
317 * close approximation? Out transitions going nowhere?
318 * FIXME: Check. */
319 if ( toState->outList.length() > 0 ) {
320 /* Fill the item sets. */
321 graph->markReachableFromHereStopFinal( toState );
322 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
323 if ( ms->stateBits & STB_ISMARKED ) {
324 ms->lmItemSet.insert( lmAct->value );
325 ms->stateBits &= ~ STB_ISMARKED;
333 /* The lmItem sets are now filled, telling us which longest match rules
334 * can succeed in which states. First determine if we need to make sure
335 * act is defaulted to zero. We need to do this if there are any states
336 * with lmItemSet.length() > 1 and NULL is included. That is, that the
337 * switch may get called when in fact nothing has been matched. */
338 int maxItemSetLength = 0;
339 graph->markReachableFromHereStopFinal( graph->startState );
340 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
341 if ( ms->stateBits & STB_ISMARKED ) {
342 if ( ms->lmItemSet.length() > maxItemSetLength )
343 maxItemSetLength = ms->lmItemSet.length();
344 ms->stateBits &= ~ STB_ISMARKED;
348 /* The actions executed on starting to match a token. */
349 graph->isolateStartState();
350 graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
351 graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
352 if ( maxItemSetLength > 1 ) {
353 /* The longest match action switch may be called when tokens are
354 * matched, in which case act must be initialized, there must be a
355 * case to handle the error, and the generated machine will require an
356 * error state. */
357 lmSwitchHandlesError = true;
358 pd->lmRequiresErrorState = true;
359 graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
362 /* The place to store transitions to restart. It maybe possible for the
363 * restarting to affect the searching through the graph that follows. For
364 * now take the safe route and save the list of transitions to restart
365 * until after all searching is done. */
366 Vector<TransAp*> restartTrans;
368 /* Set actions that do immediate token recognition, set the longest match part
369 * id and set the token ending. */
370 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
371 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
372 if ( trans->lmActionTable.length() > 0 ) {
373 LmActionTableEl *lmAct = trans->lmActionTable.data;
374 StateAp *toState = trans->toState;
375 assert( toState );
377 /* Check if there are transitions out, this may be a very
378 * close approximation? Out transitions going nowhere?
379 * FIXME: Check. */
380 if ( toState->outList.length() == 0 ) {
381 /* Can execute the immediate action for the longest match
382 * part. Redirect the action to the start state. */
383 trans->actionTable.setAction( lmAct->key,
384 lmAct->value->actOnLast );
385 restartTrans.append( trans );
387 else {
388 /* Look for non final states that have a non-empty item
389 * set. If these are present then we need to record the
390 * end of the token. Also Find the highest item set
391 * length reachable from here (excluding at transtions to
392 * final states). */
393 bool nonFinalNonEmptyItemSet = false;
394 maxItemSetLength = 0;
395 graph->markReachableFromHereStopFinal( toState );
396 for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
397 if ( ms->stateBits & STB_ISMARKED ) {
398 if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
399 nonFinalNonEmptyItemSet = true;
400 if ( ms->lmItemSet.length() > maxItemSetLength )
401 maxItemSetLength = ms->lmItemSet.length();
402 ms->stateBits &= ~ STB_ISMARKED;
406 /* If there are reachable states that are not final and
407 * have non empty item sets or that have an item set
408 * length greater than one then we need to set tokend
409 * because the error action that matches the token will
410 * require it. */
411 if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
412 trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
414 /* Some states may not know which longest match item to
415 * execute, must set it. */
416 if ( maxItemSetLength > 1 ) {
417 /* There are transitions out, another match may come. */
418 trans->actionTable.setAction( lmAct->key,
419 lmAct->value->setActId );
426 /* Now that all graph searching is done it certainly safe set the
427 * restarting. It may be safe above, however this must be verified. */
428 for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
429 restart( graph, *pt );
431 int lmErrActionOrd = pd->curActionOrd++;
433 /* Embed the error for recognizing a char. */
434 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
435 if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
436 if ( st->isFinState() ) {
437 /* On error execute the onActNext action, which knows that
438 * the last character of the token was one back and restart. */
439 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
440 &st->lmItemSet[0]->actOnNext, 1 );
441 st->eofActionTable.setAction( lmErrActionOrd,
442 st->lmItemSet[0]->actOnNext );
443 st->eofTarget = graph->startState;
445 else {
446 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
447 &st->lmItemSet[0]->actLagBehind, 1 );
448 st->eofActionTable.setAction( lmErrActionOrd,
449 st->lmItemSet[0]->actLagBehind );
450 st->eofTarget = graph->startState;
453 else if ( st->lmItemSet.length() > 1 ) {
454 /* Need to use the select. Take note of the which items the select
455 * is needed for so only the necessary actions are included. */
456 for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
457 if ( *plmi != 0 )
458 (*plmi)->inLmSelect = true;
460 /* On error, execute the action select and go to the start state. */
461 graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
462 &lmActSelect, 1 );
463 st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
464 st->eofTarget = graph->startState;
468 /* Finally, the start state should be made final. */
469 graph->setFinState( graph->startState );
472 FsmAp *LongestMatch::walk( ParseData *pd )
474 /* The longest match has it's own name scope. */
475 NameFrame nameFrame = pd->enterNameScope( true, 1 );
477 /* Make each part of the longest match. */
478 FsmAp **parts = new FsmAp*[longestMatchList->length()];
479 LmPartList::Iter lmi = *longestMatchList;
480 for ( int i = 0; lmi.lte(); lmi++, i++ ) {
481 /* Create the machine and embed the setting of the longest match id. */
482 parts[i] = lmi->join->walk( pd );
483 parts[i]->longMatchAction( pd->curActionOrd++, lmi );
486 /* Union machines one and up with machine zero. The grammar dictates that
487 * there will always be at least one part. */
488 FsmAp *rtnVal = parts[0];
489 for ( int i = 1; i < longestMatchList->length(); i++ ) {
490 rtnVal->unionOp( parts[i] );
491 afterOpMinimize( rtnVal );
494 runLonestMatch( pd, rtnVal );
496 /* Pop the name scope. */
497 pd->popNameScope( nameFrame );
499 delete[] parts;
500 return rtnVal;
503 FsmAp *JoinOrLm::walk( ParseData *pd )
505 FsmAp *rtnVal = 0;
506 switch ( type ) {
507 case JoinType:
508 rtnVal = join->walk( pd );
509 break;
510 case LongestMatchType:
511 rtnVal = longestMatch->walk( pd );
512 break;
514 return rtnVal;
517 void JoinOrLm::makeNameTree( ParseData *pd )
519 switch ( type ) {
520 case JoinType:
521 join->makeNameTree( pd );
522 break;
523 case LongestMatchType:
524 longestMatch->makeNameTree( pd );
525 break;
529 void JoinOrLm::resolveNameRefs( ParseData *pd )
531 switch ( type ) {
532 case JoinType:
533 join->resolveNameRefs( pd );
534 break;
535 case LongestMatchType:
536 longestMatch->resolveNameRefs( pd );
537 break;
542 /* Construct with a location and the first expression. */
543 Join::Join( const InputLoc &loc, Expression *expr )
545 loc(loc)
547 exprList.append( expr );
550 /* Construct with a location and the first expression. */
551 Join::Join( Expression *expr )
553 loc(loc)
555 exprList.append( expr );
558 /* Walk an expression node. */
559 FsmAp *Join::walk( ParseData *pd )
561 if ( exprList.length() > 1 )
562 return walkJoin( pd );
563 else
564 return exprList.head->walk( pd );
567 /* There is a list of expressions to join. */
568 FsmAp *Join::walkJoin( ParseData *pd )
570 /* We enter into a new name scope. */
571 NameFrame nameFrame = pd->enterNameScope( true, 1 );
573 /* Evaluate the machines. */
574 FsmAp **fsms = new FsmAp*[exprList.length()];
575 ExprList::Iter expr = exprList;
576 for ( int e = 0; e < exprList.length(); e++, expr++ )
577 fsms[e] = expr->walk( pd );
579 /* Get the start and final names. Final is
580 * guaranteed to exist, start is not. */
581 NameInst *startName = pd->curNameInst->start;
582 NameInst *finalName = pd->curNameInst->final;
584 int startId = -1;
585 if ( startName != 0 ) {
586 /* Take note that there was an implicit link to the start machine. */
587 pd->localNameScope->referencedNames.append( startName );
588 startId = startName->id;
591 /* A final id of -1 indicates there is no epsilon that references the
592 * final state, therefor do not create one or set an entry point to it. */
593 int finalId = -1;
594 if ( finalName->numRefs > 0 )
595 finalId = finalName->id;
597 /* Join machines 1 and up onto machine 0. */
598 FsmAp *retFsm = fsms[0];
599 retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
601 /* We can now unset entry points that are not longer used. */
602 pd->unsetObsoleteEntries( retFsm );
604 /* Pop the name scope. */
605 pd->popNameScope( nameFrame );
607 delete[] fsms;
608 return retFsm;
611 void Join::makeNameTree( ParseData *pd )
613 if ( exprList.length() > 1 ) {
614 /* Create the new anonymous scope. */
615 NameInst *prevNameInst = pd->curNameInst;
616 pd->curNameInst = pd->addNameInst( loc, 0, false );
618 /* Join scopes need an implicit "final" target. */
619 pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
620 pd->nextNameId++, false );
622 /* Recurse into all expressions in the list. */
623 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
624 expr->makeNameTree( pd );
626 /* The name scope ends, pop the name instantiation. */
627 pd->curNameInst = prevNameInst;
629 else {
630 /* Recurse into the single expression. */
631 exprList.head->makeNameTree( pd );
636 void Join::resolveNameRefs( ParseData *pd )
638 /* Branch on whether or not there is to be a join. */
639 if ( exprList.length() > 1 ) {
640 /* The variable definition enters a new scope. */
641 NameFrame nameFrame = pd->enterNameScope( true, 1 );
643 /* The join scope must contain a start label. */
644 NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
645 if ( resolved.length() > 0 ) {
646 /* Take the first. */
647 pd->curNameInst->start = resolved[0];
648 if ( resolved.length() > 1 ) {
649 /* Complain about the multiple references. */
650 error(loc) << "multiple start labels" << endl;
651 errorStateLabels( resolved );
655 /* Make sure there is a start label. */
656 if ( pd->curNameInst->start != 0 ) {
657 /* There is an implicit reference to start name. */
658 pd->curNameInst->start->numRefs += 1;
660 else {
661 /* No start label. Complain and recover by adding a label to the
662 * adding one. Recover ignoring the problem. */
663 error(loc) << "no start label" << endl;
666 /* Recurse into all expressions in the list. */
667 for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
668 expr->resolveNameRefs( pd );
670 /* The name scope ends, pop the name instantiation. */
671 pd->popNameScope( nameFrame );
673 else {
674 /* Recurse into the single expression. */
675 exprList.head->resolveNameRefs( pd );
679 /* Clean up after an expression node. */
680 Expression::~Expression()
682 switch ( type ) {
683 case OrType: case IntersectType: case SubtractType:
684 case StrongSubtractType:
685 delete expression;
686 delete term;
687 break;
688 case TermType:
689 delete term;
690 break;
691 case BuiltinType:
692 break;
696 /* Evaluate a single expression node. */
697 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
699 FsmAp *rtnVal = 0;
700 switch ( type ) {
701 case OrType: {
702 /* Evaluate the expression. */
703 rtnVal = expression->walk( pd, false );
704 /* Evaluate the term. */
705 FsmAp *rhs = term->walk( pd );
706 /* Perform union. */
707 rtnVal->unionOp( rhs );
708 afterOpMinimize( rtnVal, lastInSeq );
709 break;
711 case IntersectType: {
712 /* Evaluate the expression. */
713 rtnVal = expression->walk( pd );
714 /* Evaluate the term. */
715 FsmAp *rhs = term->walk( pd );
716 /* Perform intersection. */
717 rtnVal->intersectOp( rhs );
718 afterOpMinimize( rtnVal, lastInSeq );
719 break;
721 case SubtractType: {
722 /* Evaluate the expression. */
723 rtnVal = expression->walk( pd );
724 /* Evaluate the term. */
725 FsmAp *rhs = term->walk( pd );
726 /* Perform subtraction. */
727 rtnVal->subtractOp( rhs );
728 afterOpMinimize( rtnVal, lastInSeq );
729 break;
731 case StrongSubtractType: {
732 /* Evaluate the expression. */
733 rtnVal = expression->walk( pd );
735 /* Evaluate the term and pad it with any* machines. */
736 FsmAp *rhs = dotStarFsm( pd );
737 FsmAp *termFsm = term->walk( pd );
738 FsmAp *trailAnyStar = dotStarFsm( pd );
739 rhs->concatOp( termFsm );
740 rhs->concatOp( trailAnyStar );
742 /* Perform subtraction. */
743 rtnVal->subtractOp( rhs );
744 afterOpMinimize( rtnVal, lastInSeq );
745 break;
747 case TermType: {
748 /* Return result of the term. */
749 rtnVal = term->walk( pd );
750 break;
752 case BuiltinType: {
753 /* Duplicate the builtin. */
754 rtnVal = makeBuiltin( builtin, pd );
755 break;
759 return rtnVal;
762 void Expression::makeNameTree( ParseData *pd )
764 switch ( type ) {
765 case OrType:
766 case IntersectType:
767 case SubtractType:
768 case StrongSubtractType:
769 expression->makeNameTree( pd );
770 term->makeNameTree( pd );
771 break;
772 case TermType:
773 term->makeNameTree( pd );
774 break;
775 case BuiltinType:
776 break;
780 void Expression::resolveNameRefs( ParseData *pd )
782 switch ( type ) {
783 case OrType:
784 case IntersectType:
785 case SubtractType:
786 case StrongSubtractType:
787 expression->resolveNameRefs( pd );
788 term->resolveNameRefs( pd );
789 break;
790 case TermType:
791 term->resolveNameRefs( pd );
792 break;
793 case BuiltinType:
794 break;
798 /* Clean up after a term node. */
799 Term::~Term()
801 switch ( type ) {
802 case ConcatType:
803 case RightStartType:
804 case RightFinishType:
805 case LeftType:
806 delete term;
807 delete factorWithAug;
808 break;
809 case FactorWithAugType:
810 delete factorWithAug;
811 break;
815 /* Evaluate a term node. */
816 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
818 FsmAp *rtnVal = 0;
819 switch ( type ) {
820 case ConcatType: {
821 /* Evaluate the Term. */
822 rtnVal = term->walk( pd, false );
823 /* Evaluate the FactorWithRep. */
824 FsmAp *rhs = factorWithAug->walk( pd );
825 /* Perform concatenation. */
826 rtnVal->concatOp( rhs );
827 afterOpMinimize( rtnVal, lastInSeq );
828 break;
830 case RightStartType: {
831 /* Evaluate the Term. */
832 rtnVal = term->walk( pd );
834 /* Evaluate the FactorWithRep. */
835 FsmAp *rhs = factorWithAug->walk( pd );
837 /* Set up the priority descriptors. The left machine gets the
838 * lower priority where as the right get the higher start priority. */
839 priorDescs[0].key = pd->nextPriorKey++;
840 priorDescs[0].priority = 0;
841 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
843 /* The start transitions right machine get the higher priority.
844 * Use the same unique key. */
845 priorDescs[1].key = priorDescs[0].key;
846 priorDescs[1].priority = 1;
847 rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
849 /* Perform concatenation. */
850 rtnVal->concatOp( rhs );
851 afterOpMinimize( rtnVal, lastInSeq );
852 break;
854 case RightFinishType: {
855 /* Evaluate the Term. */
856 rtnVal = term->walk( pd );
858 /* Evaluate the FactorWithRep. */
859 FsmAp *rhs = factorWithAug->walk( pd );
861 /* Set up the priority descriptors. The left machine gets the
862 * lower priority where as the finishing transitions to the right
863 * get the higher priority. */
864 priorDescs[0].key = pd->nextPriorKey++;
865 priorDescs[0].priority = 0;
866 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
868 /* The finishing transitions of the right machine get the higher
869 * priority. Use the same unique key. */
870 priorDescs[1].key = priorDescs[0].key;
871 priorDescs[1].priority = 1;
872 rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
874 /* Perform concatenation. */
875 rtnVal->concatOp( rhs );
876 afterOpMinimize( rtnVal, lastInSeq );
877 break;
879 case LeftType: {
880 /* Evaluate the Term. */
881 rtnVal = term->walk( pd );
883 /* Evaluate the FactorWithRep. */
884 FsmAp *rhs = factorWithAug->walk( pd );
886 /* Set up the priority descriptors. The left machine gets the
887 * higher priority. */
888 priorDescs[0].key = pd->nextPriorKey++;
889 priorDescs[0].priority = 1;
890 rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
892 /* The right machine gets the lower priority. Since
893 * startTransPrior might unnecessarily increase the number of
894 * states during the state machine construction process (due to
895 * isolation), we use allTransPrior instead, which has the same
896 * effect. */
897 priorDescs[1].key = priorDescs[0].key;
898 priorDescs[1].priority = 0;
899 rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
901 /* Perform concatenation. */
902 rtnVal->concatOp( rhs );
903 afterOpMinimize( rtnVal, lastInSeq );
904 break;
906 case FactorWithAugType: {
907 rtnVal = factorWithAug->walk( pd );
908 break;
911 return rtnVal;
914 void Term::makeNameTree( ParseData *pd )
916 switch ( type ) {
917 case ConcatType:
918 case RightStartType:
919 case RightFinishType:
920 case LeftType:
921 term->makeNameTree( pd );
922 factorWithAug->makeNameTree( pd );
923 break;
924 case FactorWithAugType:
925 factorWithAug->makeNameTree( pd );
926 break;
930 void Term::resolveNameRefs( ParseData *pd )
932 switch ( type ) {
933 case ConcatType:
934 case RightStartType:
935 case RightFinishType:
936 case LeftType:
937 term->resolveNameRefs( pd );
938 factorWithAug->resolveNameRefs( pd );
939 break;
940 case FactorWithAugType:
941 factorWithAug->resolveNameRefs( pd );
942 break;
946 /* Clean up after a factor with augmentation node. */
947 FactorWithAug::~FactorWithAug()
949 delete factorWithRep;
951 /* Walk the vector of parser actions, deleting function names. */
953 /* Clean up priority descriptors. */
954 if ( priorDescs != 0 )
955 delete[] priorDescs;
958 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
960 /* Assign actions. */
961 for ( int i = 0; i < actions.length(); i++ ) {
962 switch ( actions[i].type ) {
963 /* Transition actions. */
964 case at_start:
965 graph->startFsmAction( actionOrd[i], actions[i].action );
966 afterOpMinimize( graph );
967 break;
968 case at_all:
969 graph->allTransAction( actionOrd[i], actions[i].action );
970 break;
971 case at_finish:
972 graph->finishFsmAction( actionOrd[i], actions[i].action );
973 break;
974 case at_leave:
975 graph->leaveFsmAction( actionOrd[i], actions[i].action );
976 break;
978 /* Global error actions. */
979 case at_start_gbl_error:
980 graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
981 afterOpMinimize( graph );
982 break;
983 case at_all_gbl_error:
984 graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
985 break;
986 case at_final_gbl_error:
987 graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
988 break;
989 case at_not_start_gbl_error:
990 graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
991 break;
992 case at_not_final_gbl_error:
993 graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
994 break;
995 case at_middle_gbl_error:
996 graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
997 break;
999 /* Local error actions. */
1000 case at_start_local_error:
1001 graph->startErrorAction( actionOrd[i], actions[i].action,
1002 actions[i].localErrKey );
1003 afterOpMinimize( graph );
1004 break;
1005 case at_all_local_error:
1006 graph->allErrorAction( actionOrd[i], actions[i].action,
1007 actions[i].localErrKey );
1008 break;
1009 case at_final_local_error:
1010 graph->finalErrorAction( actionOrd[i], actions[i].action,
1011 actions[i].localErrKey );
1012 break;
1013 case at_not_start_local_error:
1014 graph->notStartErrorAction( actionOrd[i], actions[i].action,
1015 actions[i].localErrKey );
1016 break;
1017 case at_not_final_local_error:
1018 graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1019 actions[i].localErrKey );
1020 break;
1021 case at_middle_local_error:
1022 graph->middleErrorAction( actionOrd[i], actions[i].action,
1023 actions[i].localErrKey );
1024 break;
1026 /* EOF actions. */
1027 case at_start_eof:
1028 graph->startEOFAction( actionOrd[i], actions[i].action );
1029 afterOpMinimize( graph );
1030 break;
1031 case at_all_eof:
1032 graph->allEOFAction( actionOrd[i], actions[i].action );
1033 break;
1034 case at_final_eof:
1035 graph->finalEOFAction( actionOrd[i], actions[i].action );
1036 break;
1037 case at_not_start_eof:
1038 graph->notStartEOFAction( actionOrd[i], actions[i].action );
1039 break;
1040 case at_not_final_eof:
1041 graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1042 break;
1043 case at_middle_eof:
1044 graph->middleEOFAction( actionOrd[i], actions[i].action );
1045 break;
1047 /* To State Actions. */
1048 case at_start_to_state:
1049 graph->startToStateAction( actionOrd[i], actions[i].action );
1050 afterOpMinimize( graph );
1051 break;
1052 case at_all_to_state:
1053 graph->allToStateAction( actionOrd[i], actions[i].action );
1054 break;
1055 case at_final_to_state:
1056 graph->finalToStateAction( actionOrd[i], actions[i].action );
1057 break;
1058 case at_not_start_to_state:
1059 graph->notStartToStateAction( actionOrd[i], actions[i].action );
1060 break;
1061 case at_not_final_to_state:
1062 graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1063 break;
1064 case at_middle_to_state:
1065 graph->middleToStateAction( actionOrd[i], actions[i].action );
1066 break;
1068 /* From State Actions. */
1069 case at_start_from_state:
1070 graph->startFromStateAction( actionOrd[i], actions[i].action );
1071 afterOpMinimize( graph );
1072 break;
1073 case at_all_from_state:
1074 graph->allFromStateAction( actionOrd[i], actions[i].action );
1075 break;
1076 case at_final_from_state:
1077 graph->finalFromStateAction( actionOrd[i], actions[i].action );
1078 break;
1079 case at_not_start_from_state:
1080 graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1081 break;
1082 case at_not_final_from_state:
1083 graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1084 break;
1085 case at_middle_from_state:
1086 graph->middleFromStateAction( actionOrd[i], actions[i].action );
1087 break;
1089 /* Remaining cases, prevented by the parser. */
1090 default:
1091 assert( false );
1092 break;
1097 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1099 /* Assign priorities. */
1100 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1101 switch ( priorityAugs[i].type ) {
1102 case at_start:
1103 graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1104 /* Start fsm priorities are a special case that may require
1105 * minimization afterwards. */
1106 afterOpMinimize( graph );
1107 break;
1108 case at_all:
1109 graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1110 break;
1111 case at_finish:
1112 graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1113 break;
1114 case at_leave:
1115 graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1116 break;
1118 default:
1119 /* Parser Prevents this case. */
1120 break;
1125 void FactorWithAug::assignConditions( FsmAp *graph )
1127 for ( int i = 0; i < conditions.length(); i++ ) {
1128 switch ( conditions[i].type ) {
1129 /* Transition actions. */
1130 case at_start:
1131 graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1132 afterOpMinimize( graph );
1133 break;
1134 case at_all:
1135 graph->allTransCondition( conditions[i].action, conditions[i].sense );
1136 break;
1137 case at_leave:
1138 graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1139 break;
1140 default:
1141 break;
1147 /* Evaluate a factor with augmentation node. */
1148 FsmAp *FactorWithAug::walk( ParseData *pd )
1150 /* Enter into the scopes created for the labels. */
1151 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1153 /* Make the array of function orderings. */
1154 int *actionOrd = 0;
1155 if ( actions.length() > 0 )
1156 actionOrd = new int[actions.length()];
1158 /* First walk the list of actions, assigning order to all starting
1159 * actions. */
1160 for ( int i = 0; i < actions.length(); i++ ) {
1161 if ( actions[i].type == at_start ||
1162 actions[i].type == at_start_gbl_error ||
1163 actions[i].type == at_start_local_error ||
1164 actions[i].type == at_start_to_state ||
1165 actions[i].type == at_start_from_state ||
1166 actions[i].type == at_start_eof )
1167 actionOrd[i] = pd->curActionOrd++;
1170 /* Evaluate the factor with repetition. */
1171 FsmAp *rtnVal = factorWithRep->walk( pd );
1173 /* Compute the remaining action orderings. */
1174 for ( int i = 0; i < actions.length(); i++ ) {
1175 if ( actions[i].type != at_start &&
1176 actions[i].type != at_start_gbl_error &&
1177 actions[i].type != at_start_local_error &&
1178 actions[i].type != at_start_to_state &&
1179 actions[i].type != at_start_from_state &&
1180 actions[i].type != at_start_eof )
1181 actionOrd[i] = pd->curActionOrd++;
1184 /* Embed conditions. */
1185 assignConditions( rtnVal );
1187 /* Embed actions. */
1188 assignActions( pd, rtnVal , actionOrd );
1190 /* Make the array of priority orderings. Orderings are local to this walk
1191 * of the factor with augmentation. */
1192 int *priorOrd = 0;
1193 if ( priorityAugs.length() > 0 )
1194 priorOrd = new int[priorityAugs.length()];
1196 /* Walk all priorities, assigning the priority ordering. */
1197 for ( int i = 0; i < priorityAugs.length(); i++ )
1198 priorOrd[i] = pd->curPriorOrd++;
1200 /* If the priority descriptors have not been made, make them now. Make
1201 * priority descriptors for each priority asignment that will be passed to
1202 * the fsm. Used to keep track of the key, value and used bit. */
1203 if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1204 priorDescs = new PriorDesc[priorityAugs.length()];
1205 for ( int i = 0; i < priorityAugs.length(); i++ ) {
1206 /* Init the prior descriptor for the priority setting. */
1207 priorDescs[i].key = priorityAugs[i].priorKey;
1208 priorDescs[i].priority = priorityAugs[i].priorValue;
1212 /* Assign priorities into the machine. */
1213 assignPriorities( rtnVal, priorOrd );
1215 /* Assign epsilon transitions. */
1216 for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1217 /* Get the name, which may not exist. If it doesn't then silently
1218 * ignore it because an error has already been reported. */
1219 NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1220 if ( epTarg != 0 ) {
1221 /* Make the epsilon transitions. */
1222 rtnVal->epsilonTrans( epTarg->id );
1224 /* Note that we have made a link to the name. */
1225 pd->localNameScope->referencedNames.append( epTarg );
1229 /* Set entry points for labels. */
1230 if ( labels.length() > 0 ) {
1231 /* Pop the names. */
1232 pd->resetNameScope( nameFrame );
1234 /* Make labels that are referenced into entry points. */
1235 for ( int i = 0; i < labels.length(); i++ ) {
1236 pd->enterNameScope( false, 1 );
1238 /* Will always be found. */
1239 NameInst *name = pd->curNameInst;
1241 /* If the name is referenced then set the entry point. */
1242 if ( name->numRefs > 0 )
1243 rtnVal->setEntry( name->id, rtnVal->startState );
1246 pd->popNameScope( nameFrame );
1249 if ( priorOrd != 0 )
1250 delete[] priorOrd;
1251 if ( actionOrd != 0 )
1252 delete[] actionOrd;
1253 return rtnVal;
1256 void FactorWithAug::makeNameTree( ParseData *pd )
1258 /* Add the labels to the tree of instantiated names. Each label
1259 * makes a new scope. */
1260 NameInst *prevNameInst = pd->curNameInst;
1261 for ( int i = 0; i < labels.length(); i++ )
1262 pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1264 /* Recurse, then pop the names. */
1265 factorWithRep->makeNameTree( pd );
1266 pd->curNameInst = prevNameInst;
1270 void FactorWithAug::resolveNameRefs( ParseData *pd )
1272 /* Enter into the name scope created by any labels. */
1273 NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1275 /* Note action references. */
1276 for ( int i = 0; i < actions.length(); i++ )
1277 actions[i].action->actionRefs.append( pd->localNameScope );
1279 /* Recurse first. IMPORTANT: we must do the exact same traversal as when
1280 * the tree is constructed. */
1281 factorWithRep->resolveNameRefs( pd );
1283 /* Resolve epsilon transitions. */
1284 for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1285 /* Get the link. */
1286 EpsilonLink &link = epsilonLinks[ep];
1287 NameInst *resolvedName = 0;
1289 if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1290 /* Epsilon drawn to an implicit final state. An implicit final is
1291 * only available in join operations. */
1292 resolvedName = pd->localNameScope->final;
1294 else {
1295 /* Do an search for the name. */
1296 NameSet resolved;
1297 pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1298 if ( resolved.length() > 0 ) {
1299 /* Take the first one. */
1300 resolvedName = resolved[0];
1301 if ( resolved.length() > 1 ) {
1302 /* Complain about the multiple references. */
1303 error(link.loc) << "state reference " << link.target <<
1304 " resolves to multiple entry points" << endl;
1305 errorStateLabels( resolved );
1310 /* This is tricky, we stuff resolved epsilon transitions into one long
1311 * vector in the parse data structure. Since the name resolution and
1312 * graph generation both do identical walks of the parse tree we
1313 * should always find the link resolutions in the right place. */
1314 pd->epsilonResolvedLinks.append( resolvedName );
1316 if ( resolvedName != 0 ) {
1317 /* Found the name, bump of the reference count on it. */
1318 resolvedName->numRefs += 1;
1320 else {
1321 /* Complain, no recovery action, the epsilon op will ignore any
1322 * epsilon transitions whose names did not resolve. */
1323 error(link.loc) << "could not resolve label " << link.target << endl;
1327 if ( labels.length() > 0 )
1328 pd->popNameScope( nameFrame );
1332 /* Clean up after a factor with repetition node. */
1333 FactorWithRep::~FactorWithRep()
1335 switch ( type ) {
1336 case StarType: case StarStarType: case OptionalType: case PlusType:
1337 case ExactType: case MaxType: case MinType: case RangeType:
1338 delete factorWithRep;
1339 break;
1340 case FactorWithNegType:
1341 delete factorWithNeg;
1342 break;
1346 /* Evaluate a factor with repetition node. */
1347 FsmAp *FactorWithRep::walk( ParseData *pd )
1349 FsmAp *retFsm = 0;
1351 switch ( type ) {
1352 case StarType: {
1353 /* Evaluate the FactorWithRep. */
1354 retFsm = factorWithRep->walk( pd );
1355 if ( retFsm->startState->isFinState() ) {
1356 warning(loc) << "applying kleene star to a machine that "
1357 "accepts zero length word" << endl;
1360 /* Shift over the start action orders then do the kleene star. */
1361 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1362 retFsm->starOp( );
1363 afterOpMinimize( retFsm );
1364 break;
1366 case StarStarType: {
1367 /* Evaluate the FactorWithRep. */
1368 retFsm = factorWithRep->walk( pd );
1369 if ( retFsm->startState->isFinState() ) {
1370 warning(loc) << "applying kleene star to a machine that "
1371 "accepts zero length word" << endl;
1374 /* Set up the prior descs. All gets priority one, whereas leaving gets
1375 * priority zero. Make a unique key so that these priorities don't
1376 * interfere with any priorities set by the user. */
1377 priorDescs[0].key = pd->nextPriorKey++;
1378 priorDescs[0].priority = 1;
1379 retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1381 /* Leaveing gets priority 0. Use same unique key. */
1382 priorDescs[1].key = priorDescs[0].key;
1383 priorDescs[1].priority = 0;
1384 retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1386 /* Shift over the start action orders then do the kleene star. */
1387 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1388 retFsm->starOp( );
1389 afterOpMinimize( retFsm );
1390 break;
1392 case OptionalType: {
1393 /* Make the null fsm. */
1394 FsmAp *nu = new FsmAp();
1395 nu->lambdaFsm( );
1397 /* Evaluate the FactorWithRep. */
1398 retFsm = factorWithRep->walk( pd );
1400 /* Perform the question operator. */
1401 retFsm->unionOp( nu );
1402 afterOpMinimize( retFsm );
1403 break;
1405 case PlusType: {
1406 /* Evaluate the FactorWithRep. */
1407 retFsm = factorWithRep->walk( pd );
1408 if ( retFsm->startState->isFinState() ) {
1409 warning(loc) << "applying plus operator to a machine that "
1410 "accpets zero length word" << endl;
1413 /* Need a duplicated for the star end. */
1414 FsmAp *dup = new FsmAp( *retFsm );
1416 /* The start func orders need to be shifted before doing the star. */
1417 pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1419 /* Star the duplicate. */
1420 dup->starOp( );
1421 afterOpMinimize( dup );
1423 retFsm->concatOp( dup );
1424 afterOpMinimize( retFsm );
1425 break;
1427 case ExactType: {
1428 /* Get an int from the repetition amount. */
1429 if ( lowerRep == 0 ) {
1430 /* No copies. Don't need to evaluate the factorWithRep.
1431 * This Defeats the purpose so give a warning. */
1432 warning(loc) << "exactly zero repetitions results "
1433 "in the null machine" << endl;
1435 retFsm = new FsmAp();
1436 retFsm->lambdaFsm();
1438 else {
1439 /* Evaluate the first FactorWithRep. */
1440 retFsm = factorWithRep->walk( pd );
1441 if ( retFsm->startState->isFinState() ) {
1442 warning(loc) << "applying repetition to a machine that "
1443 "accepts zero length word" << endl;
1446 /* The start func orders need to be shifted before doing the
1447 * repetition. */
1448 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1450 /* Do the repetition on the machine. Already guarded against n == 0 */
1451 retFsm->repeatOp( lowerRep );
1452 afterOpMinimize( retFsm );
1454 break;
1456 case MaxType: {
1457 /* Get an int from the repetition amount. */
1458 if ( upperRep == 0 ) {
1459 /* No copies. Don't need to evaluate the factorWithRep.
1460 * This Defeats the purpose so give a warning. */
1461 warning(loc) << "max zero repetitions results "
1462 "in the null machine" << endl;
1464 retFsm = new FsmAp();
1465 retFsm->lambdaFsm();
1467 else {
1468 /* Evaluate the first FactorWithRep. */
1469 retFsm = factorWithRep->walk( pd );
1470 if ( retFsm->startState->isFinState() ) {
1471 warning(loc) << "applying max repetition to a machine that "
1472 "accepts zero length word" << endl;
1475 /* The start func orders need to be shifted before doing the
1476 * repetition. */
1477 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1479 /* Do the repetition on the machine. Already guarded against n == 0 */
1480 retFsm->optionalRepeatOp( upperRep );
1481 afterOpMinimize( retFsm );
1483 break;
1485 case MinType: {
1486 /* Evaluate the repeated machine. */
1487 retFsm = factorWithRep->walk( pd );
1488 if ( retFsm->startState->isFinState() ) {
1489 warning(loc) << "applying min repetition to a machine that "
1490 "accepts zero length word" << endl;
1493 /* The start func orders need to be shifted before doing the repetition
1494 * and the kleene star. */
1495 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1497 if ( lowerRep == 0 ) {
1498 /* Acts just like a star op on the machine to return. */
1499 retFsm->starOp( );
1500 afterOpMinimize( retFsm );
1502 else {
1503 /* Take a duplicate for the plus. */
1504 FsmAp *dup = new FsmAp( *retFsm );
1506 /* Do repetition on the first half. */
1507 retFsm->repeatOp( lowerRep );
1508 afterOpMinimize( retFsm );
1510 /* Star the duplicate. */
1511 dup->starOp( );
1512 afterOpMinimize( dup );
1514 /* Tak on the kleene star. */
1515 retFsm->concatOp( dup );
1516 afterOpMinimize( retFsm );
1518 break;
1520 case RangeType: {
1521 /* Check for bogus range. */
1522 if ( upperRep - lowerRep < 0 ) {
1523 error(loc) << "invalid range repetition" << endl;
1525 /* Return null machine as recovery. */
1526 retFsm = new FsmAp();
1527 retFsm->lambdaFsm();
1529 else if ( lowerRep == 0 && upperRep == 0 ) {
1530 /* No copies. Don't need to evaluate the factorWithRep. This
1531 * defeats the purpose so give a warning. */
1532 warning(loc) << "zero to zero repetitions results "
1533 "in the null machine" << endl;
1535 retFsm = new FsmAp();
1536 retFsm->lambdaFsm();
1538 else {
1539 /* Now need to evaluate the repeated machine. */
1540 retFsm = factorWithRep->walk( pd );
1541 if ( retFsm->startState->isFinState() ) {
1542 warning(loc) << "applying range repetition to a machine that "
1543 "accepts zero length word" << endl;
1546 /* The start func orders need to be shifted before doing both kinds
1547 * of repetition. */
1548 pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1550 if ( lowerRep == 0 ) {
1551 /* Just doing max repetition. Already guarded against n == 0. */
1552 retFsm->optionalRepeatOp( upperRep );
1553 afterOpMinimize( retFsm );
1555 else if ( lowerRep == upperRep ) {
1556 /* Just doing exact repetition. Already guarded against n == 0. */
1557 retFsm->repeatOp( lowerRep );
1558 afterOpMinimize( retFsm );
1560 else {
1561 /* This is the case that 0 < lowerRep < upperRep. Take a
1562 * duplicate for the optional repeat. */
1563 FsmAp *dup = new FsmAp( *retFsm );
1565 /* Do repetition on the first half. */
1566 retFsm->repeatOp( lowerRep );
1567 afterOpMinimize( retFsm );
1569 /* Do optional repetition on the second half. */
1570 dup->optionalRepeatOp( upperRep - lowerRep );
1571 afterOpMinimize( dup );
1573 /* Tak on the duplicate machine. */
1574 retFsm->concatOp( dup );
1575 afterOpMinimize( retFsm );
1578 break;
1580 case FactorWithNegType: {
1581 /* Evaluate the Factor. Pass it up. */
1582 retFsm = factorWithNeg->walk( pd );
1583 break;
1585 return retFsm;
1588 void FactorWithRep::makeNameTree( ParseData *pd )
1590 switch ( type ) {
1591 case StarType:
1592 case StarStarType:
1593 case OptionalType:
1594 case PlusType:
1595 case ExactType:
1596 case MaxType:
1597 case MinType:
1598 case RangeType:
1599 factorWithRep->makeNameTree( pd );
1600 break;
1601 case FactorWithNegType:
1602 factorWithNeg->makeNameTree( pd );
1603 break;
1607 void FactorWithRep::resolveNameRefs( ParseData *pd )
1609 switch ( type ) {
1610 case StarType:
1611 case StarStarType:
1612 case OptionalType:
1613 case PlusType:
1614 case ExactType:
1615 case MaxType:
1616 case MinType:
1617 case RangeType:
1618 factorWithRep->resolveNameRefs( pd );
1619 break;
1620 case FactorWithNegType:
1621 factorWithNeg->resolveNameRefs( pd );
1622 break;
1626 /* Clean up after a factor with negation node. */
1627 FactorWithNeg::~FactorWithNeg()
1629 switch ( type ) {
1630 case NegateType:
1631 case CharNegateType:
1632 delete factorWithNeg;
1633 break;
1634 case FactorType:
1635 delete factor;
1636 break;
1640 /* Evaluate a factor with negation node. */
1641 FsmAp *FactorWithNeg::walk( ParseData *pd )
1643 FsmAp *retFsm = 0;
1645 switch ( type ) {
1646 case NegateType: {
1647 /* Evaluate the factorWithNeg. */
1648 FsmAp *toNegate = factorWithNeg->walk( pd );
1650 /* Negation is subtract from dot-star. */
1651 retFsm = dotStarFsm( pd );
1652 retFsm->subtractOp( toNegate );
1653 afterOpMinimize( retFsm );
1654 break;
1656 case CharNegateType: {
1657 /* Evaluate the factorWithNeg. */
1658 FsmAp *toNegate = factorWithNeg->walk( pd );
1660 /* CharNegation is subtract from dot. */
1661 retFsm = dotFsm( pd );
1662 retFsm->subtractOp( toNegate );
1663 afterOpMinimize( retFsm );
1664 break;
1666 case FactorType: {
1667 /* Evaluate the Factor. Pass it up. */
1668 retFsm = factor->walk( pd );
1669 break;
1671 return retFsm;
1674 void FactorWithNeg::makeNameTree( ParseData *pd )
1676 switch ( type ) {
1677 case NegateType:
1678 case CharNegateType:
1679 factorWithNeg->makeNameTree( pd );
1680 break;
1681 case FactorType:
1682 factor->makeNameTree( pd );
1683 break;
1687 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1689 switch ( type ) {
1690 case NegateType:
1691 case CharNegateType:
1692 factorWithNeg->resolveNameRefs( pd );
1693 break;
1694 case FactorType:
1695 factor->resolveNameRefs( pd );
1696 break;
1700 /* Clean up after a factor node. */
1701 Factor::~Factor()
1703 switch ( type ) {
1704 case LiteralType:
1705 delete literal;
1706 break;
1707 case RangeType:
1708 delete range;
1709 break;
1710 case OrExprType:
1711 delete reItem;
1712 break;
1713 case RegExprType:
1714 delete regExpr;
1715 break;
1716 case ReferenceType:
1717 break;
1718 case ParenType:
1719 delete join;
1720 break;
1721 case LongestMatchType:
1722 delete longestMatch;
1723 break;
1727 /* Evaluate a factor node. */
1728 FsmAp *Factor::walk( ParseData *pd )
1730 FsmAp *rtnVal = 0;
1731 switch ( type ) {
1732 case LiteralType:
1733 rtnVal = literal->walk( pd );
1734 break;
1735 case RangeType:
1736 rtnVal = range->walk( pd );
1737 break;
1738 case OrExprType:
1739 rtnVal = reItem->walk( pd, 0 );
1740 break;
1741 case RegExprType:
1742 rtnVal = regExpr->walk( pd, 0 );
1743 break;
1744 case ReferenceType:
1745 rtnVal = varDef->walk( pd );
1746 break;
1747 case ParenType:
1748 rtnVal = join->walk( pd );
1749 break;
1750 case LongestMatchType:
1751 rtnVal = longestMatch->walk( pd );
1752 break;
1755 return rtnVal;
1758 void Factor::makeNameTree( ParseData *pd )
1760 switch ( type ) {
1761 case LiteralType:
1762 case RangeType:
1763 case OrExprType:
1764 case RegExprType:
1765 break;
1766 case ReferenceType:
1767 varDef->makeNameTree( loc, pd );
1768 break;
1769 case ParenType:
1770 join->makeNameTree( pd );
1771 break;
1772 case LongestMatchType:
1773 longestMatch->makeNameTree( pd );
1774 break;
1778 void Factor::resolveNameRefs( ParseData *pd )
1780 switch ( type ) {
1781 case LiteralType:
1782 case RangeType:
1783 case OrExprType:
1784 case RegExprType:
1785 break;
1786 case ReferenceType:
1787 varDef->resolveNameRefs( pd );
1788 break;
1789 case ParenType:
1790 join->resolveNameRefs( pd );
1791 break;
1792 case LongestMatchType:
1793 longestMatch->resolveNameRefs( pd );
1794 break;
1798 /* Clean up a range object. Must delete the two literals. */
1799 Range::~Range()
1801 delete lowerLit;
1802 delete upperLit;
1805 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
1806 FsmAp *Range::walk( ParseData *pd )
1808 /* Construct and verify the suitability of the lower end of the range. */
1809 FsmAp *lowerFsm = lowerLit->walk( pd );
1810 if ( !lowerFsm->checkSingleCharMachine() ) {
1811 error(lowerLit->token.loc) <<
1812 "bad range lower end, must be a single character" << endl;
1815 /* Construct and verify the upper end. */
1816 FsmAp *upperFsm = upperLit->walk( pd );
1817 if ( !upperFsm->checkSingleCharMachine() ) {
1818 error(upperLit->token.loc) <<
1819 "bad range upper end, must be a single character" << endl;
1822 /* Grab the keys from the machines, then delete them. */
1823 Key lowKey = lowerFsm->startState->outList.head->lowKey;
1824 Key highKey = upperFsm->startState->outList.head->lowKey;
1825 delete lowerFsm;
1826 delete upperFsm;
1828 /* Validate the range. */
1829 if ( lowKey > highKey ) {
1830 /* Recover by setting upper to lower; */
1831 error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1832 highKey = lowKey;
1835 /* Return the range now that it is validated. */
1836 FsmAp *retFsm = new FsmAp();
1837 retFsm->rangeFsm( lowKey, highKey );
1838 return retFsm;
1841 /* Evaluate a literal object. */
1842 FsmAp *Literal::walk( ParseData *pd )
1844 /* FsmAp to return, is the alphabet signed. */
1845 FsmAp *rtnVal = 0;
1847 switch ( type ) {
1848 case Number: {
1849 /* Make the fsm key in int format. */
1850 Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1851 /* Make the new machine. */
1852 rtnVal = new FsmAp();
1853 rtnVal->concatFsm( fsmKey );
1854 break;
1856 case LitString: {
1857 /* Make the array of keys in int format. */
1858 Token interp;
1859 bool caseInsensitive;
1860 token.prepareLitString( interp, caseInsensitive );
1861 Key *arr = new Key[interp.length];
1862 makeFsmKeyArray( arr, interp.data, interp.length, pd );
1864 /* Make the new machine. */
1865 rtnVal = new FsmAp();
1866 if ( caseInsensitive )
1867 rtnVal->concatFsmCI( arr, interp.length );
1868 else
1869 rtnVal->concatFsm( arr, interp.length );
1870 delete[] interp.data;
1871 delete[] arr;
1872 break;
1874 return rtnVal;
1877 /* Clean up after a regular expression object. */
1878 RegExpr::~RegExpr()
1880 switch ( type ) {
1881 case RecurseItem:
1882 delete regExpr;
1883 delete item;
1884 break;
1885 case Empty:
1886 break;
1890 /* Evaluate a regular expression object. */
1891 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1893 /* This is the root regex, pass down a pointer to this. */
1894 if ( rootRegex == 0 )
1895 rootRegex = this;
1897 FsmAp *rtnVal = 0;
1898 switch ( type ) {
1899 case RecurseItem: {
1900 /* Walk both items. */
1901 rtnVal = regExpr->walk( pd, rootRegex );
1902 FsmAp *fsm2 = item->walk( pd, rootRegex );
1903 rtnVal->concatOp( fsm2 );
1904 break;
1906 case Empty: {
1907 rtnVal = new FsmAp();
1908 rtnVal->lambdaFsm();
1909 break;
1912 return rtnVal;
1915 /* Clean up after an item in a regular expression. */
1916 ReItem::~ReItem()
1918 switch ( type ) {
1919 case Data:
1920 case Dot:
1921 break;
1922 case OrBlock:
1923 case NegOrBlock:
1924 delete orBlock;
1925 break;
1929 /* Evaluate a regular expression object. */
1930 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1932 /* The fsm to return, is the alphabet signed? */
1933 FsmAp *rtnVal = 0;
1935 switch ( type ) {
1936 case Data: {
1937 /* Move the data into an integer array and make a concat fsm. */
1938 Key *arr = new Key[token.length];
1939 makeFsmKeyArray( arr, token.data, token.length, pd );
1941 /* Make the concat fsm. */
1942 rtnVal = new FsmAp();
1943 if ( rootRegex != 0 && rootRegex->caseInsensitive )
1944 rtnVal->concatFsmCI( arr, token.length );
1945 else
1946 rtnVal->concatFsm( arr, token.length );
1947 delete[] arr;
1948 break;
1950 case Dot: {
1951 /* Make the dot fsm. */
1952 rtnVal = dotFsm( pd );
1953 break;
1955 case OrBlock: {
1956 /* Get the or block and minmize it. */
1957 rtnVal = orBlock->walk( pd, rootRegex );
1958 if ( rtnVal == 0 ) {
1959 rtnVal = new FsmAp();
1960 rtnVal->lambdaFsm();
1962 rtnVal->minimizePartition2();
1963 break;
1965 case NegOrBlock: {
1966 /* Get the or block and minimize it. */
1967 FsmAp *fsm = orBlock->walk( pd, rootRegex );
1968 fsm->minimizePartition2();
1970 /* Make a dot fsm and subtract from it. */
1971 rtnVal = dotFsm( pd );
1972 rtnVal->subtractOp( fsm );
1973 rtnVal->minimizePartition2();
1974 break;
1978 /* If the item is followed by a star, then apply the star op. */
1979 if ( star ) {
1980 if ( rtnVal->startState->isFinState() ) {
1981 warning(loc) << "applying kleene star to a machine that "
1982 "accpets zero length word" << endl;
1985 rtnVal->starOp();
1986 rtnVal->minimizePartition2();
1988 return rtnVal;
1991 /* Clean up after an or block of a regular expression. */
1992 ReOrBlock::~ReOrBlock()
1994 switch ( type ) {
1995 case RecurseItem:
1996 delete orBlock;
1997 delete item;
1998 break;
1999 case Empty:
2000 break;
2005 /* Evaluate an or block of a regular expression. */
2006 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2008 FsmAp *rtnVal = 0;
2009 switch ( type ) {
2010 case RecurseItem: {
2011 /* Evaluate the two fsm. */
2012 FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2013 FsmAp *fsm2 = item->walk( pd, rootRegex );
2014 if ( fsm1 == 0 )
2015 rtnVal = fsm2;
2016 else {
2017 fsm1->unionOp( fsm2 );
2018 rtnVal = fsm1;
2020 break;
2022 case Empty: {
2023 rtnVal = 0;
2024 break;
2027 return rtnVal;;
2030 /* Evaluate an or block item of a regular expression. */
2031 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2033 /* The return value, is the alphabet signed? */
2034 FsmAp *rtnVal = 0;
2035 switch ( type ) {
2036 case Data: {
2037 /* Make the or machine. */
2038 rtnVal = new FsmAp();
2040 /* Put the or data into an array of ints. Note that we find unique
2041 * keys. Duplicates are silently ignored. The alternative would be to
2042 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2043 * 'a' don't bother here. */
2044 KeySet keySet;
2045 makeFsmUniqueKeyArray( keySet, token.data, token.length,
2046 rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2048 /* Run the or operator. */
2049 rtnVal->orFsm( keySet.data, keySet.length() );
2050 break;
2052 case Range: {
2053 /* Make the upper and lower keys. */
2054 Key lowKey = makeFsmKeyChar( lower, pd );
2055 Key highKey = makeFsmKeyChar( upper, pd );
2057 /* Validate the range. */
2058 if ( lowKey > highKey ) {
2059 /* Recover by setting upper to lower; */
2060 error(loc) << "lower end of range is greater then upper end" << endl;
2061 highKey = lowKey;
2064 /* Make the range machine. */
2065 rtnVal = new FsmAp();
2066 rtnVal->rangeFsm( lowKey, highKey );
2068 if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2069 if ( lowKey <= 'Z' && 'A' <= highKey ) {
2070 Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2071 Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2073 otherLow = 'a' + ( otherLow - 'A' );
2074 otherHigh = 'a' + ( otherHigh - 'A' );
2076 FsmAp *otherRange = new FsmAp();
2077 otherRange->rangeFsm( otherLow, otherHigh );
2078 rtnVal->unionOp( otherRange );
2079 rtnVal->minimizePartition2();
2081 else if ( lowKey <= 'z' && 'a' <= highKey ) {
2082 Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2083 Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2085 otherLow = 'A' + ( otherLow - 'a' );
2086 otherHigh = 'A' + ( otherHigh - 'a' );
2088 FsmAp *otherRange = new FsmAp();
2089 otherRange->rangeFsm( otherLow, otherHigh );
2090 rtnVal->unionOp( otherRange );
2091 rtnVal->minimizePartition2();
2095 break;
2097 return rtnVal;