Use portable types in the C/C++ code generator
[ragel-jkt.git] / ragel / parsedata.cpp
blob8ce89db64687486827332ae335246c9884f8e1ec
1 /*
2 * Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <iostream>
23 #include <iomanip>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <limits.h>
28 #include "ragel.h"
29 #include "rlparse.h"
30 #include "parsedata.h"
31 #include "parsetree.h"
32 #include "mergesort.h"
33 #include "xmlcodegen.h"
34 #include "version.h"
35 #include "inputdata.h"
37 using namespace std;
39 char mainMachine[] = "main";
41 void Token::set( const char *str, int len )
43 length = len;
44 data = new char[len+1];
45 memcpy( data, str, len );
46 data[len] = 0;
49 void Token::append( const Token &other )
51 int newLength = length + other.length;
52 char *newString = new char[newLength+1];
53 memcpy( newString, data, length );
54 memcpy( newString + length, other.data, other.length );
55 newString[newLength] = 0;
56 data = newString;
57 length = newLength;
60 /* Perform minimization after an operation according
61 * to the command line args. */
62 void afterOpMinimize( FsmAp *fsm, bool lastInSeq )
64 /* Switch on the prefered minimization algorithm. */
65 if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) {
66 /* First clean up the graph. FsmAp operations may leave these
67 * lying around. There should be no dead end states. The subtract
68 * intersection operators are the only places where they may be
69 * created and those operators clean them up. */
70 fsm->removeUnreachableStates();
72 switch ( minimizeLevel ) {
73 case MinimizeApprox:
74 fsm->minimizeApproximate();
75 break;
76 case MinimizePartition1:
77 fsm->minimizePartition1();
78 break;
79 case MinimizePartition2:
80 fsm->minimizePartition2();
81 break;
82 case MinimizeStable:
83 fsm->minimizeStable();
84 break;
89 /* Count the transitions in the fsm by walking the state list. */
90 int countTransitions( FsmAp *fsm )
92 int numTrans = 0;
93 StateAp *state = fsm->stateList.head;
94 while ( state != 0 ) {
95 numTrans += state->outList.length();
96 state = state->next;
98 return numTrans;
101 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd )
103 /* Reset errno so we can check for overflow or underflow. In the event of
104 * an error, sets the return val to the upper or lower bound being tested
105 * against. */
106 errno = 0;
107 unsigned int size = keyOps->alphType->size;
108 bool unusedBits = size < sizeof(unsigned long);
110 unsigned long ul = strtoul( str, 0, 16 );
112 if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) {
113 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
114 ul = 1 << (size * 8);
117 if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
118 ul |= ( -1L >> (size*8) ) << (size*8);
120 return Key( (long)ul );
123 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd )
125 /* Convert the number to a decimal. First reset errno so we can check
126 * for overflow or underflow. */
127 errno = 0;
128 long long minVal = keyOps->alphType->minVal;
129 long long maxVal = keyOps->alphType->maxVal;
131 long long ll = strtoll( str, 0, 10 );
133 /* Check for underflow. */
134 if ( ( errno == ERANGE && ll < 0 ) || ll < minVal) {
135 error(loc) << "literal " << str << " underflows the alphabet type" << endl;
136 ll = minVal;
138 /* Check for overflow. */
139 else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) {
140 error(loc) << "literal " << str << " overflows the alphabet type" << endl;
141 ll = maxVal;
144 if ( keyOps->alphType->isSigned )
145 return Key( (long)ll );
146 else
147 return Key( (unsigned long)ll );
150 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
151 * number returned by the parser. Validates that the number doesn't overflow
152 * the alphabet type. */
153 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd )
155 /* Switch on hex/decimal format. */
156 if ( str[0] == '0' && str[1] == 'x' )
157 return makeFsmKeyHex( str, loc, pd );
158 else
159 return makeFsmKeyDec( str, loc, pd );
162 /* Make an fsm int format (what the fsm graph uses) from a single character.
163 * Performs proper conversion depending on signed/unsigned property of the
164 * alphabet. */
165 Key makeFsmKeyChar( char c, ParseData *pd )
167 if ( keyOps->isSigned ) {
168 /* Copy from a char type. */
169 return Key( c );
171 else {
172 /* Copy from an unsigned byte type. */
173 return Key( (unsigned char)c );
177 /* Make an fsm key array in int format (what the fsm graph uses) from a string
178 * of characters. Performs proper conversion depending on signed/unsigned
179 * property of the alphabet. */
180 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd )
182 if ( keyOps->isSigned ) {
183 /* Copy from a char star type. */
184 char *src = data;
185 for ( int i = 0; i < len; i++ )
186 result[i] = Key(src[i]);
188 else {
189 /* Copy from an unsigned byte ptr type. */
190 unsigned char *src = (unsigned char*) data;
191 for ( int i = 0; i < len; i++ )
192 result[i] = Key(src[i]);
196 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
197 * will be changed. */
198 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
199 bool caseInsensitive, ParseData *pd )
201 /* Use a transitions list for getting unique keys. */
202 if ( keyOps->isSigned ) {
203 /* Copy from a char star type. */
204 char *src = data;
205 for ( int si = 0; si < len; si++ ) {
206 Key key( src[si] );
207 result.insert( key );
208 if ( caseInsensitive ) {
209 if ( key.isLower() )
210 result.insert( key.toUpper() );
211 else if ( key.isUpper() )
212 result.insert( key.toLower() );
216 else {
217 /* Copy from an unsigned byte ptr type. */
218 unsigned char *src = (unsigned char*) data;
219 for ( int si = 0; si < len; si++ ) {
220 Key key( src[si] );
221 result.insert( key );
222 if ( caseInsensitive ) {
223 if ( key.isLower() )
224 result.insert( key.toUpper() );
225 else if ( key.isUpper() )
226 result.insert( key.toLower() );
232 FsmAp *dotFsm( ParseData *pd )
234 FsmAp *retFsm = new FsmAp();
235 retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
236 return retFsm;
239 FsmAp *dotStarFsm( ParseData *pd )
241 FsmAp *retFsm = new FsmAp();
242 retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
243 return retFsm;
246 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
247 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd )
249 /* FsmAp created to return. */
250 FsmAp *retFsm = 0;
251 bool isSigned = keyOps->isSigned;
253 switch ( builtin ) {
254 case BT_Any: {
255 /* All characters. */
256 retFsm = dotFsm( pd );
257 break;
259 case BT_Ascii: {
260 /* Ascii characters 0 to 127. */
261 retFsm = new FsmAp();
262 retFsm->rangeFsm( 0, 127 );
263 break;
265 case BT_Extend: {
266 /* Ascii extended characters. This is the full byte range. Dependent
267 * on signed, vs no signed. If the alphabet is one byte then just use
268 * dot fsm. */
269 if ( isSigned ) {
270 retFsm = new FsmAp();
271 retFsm->rangeFsm( -128, 127 );
273 else {
274 retFsm = new FsmAp();
275 retFsm->rangeFsm( 0, 255 );
277 break;
279 case BT_Alpha: {
280 /* Alpha [A-Za-z]. */
281 FsmAp *upper = new FsmAp(), *lower = new FsmAp();
282 upper->rangeFsm( 'A', 'Z' );
283 lower->rangeFsm( 'a', 'z' );
284 upper->unionOp( lower );
285 upper->minimizePartition2();
286 retFsm = upper;
287 break;
289 case BT_Digit: {
290 /* Digits [0-9]. */
291 retFsm = new FsmAp();
292 retFsm->rangeFsm( '0', '9' );
293 break;
295 case BT_Alnum: {
296 /* Alpha numerics [0-9A-Za-z]. */
297 FsmAp *digit = new FsmAp(), *lower = new FsmAp();
298 FsmAp *upper = new FsmAp();
299 digit->rangeFsm( '0', '9' );
300 upper->rangeFsm( 'A', 'Z' );
301 lower->rangeFsm( 'a', 'z' );
302 digit->unionOp( upper );
303 digit->unionOp( lower );
304 digit->minimizePartition2();
305 retFsm = digit;
306 break;
308 case BT_Lower: {
309 /* Lower case characters. */
310 retFsm = new FsmAp();
311 retFsm->rangeFsm( 'a', 'z' );
312 break;
314 case BT_Upper: {
315 /* Upper case characters. */
316 retFsm = new FsmAp();
317 retFsm->rangeFsm( 'A', 'Z' );
318 break;
320 case BT_Cntrl: {
321 /* Control characters. */
322 FsmAp *cntrl = new FsmAp();
323 FsmAp *highChar = new FsmAp();
324 cntrl->rangeFsm( 0, 31 );
325 highChar->concatFsm( 127 );
326 cntrl->unionOp( highChar );
327 cntrl->minimizePartition2();
328 retFsm = cntrl;
329 break;
331 case BT_Graph: {
332 /* Graphical ascii characters [!-~]. */
333 retFsm = new FsmAp();
334 retFsm->rangeFsm( '!', '~' );
335 break;
337 case BT_Print: {
338 /* Printable characters. Same as graph except includes space. */
339 retFsm = new FsmAp();
340 retFsm->rangeFsm( ' ', '~' );
341 break;
343 case BT_Punct: {
344 /* Punctuation. */
345 FsmAp *range1 = new FsmAp();
346 FsmAp *range2 = new FsmAp();
347 FsmAp *range3 = new FsmAp();
348 FsmAp *range4 = new FsmAp();
349 range1->rangeFsm( '!', '/' );
350 range2->rangeFsm( ':', '@' );
351 range3->rangeFsm( '[', '`' );
352 range4->rangeFsm( '{', '~' );
353 range1->unionOp( range2 );
354 range1->unionOp( range3 );
355 range1->unionOp( range4 );
356 range1->minimizePartition2();
357 retFsm = range1;
358 break;
360 case BT_Space: {
361 /* Whitespace: [\t\v\f\n\r ]. */
362 FsmAp *cntrl = new FsmAp();
363 FsmAp *space = new FsmAp();
364 cntrl->rangeFsm( '\t', '\r' );
365 space->concatFsm( ' ' );
366 cntrl->unionOp( space );
367 cntrl->minimizePartition2();
368 retFsm = cntrl;
369 break;
371 case BT_Xdigit: {
372 /* Hex digits [0-9A-Fa-f]. */
373 FsmAp *digit = new FsmAp();
374 FsmAp *upper = new FsmAp();
375 FsmAp *lower = new FsmAp();
376 digit->rangeFsm( '0', '9' );
377 upper->rangeFsm( 'A', 'F' );
378 lower->rangeFsm( 'a', 'f' );
379 digit->unionOp( upper );
380 digit->unionOp( lower );
381 digit->minimizePartition2();
382 retFsm = digit;
383 break;
385 case BT_Lambda: {
386 retFsm = new FsmAp();
387 retFsm->lambdaFsm();
388 break;
390 case BT_Empty: {
391 retFsm = new FsmAp();
392 retFsm->emptyFsm();
393 break;
396 return retFsm;
399 /* Check if this name inst or any name inst below is referenced. */
400 bool NameInst::anyRefsRec()
402 if ( numRefs > 0 )
403 return true;
405 /* Recurse on children until true. */
406 for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
407 if ( (*ch)->anyRefsRec() )
408 return true;
411 return false;
415 * ParseData
418 /* Initialize the structure that will collect info during the parse of a
419 * machine. */
420 ParseData::ParseData( const char *fileName, char *sectionName,
421 const InputLoc &sectionLoc )
423 sectionGraph(0),
424 generatingSectionSubset(false),
425 nextPriorKey(0),
426 /* 0 is reserved for global error actions. */
427 nextLocalErrKey(1),
428 nextNameId(0),
429 nextCondId(0),
430 alphTypeSet(false),
431 getKeyExpr(0),
432 accessExpr(0),
433 prePushExpr(0),
434 postPopExpr(0),
435 pExpr(0),
436 peExpr(0),
437 eofExpr(0),
438 csExpr(0),
439 topExpr(0),
440 stackExpr(0),
441 actExpr(0),
442 tokstartExpr(0),
443 tokendExpr(0),
444 dataExpr(0),
445 lowerNum(0),
446 upperNum(0),
447 fileName(fileName),
448 sectionName(sectionName),
449 sectionLoc(sectionLoc),
450 curActionOrd(0),
451 curPriorOrd(0),
452 rootName(0),
453 exportsRootName(0),
454 nextEpsilonResolvedLink(0),
455 nextLongestMatchId(1),
456 lmRequiresErrorState(false),
457 cgd(0)
459 /* Initialize the dictionary of graphs. This is our symbol table. The
460 * initialization needs to be done on construction which happens at the
461 * beginning of a machine spec so any assignment operators can reference
462 * the builtins. */
463 initGraphDict();
466 /* Clean up the data collected during a parse. */
467 ParseData::~ParseData()
469 /* Delete all the nodes in the action list. Will cause all the
470 * string data that represents the actions to be deallocated. */
471 actionList.empty();
474 /* Make a name id in the current name instantiation scope if it is not
475 * already there. */
476 NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel )
478 /* Create the name instantitaion object and insert it. */
479 NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
480 curNameInst->childVect.append( newNameInst );
481 if ( data != 0 )
482 curNameInst->children.insertMulti( data, newNameInst );
483 return newNameInst;
486 void ParseData::initNameWalk()
488 curNameInst = rootName;
489 curNameChild = 0;
492 void ParseData::initExportsNameWalk()
494 curNameInst = exportsRootName;
495 curNameChild = 0;
498 /* Goes into the next child scope. The number of the child is already set up.
499 * We need this for the syncronous name tree and parse tree walk to work
500 * properly. It is reset on entry into a scope and advanced on poping of a
501 * scope. A call to enterNameScope should be accompanied by a corresponding
502 * popNameScope. */
503 NameFrame ParseData::enterNameScope( bool isLocal, int numScopes )
505 /* Save off the current data. */
506 NameFrame retFrame;
507 retFrame.prevNameInst = curNameInst;
508 retFrame.prevNameChild = curNameChild;
509 retFrame.prevLocalScope = localNameScope;
511 /* Enter into the new name scope. */
512 for ( int i = 0; i < numScopes; i++ ) {
513 curNameInst = curNameInst->childVect[curNameChild];
514 curNameChild = 0;
517 if ( isLocal )
518 localNameScope = curNameInst;
520 return retFrame;
523 /* Return from a child scope to a parent. The parent info must be specified as
524 * an argument and is obtained from the corresponding call to enterNameScope.
525 * */
526 void ParseData::popNameScope( const NameFrame &frame )
528 /* Pop the name scope. */
529 curNameInst = frame.prevNameInst;
530 curNameChild = frame.prevNameChild+1;
531 localNameScope = frame.prevLocalScope;
534 void ParseData::resetNameScope( const NameFrame &frame )
536 /* Pop the name scope. */
537 curNameInst = frame.prevNameInst;
538 curNameChild = frame.prevNameChild;
539 localNameScope = frame.prevLocalScope;
543 void ParseData::unsetObsoleteEntries( FsmAp *graph )
545 /* Loop the reference names and increment the usage. Names that are no
546 * longer needed will be unset in graph. */
547 for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
548 /* Get the name. */
549 NameInst *name = *ref;
550 name->numUses += 1;
552 /* If the name is no longer needed unset its corresponding entry. */
553 if ( name->numUses == name->numRefs ) {
554 assert( graph->entryPoints.find( name->id ) != 0 );
555 graph->unsetEntry( name->id );
556 assert( graph->entryPoints.find( name->id ) == 0 );
561 NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
563 /* Queue needed for breadth-first search, load it with the start node. */
564 NameInstList nameQueue;
565 nameQueue.append( refFrom );
567 NameSet result;
568 while ( nameQueue.length() > 0 ) {
569 /* Pull the next from location off the queue. */
570 NameInst *from = nameQueue.detachFirst();
572 /* Look for the name. */
573 NameMapEl *low, *high;
574 if ( from->children.findMulti( data, low, high ) ) {
575 /* Record all instances of the name. */
576 for ( ; low <= high; low++ )
577 result.insert( low->value );
580 /* Name not there, do breadth-first operation of appending all
581 * childrent to the processing queue. */
582 for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
583 if ( !recLabelsOnly || (*name)->isLabel )
584 nameQueue.append( *name );
588 /* Queue exhausted and name never found. */
589 return result;
592 void ParseData::resolveFrom( NameSet &result, NameInst *refFrom,
593 const NameRef &nameRef, int namePos )
595 /* Look for the name in the owning scope of the factor with aug. */
596 NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
598 /* If there are more parts to the name then continue on. */
599 if ( ++namePos < nameRef.length() ) {
600 /* There are more components to the name, search using all the part
601 * results as the base. */
602 for ( NameSet::Iter name = partResult; name.lte(); name++ )
603 resolveFrom( result, *name, nameRef, namePos );
605 else {
606 /* This is the last component, append the part results to the final
607 * results. */
608 result.insert( partResult );
612 /* Write out a name reference. */
613 ostream &operator<<( ostream &out, const NameRef &nameRef )
615 int pos = 0;
616 if ( nameRef[pos] == 0 ) {
617 out << "::";
618 pos += 1;
620 out << nameRef[pos++];
621 for ( ; pos < nameRef.length(); pos++ )
622 out << "::" << nameRef[pos];
623 return out;
626 ostream &operator<<( ostream &out, const NameInst &nameInst )
628 /* Count the number fully qualified name parts. */
629 int numParents = 0;
630 NameInst *curParent = nameInst.parent;
631 while ( curParent != 0 ) {
632 numParents += 1;
633 curParent = curParent->parent;
636 /* Make an array and fill it in. */
637 curParent = nameInst.parent;
638 NameInst **parents = new NameInst*[numParents];
639 for ( int p = numParents-1; p >= 0; p-- ) {
640 parents[p] = curParent;
641 curParent = curParent->parent;
644 /* Write the parents out, skip the root. */
645 for ( int p = 1; p < numParents; p++ )
646 out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
648 /* Write the name and cleanup. */
649 out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
650 delete[] parents;
651 return out;
654 struct CmpNameInstLoc
656 static int compare( const NameInst *ni1, const NameInst *ni2 )
658 if ( ni1->loc.line < ni2->loc.line )
659 return -1;
660 else if ( ni1->loc.line > ni2->loc.line )
661 return 1;
662 else if ( ni1->loc.col < ni2->loc.col )
663 return -1;
664 else if ( ni1->loc.col > ni2->loc.col )
665 return 1;
666 return 0;
670 void errorStateLabels( const NameSet &resolved )
672 MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
673 mergeSort.sort( resolved.data, resolved.length() );
674 for ( NameSet::Iter res = resolved; res.lte(); res++ )
675 error((*res)->loc) << " -> " << **res << endl;
679 NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action )
681 NameInst *nameInst = 0;
683 /* Do the local search if the name is not strictly a root level name
684 * search. */
685 if ( nameRef[0] != 0 ) {
686 /* If the action is referenced, resolve all of them. */
687 if ( action != 0 && action->actionRefs.length() > 0 ) {
688 /* Look for the name in all referencing scopes. */
689 NameSet resolved;
690 for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ )
691 resolveFrom( resolved, *actRef, nameRef, 0 );
693 if ( resolved.length() > 0 ) {
694 /* Take the first one. */
695 nameInst = resolved[0];
696 if ( resolved.length() > 1 ) {
697 /* Complain about the multiple references. */
698 error(loc) << "state reference " << nameRef <<
699 " resolves to multiple entry points" << endl;
700 errorStateLabels( resolved );
706 /* If not found in the local scope, look in global. */
707 if ( nameInst == 0 ) {
708 NameSet resolved;
709 int fromPos = nameRef[0] != 0 ? 0 : 1;
710 resolveFrom( resolved, rootName, nameRef, fromPos );
712 if ( resolved.length() > 0 ) {
713 /* Take the first. */
714 nameInst = resolved[0];
715 if ( resolved.length() > 1 ) {
716 /* Complain about the multiple references. */
717 error(loc) << "state reference " << nameRef <<
718 " resolves to multiple entry points" << endl;
719 errorStateLabels( resolved );
724 if ( nameInst == 0 ) {
725 /* If not found then complain. */
726 error(loc) << "could not resolve state reference " << nameRef << endl;
728 return nameInst;
731 void ParseData::resolveNameRefs( InlineList *inlineList, Action *action )
733 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
734 switch ( item->type ) {
735 case InlineItem::Entry: case InlineItem::Goto:
736 case InlineItem::Call: case InlineItem::Next: {
737 /* Resolve, pass action for local search. */
738 NameInst *target = resolveStateRef( *item->nameRef, item->loc, action );
740 /* Name lookup error reporting is handled by resolveStateRef. */
741 if ( target != 0 ) {
742 /* Check if the target goes into a longest match. */
743 NameInst *search = target->parent;
744 while ( search != 0 ) {
745 if ( search->isLongestMatch ) {
746 error(item->loc) << "cannot enter inside a longest "
747 "match construction as an entry point" << endl;
748 break;
750 search = search->parent;
753 /* Record the reference in the name. This will cause the
754 * entry point to survive to the end of the graph
755 * generating walk. */
756 target->numRefs += 1;
759 item->nameTarg = target;
760 break;
762 default:
763 break;
766 /* Some of the item types may have children. */
767 if ( item->children != 0 )
768 resolveNameRefs( item->children, action );
772 /* Resolve references to labels in actions. */
773 void ParseData::resolveActionNameRefs()
775 for ( ActionList::Iter act = actionList; act.lte(); act++ ) {
776 /* Only care about the actions that are referenced. */
777 if ( act->actionRefs.length() > 0 )
778 resolveNameRefs( act->inlineList, act );
782 /* Walk a name tree starting at from and fill the name index. */
783 void ParseData::fillNameIndex( NameInst *from )
785 /* Fill the value for from in the name index. */
786 nameIndex[from->id] = from;
788 /* Recurse on the implicit final state and then all children. */
789 if ( from->final != 0 )
790 fillNameIndex( from->final );
791 for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
792 fillNameIndex( *name );
795 void ParseData::makeRootNames()
797 /* Create the root name. */
798 rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
799 exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
802 /* Build the name tree and supporting data structures. */
803 void ParseData::makeNameTree( GraphDictEl *dictEl )
805 /* Set up curNameInst for the walk. */
806 initNameWalk();
808 if ( dictEl != 0 ) {
809 /* A start location has been specified. */
810 dictEl->value->makeNameTree( dictEl->loc, this );
812 else {
813 /* First make the name tree. */
814 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
815 /* Recurse on the instance. */
816 glel->value->makeNameTree( glel->loc, this );
820 /* The number of nodes in the tree can now be given by nextNameId */
821 nameIndex = new NameInst*[nextNameId];
822 memset( nameIndex, 0, sizeof(NameInst*)*nextNameId );
823 fillNameIndex( rootName );
824 fillNameIndex( exportsRootName );
828 void ParseData::createBuiltin( const char *name, BuiltinMachine builtin )
830 Expression *expression = new Expression( builtin );
831 Join *join = new Join( expression );
832 MachineDef *machineDef = new MachineDef( join );
833 VarDef *varDef = new VarDef( name, machineDef );
834 GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
835 graphDict.insert( graphDictEl );
838 /* Initialize the graph dict with builtin types. */
839 void ParseData::initGraphDict( )
841 createBuiltin( "any", BT_Any );
842 createBuiltin( "ascii", BT_Ascii );
843 createBuiltin( "extend", BT_Extend );
844 createBuiltin( "alpha", BT_Alpha );
845 createBuiltin( "digit", BT_Digit );
846 createBuiltin( "alnum", BT_Alnum );
847 createBuiltin( "lower", BT_Lower );
848 createBuiltin( "upper", BT_Upper );
849 createBuiltin( "cntrl", BT_Cntrl );
850 createBuiltin( "graph", BT_Graph );
851 createBuiltin( "print", BT_Print );
852 createBuiltin( "punct", BT_Punct );
853 createBuiltin( "space", BT_Space );
854 createBuiltin( "xdigit", BT_Xdigit );
855 createBuiltin( "null", BT_Lambda );
856 createBuiltin( "zlen", BT_Lambda );
857 createBuiltin( "empty", BT_Empty );
860 /* Set the alphabet type. If the types are not valid returns false. */
861 bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
863 alphTypeLoc = loc;
864 userAlphType = findAlphType( s1, s2 );
865 alphTypeSet = true;
866 return userAlphType != 0;
869 /* Set the alphabet type. If the types are not valid returns false. */
870 bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
872 alphTypeLoc = loc;
873 userAlphType = findAlphType( s1 );
874 alphTypeSet = true;
875 return userAlphType != 0;
878 bool ParseData::setVariable( char *var, InlineList *inlineList )
880 bool set = true;
882 if ( strcmp( var, "p" ) == 0 )
883 pExpr = inlineList;
884 else if ( strcmp( var, "pe" ) == 0 )
885 peExpr = inlineList;
886 else if ( strcmp( var, "eof" ) == 0 )
887 eofExpr = inlineList;
888 else if ( strcmp( var, "cs" ) == 0 )
889 csExpr = inlineList;
890 else if ( strcmp( var, "data" ) == 0 )
891 dataExpr = inlineList;
892 else if ( strcmp( var, "top" ) == 0 )
893 topExpr = inlineList;
894 else if ( strcmp( var, "stack" ) == 0 )
895 stackExpr = inlineList;
896 else if ( strcmp( var, "act" ) == 0 )
897 actExpr = inlineList;
898 else if ( strcmp( var, "ts" ) == 0 )
899 tokstartExpr = inlineList;
900 else if ( strcmp( var, "te" ) == 0 )
901 tokendExpr = inlineList;
902 else
903 set = false;
905 return set;
908 /* Initialize the key operators object that will be referenced by all fsms
909 * created. */
910 void ParseData::initKeyOps( )
912 /* Signedness and bounds. */
913 HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
914 thisKeyOps.setAlphType( alphType );
916 if ( lowerNum != 0 ) {
917 /* If ranges are given then interpret the alphabet type. */
918 thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
919 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
922 thisCondData.lastCondKey = thisKeyOps.maxKey;
925 void ParseData::printNameInst( NameInst *nameInst, int level )
927 for ( int i = 0; i < level; i++ )
928 cerr << " ";
929 cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
930 " id: " << nameInst->id <<
931 " refs: " << nameInst->numRefs <<
932 " uses: " << nameInst->numUses << endl;
933 for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
934 printNameInst( *name, level+1 );
937 /* Remove duplicates of unique actions from an action table. */
938 void ParseData::removeDups( ActionTable &table )
940 /* Scan through the table looking for unique actions to
941 * remove duplicates of. */
942 for ( int i = 0; i < table.length(); i++ ) {
943 /* Remove any duplicates ahead of i. */
944 for ( int r = i+1; r < table.length(); ) {
945 if ( table[r].value == table[i].value )
946 table.vremove(r);
947 else
948 r += 1;
953 /* Remove duplicates from action lists. This operates only on transition and
954 * eof action lists and so should be called once all actions have been
955 * transfered to their final resting place. */
956 void ParseData::removeActionDups( FsmAp *graph )
958 /* Loop all states. */
959 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
960 /* Loop all transitions. */
961 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
962 removeDups( trans->actionTable );
963 removeDups( state->toStateActionTable );
964 removeDups( state->fromStateActionTable );
965 removeDups( state->eofActionTable );
969 Action *ParseData::newAction( const char *name, InlineList *inlineList )
971 InputLoc loc;
972 loc.line = 1;
973 loc.col = 1;
974 loc.fileName = "NONE";
976 Action *action = new Action( loc, name, inlineList, nextCondId++ );
977 action->actionRefs.append( rootName );
978 actionList.append( action );
979 return action;
982 void ParseData::initLongestMatchData()
984 if ( lmList.length() > 0 ) {
985 /* The initTokStart action resets the token start. */
986 InlineList *il1 = new InlineList;
987 il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) );
988 initTokStart = newAction( "initts", il1 );
989 initTokStart->isLmAction = true;
991 /* The initActId action gives act a default value. */
992 InlineList *il4 = new InlineList;
993 il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
994 initActId = newAction( "initact", il4 );
995 initActId->isLmAction = true;
997 /* The setTokStart action sets tokstart. */
998 InlineList *il5 = new InlineList;
999 il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
1000 setTokStart = newAction( "ts", il5 );
1001 setTokStart->isLmAction = true;
1003 /* The setTokEnd action sets tokend. */
1004 InlineList *il3 = new InlineList;
1005 il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
1006 setTokEnd = newAction( "te", il3 );
1007 setTokEnd->isLmAction = true;
1009 /* The action will also need an ordering: ahead of all user action
1010 * embeddings. */
1011 initTokStartOrd = curActionOrd++;
1012 initActIdOrd = curActionOrd++;
1013 setTokStartOrd = curActionOrd++;
1014 setTokEndOrd = curActionOrd++;
1018 /* After building the graph, do some extra processing to ensure the runtime
1019 * data of the longest mactch operators is consistent. */
1020 void ParseData::setLongestMatchData( FsmAp *graph )
1022 if ( lmList.length() > 0 ) {
1023 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1024 * init the tokstart. */
1025 for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) {
1026 /* This is run after duplicates are removed, we must guard against
1027 * inserting a duplicate. */
1028 ActionTable &actionTable = en->value->toStateActionTable;
1029 if ( ! actionTable.hasAction( initTokStart ) )
1030 actionTable.setAction( initTokStartOrd, initTokStart );
1033 /* Find the set of states that are the target of transitions with
1034 * actions that have calls. These states will be targeted by fret
1035 * statements. */
1036 StateSet states;
1037 for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
1038 for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
1039 for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) {
1040 if ( ati->value->anyCall && trans->toState != 0 )
1041 states.insert( trans->toState );
1047 /* Init tokstart upon entering the above collected states. */
1048 for ( StateSet::Iter ps = states; ps.lte(); ps++ ) {
1049 /* This is run after duplicates are removed, we must guard against
1050 * inserting a duplicate. */
1051 ActionTable &actionTable = (*ps)->toStateActionTable;
1052 if ( ! actionTable.hasAction( initTokStart ) )
1053 actionTable.setAction( initTokStartOrd, initTokStart );
1058 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1059 FsmAp *ParseData::makeInstance( GraphDictEl *gdNode )
1061 /* Build the graph from a walk of the parse tree. */
1062 FsmAp *graph = gdNode->value->walk( this );
1064 /* Resolve any labels that point to multiple states. Any labels that are
1065 * still around are referenced only by gotos and calls and they need to be
1066 * made into deterministic entry points. */
1067 graph->deterministicEntry();
1070 * All state construction is now complete.
1073 /* Transfer actions from the out action tables to eof action tables. */
1074 for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ )
1075 graph->transferOutActions( *state );
1077 /* Transfer global error actions. */
1078 for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
1079 graph->transferErrorActions( state, 0 );
1081 if ( ::wantDupsRemoved )
1082 removeActionDups( graph );
1084 /* Remove unreachable states. There should be no dead end states. The
1085 * subtract and intersection operators are the only places where they may
1086 * be created and those operators clean them up. */
1087 graph->removeUnreachableStates();
1089 /* No more fsm operations are to be done. Action ordering numbers are
1090 * no longer of use and will just hinder minimization. Clear them. */
1091 graph->nullActionKeys();
1093 /* Transition priorities are no longer of use. We can clear them
1094 * because they will just hinder minimization as well. Clear them. */
1095 graph->clearAllPriorities();
1097 if ( minimizeOpt != MinimizeNone ) {
1098 /* Minimize here even if we minimized at every op. Now that function
1099 * keys have been cleared we may get a more minimal fsm. */
1100 switch ( minimizeLevel ) {
1101 case MinimizeApprox:
1102 graph->minimizeApproximate();
1103 break;
1104 case MinimizeStable:
1105 graph->minimizeStable();
1106 break;
1107 case MinimizePartition1:
1108 graph->minimizePartition1();
1109 break;
1110 case MinimizePartition2:
1111 graph->minimizePartition2();
1112 break;
1116 graph->compressTransitions();
1118 return graph;
1121 void ParseData::printNameTree()
1123 /* Print the name instance map. */
1124 for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
1125 printNameInst( *name, 0 );
1127 cerr << "name index:" << endl;
1128 /* Show that the name index is correct. */
1129 for ( int ni = 0; ni < nextNameId; ni++ ) {
1130 cerr << ni << ": ";
1131 const char *name = nameIndex[ni]->name;
1132 cerr << ( name != 0 ? name : "<ANON>" ) << endl;
1136 FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode )
1138 /* Build the name tree and supporting data structures. */
1139 makeNameTree( gdNode );
1141 /* Resove name references from gdNode. */
1142 initNameWalk();
1143 gdNode->value->resolveNameRefs( this );
1145 /* Do not resolve action references. Since we are not building the entire
1146 * graph there's a good chance that many name references will fail. This
1147 * is okay since generating part of the graph is usually only done when
1148 * inspecting the compiled machine. */
1150 /* Same story for extern entry point references. */
1152 /* Flag this case so that the XML code generator is aware that we haven't
1153 * looked up name references in actions. It can then avoid segfaulting. */
1154 generatingSectionSubset = true;
1156 /* Just building the specified graph. */
1157 initNameWalk();
1158 FsmAp *mainGraph = makeInstance( gdNode );
1160 return mainGraph;
1163 FsmAp *ParseData::makeAll()
1165 /* Build the name tree and supporting data structures. */
1166 makeNameTree( 0 );
1168 /* Resove name references in the tree. */
1169 initNameWalk();
1170 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
1171 glel->value->resolveNameRefs( this );
1173 /* Resolve action code name references. */
1174 resolveActionNameRefs();
1176 /* Force name references to the top level instantiations. */
1177 for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ )
1178 (*inst)->numRefs += 1;
1180 FsmAp *mainGraph = 0;
1181 FsmAp **graphs = new FsmAp*[instanceList.length()];
1182 int numOthers = 0;
1184 /* Make all the instantiations, we know that main exists in this list. */
1185 initNameWalk();
1186 for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
1187 if ( strcmp( glel->key, mainMachine ) == 0 ) {
1188 /* Main graph is always instantiated. */
1189 mainGraph = makeInstance( glel );
1191 else {
1192 /* Instantiate and store in others array. */
1193 graphs[numOthers++] = makeInstance( glel );
1197 if ( mainGraph == 0 )
1198 mainGraph = graphs[--numOthers];
1200 if ( numOthers > 0 ) {
1201 /* Add all the other graphs into main. */
1202 mainGraph->globOp( graphs, numOthers );
1205 delete[] graphs;
1206 return mainGraph;
1209 void ParseData::analyzeAction( Action *action, InlineList *inlineList )
1211 /* FIXME: Actions used as conditions should be very constrained. */
1212 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1213 if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
1214 action->anyCall = true;
1216 /* Need to recurse into longest match items. */
1217 if ( item->type == InlineItem::LmSwitch ) {
1218 LongestMatch *lm = item->longestMatch;
1219 for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) {
1220 if ( lmi->action != 0 )
1221 analyzeAction( action, lmi->action->inlineList );
1225 if ( item->type == InlineItem::LmOnLast ||
1226 item->type == InlineItem::LmOnNext ||
1227 item->type == InlineItem::LmOnLagBehind )
1229 LongestMatchPart *lmi = item->longestMatchPart;
1230 if ( lmi->action != 0 )
1231 analyzeAction( action, lmi->action->inlineList );
1234 if ( item->children != 0 )
1235 analyzeAction( action, item->children );
1240 /* Check actions for bad uses of fsm directives. We don't go inside longest
1241 * match items in actions created by ragel, since we just want the user
1242 * actions. */
1243 void ParseData::checkInlineList( Action *act, InlineList *inlineList )
1245 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
1246 /* EOF checks. */
1247 if ( act->numEofRefs > 0 ) {
1248 switch ( item->type ) {
1249 /* Currently no checks. */
1250 default:
1251 break;
1255 /* Recurse. */
1256 if ( item->children != 0 )
1257 checkInlineList( act, item->children );
1261 void ParseData::checkAction( Action *action )
1263 /* Check for actions with calls that are embedded within a longest match
1264 * machine. */
1265 if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) {
1266 for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) {
1267 NameInst *check = *ar;
1268 while ( check != 0 ) {
1269 if ( check->isLongestMatch ) {
1270 error(action->loc) << "within a scanner, fcall is permitted"
1271 " only in pattern actions" << endl;
1272 break;
1274 check = check->parent;
1279 checkInlineList( action, action->inlineList );
1283 void ParseData::analyzeGraph( FsmAp *graph )
1285 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1286 analyzeAction( act, act->inlineList );
1288 for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
1289 /* The transition list. */
1290 for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
1291 for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
1292 at->value->numTransRefs += 1;
1295 for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
1296 at->value->numToStateRefs += 1;
1298 for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
1299 at->value->numFromStateRefs += 1;
1301 for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
1302 at->value->numEofRefs += 1;
1304 for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
1305 for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
1306 (*sci)->numCondRefs += 1;
1310 /* Checks for bad usage of directives in action code. */
1311 for ( ActionList::Iter act = actionList; act.lte(); act++ )
1312 checkAction( act );
1315 void ParseData::makeExportsNameTree()
1317 /* Make a name tree for the exports. */
1318 initExportsNameWalk();
1320 /* First make the name tree. */
1321 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1322 if ( gdel->value->isExport ) {
1323 /* Recurse on the instance. */
1324 gdel->value->makeNameTree( gdel->loc, this );
1329 void ParseData::makeExports()
1331 makeExportsNameTree();
1333 /* Resove name references in the tree. */
1334 initExportsNameWalk();
1335 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1336 if ( gdel->value->isExport )
1337 gdel->value->resolveNameRefs( this );
1340 /* Make all the instantiations, we know that main exists in this list. */
1341 initExportsNameWalk();
1342 for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) {
1343 /* Check if this var def is an export. */
1344 if ( gdel->value->isExport ) {
1345 /* Build the graph from a walk of the parse tree. */
1346 FsmAp *graph = gdel->value->walk( this );
1348 /* Build the graph from a walk of the parse tree. */
1349 if ( !graph->checkSingleCharMachine() ) {
1350 error(gdel->loc) << "bad export machine, must define "
1351 "a single character" << endl;
1353 else {
1354 /* Safe to extract the key and declare the export. */
1355 Key exportKey = graph->startState->outList.head->lowKey;
1356 exportList.append( new Export( gdel->value->name, exportKey ) );
1363 /* Construct the machine and catch failures which can occur during
1364 * construction. */
1365 void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
1367 try {
1368 /* This machine construction can fail. */
1369 prepareMachineGenTBWrapped( graphDictEl );
1371 catch ( FsmConstructFail fail ) {
1372 switch ( fail.reason ) {
1373 case FsmConstructFail::CondNoKeySpace: {
1374 InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
1375 error(loc) << "sorry, no more characters are "
1376 "available in the alphabet space" << endl;
1377 error(loc) << " for conditions, please use a "
1378 "smaller alphtype or reduce" << endl;
1379 error(loc) << " the span of characters on which "
1380 "conditions are embedded" << endl;
1381 break;
1387 void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
1389 beginProcessing();
1390 initKeyOps();
1391 makeRootNames();
1392 initLongestMatchData();
1394 /* Make the graph, do minimization. */
1395 if ( graphDictEl == 0 )
1396 sectionGraph = makeAll();
1397 else
1398 sectionGraph = makeSpecific( graphDictEl );
1400 /* Compute exports from the export definitions. */
1401 makeExports();
1403 /* If any errors have occured in the input file then don't write anything. */
1404 if ( gblErrorCount > 0 )
1405 return;
1407 analyzeGraph( sectionGraph );
1409 /* Depends on the graph analysis. */
1410 setLongestMatchData( sectionGraph );
1412 /* Decide if an error state is necessary.
1413 * 1. There is an error transition
1414 * 2. There is a gap in the transitions
1415 * 3. The longest match operator requires it. */
1416 if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() )
1417 sectionGraph->errState = sectionGraph->addState();
1419 /* State numbers need to be assigned such that all final states have a
1420 * larger state id number than all non-final states. This enables the
1421 * first_final mechanism to function correctly. We also want states to be
1422 * ordered in a predictable fashion. So we first apply a depth-first
1423 * search, then do a stable sort by final state status, then assign
1424 * numbers. */
1426 sectionGraph->depthFirstOrdering();
1427 sectionGraph->sortStatesByFinal();
1428 sectionGraph->setStateNumbers( 0 );
1431 void ParseData::generateReduced( InputData &inputData )
1433 beginProcessing();
1435 cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream );
1437 /* Make the generator. */
1438 BackendGen backendGen( sectionName, this, sectionGraph, cgd );
1440 /* Write out with it. */
1441 backendGen.makeBackend();
1443 if ( printStatistics ) {
1444 cerr << "fsm name : " << sectionName << endl;
1445 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1446 cerr << endl;
1450 void ParseData::generateXML( ostream &out )
1452 beginProcessing();
1454 /* Make the generator. */
1455 XMLCodeGen codeGen( sectionName, this, sectionGraph, out );
1457 /* Write out with it. */
1458 codeGen.writeXML();
1460 if ( printStatistics ) {
1461 cerr << "fsm name : " << sectionName << endl;
1462 cerr << "num states: " << sectionGraph->stateList.length() << endl;
1463 cerr << endl;