2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include "parsedata.h"
31 #include "parsetree.h"
32 #include "mergesort.h"
33 #include "xmlcodegen.h"
38 char mainMachine
[] = "main";
40 void Token::set( const char *str
, int len
)
43 data
= new char[len
+1];
44 memcpy( data
, str
, len
);
48 void Token::append( const Token
&other
)
50 int newLength
= length
+ other
.length
;
51 char *newString
= new char[newLength
+1];
52 memcpy( newString
, data
, length
);
53 memcpy( newString
+ length
, other
.data
, other
.length
);
54 newString
[newLength
] = 0;
59 /* Perform minimization after an operation according
60 * to the command line args. */
61 void afterOpMinimize( FsmAp
*fsm
, bool lastInSeq
)
63 /* Switch on the prefered minimization algorithm. */
64 if ( minimizeOpt
== MinimizeEveryOp
|| ( minimizeOpt
== MinimizeMostOps
&& lastInSeq
) ) {
65 /* First clean up the graph. FsmAp operations may leave these
66 * lying around. There should be no dead end states. The subtract
67 * intersection operators are the only places where they may be
68 * created and those operators clean them up. */
69 fsm
->removeUnreachableStates();
71 switch ( minimizeLevel
) {
73 fsm
->minimizeApproximate();
75 case MinimizePartition1
:
76 fsm
->minimizePartition1();
78 case MinimizePartition2
:
79 fsm
->minimizePartition2();
82 fsm
->minimizeStable();
88 /* Count the transitions in the fsm by walking the state list. */
89 int countTransitions( FsmAp
*fsm
)
92 StateAp
*state
= fsm
->stateList
.head
;
93 while ( state
!= 0 ) {
94 numTrans
+= state
->outList
.length();
100 Key
makeFsmKeyHex( char *str
, const InputLoc
&loc
, ParseData
*pd
)
102 /* Reset errno so we can check for overflow or underflow. In the event of
103 * an error, sets the return val to the upper or lower bound being tested
106 unsigned int size
= keyOps
->alphType
->size
;
107 bool unusedBits
= size
< sizeof(unsigned long);
109 unsigned long ul
= strtoul( str
, 0, 16 );
111 if ( errno
== ERANGE
|| ( unusedBits
&& ul
>> (size
* 8) ) ) {
112 error(loc
) << "literal " << str
<< " overflows the alphabet type" << endl
;
113 ul
= 1 << (size
* 8);
116 if ( unusedBits
&& keyOps
->alphType
->isSigned
&& ul
>> (size
* 8 - 1) )
117 ul
|= (0xffffffff >> (size
*8 ) ) << (size
*8);
119 return Key( (long)ul
);
122 Key
makeFsmKeyDec( char *str
, const InputLoc
&loc
, ParseData
*pd
)
124 /* Convert the number to a decimal. First reset errno so we can check
125 * for overflow or underflow. */
127 long long minVal
= keyOps
->alphType
->minVal
;
128 long long maxVal
= keyOps
->alphType
->maxVal
;
130 long long ll
= strtoll( str
, 0, 10 );
132 /* Check for underflow. */
133 if ( ( errno
== ERANGE
&& ll
< 0 ) || ll
< minVal
) {
134 error(loc
) << "literal " << str
<< " underflows the alphabet type" << endl
;
137 /* Check for overflow. */
138 else if ( ( errno
== ERANGE
&& ll
> 0 ) || ll
> maxVal
) {
139 error(loc
) << "literal " << str
<< " overflows the alphabet type" << endl
;
143 if ( keyOps
->alphType
->isSigned
)
144 return Key( (long)ll
);
146 return Key( (unsigned long)ll
);
149 /* Make an fsm key in int format (what the fsm graph uses) from an alphabet
150 * number returned by the parser. Validates that the number doesn't overflow
151 * the alphabet type. */
152 Key
makeFsmKeyNum( char *str
, const InputLoc
&loc
, ParseData
*pd
)
154 /* Switch on hex/decimal format. */
155 if ( str
[0] == '0' && str
[1] == 'x' )
156 return makeFsmKeyHex( str
, loc
, pd
);
158 return makeFsmKeyDec( str
, loc
, pd
);
161 /* Make an fsm int format (what the fsm graph uses) from a single character.
162 * Performs proper conversion depending on signed/unsigned property of the
164 Key
makeFsmKeyChar( char c
, ParseData
*pd
)
166 if ( keyOps
->isSigned
) {
167 /* Copy from a char type. */
171 /* Copy from an unsigned byte type. */
172 return Key( (unsigned char)c
);
176 /* Make an fsm key array in int format (what the fsm graph uses) from a string
177 * of characters. Performs proper conversion depending on signed/unsigned
178 * property of the alphabet. */
179 void makeFsmKeyArray( Key
*result
, char *data
, int len
, ParseData
*pd
)
181 if ( keyOps
->isSigned
) {
182 /* Copy from a char star type. */
184 for ( int i
= 0; i
< len
; i
++ )
185 result
[i
] = Key(src
[i
]);
188 /* Copy from an unsigned byte ptr type. */
189 unsigned char *src
= (unsigned char*) data
;
190 for ( int i
= 0; i
< len
; i
++ )
191 result
[i
] = Key(src
[i
]);
195 /* Like makeFsmKeyArray except the result has only unique keys. They ordering
196 * will be changed. */
197 void makeFsmUniqueKeyArray( KeySet
&result
, char *data
, int len
,
198 bool caseInsensitive
, ParseData
*pd
)
200 /* Use a transitions list for getting unique keys. */
201 if ( keyOps
->isSigned
) {
202 /* Copy from a char star type. */
204 for ( int si
= 0; si
< len
; si
++ ) {
206 result
.insert( key
);
207 if ( caseInsensitive
) {
209 result
.insert( key
.toUpper() );
210 else if ( key
.isUpper() )
211 result
.insert( key
.toLower() );
216 /* Copy from an unsigned byte ptr type. */
217 unsigned char *src
= (unsigned char*) data
;
218 for ( int si
= 0; si
< len
; si
++ ) {
220 result
.insert( key
);
221 if ( caseInsensitive
) {
223 result
.insert( key
.toUpper() );
224 else if ( key
.isUpper() )
225 result
.insert( key
.toLower() );
231 FsmAp
*dotFsm( ParseData
*pd
)
233 FsmAp
*retFsm
= new FsmAp();
234 retFsm
->rangeFsm( keyOps
->minKey
, keyOps
->maxKey
);
238 FsmAp
*dotStarFsm( ParseData
*pd
)
240 FsmAp
*retFsm
= new FsmAp();
241 retFsm
->rangeStarFsm( keyOps
->minKey
, keyOps
->maxKey
);
245 /* Make a builtin type. Depends on the signed nature of the alphabet type. */
246 FsmAp
*makeBuiltin( BuiltinMachine builtin
, ParseData
*pd
)
248 /* FsmAp created to return. */
250 bool isSigned
= keyOps
->isSigned
;
254 /* All characters. */
255 retFsm
= dotFsm( pd
);
259 /* Ascii characters 0 to 127. */
260 retFsm
= new FsmAp();
261 retFsm
->rangeFsm( 0, 127 );
265 /* Ascii extended characters. This is the full byte range. Dependent
266 * on signed, vs no signed. If the alphabet is one byte then just use
269 retFsm
= new FsmAp();
270 retFsm
->rangeFsm( -128, 127 );
273 retFsm
= new FsmAp();
274 retFsm
->rangeFsm( 0, 255 );
279 /* Alpha [A-Za-z]. */
280 FsmAp
*upper
= new FsmAp(), *lower
= new FsmAp();
281 upper
->rangeFsm( 'A', 'Z' );
282 lower
->rangeFsm( 'a', 'z' );
283 upper
->unionOp( lower
);
284 upper
->minimizePartition2();
290 retFsm
= new FsmAp();
291 retFsm
->rangeFsm( '0', '9' );
295 /* Alpha numerics [0-9A-Za-z]. */
296 FsmAp
*digit
= new FsmAp(), *lower
= new FsmAp();
297 FsmAp
*upper
= new FsmAp();
298 digit
->rangeFsm( '0', '9' );
299 upper
->rangeFsm( 'A', 'Z' );
300 lower
->rangeFsm( 'a', 'z' );
301 digit
->unionOp( upper
);
302 digit
->unionOp( lower
);
303 digit
->minimizePartition2();
308 /* Lower case characters. */
309 retFsm
= new FsmAp();
310 retFsm
->rangeFsm( 'a', 'z' );
314 /* Upper case characters. */
315 retFsm
= new FsmAp();
316 retFsm
->rangeFsm( 'A', 'Z' );
320 /* Control characters. */
321 FsmAp
*cntrl
= new FsmAp();
322 FsmAp
*highChar
= new FsmAp();
323 cntrl
->rangeFsm( 0, 31 );
324 highChar
->concatFsm( 127 );
325 cntrl
->unionOp( highChar
);
326 cntrl
->minimizePartition2();
331 /* Graphical ascii characters [!-~]. */
332 retFsm
= new FsmAp();
333 retFsm
->rangeFsm( '!', '~' );
337 /* Printable characters. Same as graph except includes space. */
338 retFsm
= new FsmAp();
339 retFsm
->rangeFsm( ' ', '~' );
344 FsmAp
*range1
= new FsmAp();
345 FsmAp
*range2
= new FsmAp();
346 FsmAp
*range3
= new FsmAp();
347 FsmAp
*range4
= new FsmAp();
348 range1
->rangeFsm( '!', '/' );
349 range2
->rangeFsm( ':', '@' );
350 range3
->rangeFsm( '[', '`' );
351 range4
->rangeFsm( '{', '~' );
352 range1
->unionOp( range2
);
353 range1
->unionOp( range3
);
354 range1
->unionOp( range4
);
355 range1
->minimizePartition2();
360 /* Whitespace: [\t\v\f\n\r ]. */
361 FsmAp
*cntrl
= new FsmAp();
362 FsmAp
*space
= new FsmAp();
363 cntrl
->rangeFsm( '\t', '\r' );
364 space
->concatFsm( ' ' );
365 cntrl
->unionOp( space
);
366 cntrl
->minimizePartition2();
371 /* Hex digits [0-9A-Fa-f]. */
372 FsmAp
*digit
= new FsmAp();
373 FsmAp
*upper
= new FsmAp();
374 FsmAp
*lower
= new FsmAp();
375 digit
->rangeFsm( '0', '9' );
376 upper
->rangeFsm( 'A', 'F' );
377 lower
->rangeFsm( 'a', 'f' );
378 digit
->unionOp( upper
);
379 digit
->unionOp( lower
);
380 digit
->minimizePartition2();
385 retFsm
= new FsmAp();
390 retFsm
= new FsmAp();
398 /* Check if this name inst or any name inst below is referenced. */
399 bool NameInst::anyRefsRec()
404 /* Recurse on children until true. */
405 for ( NameVect::Iter ch
= childVect
; ch
.lte(); ch
++ ) {
406 if ( (*ch
)->anyRefsRec() )
417 /* Initialize the structure that will collect info during the parse of a
419 ParseData::ParseData( char *fileName
, char *sectionName
,
420 const InputLoc
§ionLoc
)
423 generatingSectionSubset(false),
425 /* 0 is reserved for global error actions. */
447 sectionName(sectionName
),
448 sectionLoc(sectionLoc
),
453 nextEpsilonResolvedLink(0),
454 nextLongestMatchId(1),
455 lmRequiresErrorState(false)
457 /* Initialize the dictionary of graphs. This is our symbol table. The
458 * initialization needs to be done on construction which happens at the
459 * beginning of a machine spec so any assignment operators can reference
464 /* Clean up the data collected during a parse. */
465 ParseData::~ParseData()
467 /* Delete all the nodes in the action list. Will cause all the
468 * string data that represents the actions to be deallocated. */
472 /* Make a name id in the current name instantiation scope if it is not
474 NameInst
*ParseData::addNameInst( const InputLoc
&loc
, const char *data
, bool isLabel
)
476 /* Create the name instantitaion object and insert it. */
477 NameInst
*newNameInst
= new NameInst( loc
, curNameInst
, data
, nextNameId
++, isLabel
);
478 curNameInst
->childVect
.append( newNameInst
);
480 curNameInst
->children
.insertMulti( data
, newNameInst
);
484 void ParseData::initNameWalk()
486 curNameInst
= rootName
;
490 void ParseData::initExportsNameWalk()
492 curNameInst
= exportsRootName
;
496 /* Goes into the next child scope. The number of the child is already set up.
497 * We need this for the syncronous name tree and parse tree walk to work
498 * properly. It is reset on entry into a scope and advanced on poping of a
499 * scope. A call to enterNameScope should be accompanied by a corresponding
501 NameFrame
ParseData::enterNameScope( bool isLocal
, int numScopes
)
503 /* Save off the current data. */
505 retFrame
.prevNameInst
= curNameInst
;
506 retFrame
.prevNameChild
= curNameChild
;
507 retFrame
.prevLocalScope
= localNameScope
;
509 /* Enter into the new name scope. */
510 for ( int i
= 0; i
< numScopes
; i
++ ) {
511 curNameInst
= curNameInst
->childVect
[curNameChild
];
516 localNameScope
= curNameInst
;
521 /* Return from a child scope to a parent. The parent info must be specified as
522 * an argument and is obtained from the corresponding call to enterNameScope.
524 void ParseData::popNameScope( const NameFrame
&frame
)
526 /* Pop the name scope. */
527 curNameInst
= frame
.prevNameInst
;
528 curNameChild
= frame
.prevNameChild
+1;
529 localNameScope
= frame
.prevLocalScope
;
532 void ParseData::resetNameScope( const NameFrame
&frame
)
534 /* Pop the name scope. */
535 curNameInst
= frame
.prevNameInst
;
536 curNameChild
= frame
.prevNameChild
;
537 localNameScope
= frame
.prevLocalScope
;
541 void ParseData::unsetObsoleteEntries( FsmAp
*graph
)
543 /* Loop the reference names and increment the usage. Names that are no
544 * longer needed will be unset in graph. */
545 for ( NameVect::Iter ref
= curNameInst
->referencedNames
; ref
.lte(); ref
++ ) {
547 NameInst
*name
= *ref
;
550 /* If the name is no longer needed unset its corresponding entry. */
551 if ( name
->numUses
== name
->numRefs
) {
552 assert( graph
->entryPoints
.find( name
->id
) != 0 );
553 graph
->unsetEntry( name
->id
);
554 assert( graph
->entryPoints
.find( name
->id
) == 0 );
559 NameSet
ParseData::resolvePart( NameInst
*refFrom
, const char *data
, bool recLabelsOnly
)
561 /* Queue needed for breadth-first search, load it with the start node. */
562 NameInstList nameQueue
;
563 nameQueue
.append( refFrom
);
566 while ( nameQueue
.length() > 0 ) {
567 /* Pull the next from location off the queue. */
568 NameInst
*from
= nameQueue
.detachFirst();
570 /* Look for the name. */
571 NameMapEl
*low
, *high
;
572 if ( from
->children
.findMulti( data
, low
, high
) ) {
573 /* Record all instances of the name. */
574 for ( ; low
<= high
; low
++ )
575 result
.insert( low
->value
);
578 /* Name not there, do breadth-first operation of appending all
579 * childrent to the processing queue. */
580 for ( NameVect::Iter name
= from
->childVect
; name
.lte(); name
++ ) {
581 if ( !recLabelsOnly
|| (*name
)->isLabel
)
582 nameQueue
.append( *name
);
586 /* Queue exhausted and name never found. */
590 void ParseData::resolveFrom( NameSet
&result
, NameInst
*refFrom
,
591 const NameRef
&nameRef
, int namePos
)
593 /* Look for the name in the owning scope of the factor with aug. */
594 NameSet partResult
= resolvePart( refFrom
, nameRef
[namePos
], false );
596 /* If there are more parts to the name then continue on. */
597 if ( ++namePos
< nameRef
.length() ) {
598 /* There are more components to the name, search using all the part
599 * results as the base. */
600 for ( NameSet::Iter name
= partResult
; name
.lte(); name
++ )
601 resolveFrom( result
, *name
, nameRef
, namePos
);
604 /* This is the last component, append the part results to the final
606 result
.insert( partResult
);
610 /* Write out a name reference. */
611 ostream
&operator<<( ostream
&out
, const NameRef
&nameRef
)
614 if ( nameRef
[pos
] == 0 ) {
618 out
<< nameRef
[pos
++];
619 for ( ; pos
< nameRef
.length(); pos
++ )
620 out
<< "::" << nameRef
[pos
];
624 ostream
&operator<<( ostream
&out
, const NameInst
&nameInst
)
626 /* Count the number fully qualified name parts. */
628 NameInst
*curParent
= nameInst
.parent
;
629 while ( curParent
!= 0 ) {
631 curParent
= curParent
->parent
;
634 /* Make an array and fill it in. */
635 curParent
= nameInst
.parent
;
636 NameInst
**parents
= new NameInst
*[numParents
];
637 for ( int p
= numParents
-1; p
>= 0; p
-- ) {
638 parents
[p
] = curParent
;
639 curParent
= curParent
->parent
;
642 /* Write the parents out, skip the root. */
643 for ( int p
= 1; p
< numParents
; p
++ )
644 out
<< "::" << ( parents
[p
]->name
!= 0 ? parents
[p
]->name
: "<ANON>" );
646 /* Write the name and cleanup. */
647 out
<< "::" << ( nameInst
.name
!= 0 ? nameInst
.name
: "<ANON>" );
652 struct CmpNameInstLoc
654 static int compare( const NameInst
*ni1
, const NameInst
*ni2
)
656 if ( ni1
->loc
.line
< ni2
->loc
.line
)
658 else if ( ni1
->loc
.line
> ni2
->loc
.line
)
660 else if ( ni1
->loc
.col
< ni2
->loc
.col
)
662 else if ( ni1
->loc
.col
> ni2
->loc
.col
)
668 void errorStateLabels( const NameSet
&resolved
)
670 MergeSort
<NameInst
*, CmpNameInstLoc
> mergeSort
;
671 mergeSort
.sort( resolved
.data
, resolved
.length() );
672 for ( NameSet::Iter res
= resolved
; res
.lte(); res
++ )
673 error((*res
)->loc
) << " -> " << **res
<< endl
;
677 NameInst
*ParseData::resolveStateRef( const NameRef
&nameRef
, InputLoc
&loc
, Action
*action
)
679 NameInst
*nameInst
= 0;
681 /* Do the local search if the name is not strictly a root level name
683 if ( nameRef
[0] != 0 ) {
684 /* If the action is referenced, resolve all of them. */
685 if ( action
!= 0 && action
->actionRefs
.length() > 0 ) {
686 /* Look for the name in all referencing scopes. */
688 for ( ActionRefs::Iter actRef
= action
->actionRefs
; actRef
.lte(); actRef
++ )
689 resolveFrom( resolved
, *actRef
, nameRef
, 0 );
691 if ( resolved
.length() > 0 ) {
692 /* Take the first one. */
693 nameInst
= resolved
[0];
694 if ( resolved
.length() > 1 ) {
695 /* Complain about the multiple references. */
696 error(loc
) << "state reference " << nameRef
<<
697 " resolves to multiple entry points" << endl
;
698 errorStateLabels( resolved
);
704 /* If not found in the local scope, look in global. */
705 if ( nameInst
== 0 ) {
707 int fromPos
= nameRef
[0] != 0 ? 0 : 1;
708 resolveFrom( resolved
, rootName
, nameRef
, fromPos
);
710 if ( resolved
.length() > 0 ) {
711 /* Take the first. */
712 nameInst
= resolved
[0];
713 if ( resolved
.length() > 1 ) {
714 /* Complain about the multiple references. */
715 error(loc
) << "state reference " << nameRef
<<
716 " resolves to multiple entry points" << endl
;
717 errorStateLabels( resolved
);
722 if ( nameInst
== 0 ) {
723 /* If not found then complain. */
724 error(loc
) << "could not resolve state reference " << nameRef
<< endl
;
729 void ParseData::resolveNameRefs( InlineList
*inlineList
, Action
*action
)
731 for ( InlineList::Iter item
= *inlineList
; item
.lte(); item
++ ) {
732 switch ( item
->type
) {
733 case InlineItem::Entry
: case InlineItem::Goto
:
734 case InlineItem::Call
: case InlineItem::Next
: {
735 /* Resolve, pass action for local search. */
736 NameInst
*target
= resolveStateRef( *item
->nameRef
, item
->loc
, action
);
738 /* Check if the target goes into a longest match. */
739 NameInst
*search
= target
->parent
;
740 while ( search
!= 0 ) {
741 if ( search
->isLongestMatch
) {
742 error(item
->loc
) << "cannot enter inside a longest "
743 "match construction as an entry point" << endl
;
746 search
= search
->parent
;
749 /* Note the reference in the name. This will cause the entry
750 * point to survive to the end of the graph generating walk. */
752 target
->numRefs
+= 1;
753 item
->nameTarg
= target
;
760 /* Some of the item types may have children. */
761 if ( item
->children
!= 0 )
762 resolveNameRefs( item
->children
, action
);
766 /* Resolve references to labels in actions. */
767 void ParseData::resolveActionNameRefs()
769 for ( ActionList::Iter act
= actionList
; act
.lte(); act
++ ) {
770 /* Only care about the actions that are referenced. */
771 if ( act
->actionRefs
.length() > 0 )
772 resolveNameRefs( act
->inlineList
, act
);
776 /* Walk a name tree starting at from and fill the name index. */
777 void ParseData::fillNameIndex( NameInst
*from
)
779 /* Fill the value for from in the name index. */
780 nameIndex
[from
->id
] = from
;
782 /* Recurse on the implicit final state and then all children. */
783 if ( from
->final
!= 0 )
784 fillNameIndex( from
->final
);
785 for ( NameVect::Iter name
= from
->childVect
; name
.lte(); name
++ )
786 fillNameIndex( *name
);
789 void ParseData::makeRootNames()
791 /* Create the root name. */
792 rootName
= new NameInst( InputLoc(), 0, 0, nextNameId
++, false );
793 exportsRootName
= new NameInst( InputLoc(), 0, 0, nextNameId
++, false );
796 /* Build the name tree and supporting data structures. */
797 void ParseData::makeNameTree( GraphDictEl
*dictEl
)
799 /* Set up curNameInst for the walk. */
803 /* A start location has been specified. */
804 dictEl
->value
->makeNameTree( dictEl
->loc
, this );
807 /* First make the name tree. */
808 for ( GraphList::Iter glel
= instanceList
; glel
.lte(); glel
++ ) {
809 /* Recurse on the instance. */
810 glel
->value
->makeNameTree( glel
->loc
, this );
814 /* The number of nodes in the tree can now be given by nextNameId */
815 nameIndex
= new NameInst
*[nextNameId
];
816 memset( nameIndex
, 0, sizeof(NameInst
*)*nextNameId
);
817 fillNameIndex( rootName
);
818 fillNameIndex( exportsRootName
);
822 void ParseData::createBuiltin( const char *name
, BuiltinMachine builtin
)
824 Expression
*expression
= new Expression( builtin
);
825 Join
*join
= new Join( expression
);
826 JoinOrLm
*joinOrLm
= new JoinOrLm( join
);
827 VarDef
*varDef
= new VarDef( name
, joinOrLm
);
828 GraphDictEl
*graphDictEl
= new GraphDictEl( name
, varDef
);
829 graphDict
.insert( graphDictEl
);
832 /* Initialize the graph dict with builtin types. */
833 void ParseData::initGraphDict( )
835 createBuiltin( "any", BT_Any
);
836 createBuiltin( "ascii", BT_Ascii
);
837 createBuiltin( "extend", BT_Extend
);
838 createBuiltin( "alpha", BT_Alpha
);
839 createBuiltin( "digit", BT_Digit
);
840 createBuiltin( "alnum", BT_Alnum
);
841 createBuiltin( "lower", BT_Lower
);
842 createBuiltin( "upper", BT_Upper
);
843 createBuiltin( "cntrl", BT_Cntrl
);
844 createBuiltin( "graph", BT_Graph
);
845 createBuiltin( "print", BT_Print
);
846 createBuiltin( "punct", BT_Punct
);
847 createBuiltin( "space", BT_Space
);
848 createBuiltin( "xdigit", BT_Xdigit
);
849 createBuiltin( "null", BT_Lambda
);
850 createBuiltin( "zlen", BT_Lambda
);
851 createBuiltin( "empty", BT_Empty
);
854 /* Set the alphabet type. If the types are not valid returns false. */
855 bool ParseData::setAlphType( const InputLoc
&loc
, char *s1
, char *s2
)
858 userAlphType
= findAlphType( s1
, s2
);
860 return userAlphType
!= 0;
863 /* Set the alphabet type. If the types are not valid returns false. */
864 bool ParseData::setAlphType( const InputLoc
&loc
, char *s1
)
867 userAlphType
= findAlphType( s1
);
869 return userAlphType
!= 0;
872 bool ParseData::setVariable( char *var
, InlineList
*inlineList
)
876 if ( strcmp( var
, "p" ) == 0 )
878 else if ( strcmp( var
, "pe" ) == 0 )
880 else if ( strcmp( var
, "eof" ) == 0 )
881 eofExpr
= inlineList
;
882 else if ( strcmp( var
, "cs" ) == 0 )
884 else if ( strcmp( var
, "data" ) == 0 )
885 dataExpr
= inlineList
;
886 else if ( strcmp( var
, "top" ) == 0 )
887 topExpr
= inlineList
;
888 else if ( strcmp( var
, "stack" ) == 0 )
889 stackExpr
= inlineList
;
890 else if ( strcmp( var
, "act" ) == 0 )
891 actExpr
= inlineList
;
892 else if ( strcmp( var
, "ts" ) == 0 )
893 tokstartExpr
= inlineList
;
894 else if ( strcmp( var
, "te" ) == 0 )
895 tokendExpr
= inlineList
;
902 /* Initialize the key operators object that will be referenced by all fsms
904 void ParseData::initKeyOps( )
906 /* Signedness and bounds. */
907 HostType
*alphType
= alphTypeSet
? userAlphType
: hostLang
->defaultAlphType
;
908 thisKeyOps
.setAlphType( alphType
);
910 if ( lowerNum
!= 0 ) {
911 /* If ranges are given then interpret the alphabet type. */
912 thisKeyOps
.minKey
= makeFsmKeyNum( lowerNum
, rangeLowLoc
, this );
913 thisKeyOps
.maxKey
= makeFsmKeyNum( upperNum
, rangeHighLoc
, this );
916 thisCondData
.lastCondKey
= thisKeyOps
.maxKey
;
919 void ParseData::printNameInst( NameInst
*nameInst
, int level
)
921 for ( int i
= 0; i
< level
; i
++ )
923 cerr
<< (nameInst
->name
!= 0 ? nameInst
->name
: "<ANON>") <<
924 " id: " << nameInst
->id
<<
925 " refs: " << nameInst
->numRefs
<<
926 " uses: " << nameInst
->numUses
<< endl
;
927 for ( NameVect::Iter name
= nameInst
->childVect
; name
.lte(); name
++ )
928 printNameInst( *name
, level
+1 );
931 /* Remove duplicates of unique actions from an action table. */
932 void ParseData::removeDups( ActionTable
&table
)
934 /* Scan through the table looking for unique actions to
935 * remove duplicates of. */
936 for ( int i
= 0; i
< table
.length(); i
++ ) {
937 /* Remove any duplicates ahead of i. */
938 for ( int r
= i
+1; r
< table
.length(); ) {
939 if ( table
[r
].value
== table
[i
].value
)
947 /* Remove duplicates from action lists. This operates only on transition and
948 * eof action lists and so should be called once all actions have been
949 * transfered to their final resting place. */
950 void ParseData::removeActionDups( FsmAp
*graph
)
952 /* Loop all states. */
953 for ( StateList::Iter state
= graph
->stateList
; state
.lte(); state
++ ) {
954 /* Loop all transitions. */
955 for ( TransList::Iter trans
= state
->outList
; trans
.lte(); trans
++ )
956 removeDups( trans
->actionTable
);
957 removeDups( state
->toStateActionTable
);
958 removeDups( state
->fromStateActionTable
);
959 removeDups( state
->eofActionTable
);
963 Action
*ParseData::newAction( const char *name
, InlineList
*inlineList
)
968 loc
.fileName
= "<NONE>";
970 Action
*action
= new Action( loc
, name
, inlineList
, nextCondId
++ );
971 action
->actionRefs
.append( rootName
);
972 actionList
.append( action
);
976 void ParseData::initLongestMatchData()
978 if ( lmList
.length() > 0 ) {
979 /* The initTokStart action resets the token start. */
980 InlineList
*il1
= new InlineList
;
981 il1
->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart
) );
982 initTokStart
= newAction( "initts", il1
);
983 initTokStart
->isLmAction
= true;
985 /* The initActId action gives act a default value. */
986 InlineList
*il4
= new InlineList
;
987 il4
->append( new InlineItem( InputLoc(), InlineItem::LmInitAct
) );
988 initActId
= newAction( "initact", il4
);
989 initActId
->isLmAction
= true;
991 /* The setTokStart action sets tokstart. */
992 InlineList
*il5
= new InlineList
;
993 il5
->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart
) );
994 setTokStart
= newAction( "ts", il5
);
995 setTokStart
->isLmAction
= true;
997 /* The setTokEnd action sets tokend. */
998 InlineList
*il3
= new InlineList
;
999 il3
->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd
) );
1000 setTokEnd
= newAction( "te", il3
);
1001 setTokEnd
->isLmAction
= true;
1003 /* The action will also need an ordering: ahead of all user action
1005 initTokStartOrd
= curActionOrd
++;
1006 initActIdOrd
= curActionOrd
++;
1007 setTokStartOrd
= curActionOrd
++;
1008 setTokEndOrd
= curActionOrd
++;
1012 /* After building the graph, do some extra processing to ensure the runtime
1013 * data of the longest mactch operators is consistent. */
1014 void ParseData::setLongestMatchData( FsmAp
*graph
)
1016 if ( lmList
.length() > 0 ) {
1017 /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry)
1018 * init the tokstart. */
1019 for ( EntryMap::Iter en
= graph
->entryPoints
; en
.lte(); en
++ ) {
1020 /* This is run after duplicates are removed, we must guard against
1021 * inserting a duplicate. */
1022 ActionTable
&actionTable
= en
->value
->toStateActionTable
;
1023 if ( ! actionTable
.hasAction( initTokStart
) )
1024 actionTable
.setAction( initTokStartOrd
, initTokStart
);
1027 /* Find the set of states that are the target of transitions with
1028 * actions that have calls. These states will be targeted by fret
1031 for ( StateList::Iter state
= graph
->stateList
; state
.lte(); state
++ ) {
1032 for ( TransList::Iter trans
= state
->outList
; trans
.lte(); trans
++ ) {
1033 for ( ActionTable::Iter ati
= trans
->actionTable
; ati
.lte(); ati
++ ) {
1034 if ( ati
->value
->anyCall
&& trans
->toState
!= 0 )
1035 states
.insert( trans
->toState
);
1041 /* Init tokstart upon entering the above collected states. */
1042 for ( StateSet::Iter ps
= states
; ps
.lte(); ps
++ ) {
1043 /* This is run after duplicates are removed, we must guard against
1044 * inserting a duplicate. */
1045 ActionTable
&actionTable
= (*ps
)->toStateActionTable
;
1046 if ( ! actionTable
.hasAction( initTokStart
) )
1047 actionTable
.setAction( initTokStartOrd
, initTokStart
);
1052 /* Make the graph from a graph dict node. Does minimization and state sorting. */
1053 FsmAp
*ParseData::makeInstance( GraphDictEl
*gdNode
)
1055 /* Build the graph from a walk of the parse tree. */
1056 FsmAp
*graph
= gdNode
->value
->walk( this );
1058 /* Resolve any labels that point to multiple states. Any labels that are
1059 * still around are referenced only by gotos and calls and they need to be
1060 * made into deterministic entry points. */
1061 graph
->deterministicEntry();
1064 * All state construction is now complete.
1067 /* Transfer actions from the out action tables to eof action tables. */
1068 for ( StateSet::Iter state
= graph
->finStateSet
; state
.lte(); state
++ )
1069 graph
->transferOutActions( *state
);
1071 /* Transfer global error actions. */
1072 for ( StateList::Iter state
= graph
->stateList
; state
.lte(); state
++ )
1073 graph
->transferErrorActions( state
, 0 );
1075 if ( ::wantDupsRemoved
)
1076 removeActionDups( graph
);
1078 /* Remove unreachable states. There should be no dead end states. The
1079 * subtract and intersection operators are the only places where they may
1080 * be created and those operators clean them up. */
1081 graph
->removeUnreachableStates();
1083 /* No more fsm operations are to be done. Action ordering numbers are
1084 * no longer of use and will just hinder minimization. Clear them. */
1085 graph
->nullActionKeys();
1087 /* Transition priorities are no longer of use. We can clear them
1088 * because they will just hinder minimization as well. Clear them. */
1089 graph
->clearAllPriorities();
1091 if ( minimizeOpt
!= MinimizeNone
) {
1092 /* Minimize here even if we minimized at every op. Now that function
1093 * keys have been cleared we may get a more minimal fsm. */
1094 switch ( minimizeLevel
) {
1095 case MinimizeApprox
:
1096 graph
->minimizeApproximate();
1098 case MinimizeStable
:
1099 graph
->minimizeStable();
1101 case MinimizePartition1
:
1102 graph
->minimizePartition1();
1104 case MinimizePartition2
:
1105 graph
->minimizePartition2();
1110 graph
->compressTransitions();
1115 void ParseData::printNameTree()
1117 /* Print the name instance map. */
1118 for ( NameVect::Iter name
= rootName
->childVect
; name
.lte(); name
++ )
1119 printNameInst( *name
, 0 );
1121 cerr
<< "name index:" << endl
;
1122 /* Show that the name index is correct. */
1123 for ( int ni
= 0; ni
< nextNameId
; ni
++ ) {
1125 const char *name
= nameIndex
[ni
]->name
;
1126 cerr
<< ( name
!= 0 ? name
: "<ANON>" ) << endl
;
1130 FsmAp
*ParseData::makeSpecific( GraphDictEl
*gdNode
)
1132 /* Build the name tree and supporting data structures. */
1133 makeNameTree( gdNode
);
1135 /* Resove name references from gdNode. */
1137 gdNode
->value
->resolveNameRefs( this );
1139 /* Do not resolve action references. Since we are not building the entire
1140 * graph there's a good chance that many name references will fail. This
1141 * is okay since generating part of the graph is usually only done when
1142 * inspecting the compiled machine. */
1144 /* Same story for extern entry point references. */
1146 /* Flag this case so that the XML code generator is aware that we haven't
1147 * looked up name references in actions. It can then avoid segfaulting. */
1148 generatingSectionSubset
= true;
1150 /* Just building the specified graph. */
1152 FsmAp
*mainGraph
= makeInstance( gdNode
);
1157 FsmAp
*ParseData::makeAll()
1159 /* Build the name tree and supporting data structures. */
1162 /* Resove name references in the tree. */
1164 for ( GraphList::Iter glel
= instanceList
; glel
.lte(); glel
++ )
1165 glel
->value
->resolveNameRefs( this );
1167 /* Resolve action code name references. */
1168 resolveActionNameRefs();
1170 /* Force name references to the top level instantiations. */
1171 for ( NameVect::Iter inst
= rootName
->childVect
; inst
.lte(); inst
++ )
1172 (*inst
)->numRefs
+= 1;
1174 FsmAp
*mainGraph
= 0;
1175 FsmAp
**graphs
= new FsmAp
*[instanceList
.length()];
1178 /* Make all the instantiations, we know that main exists in this list. */
1180 for ( GraphList::Iter glel
= instanceList
; glel
.lte(); glel
++ ) {
1181 if ( strcmp( glel
->key
, mainMachine
) == 0 ) {
1182 /* Main graph is always instantiated. */
1183 mainGraph
= makeInstance( glel
);
1186 /* Instantiate and store in others array. */
1187 graphs
[numOthers
++] = makeInstance( glel
);
1191 if ( mainGraph
== 0 )
1192 mainGraph
= graphs
[--numOthers
];
1194 if ( numOthers
> 0 ) {
1195 /* Add all the other graphs into main. */
1196 mainGraph
->globOp( graphs
, numOthers
);
1203 void ParseData::analyzeAction( Action
*action
, InlineList
*inlineList
)
1205 /* FIXME: Actions used as conditions should be very constrained. */
1206 for ( InlineList::Iter item
= *inlineList
; item
.lte(); item
++ ) {
1207 if ( item
->type
== InlineItem::Call
|| item
->type
== InlineItem::CallExpr
)
1208 action
->anyCall
= true;
1210 /* Need to recurse into longest match items. */
1211 if ( item
->type
== InlineItem::LmSwitch
) {
1212 LongestMatch
*lm
= item
->longestMatch
;
1213 for ( LmPartList::Iter lmi
= *lm
->longestMatchList
; lmi
.lte(); lmi
++ ) {
1214 if ( lmi
->action
!= 0 )
1215 analyzeAction( action
, lmi
->action
->inlineList
);
1219 if ( item
->type
== InlineItem::LmOnLast
||
1220 item
->type
== InlineItem::LmOnNext
||
1221 item
->type
== InlineItem::LmOnLagBehind
)
1223 LongestMatchPart
*lmi
= item
->longestMatchPart
;
1224 if ( lmi
->action
!= 0 )
1225 analyzeAction( action
, lmi
->action
->inlineList
);
1228 if ( item
->children
!= 0 )
1229 analyzeAction( action
, item
->children
);
1234 /* Check actions for bad uses of fsm directives. We don't go inside longest
1235 * match items in actions created by ragel, since we just want the user
1237 void ParseData::checkInlineList( Action
*act
, InlineList
*inlineList
)
1239 for ( InlineList::Iter item
= *inlineList
; item
.lte(); item
++ ) {
1241 if ( act
->numEofRefs
> 0 ) {
1242 switch ( item
->type
) {
1243 /* Currently no checks. */
1250 if ( item
->children
!= 0 )
1251 checkInlineList( act
, item
->children
);
1255 void ParseData::checkAction( Action
*action
)
1257 /* Check for actions with calls that are embedded within a longest match
1259 if ( !action
->isLmAction
&& action
->numRefs() > 0 && action
->anyCall
) {
1260 for ( ActionRefs::Iter ar
= action
->actionRefs
; ar
.lte(); ar
++ ) {
1261 NameInst
*check
= *ar
;
1262 while ( check
!= 0 ) {
1263 if ( check
->isLongestMatch
) {
1264 error(action
->loc
) << "within a scanner, fcall is permitted"
1265 " only in pattern actions" << endl
;
1268 check
= check
->parent
;
1273 checkInlineList( action
, action
->inlineList
);
1277 void ParseData::analyzeGraph( FsmAp
*graph
)
1279 for ( ActionList::Iter act
= actionList
; act
.lte(); act
++ )
1280 analyzeAction( act
, act
->inlineList
);
1282 for ( StateList::Iter st
= graph
->stateList
; st
.lte(); st
++ ) {
1283 /* The transition list. */
1284 for ( TransList::Iter trans
= st
->outList
; trans
.lte(); trans
++ ) {
1285 for ( ActionTable::Iter at
= trans
->actionTable
; at
.lte(); at
++ )
1286 at
->value
->numTransRefs
+= 1;
1289 for ( ActionTable::Iter at
= st
->toStateActionTable
; at
.lte(); at
++ )
1290 at
->value
->numToStateRefs
+= 1;
1292 for ( ActionTable::Iter at
= st
->fromStateActionTable
; at
.lte(); at
++ )
1293 at
->value
->numFromStateRefs
+= 1;
1295 for ( ActionTable::Iter at
= st
->eofActionTable
; at
.lte(); at
++ )
1296 at
->value
->numEofRefs
+= 1;
1298 for ( StateCondList::Iter sc
= st
->stateCondList
; sc
.lte(); sc
++ ) {
1299 for ( CondSet::Iter sci
= sc
->condSpace
->condSet
; sci
.lte(); sci
++ )
1300 (*sci
)->numCondRefs
+= 1;
1304 /* Checks for bad usage of directives in action code. */
1305 for ( ActionList::Iter act
= actionList
; act
.lte(); act
++ )
1309 void ParseData::makeExportsNameTree()
1311 /* Make a name tree for the exports. */
1312 initExportsNameWalk();
1314 /* First make the name tree. */
1315 for ( GraphDict::Iter gdel
= graphDict
; gdel
.lte(); gdel
++ ) {
1316 if ( gdel
->value
->isExport
) {
1317 /* Recurse on the instance. */
1318 gdel
->value
->makeNameTree( gdel
->loc
, this );
1323 void ParseData::makeExports()
1325 makeExportsNameTree();
1327 /* Resove name references in the tree. */
1328 initExportsNameWalk();
1329 for ( GraphDict::Iter gdel
= graphDict
; gdel
.lte(); gdel
++ ) {
1330 if ( gdel
->value
->isExport
)
1331 gdel
->value
->resolveNameRefs( this );
1334 /* Make all the instantiations, we know that main exists in this list. */
1335 initExportsNameWalk();
1336 for ( GraphDict::Iter gdel
= graphDict
; gdel
.lte(); gdel
++ ) {
1337 /* Check if this var def is an export. */
1338 if ( gdel
->value
->isExport
) {
1339 /* Build the graph from a walk of the parse tree. */
1340 FsmAp
*graph
= gdel
->value
->walk( this );
1342 /* Build the graph from a walk of the parse tree. */
1343 if ( !graph
->checkSingleCharMachine() ) {
1344 error(gdel
->loc
) << "bad export machine, must define "
1345 "a single character" << endl
;
1348 /* Safe to extract the key and declare the export. */
1349 Key exportKey
= graph
->startState
->outList
.head
->lowKey
;
1350 exportList
.append( new Export( gdel
->value
->name
, exportKey
) );
1357 /* Construct the machine and catch failures which can occur during
1359 void ParseData::prepareMachineGen( GraphDictEl
*graphDictEl
)
1362 /* This machine construction can fail. */
1363 prepareMachineGenTBWrapped( graphDictEl
);
1365 catch ( FsmConstructFail fail
) {
1366 switch ( fail
.reason
) {
1367 case FsmConstructFail::CondNoKeySpace
: {
1368 InputLoc
&loc
= alphTypeSet
? alphTypeLoc
: sectionLoc
;
1369 error(loc
) << "sorry, no more characters are "
1370 "available in the alphabet space" << endl
;
1371 error(loc
) << " for conditions, please use a "
1372 "smaller alphtype or reduce" << endl
;
1373 error(loc
) << " the span of characters on which "
1374 "conditions are embedded" << endl
;
1381 void ParseData::prepareMachineGenTBWrapped( GraphDictEl
*graphDictEl
)
1386 initLongestMatchData();
1388 /* Make the graph, do minimization. */
1389 if ( graphDictEl
== 0 )
1390 sectionGraph
= makeAll();
1392 sectionGraph
= makeSpecific( graphDictEl
);
1394 /* Compute exports from the export definitions. */
1397 /* If any errors have occured in the input file then don't write anything. */
1398 if ( gblErrorCount
> 0 )
1401 analyzeGraph( sectionGraph
);
1403 /* Depends on the graph analysis. */
1404 setLongestMatchData( sectionGraph
);
1406 /* Decide if an error state is necessary.
1407 * 1. There is an error transition
1408 * 2. There is a gap in the transitions
1409 * 3. The longest match operator requires it. */
1410 if ( lmRequiresErrorState
|| sectionGraph
->hasErrorTrans() )
1411 sectionGraph
->errState
= sectionGraph
->addState();
1413 /* State numbers need to be assigned such that all final states have a
1414 * larger state id number than all non-final states. This enables the
1415 * first_final mechanism to function correctly. We also want states to be
1416 * ordered in a predictable fashion. So we first apply a depth-first
1417 * search, then do a stable sort by final state status, then assign
1420 sectionGraph
->depthFirstOrdering();
1421 sectionGraph
->sortStatesByFinal();
1422 sectionGraph
->setStateNumbers( 0 );
1425 void ParseData::generateXML( ostream
&out
)
1429 /* Make the generator. */
1430 XMLCodeGen
codeGen( sectionName
, this, sectionGraph
, out
);
1432 /* Write out with it. */
1435 if ( printStatistics
) {
1436 cerr
<< "fsm name : " << sectionName
<< endl
;
1437 cerr
<< "num states: " << sectionGraph
->stateList
.length() << endl
;
1442 /* Send eof to all parsers. */
1443 void terminateAllParsers( )
1445 /* FIXME: a proper token is needed here. Suppose we should use the
1446 * location of EOF in the last file that the parser was referenced in. */
1448 loc
.fileName
= "<EOF>";
1451 for ( ParserDict::Iter pdel
= parserDict
; pdel
.lte(); pdel
++ )
1452 pdel
->value
->token( loc
, _eof
, 0, 0 );
1455 void writeLanguage( std::ostream
&out
)
1458 switch ( hostLang
->lang
) {
1459 case HostLang::C
: out
<< "C"; break;
1460 case HostLang::D
: out
<< "D"; break;
1461 case HostLang::Java
: out
<< "Java"; break;
1462 case HostLang::Ruby
: out
<< "Ruby"; break;
1463 case HostLang::CSharp
: out
<< "C#"; break;
1469 void writeMachines( std::ostream
&out
, std::string hostData
, char *inputFileName
)
1471 if ( machineSpec
== 0 && machineName
== 0 ) {
1472 /* No machine spec or machine name given. Generate everything. */
1473 for ( ParserDict::Iter parser
= parserDict
; parser
.lte(); parser
++ ) {
1474 ParseData
*pd
= parser
->value
->pd
;
1475 if ( pd
->instanceList
.length() > 0 )
1476 pd
->prepareMachineGen( 0 );
1479 if ( gblErrorCount
== 0 ) {
1480 out
<< "<ragel version=\"" VERSION
"\" filename=\"" << inputFileName
<< "\"";
1481 writeLanguage( out
);
1483 for ( ParserDict::Iter parser
= parserDict
; parser
.lte(); parser
++ ) {
1484 ParseData
*pd
= parser
->value
->pd
;
1485 if ( pd
->instanceList
.length() > 0 )
1486 pd
->generateXML( out
);
1489 out
<< "</ragel>\n";
1492 else if ( parserDict
.length() > 0 ) {
1493 /* There is either a machine spec or machine name given. */
1494 ParseData
*parseData
= 0;
1495 GraphDictEl
*graphDictEl
= 0;
1497 /* Traverse the sections, break out when we find a section/machine
1498 * that matches the one specified. */
1499 for ( ParserDict::Iter parser
= parserDict
; parser
.lte(); parser
++ ) {
1500 ParseData
*checkPd
= parser
->value
->pd
;
1501 if ( machineSpec
== 0 || strcmp( checkPd
->sectionName
, machineSpec
) == 0 ) {
1502 GraphDictEl
*checkGdEl
= 0;
1503 if ( machineName
== 0 || (checkGdEl
=
1504 checkPd
->graphDict
.find( machineName
)) != 0 )
1506 /* Have a machine spec and/or machine name that matches
1507 * the -M/-S options. */
1508 parseData
= checkPd
;
1509 graphDictEl
= checkGdEl
;
1515 if ( parseData
== 0 )
1516 error() << "could not locate machine specified with -S and/or -M" << endl
;
1518 /* Section/Machine to emit was found. Prepare and emit it. */
1519 parseData
->prepareMachineGen( graphDictEl
);
1520 if ( gblErrorCount
== 0 ) {
1521 out
<< "<ragel version=\"" VERSION
"\" filename=\"" << inputFileName
<< "\"";
1522 writeLanguage( out
);
1524 parseData
->generateXML( out
);
1526 out
<< "</ragel>\n";