2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "rlgen-csharp.h"
25 #include "fsmcodegen.h"
35 using std::ostringstream
;
40 void lineDirective( ostream
&out
, char *fileName
, int line
)
42 if ( noLineDirectives
)
45 /* Write the preprocessor line info for to the input file. */
46 out
<< "#line " << line
<< " \"";
47 for ( char *pc
= fileName
; *pc
!= 0; pc
++ ) {
55 if ( noLineDirectives
)
61 void genLineDirective( ostream
&out
)
63 std::streambuf
*sbuf
= out
.rdbuf();
64 output_filter
*filter
= static_cast<output_filter
*>(sbuf
);
65 lineDirective( out
, filter
->fileName
, filter
->line
+ 1 );
69 /* Init code gen with in parameters. */
70 FsmCodeGen::FsmCodeGen( ostream
&out
)
76 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal
)
78 long long maxValLL
= (long long) maxVal
;
79 HostType
*arrayType
= keyOps
->typeSubsumes( maxValLL
);
80 assert( arrayType
!= 0 );
81 return arrayType
->size
;
84 string
FsmCodeGen::ARRAY_TYPE( unsigned long maxVal
)
86 return ARRAY_TYPE( maxVal
, false );
89 string
FsmCodeGen::ARRAY_TYPE( unsigned long maxVal
, bool forceSigned
)
91 long long maxValLL
= (long long) maxVal
;
94 arrayType
= keyOps
->typeSubsumes(true, maxValLL
);
96 arrayType
= keyOps
->typeSubsumes( maxValLL
);
97 assert( arrayType
!= 0 );
99 string ret
= arrayType
->data1
;
100 if ( arrayType
->data2
!= 0 ) {
102 ret
+= arrayType
->data2
;
107 /* Write out the fsm name. */
108 string
FsmCodeGen::FSM_NAME()
113 /* Emit the offset of the start state as a decimal integer. */
114 string
FsmCodeGen::START_STATE_ID()
117 ret
<< redFsm
->startState
->id
;
121 /* Write out the array of actions. */
122 std::ostream
&FsmCodeGen::ACTIONS_ARRAY()
125 int totalActions
= 1;
126 for ( ActionTableMap::Iter act
= redFsm
->actionMap
; act
.lte(); act
++ ) {
127 /* Write out the length, which will never be the last character. */
128 out
<< act
->key
.length() << ", ";
129 /* Put in a line break every 8 */
130 if ( totalActions
++ % 8 == 7 )
133 for ( ActionTable::Iter item
= act
->key
; item
.lte(); item
++ ) {
134 out
<< item
->value
->actionId
;
135 if ( ! (act
.last() && item
.last()) )
138 /* Put in a line break every 8 */
139 if ( totalActions
++ % 8 == 7 )
148 string
FsmCodeGen::ACCESS()
151 if ( accessExpr
!= 0 )
152 INLINE_LIST( ret
, accessExpr
, 0, false );
157 string
FsmCodeGen::P()
164 INLINE_LIST( ret
, pExpr
, 0, false );
170 string
FsmCodeGen::PE()
177 INLINE_LIST( ret
, peExpr
, 0, false );
183 string
FsmCodeGen::EOFV()
190 INLINE_LIST( ret
, eofExpr
, 0, false );
196 string
FsmCodeGen::CS()
200 ret
<< ACCESS() << "cs";
202 /* Emit the user supplied method of retrieving the key. */
204 INLINE_LIST( ret
, csExpr
, 0, false );
210 string
FsmCodeGen::TOP()
214 ret
<< ACCESS() + "top";
217 INLINE_LIST( ret
, topExpr
, 0, false );
223 string
FsmCodeGen::STACK()
226 if ( stackExpr
== 0 )
227 ret
<< ACCESS() + "stack";
230 INLINE_LIST( ret
, stackExpr
, 0, false );
236 string
FsmCodeGen::ACT()
240 ret
<< ACCESS() + "act";
243 INLINE_LIST( ret
, actExpr
, 0, false );
249 string
FsmCodeGen::TOKSTART()
252 if ( tokstartExpr
== 0 )
253 ret
<< ACCESS() + "ts";
256 INLINE_LIST( ret
, tokstartExpr
, 0, false );
262 string
FsmCodeGen::TOKEND()
265 if ( tokendExpr
== 0 )
266 ret
<< ACCESS() + "te";
269 INLINE_LIST( ret
, tokendExpr
, 0, false );
275 string
FsmCodeGen::GET_WIDE_KEY()
277 if ( redFsm
->anyConditions() )
283 string
FsmCodeGen::GET_WIDE_KEY( RedStateAp
*state
)
285 if ( state
->stateCondList
.length() > 0 )
291 string
FsmCodeGen::GET_KEY()
294 if ( getKeyExpr
!= 0 ) {
295 /* Emit the user supplied method of retrieving the key. */
297 INLINE_LIST( ret
, getKeyExpr
, 0, false );
301 /* Expression for retrieving the key, use simple dereference. */
302 ret
<< "(*" << P() << ")";
307 /* Write out level number of tabs. Makes the nested binary search nice
309 string
FsmCodeGen::TABS( int level
)
312 while ( level
-- > 0 )
317 /* Write out a key from the fsm code gen. Depends on wether or not the key is
319 string
FsmCodeGen::KEY( Key key
)
322 if ( keyOps
->isSigned
|| !hostLang
->explicitUnsigned
)
325 ret
<< (unsigned long) key
.getVal() << 'u';
329 string
FsmCodeGen::ALPHA_KEY( Key key
)
332 if (key
.getVal() > 0xFFFF) {
335 ret
<< "'\\u" << std::hex
<< std::setw(4) << std::setfill('0') <<
338 //ret << "(char) " << key.getVal();
342 void FsmCodeGen::EXEC( ostream
&ret
, InlineItem
*item
, int targState
, int inFinish
)
344 /* The parser gives fexec two children. The double brackets are for D
345 * code. If the inline list is a single word it will get interpreted as a
346 * C-style cast by the D compiler. */
347 ret
<< "{" << P() << " = ((";
348 INLINE_LIST( ret
, item
->children
, targState
, inFinish
);
352 void FsmCodeGen::LM_SWITCH( ostream
&ret
, InlineItem
*item
,
353 int targState
, int inFinish
)
356 " switch( " << ACT() << " ) {\n";
358 for ( InlineList::Iter lma
= *item
->children
; lma
.lte(); lma
++ ) {
359 /* Write the case label, the action and the case break. */
361 ret
<< " default:\n";
363 ret
<< " case " << lma
->lmId
<< ":\n";
365 /* Write the block and close it off. */
367 INLINE_LIST( ret
, lma
->children
, targState
, inFinish
);
378 void FsmCodeGen::SET_ACT( ostream
&ret
, InlineItem
*item
)
380 ret
<< ACT() << " = " << item
->lmId
<< ";";
383 void FsmCodeGen::SET_TOKEND( ostream
&ret
, InlineItem
*item
)
385 /* The tokend action sets tokend. */
386 ret
<< TOKEND() << " = " << P();
387 if ( item
->offset
!= 0 )
388 out
<< "+" << item
->offset
;
392 void FsmCodeGen::GET_TOKEND( ostream
&ret
, InlineItem
*item
)
397 void FsmCodeGen::INIT_TOKSTART( ostream
&ret
, InlineItem
*item
)
399 ret
<< TOKSTART() << " = " << NULL_ITEM() << ";";
402 void FsmCodeGen::INIT_ACT( ostream
&ret
, InlineItem
*item
)
404 ret
<< ACT() << " = 0;";
407 void FsmCodeGen::SET_TOKSTART( ostream
&ret
, InlineItem
*item
)
409 ret
<< TOKSTART() << " = " << P() << ";";
412 void FsmCodeGen::SUB_ACTION( ostream
&ret
, InlineItem
*item
,
413 int targState
, bool inFinish
)
415 if ( item
->children
->length() > 0 ) {
416 /* Write the block and close it off. */
418 INLINE_LIST( ret
, item
->children
, targState
, inFinish
);
424 /* Write out an inline tree structure. Walks the list and possibly calls out
425 * to virtual functions than handle language specific items in the tree. */
426 void FsmCodeGen::INLINE_LIST( ostream
&ret
, InlineList
*inlineList
,
427 int targState
, bool inFinish
)
429 for ( InlineList::Iter item
= *inlineList
; item
.lte(); item
++ ) {
430 switch ( item
->type
) {
431 case InlineItem::Text
:
434 case InlineItem::Goto
:
435 GOTO( ret
, item
->targState
->id
, inFinish
);
437 case InlineItem::Call
:
438 CALL( ret
, item
->targState
->id
, targState
, inFinish
);
440 case InlineItem::Next
:
441 NEXT( ret
, item
->targState
->id
, inFinish
);
443 case InlineItem::Ret
:
444 RET( ret
, inFinish
);
446 case InlineItem::PChar
:
449 case InlineItem::Char
:
452 case InlineItem::Hold
:
455 case InlineItem::Exec
:
456 EXEC( ret
, item
, targState
, inFinish
);
458 case InlineItem::Curs
:
459 CURS( ret
, inFinish
);
461 case InlineItem::Targs
:
462 TARGS( ret
, inFinish
, targState
);
464 case InlineItem::Entry
:
465 ret
<< item
->targState
->id
;
467 case InlineItem::GotoExpr
:
468 GOTO_EXPR( ret
, item
, inFinish
);
470 case InlineItem::CallExpr
:
471 CALL_EXPR( ret
, item
, targState
, inFinish
);
473 case InlineItem::NextExpr
:
474 NEXT_EXPR( ret
, item
, inFinish
);
476 case InlineItem::LmSwitch
:
477 LM_SWITCH( ret
, item
, targState
, inFinish
);
479 case InlineItem::LmSetActId
:
480 SET_ACT( ret
, item
);
482 case InlineItem::LmSetTokEnd
:
483 SET_TOKEND( ret
, item
);
485 case InlineItem::LmGetTokEnd
:
486 GET_TOKEND( ret
, item
);
488 case InlineItem::LmInitTokStart
:
489 INIT_TOKSTART( ret
, item
);
491 case InlineItem::LmInitAct
:
492 INIT_ACT( ret
, item
);
494 case InlineItem::LmSetTokStart
:
495 SET_TOKSTART( ret
, item
);
497 case InlineItem::SubAction
:
498 SUB_ACTION( ret
, item
, targState
, inFinish
);
500 case InlineItem::Break
:
501 BREAK( ret
, targState
);
506 /* Write out paths in line directives. Escapes any special characters. */
507 string
FsmCodeGen::LDIR_PATH( char *path
)
510 for ( char *pc
= path
; *pc
!= 0; pc
++ ) {
519 void FsmCodeGen::ACTION( ostream
&ret
, Action
*action
, int targState
, bool inFinish
)
521 /* Write the preprocessor line info for going into the source file. */
522 lineDirective( ret
, sourceFileName
, action
->loc
.line
);
524 /* Write the block and close it off. */
526 INLINE_LIST( ret
, action
->inlineList
, targState
, inFinish
);
530 void FsmCodeGen::CONDITION( ostream
&ret
, Action
*condition
)
533 lineDirective( ret
, sourceFileName
, condition
->loc
.line
);
534 INLINE_LIST( ret
, condition
->inlineList
, 0, false );
537 string
FsmCodeGen::ERROR_STATE()
540 if ( redFsm
->errState
!= 0 )
541 ret
<< redFsm
->errState
->id
;
547 string
FsmCodeGen::FIRST_FINAL_STATE()
550 if ( redFsm
->firstFinState
!= 0 )
551 ret
<< redFsm
->firstFinState
->id
;
553 ret
<< redFsm
->nextStateId
;
557 void FsmCodeGen::writeInit()
562 out
<< "\t" << CS() << " = " << START() << ";\n";
564 /* If there are any calls, then the stack top needs initialization. */
565 if ( redFsm
->anyActionCalls() || redFsm
->anyActionRets() )
566 out
<< "\t" << TOP() << " = 0;\n";
568 if ( hasLongestMatch
) {
570 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
571 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
572 " " << ACT() << " = 0;\n";
577 string
FsmCodeGen::DATA_PREFIX()
580 return FSM_NAME() + "_";
584 /* Emit the alphabet data type. */
585 string
FsmCodeGen::ALPH_TYPE()
587 string ret
= keyOps
->alphType
->data1
;
588 if ( keyOps
->alphType
->data2
!= 0 ) {
590 ret
+= + keyOps
->alphType
->data2
;
595 /* Emit the alphabet data type. */
596 string
FsmCodeGen::WIDE_ALPH_TYPE()
599 if ( redFsm
->maxKey
<= keyOps
->maxKey
)
602 long long maxKeyVal
= redFsm
->maxKey
.getLongLong();
603 HostType
*wideType
= keyOps
->typeSubsumes( keyOps
->isSigned
, maxKeyVal
);
604 assert( wideType
!= 0 );
606 ret
= wideType
->data1
;
607 if ( wideType
->data2
!= 0 ) {
609 ret
+= wideType
->data2
;
615 void FsmCodeGen::STATE_IDS()
617 if ( redFsm
->startState
!= 0 )
618 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
620 if ( writeFirstFinal
)
621 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
624 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
628 if ( entryPointNames
.length() > 0 ) {
629 for ( EntryNameVect::Iter en
= entryPointNames
; en
.lte(); en
++ ) {
630 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en
) <<
631 " = " << entryPointIds
[en
.pos()] << ";\n";
640 string
CSharpCodeGen::GET_KEY()
643 if ( getKeyExpr
!= 0 ) {
644 /* Emit the user supplied method of retrieving the key. */
646 INLINE_LIST( ret
, getKeyExpr
, 0, false );
650 /* Expression for retrieving the key, use simple dereference. */
651 ret
<< "data[" << P() << "]";
655 string
CSharpCodeGen::NULL_ITEM()
660 string
CSharpCodeGen::POINTER()
662 // XXX C# has no pointers
663 // multiple items seperated by commas can also be pointer types.
667 string
CSharpCodeGen::PTR_CONST()
672 std::ostream
&CSharpCodeGen::OPEN_ARRAY( string type
, string name
)
674 out
<< "static readonly " << type
<< "[] " << name
<< " = ";
677 out << "Encoding.ASCII.Get";
679 out
<< "new " << type
<< " [] {\n";
683 std::ostream
&CSharpCodeGen::CLOSE_ARRAY()
685 return out
<< "};\n";
688 std::ostream
&CSharpCodeGen::STATIC_VAR( string type
, string name
)
690 out
<< "const " << type
<< " " << name
;
694 string
CSharpCodeGen::ARR_OFF( string ptr
, string offset
)
696 // XXX C# can't do pointer arithmetic
697 return "&" + ptr
+ "[" + offset
+ "]";
700 string
CSharpCodeGen::CAST( string type
)
702 return "(" + type
+ ")";
705 string
CSharpCodeGen::UINT( )
710 std::ostream
&CSharpCodeGen::SWITCH_DEFAULT()
712 out
<< " default: break;\n";
716 string
CSharpCodeGen::CTRL_FLOW()
721 void CSharpCodeGen::writeExports()
723 if ( exportList
.length() > 0 ) {
724 for ( ExportList::Iter ex
= exportList
; ex
.lte(); ex
++ ) {
725 out
<< "const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
726 "ex_" << ex
->name
<< " = " << KEY(ex
->key
) << ";\n";
733 * End C#-specific code.
736 void FsmCodeGen::finishRagelDef()
738 if ( codeStyle
== GenGoto
|| codeStyle
== GenFGoto
||
739 codeStyle
== GenIpGoto
|| codeStyle
== GenSplit
)
741 /* For directly executable machines there is no required state
742 * ordering. Choose a depth-first ordering to increase the
743 * potential for fall-throughs. */
744 redFsm
->depthFirstOrdering();
747 /* The frontend will do this for us, but it may be a good idea to
748 * force it if the intermediate file is edited. */
749 redFsm
->sortByStateId();
752 /* Choose default transitions and the single transition. */
753 redFsm
->chooseDefaultSpan();
755 /* Maybe do flat expand, otherwise choose single. */
756 if ( codeStyle
== GenFlat
|| codeStyle
== GenFFlat
)
759 redFsm
->chooseSingle();
761 /* If any errors have occured in the input file then don't write anything. */
762 if ( gblErrorCount
> 0 )
765 if ( codeStyle
== GenSplit
)
766 redFsm
->partitionFsm( numSplitPartitions
);
768 if ( codeStyle
== GenIpGoto
|| codeStyle
== GenSplit
)
769 redFsm
->setInTrans();
771 /* Anlayze Machine will find the final action reference counts, among
772 * other things. We will use these in reporting the usage
773 * of fsm directives in action code. */
776 /* Determine if we should use indicies. */
780 ostream
&FsmCodeGen::source_warning( const InputLoc
&loc
)
782 cerr
<< sourceFileName
<< ":" << loc
.line
<< ":" << loc
.col
<< ": warning: ";
786 ostream
&FsmCodeGen::source_error( const InputLoc
&loc
)
789 assert( sourceFileName
!= 0 );
790 cerr
<< sourceFileName
<< ":" << loc
.line
<< ":" << loc
.col
<< ": ";