lm_switch needs to set p from tokend when there is no user action.
[ragel.git] / rlgen-csharp / fsmcodegen.cpp
blobdafb4bc446edc4f4575f1859ce920f658f157d49
1 /*
2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 * 2004 Erich Ocean <eric.ocean@ampede.com>
4 * 2005 Alan West <alan@alanz.com>
5 */
7 /* This file is part of Ragel.
9 * Ragel is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Ragel is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Ragel; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "rlgen-csharp.h"
25 #include "fsmcodegen.h"
26 #include "redfsm.h"
27 #include "gendata.h"
28 #include <sstream>
29 #include <iomanip>
30 #include <string>
31 #include <assert.h>
34 using std::ostream;
35 using std::ostringstream;
36 using std::string;
37 using std::cerr;
38 using std::endl;
40 void lineDirective( ostream &out, char *fileName, int line )
42 if ( noLineDirectives )
43 out << "/* ";
45 /* Write the preprocessor line info for to the input file. */
46 out << "#line " << line << " \"";
47 for ( char *pc = fileName; *pc != 0; pc++ ) {
48 if ( *pc == '\\' )
49 out << "\\\\";
50 else
51 out << *pc;
53 out << '"';
55 if ( noLineDirectives )
56 out << " */";
58 out << '\n';
61 void genLineDirective( ostream &out )
63 std::streambuf *sbuf = out.rdbuf();
64 output_filter *filter = static_cast<output_filter*>(sbuf);
65 lineDirective( out, filter->fileName, filter->line + 1 );
69 /* Init code gen with in parameters. */
70 FsmCodeGen::FsmCodeGen( ostream &out )
72 CodeGenData(out)
76 unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
78 long long maxValLL = (long long) maxVal;
79 HostType *arrayType = keyOps->typeSubsumes( maxValLL );
80 assert( arrayType != 0 );
81 return arrayType->size;
84 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
86 return ARRAY_TYPE( maxVal, false );
89 string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal, bool forceSigned )
91 long long maxValLL = (long long) maxVal;
92 HostType *arrayType;
93 if (forceSigned)
94 arrayType = keyOps->typeSubsumes(true, maxValLL);
95 else
96 arrayType = keyOps->typeSubsumes( maxValLL );
97 assert( arrayType != 0 );
99 string ret = arrayType->data1;
100 if ( arrayType->data2 != 0 ) {
101 ret += " ";
102 ret += arrayType->data2;
104 return ret;
107 /* Write out the fsm name. */
108 string FsmCodeGen::FSM_NAME()
110 return fsmName;
113 /* Emit the offset of the start state as a decimal integer. */
114 string FsmCodeGen::START_STATE_ID()
116 ostringstream ret;
117 ret << redFsm->startState->id;
118 return ret.str();
121 /* Write out the array of actions. */
122 std::ostream &FsmCodeGen::ACTIONS_ARRAY()
124 out << "\t0, ";
125 int totalActions = 1;
126 for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
127 /* Write out the length, which will never be the last character. */
128 out << act->key.length() << ", ";
129 /* Put in a line break every 8 */
130 if ( totalActions++ % 8 == 7 )
131 out << "\n\t";
133 for ( ActionTable::Iter item = act->key; item.lte(); item++ ) {
134 out << item->value->actionId;
135 if ( ! (act.last() && item.last()) )
136 out << ", ";
138 /* Put in a line break every 8 */
139 if ( totalActions++ % 8 == 7 )
140 out << "\n\t";
143 out << "\n";
144 return out;
148 string FsmCodeGen::ACCESS()
150 ostringstream ret;
151 if ( accessExpr != 0 )
152 INLINE_LIST( ret, accessExpr, 0, false );
153 return ret.str();
157 string FsmCodeGen::P()
159 ostringstream ret;
160 if ( pExpr == 0 )
161 ret << "p";
162 else {
163 ret << "(";
164 INLINE_LIST( ret, pExpr, 0, false );
165 ret << ")";
167 return ret.str();
170 string FsmCodeGen::PE()
172 ostringstream ret;
173 if ( peExpr == 0 )
174 ret << "pe";
175 else {
176 ret << "(";
177 INLINE_LIST( ret, peExpr, 0, false );
178 ret << ")";
180 return ret.str();
183 string FsmCodeGen::EOFV()
185 ostringstream ret;
186 if ( eofExpr == 0 )
187 ret << "eof";
188 else {
189 ret << "(";
190 INLINE_LIST( ret, eofExpr, 0, false );
191 ret << ")";
193 return ret.str();
196 string FsmCodeGen::CS()
198 ostringstream ret;
199 if ( csExpr == 0 )
200 ret << ACCESS() << "cs";
201 else {
202 /* Emit the user supplied method of retrieving the key. */
203 ret << "(";
204 INLINE_LIST( ret, csExpr, 0, false );
205 ret << ")";
207 return ret.str();
210 string FsmCodeGen::TOP()
212 ostringstream ret;
213 if ( topExpr == 0 )
214 ret << ACCESS() + "top";
215 else {
216 ret << "(";
217 INLINE_LIST( ret, topExpr, 0, false );
218 ret << ")";
220 return ret.str();
223 string FsmCodeGen::STACK()
225 ostringstream ret;
226 if ( stackExpr == 0 )
227 ret << ACCESS() + "stack";
228 else {
229 ret << "(";
230 INLINE_LIST( ret, stackExpr, 0, false );
231 ret << ")";
233 return ret.str();
236 string FsmCodeGen::ACT()
238 ostringstream ret;
239 if ( actExpr == 0 )
240 ret << ACCESS() + "act";
241 else {
242 ret << "(";
243 INLINE_LIST( ret, actExpr, 0, false );
244 ret << ")";
246 return ret.str();
249 string FsmCodeGen::TOKSTART()
251 ostringstream ret;
252 if ( tokstartExpr == 0 )
253 ret << ACCESS() + "ts";
254 else {
255 ret << "(";
256 INLINE_LIST( ret, tokstartExpr, 0, false );
257 ret << ")";
259 return ret.str();
262 string FsmCodeGen::TOKEND()
264 ostringstream ret;
265 if ( tokendExpr == 0 )
266 ret << ACCESS() + "te";
267 else {
268 ret << "(";
269 INLINE_LIST( ret, tokendExpr, 0, false );
270 ret << ")";
272 return ret.str();
275 string FsmCodeGen::GET_WIDE_KEY()
277 if ( redFsm->anyConditions() )
278 return "_widec";
279 else
280 return GET_KEY();
283 string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state )
285 if ( state->stateCondList.length() > 0 )
286 return "_widec";
287 else
288 return GET_KEY();
291 string FsmCodeGen::GET_KEY()
293 ostringstream ret;
294 if ( getKeyExpr != 0 ) {
295 /* Emit the user supplied method of retrieving the key. */
296 ret << "(";
297 INLINE_LIST( ret, getKeyExpr, 0, false );
298 ret << ")";
300 else {
301 /* Expression for retrieving the key, use simple dereference. */
302 ret << "(*" << P() << ")";
304 return ret.str();
307 /* Write out level number of tabs. Makes the nested binary search nice
308 * looking. */
309 string FsmCodeGen::TABS( int level )
311 string result;
312 while ( level-- > 0 )
313 result += "\t";
314 return result;
317 /* Write out a key from the fsm code gen. Depends on wether or not the key is
318 * signed. */
319 string FsmCodeGen::KEY( Key key )
321 ostringstream ret;
322 if ( keyOps->isSigned || !hostLang->explicitUnsigned )
323 ret << key.getVal();
324 else
325 ret << (unsigned long) key.getVal() << 'u';
326 return ret.str();
329 string FsmCodeGen::ALPHA_KEY( Key key )
331 ostringstream ret;
332 if (key.getVal() > 0xFFFF) {
333 ret << key.getVal();
334 } else {
335 ret << "'\\u" << std::hex << std::setw(4) << std::setfill('0') <<
336 key.getVal() << "'";
338 //ret << "(char) " << key.getVal();
339 return ret.str();
342 void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish )
344 /* The parser gives fexec two children. The double brackets are for D
345 * code. If the inline list is a single word it will get interpreted as a
346 * C-style cast by the D compiler. */
347 ret << "{" << P() << " = ((";
348 INLINE_LIST( ret, item->children, targState, inFinish );
349 ret << "))-1;}";
352 void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
353 int targState, int inFinish )
355 ret <<
356 " switch( " << ACT() << " ) {\n";
358 for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) {
359 /* Write the case label, the action and the case break. */
360 if ( lma->lmId < 0 )
361 ret << " default:\n";
362 else
363 ret << " case " << lma->lmId << ":\n";
365 /* Write the block and close it off. */
366 ret << " {";
367 INLINE_LIST( ret, lma->children, targState, inFinish );
368 ret << "}\n";
370 ret << " break;\n";
373 ret <<
374 " }\n"
375 "\t";
378 void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
380 ret << ACT() << " = " << item->lmId << ";";
383 void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
385 /* The tokend action sets tokend. */
386 ret << TOKEND() << " = " << P();
387 if ( item->offset != 0 )
388 out << "+" << item->offset;
389 out << ";";
392 void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item )
394 ret << TOKEND();
397 void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
399 ret << TOKSTART() << " = " << NULL_ITEM() << ";";
402 void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
404 ret << ACT() << " = 0;";
407 void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
409 ret << TOKSTART() << " = " << P() << ";";
412 void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item,
413 int targState, bool inFinish )
415 if ( item->children->length() > 0 ) {
416 /* Write the block and close it off. */
417 ret << "{";
418 INLINE_LIST( ret, item->children, targState, inFinish );
419 ret << "}";
424 /* Write out an inline tree structure. Walks the list and possibly calls out
425 * to virtual functions than handle language specific items in the tree. */
426 void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
427 int targState, bool inFinish )
429 for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
430 switch ( item->type ) {
431 case InlineItem::Text:
432 ret << item->data;
433 break;
434 case InlineItem::Goto:
435 GOTO( ret, item->targState->id, inFinish );
436 break;
437 case InlineItem::Call:
438 CALL( ret, item->targState->id, targState, inFinish );
439 break;
440 case InlineItem::Next:
441 NEXT( ret, item->targState->id, inFinish );
442 break;
443 case InlineItem::Ret:
444 RET( ret, inFinish );
445 break;
446 case InlineItem::PChar:
447 ret << P();
448 break;
449 case InlineItem::Char:
450 ret << GET_KEY();
451 break;
452 case InlineItem::Hold:
453 ret << P() << "--;";
454 break;
455 case InlineItem::Exec:
456 EXEC( ret, item, targState, inFinish );
457 break;
458 case InlineItem::Curs:
459 CURS( ret, inFinish );
460 break;
461 case InlineItem::Targs:
462 TARGS( ret, inFinish, targState );
463 break;
464 case InlineItem::Entry:
465 ret << item->targState->id;
466 break;
467 case InlineItem::GotoExpr:
468 GOTO_EXPR( ret, item, inFinish );
469 break;
470 case InlineItem::CallExpr:
471 CALL_EXPR( ret, item, targState, inFinish );
472 break;
473 case InlineItem::NextExpr:
474 NEXT_EXPR( ret, item, inFinish );
475 break;
476 case InlineItem::LmSwitch:
477 LM_SWITCH( ret, item, targState, inFinish );
478 break;
479 case InlineItem::LmSetActId:
480 SET_ACT( ret, item );
481 break;
482 case InlineItem::LmSetTokEnd:
483 SET_TOKEND( ret, item );
484 break;
485 case InlineItem::LmGetTokEnd:
486 GET_TOKEND( ret, item );
487 break;
488 case InlineItem::LmInitTokStart:
489 INIT_TOKSTART( ret, item );
490 break;
491 case InlineItem::LmInitAct:
492 INIT_ACT( ret, item );
493 break;
494 case InlineItem::LmSetTokStart:
495 SET_TOKSTART( ret, item );
496 break;
497 case InlineItem::SubAction:
498 SUB_ACTION( ret, item, targState, inFinish );
499 break;
500 case InlineItem::Break:
501 BREAK( ret, targState );
502 break;
506 /* Write out paths in line directives. Escapes any special characters. */
507 string FsmCodeGen::LDIR_PATH( char *path )
509 ostringstream ret;
510 for ( char *pc = path; *pc != 0; pc++ ) {
511 if ( *pc == '\\' )
512 ret << "\\\\";
513 else
514 ret << *pc;
516 return ret.str();
519 void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish )
521 /* Write the preprocessor line info for going into the source file. */
522 lineDirective( ret, sourceFileName, action->loc.line );
524 /* Write the block and close it off. */
525 ret << "\t{";
526 INLINE_LIST( ret, action->inlineList, targState, inFinish );
527 ret << "}\n";
530 void FsmCodeGen::CONDITION( ostream &ret, Action *condition )
532 ret << "\n";
533 lineDirective( ret, sourceFileName, condition->loc.line );
534 INLINE_LIST( ret, condition->inlineList, 0, false );
537 string FsmCodeGen::ERROR_STATE()
539 ostringstream ret;
540 if ( redFsm->errState != 0 )
541 ret << redFsm->errState->id;
542 else
543 ret << "-1";
544 return ret.str();
547 string FsmCodeGen::FIRST_FINAL_STATE()
549 ostringstream ret;
550 if ( redFsm->firstFinState != 0 )
551 ret << redFsm->firstFinState->id;
552 else
553 ret << redFsm->nextStateId;
554 return ret.str();
557 void FsmCodeGen::writeInit()
559 out << " {\n";
561 if ( writeCS )
562 out << "\t" << CS() << " = " << START() << ";\n";
564 /* If there are any calls, then the stack top needs initialization. */
565 if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
566 out << "\t" << TOP() << " = 0;\n";
568 if ( hasLongestMatch ) {
569 out <<
570 " " << TOKSTART() << " = " << NULL_ITEM() << ";\n"
571 " " << TOKEND() << " = " << NULL_ITEM() << ";\n"
572 " " << ACT() << " = 0;\n";
574 out << " }\n";
577 string FsmCodeGen::DATA_PREFIX()
579 if ( dataPrefix )
580 return FSM_NAME() + "_";
581 return "";
584 /* Emit the alphabet data type. */
585 string FsmCodeGen::ALPH_TYPE()
587 string ret = keyOps->alphType->data1;
588 if ( keyOps->alphType->data2 != 0 ) {
589 ret += " ";
590 ret += + keyOps->alphType->data2;
592 return ret;
595 /* Emit the alphabet data type. */
596 string FsmCodeGen::WIDE_ALPH_TYPE()
598 string ret;
599 if ( redFsm->maxKey <= keyOps->maxKey )
600 ret = ALPH_TYPE();
601 else {
602 long long maxKeyVal = redFsm->maxKey.getLongLong();
603 HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
604 assert( wideType != 0 );
606 ret = wideType->data1;
607 if ( wideType->data2 != 0 ) {
608 ret += " ";
609 ret += wideType->data2;
612 return ret;
615 void FsmCodeGen::STATE_IDS()
617 if ( redFsm->startState != 0 )
618 STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n";
620 if ( writeFirstFinal )
621 STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n";
623 if ( writeErr )
624 STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n";
626 out << "\n";
628 if ( entryPointNames.length() > 0 ) {
629 for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) {
630 STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) <<
631 " = " << entryPointIds[en.pos()] << ";\n";
633 out << "\n";
638 * C# Specific
640 string CSharpCodeGen::GET_KEY()
642 ostringstream ret;
643 if ( getKeyExpr != 0 ) {
644 /* Emit the user supplied method of retrieving the key. */
645 ret << "(";
646 INLINE_LIST( ret, getKeyExpr, 0, false );
647 ret << ")";
649 else {
650 /* Expression for retrieving the key, use simple dereference. */
651 ret << "data[" << P() << "]";
653 return ret.str();
655 string CSharpCodeGen::NULL_ITEM()
657 return "-1";
660 string CSharpCodeGen::POINTER()
662 // XXX C# has no pointers
663 // multiple items seperated by commas can also be pointer types.
664 return " ";
667 string CSharpCodeGen::PTR_CONST()
669 return "";
672 std::ostream &CSharpCodeGen::OPEN_ARRAY( string type, string name )
674 out << "static readonly " << type << "[] " << name << " = ";
676 if (type == "char")
677 out << "Encoding.ASCII.Get";
678 else */
679 out << "new " << type << " [] {\n";
680 return out;
683 std::ostream &CSharpCodeGen::CLOSE_ARRAY()
685 return out << "};\n";
688 std::ostream &CSharpCodeGen::STATIC_VAR( string type, string name )
690 out << "const " << type << " " << name;
691 return out;
694 string CSharpCodeGen::ARR_OFF( string ptr, string offset )
696 // XXX C# can't do pointer arithmetic
697 return "&" + ptr + "[" + offset + "]";
700 string CSharpCodeGen::CAST( string type )
702 return "(" + type + ")";
705 string CSharpCodeGen::UINT( )
707 return "uint";
710 std::ostream &CSharpCodeGen::SWITCH_DEFAULT()
712 out << " default: break;\n";
713 return out;
716 string CSharpCodeGen::CTRL_FLOW()
718 return "if (true) ";
721 void CSharpCodeGen::writeExports()
723 if ( exportList.length() > 0 ) {
724 for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) {
725 out << "const " << ALPH_TYPE() << " " << DATA_PREFIX() <<
726 "ex_" << ex->name << " = " << KEY(ex->key) << ";\n";
728 out << "\n";
733 * End C#-specific code.
736 void FsmCodeGen::finishRagelDef()
738 if ( codeStyle == GenGoto || codeStyle == GenFGoto ||
739 codeStyle == GenIpGoto || codeStyle == GenSplit )
741 /* For directly executable machines there is no required state
742 * ordering. Choose a depth-first ordering to increase the
743 * potential for fall-throughs. */
744 redFsm->depthFirstOrdering();
746 else {
747 /* The frontend will do this for us, but it may be a good idea to
748 * force it if the intermediate file is edited. */
749 redFsm->sortByStateId();
752 /* Choose default transitions and the single transition. */
753 redFsm->chooseDefaultSpan();
755 /* Maybe do flat expand, otherwise choose single. */
756 if ( codeStyle == GenFlat || codeStyle == GenFFlat )
757 redFsm->makeFlat();
758 else
759 redFsm->chooseSingle();
761 /* If any errors have occured in the input file then don't write anything. */
762 if ( gblErrorCount > 0 )
763 return;
765 if ( codeStyle == GenSplit )
766 redFsm->partitionFsm( numSplitPartitions );
768 if ( codeStyle == GenIpGoto || codeStyle == GenSplit )
769 redFsm->setInTrans();
771 /* Anlayze Machine will find the final action reference counts, among
772 * other things. We will use these in reporting the usage
773 * of fsm directives in action code. */
774 analyzeMachine();
776 /* Determine if we should use indicies. */
777 calcIndexSize();
780 ostream &FsmCodeGen::source_warning( const InputLoc &loc )
782 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
783 return cerr;
786 ostream &FsmCodeGen::source_error( const InputLoc &loc )
788 gblErrorCount += 1;
789 assert( sourceFileName != 0 );
790 cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";
791 return cerr;