Now supplying the XmlParser to writeMachines.
[ragel.git] / ragel / main.cpp
blob1e7ccc0a9ec0e9c4ad1b2712db2b3a85f080996f
1 /*
2 * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 #include <iostream>
26 #include <fstream>
27 #include <unistd.h>
28 #include <sstream>
29 #include <unistd.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <errno.h>
35 #ifdef _WIN32
36 #include <windows.h>
37 #include <psapi.h>
38 #include <time.h>
39 #include <io.h>
40 #include <process.h>
42 #if _MSC_VER
43 #define S_IRUSR _S_IREAD
44 #define S_IWUSR _S_IWRITE
45 #endif
46 #endif
48 /* Parsing. */
49 #include "ragel.h"
50 #include "rlscan.h"
52 /* Parameters and output. */
53 #include "pcheck.h"
54 #include "vector.h"
55 #include "version.h"
56 #include "common.h"
57 #include "xmlparse.h"
59 using std::istream;
60 using std::ostream;
61 using std::ifstream;
62 using std::ofstream;
63 using std::cin;
64 using std::cout;
65 using std::cerr;
66 using std::endl;
67 using std::ios;
68 using std::streamsize;
70 /* Controls minimization. */
71 MinimizeLevel minimizeLevel = MinimizePartition2;
72 MinimizeOpt minimizeOpt = MinimizeMostOps;
74 /* Graphviz dot file generation. */
75 const char *machineSpec = 0, *machineName = 0;
76 bool machineSpecFound = false;
77 bool wantDupsRemoved = true;
79 bool printStatistics = false;
80 bool frontendOnly = false;
81 bool generateDot = false;
83 /* Target language and output style. */
84 CodeStyleEnum codeStyle = GenTables;
86 int numSplitPartitions = 0;
87 bool noLineDirectives = false;
89 bool displayPrintables = false;
90 bool graphvizDone = false;
92 /* Target ruby impl */
93 RubyImplEnum rubyImpl = MRI;
95 ArgsVector includePaths;
97 istream *inStream = 0;
98 ostream *outStream = 0;
99 output_filter *outFilter = 0;
100 const char *outputFileName = 0;
102 /* Print a summary of the options. */
103 void usage()
105 cout <<
106 "usage: ragel [options] file\n"
107 "general:\n"
108 " -h, -H, -?, --help Print this usage and exit\n"
109 " -v, --version Print version information and exit\n"
110 " -o <file> Write output to <file>\n"
111 " -s Print some statistics on stderr\n"
112 " -d Do not remove duplicates from action lists\n"
113 " -I <dir> Add <dir> to the list of directories to search\n"
114 " for included an imported files\n"
115 "error reporting format:\n"
116 " --error-format=gnu file:line:column: message (default)\n"
117 " --error-format=msvc file(line,column): message\n"
118 "fsm minimization:\n"
119 " -n Do not perform minimization\n"
120 " -m Minimize at the end of the compilation\n"
121 " -l Minimize after most operations (default)\n"
122 " -e Minimize after every operation\n"
123 "visualization:\n"
124 " -x Run the frontend only: emit XML intermediate format\n"
125 " -V Generate a dot file for Graphviz\n"
126 " -p Display printable characters on labels\n"
127 " -S <spec> FSM specification to output (for rlgen-dot)\n"
128 " -M <machine> Machine definition/instantiation to output (for rlgen-dot)\n"
129 "host language:\n"
130 " -C The host language is C, C++, Obj-C or Obj-C++ (default)\n"
131 " -D The host language is D\n"
132 " -J The host language is Java\n"
133 " -R The host language is Ruby\n"
134 " -A The host language is C#\n"
135 "line direcives: (C/D/C# only)\n"
136 " -L Inhibit writing of #line directives\n"
137 "code style: (C/Ruby/C# only)\n"
138 " -T0 Table driven FSM (default)\n"
139 " -T1 Faster table driven FSM\n"
140 " -F0 Flat table driven FSM\n"
141 " -F1 Faster flat table-driven FSM\n"
142 "code style: (C/C# only)\n"
143 " -G0 Goto-driven FSM\n"
144 " -G1 Faster goto-driven FSM\n"
145 "code style: (C only)\n"
146 " -G2 Really fast goto-driven FSM\n"
147 " -P<N> N-Way Split really fast goto-driven FSM\n"
150 exit(0);
153 /* Print version information and exit. */
154 void version()
156 cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl <<
157 "Copyright (c) 2001-2007 by Adrian Thurston" << endl;
158 exit(0);
161 /* Error reporting format. */
162 ErrorFormat errorFormat = ErrorFormatGNU;
164 InputLoc makeInputLoc( const char *fileName, int line, int col)
166 InputLoc loc = { fileName, line, col };
167 return loc;
170 ostream &operator<<( ostream &out, const InputLoc &loc )
172 assert( loc.fileName != 0 );
173 switch ( errorFormat ) {
174 case ErrorFormatMSVC:
175 out << loc.fileName << "(" << loc.line;
176 if ( loc.col )
177 out << "," << loc.col;
178 out << ")";
179 break;
181 default:
182 out << loc.fileName << ":" << loc.line;
183 if ( loc.col )
184 out << ":" << loc.col;
185 break;
187 return out;
190 /* Total error count. */
191 int gblErrorCount = 0;
193 /* Print the opening to a warning in the input, then return the error ostream. */
194 ostream &warning( const InputLoc &loc )
196 cerr << loc << ": warning: ";
197 return cerr;
200 /* Print the opening to a program error, then return the error stream. */
201 ostream &error()
203 gblErrorCount += 1;
204 cerr << PROGNAME ": ";
205 return cerr;
208 ostream &error( const InputLoc &loc )
210 gblErrorCount += 1;
211 cerr << loc << ": ";
212 return cerr;
215 void escapeLineDirectivePath( std::ostream &out, char *path )
217 for ( char *pc = path; *pc != 0; pc++ ) {
218 if ( *pc == '\\' )
219 out << "\\\\";
220 else
221 out << *pc;
225 void processArgs( int argc, const char **argv, const char *&inputFileName )
227 ParamCheck pc("xo:dnmleabjkS:M:I:CDJRAvHh?-:sT:F:G:P:LpV", argc, argv);
229 /* FIXME: Need to check code styles VS langauge. */
231 while ( pc.check() ) {
232 switch ( pc.state ) {
233 case ParamCheck::match:
234 switch ( pc.parameter ) {
235 case 'V':
236 generateDot = true;
237 break;
239 case 'x':
240 frontendOnly = true;
241 break;
243 /* Output. */
244 case 'o':
245 if ( *pc.paramArg == 0 )
246 error() << "a zero length output file name was given" << endl;
247 else if ( outputFileName != 0 )
248 error() << "more than one output file name was given" << endl;
249 else {
250 /* Ok, remember the output file name. */
251 outputFileName = pc.paramArg;
253 break;
255 /* Flag for turning off duplicate action removal. */
256 case 'd':
257 wantDupsRemoved = false;
258 break;
260 /* Minimization, mostly hidden options. */
261 case 'n':
262 minimizeOpt = MinimizeNone;
263 break;
264 case 'm':
265 minimizeOpt = MinimizeEnd;
266 break;
267 case 'l':
268 minimizeOpt = MinimizeMostOps;
269 break;
270 case 'e':
271 minimizeOpt = MinimizeEveryOp;
272 break;
273 case 'a':
274 minimizeLevel = MinimizeApprox;
275 break;
276 case 'b':
277 minimizeLevel = MinimizeStable;
278 break;
279 case 'j':
280 minimizeLevel = MinimizePartition1;
281 break;
282 case 'k':
283 minimizeLevel = MinimizePartition2;
284 break;
286 /* Machine spec. */
287 case 'S':
288 if ( *pc.paramArg == 0 )
289 error() << "please specify an argument to -S" << endl;
290 else if ( machineSpec != 0 )
291 error() << "more than one -S argument was given" << endl;
292 else {
293 /* Ok, remember the path to the machine to generate. */
294 machineSpec = pc.paramArg;
296 break;
298 /* Machine path. */
299 case 'M':
300 if ( *pc.paramArg == 0 )
301 error() << "please specify an argument to -M" << endl;
302 else if ( machineName != 0 )
303 error() << "more than one -M argument was given" << endl;
304 else {
305 /* Ok, remember the machine name to generate. */
306 machineName = pc.paramArg;
308 break;
310 case 'I':
311 if ( *pc.paramArg == 0 )
312 error() << "please specify an argument to -I" << endl;
313 else {
314 includePaths.append( pc.paramArg );
316 break;
318 /* Host language types. */
319 case 'C':
320 hostLang = &hostLangC;
321 break;
322 case 'D':
323 hostLang = &hostLangD;
324 break;
325 case 'J':
326 hostLang = &hostLangJava;
327 break;
328 case 'R':
329 hostLang = &hostLangRuby;
330 break;
331 case 'A':
332 hostLang = &hostLangCSharp;
333 break;
335 /* Version and help. */
336 case 'v':
337 version();
338 break;
339 case 'H': case 'h': case '?':
340 usage();
341 break;
342 case 's':
343 printStatistics = true;
344 break;
345 case '-': {
346 char *eq = strchr( pc.paramArg, '=' );
348 if ( eq != 0 )
349 *eq++ = 0;
351 if ( strcmp( pc.paramArg, "help" ) == 0 )
352 usage();
353 else if ( strcmp( pc.paramArg, "version" ) == 0 )
354 version();
355 else if ( strcmp( pc.paramArg, "error-format" ) == 0 ) {
356 if ( eq == 0 )
357 error() << "expecting '=value' for error-format" << endl;
358 else if ( strcmp( eq, "gnu" ) == 0 )
359 errorFormat = ErrorFormatGNU;
360 else if ( strcmp( eq, "msvc" ) == 0 )
361 errorFormat = ErrorFormatMSVC;
362 else
363 error() << "invalid value for error-format" << endl;
365 else if ( strcmp( pc.paramArg, "rbx" ) == 0 )
366 rubyImpl = Rubinius;
367 else {
368 error() << "--" << pc.paramArg <<
369 " is an invalid argument" << endl;
371 break;
374 /* Passthrough args. */
375 case 'T':
376 if ( pc.paramArg[0] == '0' )
377 codeStyle = GenTables;
378 else if ( pc.paramArg[0] == '1' )
379 codeStyle = GenFTables;
380 else {
381 error() << "-T" << pc.paramArg[0] <<
382 " is an invalid argument" << endl;
383 exit(1);
385 break;
386 case 'F':
387 if ( pc.paramArg[0] == '0' )
388 codeStyle = GenFlat;
389 else if ( pc.paramArg[0] == '1' )
390 codeStyle = GenFFlat;
391 else {
392 error() << "-F" << pc.paramArg[0] <<
393 " is an invalid argument" << endl;
394 exit(1);
396 break;
397 case 'G':
398 if ( pc.paramArg[0] == '0' )
399 codeStyle = GenGoto;
400 else if ( pc.paramArg[0] == '1' )
401 codeStyle = GenFGoto;
402 else if ( pc.paramArg[0] == '2' )
403 codeStyle = GenIpGoto;
404 else {
405 error() << "-G" << pc.paramArg[0] <<
406 " is an invalid argument" << endl;
407 exit(1);
409 break;
410 case 'P':
411 codeStyle = GenSplit;
412 numSplitPartitions = atoi( pc.paramArg );
413 break;
415 case 'p':
416 displayPrintables = true;
417 break;
419 case 'L':
420 noLineDirectives = true;
421 break;
423 break;
425 case ParamCheck::invalid:
426 error() << "-" << pc.parameter << " is an invalid argument" << endl;
427 break;
429 case ParamCheck::noparam:
430 /* It is interpreted as an input file. */
431 if ( *pc.curArg == 0 )
432 error() << "a zero length input file name was given" << endl;
433 else if ( inputFileName != 0 )
434 error() << "more than one input file name was given" << endl;
435 else {
436 /* OK, Remember the filename. */
437 inputFileName = pc.curArg;
439 break;
444 void process( const char *inputFileName, const char *intermed )
446 const char *xmlFileName = intermed;
447 bool wantComplete = true;
448 bool outputActive = true;
450 /* Open the input file for reading. */
451 assert( inputFileName != 0 );
452 ifstream *inFile = new ifstream( inputFileName );
453 if ( ! inFile->is_open() )
454 error() << "could not open " << inputFileName << " for reading" << endp;
456 /* Used for just a few things. */
457 std::ostringstream hostData;
459 if ( machineSpec == 0 && machineName == 0 )
460 hostData << "<host line=\"1\" col=\"1\">";
462 Scanner scanner( inputFileName, *inFile, hostData, 0, 0, 0, false );
463 scanner.do_scan();
465 /* Finished, final check for errors.. */
466 if ( gblErrorCount > 0 )
467 exit(1);
469 /* Now send EOF to all parsers. */
470 terminateAllParsers();
472 /* Finished, final check for errors.. */
473 if ( gblErrorCount > 0 )
474 exit(1);
476 if ( machineSpec == 0 && machineName == 0 )
477 hostData << "</host>\n";
479 if ( gblErrorCount > 0 )
480 exit(1);
482 /* Open the XML file for writing. */
483 ostream *xmlOutFile = new ofstream( xmlFileName );
485 /* Open the XML file for reading. */
486 ifstream *xmlInFile = new ifstream( xmlFileName );
487 if ( ! xmlInFile->is_open() )
488 error() << "could not open " << xmlFileName << " for reading" << endl;
490 /* Bail on above error. */
491 if ( gblErrorCount > 0 )
492 exit(1);
494 /* Locate the backend program */
495 if ( generateDot ) {
496 wantComplete = false;
497 outputActive = false;
500 XmlScanner xmlScanner( xmlFileName, *xmlInFile );
501 XmlParser xmlParser( xmlFileName, outputActive, wantComplete );
502 xmlParser.init();
504 /* Write the machines, then the surrounding code. */
505 writeMachines( *xmlOutFile, hostData.str(), inputFileName, xmlParser );
507 /* Close the input and the intermediate file. */
508 delete xmlOutFile;
509 delete inFile;
511 /* Bail on above error. */
512 if ( gblErrorCount > 0 )
513 exit(1);
515 xml_parse( *xmlInFile, xmlFileName,
516 outputActive, wantComplete,
517 xmlScanner, xmlParser );
519 /* If writing to a file, delete the ostream, causing it to flush.
520 * Standard out is flushed automatically. */
521 if ( outputFileName != 0 ) {
522 delete outStream;
523 delete outFilter;
526 /* Finished, final check for errors.. */
527 if ( gblErrorCount > 0 ) {
528 /* If we opened an output file, remove it. */
529 if ( outputFileName != 0 )
530 unlink( outputFileName );
531 exit(1);
535 char *makeIntermedTemplate( const char *baseFileName )
537 char *result = 0;
538 const char *templ = "ragel-XXXXXX.xml";
539 char *lastSlash = strrchr( baseFileName, '/' );
540 if ( lastSlash == 0 ) {
541 result = new char[strlen(templ)+1];
542 strcpy( result, templ );
544 else {
545 int baseLen = lastSlash - baseFileName + 1;
546 result = new char[baseLen + strlen(templ) + 1];
547 memcpy( result, baseFileName, baseLen );
548 strcpy( result+baseLen, templ );
550 return result;
553 const char *openIntermed( const char *inputFileName, const char *outputFileName )
555 srand(time(0));
556 const char *result = 0;
558 /* Which filename do we use as the base? */
559 const char *baseFileName = outputFileName != 0 ? outputFileName : inputFileName;
561 /* The template for the intermediate file name. */
562 const char *intermedFileName = makeIntermedTemplate( baseFileName );
564 /* Randomize the name and try to open. */
565 char fnChars[] = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
566 char *firstX = strrchr( intermedFileName, 'X' ) - 5;
567 for ( int tries = 0; tries < 20; tries++ ) {
568 /* Choose a random name. */
569 for ( int x = 0; x < 6; x++ )
570 firstX[x] = fnChars[rand() % 52];
572 /* Try to open the file. */
573 int fd = ::open( intermedFileName, O_WRONLY|O_EXCL|O_CREAT, S_IRUSR|S_IWUSR );
575 if ( fd > 0 ) {
576 /* Success. Close the file immediately and return the name for use
577 * by the child processes. */
578 ::close( fd );
579 result = intermedFileName;
580 break;
583 if ( errno == EACCES ) {
584 error() << "failed to open temp file " << intermedFileName <<
585 ", access denied" << endp;
589 if ( result == 0 )
590 error() << "abnormal error: cannot find unique name for temp file" << endp;
592 return result;
596 void cleanExit( const char *intermed, int status )
598 unlink( intermed );
599 exit( status );
602 /* Main, process args and call yyparse to start scanning input. */
603 int main( int argc, const char **argv )
605 const char *inputFileName = 0;
606 processArgs( argc, argv, inputFileName );
608 /* If -M or -S are given and we're not generating a dot file then invoke
609 * the frontend. These options are not useful with code generators. */
610 if ( machineName != 0 || machineSpec != 0 ) {
611 if ( !generateDot )
612 frontendOnly = true;
615 /* Require an input file. If we use standard in then we won't have a file
616 * name on which to base the output. */
617 if ( inputFileName == 0 )
618 error() << "no input file given" << endl;
620 /* Bail on argument processing errors. */
621 if ( gblErrorCount > 0 )
622 exit(1);
624 /* Make sure we are not writing to the same file as the input file. */
625 if ( inputFileName != 0 && outputFileName != 0 &&
626 strcmp( inputFileName, outputFileName ) == 0 )
628 error() << "output file \"" << outputFileName <<
629 "\" is the same as the input file" << endp;
632 const char *intermed = openIntermed( inputFileName, outputFileName );
633 process( inputFileName, intermed );
635 /* Clean up the intermediate. */
636 cleanExit( intermed, 0 );
638 return 0;