Scanners now ensure that a pattern's leaving actions are executed.
[ragel.git] / examples / rlscan.rl
blobd4d4bf9712f2d04e3e5bdfcc993356b6088ab94d
1 /*
2  * Lexes Ragel input files.
3  */
5 #include <iostream>
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
10 using namespace std;
12 void escapeXML( char *data )
14         while ( *data != 0 ) {
15                 switch ( *data ) {
16                         case '<': cout << "&lt;"; break;
17                         case '>': cout << "&gt;"; break;
18                         case '&': cout << "&amp;"; break;
19                         default: cout << *data; break;
20                 }
21                 data += 1;
22         }
25 void escapeXML( char c )
27         switch ( c ) {
28                 case '<': cout << "&lt;"; break;
29                 case '>': cout << "&gt;"; break;
30                 case '&': cout << "&amp;"; break;
31                 default: cout << c; break;
32         }
35 void escapeXML( char *data, int len )
37         for ( char *end = data + len; data != end; data++  ) {
38                 switch ( *data ) {
39                         case '<': cout << "&lt;"; break;
40                         case '>': cout << "&gt;"; break;
41                         case '&': cout << "&amp;"; break;
42                         default: cout << *data; break;
43                 }
44         }
47 inline void write( const char *data )
49         cout << data;
52 inline void write( char c )
54         cout << c;
57 inline void write( char *data, int len )
59         cout.write( data, len );
63 %%{
64         machine RagelScan;
66         word = [a-zA-Z_][a-zA-Z_0-9]*;
67         integer = [0-9]+;
68         hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
70         default = ^0;
71         EOF = 0;
73         # Handles comments in outside code and inline blocks.
74         c_comment := 
75                 ( default* :>> '*/' )
76                 ${ escapeXML( fc ); }
77                 @{ fret; };
79         action emit {
80                 escapeXML( ts, te-ts );
81         }
83         #
84         # Inline action code
85         #
87         ilscan := |*
89                 "'" ( [^'\\] | /\\./ )* "'" => emit;
90                 '"' ( [^"\\] | /\\./ )* '"' => emit;
91                 '/*' {
92                         write( "/*" );
93                         fcall c_comment;
94                 };
95                 '//' [^\n]* '\n' => emit;
97                 '{' {
98                         write( '{' );
99                         inline_depth += 1; 
100                 };
102                 '}' {
103                         write( '}' );
104                         /* If dropping down to the last } then return 
105                          * to ragel code. */
106                         if ( --inline_depth == 0 ) {
107                                 write( "</inline>\n" );
108                                 fgoto rlscan;
109                         }
110                 };
112                 default => { escapeXML( *ts ); };
113         *|;
115         #
116         # Ragel Tokens
117         #
119         rlscan := |*
120                 '}%%' {
121                         if ( !single_line ) {
122                                 write( "</section>\n" );
123                                 fgoto main;
124                         }
125                 };
127                 '\n' {
128                         if ( single_line ) {
129                                 write( "</section>\n" );
130                                 fgoto main;
131                         }
132                 };
134                 # Word
135                 word {
136                         write( "<word>" );
137                         write( ts, te-ts );
138                         write( "</word>\n" );
139                 };
141                 # Decimal integer.
142                 integer {
143                         write( "<int>" );
144                         write( ts, te-ts );
145                         write( "</int>\n" );
146                 };
148                 # Hexidecimal integer.
149                 hex {
150                         write( "<hex>" );
151                         write( ts, te-ts );
152                         write( "</hex>\n" );
153                 };
155                 # Consume comments.
156                 '#' [^\n]* '\n';
158                 # Single literal string.
159                 "'" ( [^'\\] | /\\./ )* "'" {
160                         write( "<single_lit>" );
161                         escapeXML( ts, te-ts );
162                         write( "</single_lit>\n" );
163                 };
165                 # Double literal string.
166                 '"' ( [^"\\] | /\\./ )* '"' {
167                         write( "<double_lit>" );
168                         escapeXML( ts, te-ts );
169                         write( "</double_lit>\n" );
170                 };
172                 # Or literal.
173                 '[' ( [^\]\\] | /\\./ )* ']' {
174                         write( "<or_lit>" );
175                         escapeXML( ts, te-ts );
176                         write( "</or_lit>\n" );
177                 };
179                 # Regex Literal.
180                 '/' ( [^/\\] | /\\./ ) * '/' {
181                         write( "<re_lit>" );
182                         escapeXML( ts, te-ts );
183                         write( "</re_lit>\n" );
184                 };
186                 # Open an inline block
187                 '{' {
188                         inline_depth = 1;
189                         write( "<inline>{" );
190                         fgoto ilscan;
191                 };
193                 punct {
194                         write( "<symbol>" );
195                         escapeXML( fc );
196                         write( "</symbol>\n" );
197                 };
198                 
199                 default;
200         *|;
202         #
203         # Outside code.
204         #
206         main := |*
208                 "'" ( [^'\\] | /\\./ )* "'" => emit;
209                 '"' ( [^"\\] | /\\./ )* '"' => emit;
211                 '/*' {
212                         escapeXML( ts, te-ts );
213                         fcall c_comment;
214                 };
216                 '//' [^\n]* '\n' => emit;
218                 '%%{' { 
219                         write( "<section>\n" );
220                         single_line = false;
221                         fgoto rlscan;
222                 };
224                 '%%' {
225                         write( "<section>\n" ); 
226                         single_line = true; 
227                         fgoto rlscan;
228                 };
230                 default { 
231                         escapeXML( *ts );
232                 };
234                 # EOF.
235                 EOF;
236         *|;
239 %% write data nofinal;
241 #define BUFSIZE 2048
243 int main()
245         std::ios::sync_with_stdio(false);
247         int cs, act;
248         char *ts, *te;
249         int stack[1], top;
251         static char inbuf[BUFSIZE];
252         bool single_line = false;
253         int inline_depth = 0;
255         %% write init;
257         bool done = false;
258         int have = 0;
259         while ( !done ) {
260                 /* How much space is in the buffer? */
261                 int space = BUFSIZE - have;
262                 if ( space == 0 ) {
263                         /* Buffer is full. */
264                         cerr << "TOKEN TOO BIG" << endl;
265                         exit(1);
266                 }
268                 /* Read in a block. */
269                 char *p = inbuf + have;
270                 cin.read( p, space );
271                 int len = cin.gcount();
272                 char *pe = p + len;
273                 char *eof = 0;
275                 /* Check for EOF. */
276                 if ( len == 0 ) {
277                         eof = pe;
278                         done = true;
279                 }
281                 %% write exec;
283                 if ( cs == RagelScan_error ) {
284                         /* Machine failed before finding a token. */
285                         cerr << "PARSE ERROR" << endl;
286                         exit(1);
287                 }
289                 if ( ts == 0 )
290                         have = 0;
291                 else {
292                         /* There is a prefix to preserve, shift it over. */
293                         have = pe - ts;
294                         memmove( inbuf, ts, have );
295                         te = inbuf + (te-ts);
296                         ts = inbuf;
297                 }
298         }
299         return 0;