2 * A C++ scanner. Uses the longest match construction.
3 * << <= <<= >> >= >>= are left out since angle brackets are used in templates.
14 #define TK_NameSep 260
16 #define TK_PlusPlus 262
17 #define TK_MinusMinus 263
18 #define TK_ArrowStar 264
19 #define TK_DotStar 265
20 #define TK_ShiftLeft 266
21 #define TK_ShiftRight 267
22 #define TK_IntegerDecimal 268
23 #define TK_IntegerOctal 269
24 #define TK_IntegerHex 270
25 #define TK_EqualsEquals 271
26 #define TK_NotEquals 272
29 #define TK_MultAssign 275
30 #define TK_DivAssign 276
31 #define TK_PercentAssign 277
32 #define TK_PlusAssign 278
33 #define TK_MinusAssign 279
34 #define TK_AmpAssign 280
35 #define TK_CaretAssign 281
36 #define TK_BarAssign 282
37 #define TK_DotDotDot 283
38 #define TK_Whitespace 284
39 #define TK_Comment 285
43 /* EOF char used to flush out that last token. This should be a whitespace
53 static char buf[BUFSIZE];
54 static int line = 1, col = 1;
55 static char *tokstart, *tokend;
56 static int act, have = 0;
64 fract_const = digit* '.' digit+ | digit+ '.';
65 exponent = [eE] [+\-]? digit+;
66 float_suffix = [flFL];
74 # Single and double literals.
75 ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" )
77 ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' )
81 ( [a-zA-Z_] [a-zA-Z0-9_]* )
85 ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? )
88 # Integer decimal. Leading part buffered by float.
89 ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )
90 {token( TK_IntegerDecimal );};
92 # Integer octal. Leading part buffered by float.
93 ( '0' [0-9]+ [ulUL]{0,2} )
94 {token( TK_IntegerOctal );};
96 # Integer hex. Leading 0 buffered by float.
97 ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) )
98 {token( TK_IntegerHex );};
100 # Only buffer the second item, first buffered by symbol. */
101 '::' {token( TK_NameSep );};
102 '==' {token( TK_EqualsEquals );};
103 '!=' {token( TK_NotEquals );};
104 '&&' {token( TK_AndAnd );};
105 '||' {token( TK_OrOr );};
106 '*=' {token( TK_MultAssign );};
107 '/=' {token( TK_DivAssign );};
108 '%=' {token( TK_PercentAssign );};
109 '+=' {token( TK_PlusAssign );};
110 '-=' {token( TK_MinusAssign );};
111 '&=' {token( TK_AmpAssign );};
112 '^=' {token( TK_CaretAssign );};
113 '|=' {token( TK_BarAssign );};
114 '++' {token( TK_PlusPlus );};
115 '--' {token( TK_MinusMinus );};
116 '->' {token( TK_Arrow );};
117 '->*' {token( TK_ArrowStar );};
118 '.*' {token( TK_DotStar );};
120 # Three char compounds, first item already buffered. */
121 '...' {token( TK_DotDotDot );};
123 # Single char symbols.
124 ( punct - [_"'] ) {token( tokstart[0] );};
126 # Comments and whitespace.
127 '/*' { fgoto c_comment; };
134 void token( int tok )
136 char *data = tokstart;
137 int len = tokend - tokstart;
139 cout << '<' << tok << "> ";
140 cout.write( data, len );
143 /* Count newlines and columns. This code is here mainly for having some
144 * code in the token routine when commenting out the above output during
145 * performance testing. */
146 for ( int i = 0; i < len; i ++ ) {
147 if ( data[i] == '\n' ) {
159 std::ios::sync_with_stdio(false);
163 /* Do the first read. */
166 char *p = buf + have;
167 int space = BUFSIZE - have;
170 /* We filled up the buffer trying to scan a token. */
171 cerr << "OUT OF BUFFER SPACE" << endl;
175 cin.read( p, space );
176 int len = cin.gcount();
180 /* If we see eof then append the EOF char. */
188 /* Check if we failed. */
189 if ( cs == Scanner_error ) {
190 /* Machine failed before finding a token. */
191 cerr << "PARSE ERROR" << endl;
195 /* Now set up the prefix. */
199 /* There is data that needs to be shifted over. */
200 have = pe - tokstart;
201 memmove( buf, tokstart, have );
202 tokend -= (tokstart-buf);