1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 1986-2009 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
19 ***********************************************************************/
25 * preprocessor and proto lexical analyzer fsm
26 * define PROTOMAIN for standalone proto
33 * lexical FSM encoding
34 * derived from a standalone ansi cpp by Dennis Ritchie
35 * modified for libpp by Glenn Fowler
37 * fsm[] is initialized from fsminit[]. The encoding is blown out into
38 * fsm[] for time efficiency. When in state state, and one of the
39 * characters in ch arrives, enter nextstate. States >= TERMINAL are
40 * either final, or at least require special action. In fsminit[] there
41 * is a line for each <state,charset,nextstate>. Early entries are
42 * overwritten by later ones. C_XXX is the universal set and should
43 * always be first. Some of the fsminit[] entries are templates for
44 * groups of states. The OP entries trigger the state copies. States
45 * above TERMINAL are represented in fsm[] as negative values. S_TOK and
46 * S_TOKB encode the resulting token type in the upper bits. These actions
47 * differ in that S_TOKB has a lookahead char.
49 * fsm[] has three start states:
51 * PROTO proto (ANSI -> K&R,C++,ANSI)
52 * QUICK standalone ppcpp()
53 * TOKEN tokenizing pplex()
55 * If the next state remains the same then the fsm[] transition value is 0.
56 * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
57 * fsm[state+1][0] which is ~S_EOB for all states. This preserves the
58 * power of 2 fsm[] row size for efficient array indexing. Thanks to
59 * D. G. Korn for the last two observations. The pseudo non-terminal state
60 * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
64 * TERM arg SPLICE next
69 * NOTE: these must be `control' characters for all native codesets
70 * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
84 #define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
86 struct fsminit
/* fsm initialization row */
88 int state
; /* if in this state */
89 unsigned char ch
[4]; /* and see one of these */
90 int nextstate
; /* enter this state if <TERMINAL*/
93 static struct fsminit fsminit
[] =
95 /* proto start state */
96 { PROTO
, { C_XXX
}, S_CHR
, },
97 { PROTO
, { C_EOF
}, S_EOF
, },
98 { PROTO
, { C_DEC
}, BAD1
, },
99 { PROTO
, { '.' }, DOT
, },
100 { PROTO
, { C_LET
}, NID
, },
101 { PROTO
, { 'L' }, LIT
, },
102 { PROTO
, { 'd', 'e', 'f', 'i' }, RES1
, },
103 { PROTO
, { 'r', 's', 't', 'v' }, RES1
, },
104 { PROTO
, { 'w', 'N' }, RES1
, },
105 { PROTO
, { '"', '\'' }, S_LITBEG
, },
106 { PROTO
, { '/' }, COM1
, },
107 { PROTO
, { '\n' }, S_NL
, },
108 { PROTO
, { ' ','\t','\f','\v' }, WS1
, },
110 /* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
111 { RES1
, { C_XXX
}, S_MACRO
, },
112 { RES1
, { C_LET
, C_DEC
}, NID
, },
113 { RES1
, { 'a' }, RES1a
, },
114 { RES1
, { 'e' }, RES1e
, },
115 { RES1
, { 'f' }, RES1f
, },
116 { RES1
, { 'h' }, RES1h
, },
117 { RES1
, { 'l' }, RES1l
, },
118 { RES1
, { 'n' }, RES1n
, },
119 { RES1
, { 'o' }, RES1o
, },
120 { RES1
, { 't' }, RES1t
, },
121 { RES1
, { 'x' }, RES1x
, },
122 { RES1
, { 'y' }, RES1y
, },
124 /* proto reserved {va_start} */
125 { RES1a
, { C_XXX
}, S_RESERVED
, },
126 { RES1a
, { C_LET
, C_DEC
}, NID
, },
127 { RES1a
, { '_','s','t','a' }, RES1a
, },
128 { RES1a
, { 'r' }, RES1a
, },
130 /* proto reserved {return} */
131 { RES1e
, { C_XXX
}, S_RESERVED
, },
132 { RES1e
, { C_LET
, C_DEC
}, NID
, },
133 { RES1e
, { 't','u','r','n' }, RES1e
, },
135 /* proto reserved {if} */
136 { RES1f
, { C_XXX
}, S_RESERVED
, },
137 { RES1f
, { C_LET
, C_DEC
}, NID
, },
139 /* proto reserved {while} */
140 { RES1h
, { C_XXX
}, S_RESERVED
, },
141 { RES1h
, { C_LET
, C_DEC
}, NID
, },
142 { RES1h
, { 'i','l','e' }, RES1h
, },
144 /* proto reserved {else} */
145 { RES1l
, { C_XXX
}, S_RESERVED
, },
146 { RES1l
, { C_LET
, C_DEC
}, NID
, },
147 { RES1l
, { 's','e' }, RES1l
, },
149 /* proto reserved {inline} */
150 { RES1n
, { C_XXX
}, S_RESERVED
, },
151 { RES1n
, { C_LET
, C_DEC
}, NID
, },
152 { RES1n
, { 'l','i','n','e' }, RES1n
, },
154 /* proto reserved {do,for,void} */
155 { RES1o
, { C_XXX
}, S_RESERVED
, },
156 { RES1o
, { C_LET
, C_DEC
}, NID
, },
157 { RES1o
, { 'r','i','d','N' }, RES1o
, },
159 /* proto reserved {static} */
160 { RES1t
, { C_XXX
}, S_RESERVED
, },
161 { RES1t
, { C_LET
, C_DEC
}, NID
, },
162 { RES1t
, { 'a','t','i','c' }, RES1t
, },
164 /* proto reserved {extern} */
165 { RES1x
, { C_XXX
}, S_RESERVED
, },
166 { RES1x
, { C_LET
, C_DEC
}, NID
, },
167 { RES1x
, { 't','e','r','n' }, RES1x
, },
169 /* proto reserved {typedef} */
170 { RES1y
, { C_XXX
}, S_RESERVED
, },
171 { RES1y
, { C_LET
, C_DEC
}, NID
, },
172 { RES1y
, { 'p','e','d','f' }, RES1y
, },
174 /* saw /, perhaps start of comment */
175 { COM1
, { C_XXX
}, S_CHRB
, },
176 { COM1
, { '*' }, COM2
, },
178 { COM1
, { '/' }, COM5
, },
181 /* saw / *, start of comment */
182 { COM2
, { C_XXX
}, COM2
, },
183 { COM2
, { '\n', C_EOF
}, S_COMMENT
, },
184 { COM2
, { '/' }, COM4
, },
185 { COM2
, { '*' }, COM3
, },
186 { COM2
, { '#', ';', ')' }, QUAL(COM2
), },
188 /* saw the * possibly ending a comment */
189 { COM3
, { C_XXX
}, COM2
, },
190 { COM3
, { '\n', C_EOF
}, S_COMMENT
, },
191 { COM3
, { '#', ';', ')' }, QUAL(COM2
), },
192 { COM3
, { '*' }, COM3
, },
193 { COM3
, { '/' }, S_COMMENT
, },
195 /* saw / in / * comment, possible malformed nest */
196 { COM4
, { C_XXX
}, COM2
, },
197 { COM4
, { '*', '\n', C_EOF
}, S_COMMENT
, },
198 { COM4
, { '/' }, COM4
, },
200 /* saw / /, start of comment */
201 { COM5
, { C_XXX
}, COM5
, },
202 { COM5
, { '\n', C_EOF
}, S_COMMENT
, },
203 { COM5
, { '/' }, COM6
, },
204 { COM5
, { '*' }, COM7
, },
206 /* saw / in / / comment, possible malformed nest */
207 { COM6
, { C_XXX
}, COM5
, },
208 { COM6
, { '*', '\n', C_EOF
}, S_COMMENT
, },
209 { COM6
, { '/' }, COM6
, },
211 /* saw * in / /, possible malformed nest */
212 { COM7
, { C_XXX
}, COM5
, },
213 { COM7
, { '\n', C_EOF
}, S_COMMENT
, },
214 { COM7
, { '*' }, COM7
, },
215 { COM7
, { '/' }, S_COMMENT
, },
217 /* normal identifier -- always a macro candidate */
218 { NID
, { C_XXX
}, S_MACRO
, },
219 { NID
, { C_LET
, C_DEC
}, NID
, },
221 /* saw ., operator or dbl constant */
222 { DOT
, { C_XXX
}, S_CHRB
, },
223 { DOT
, { '.' }, DOT2
, },
224 { DOT
, { C_DEC
}, BAD1
, },
226 /* saw .., possible ... */
227 { DOT2
, { C_XXX
}, BACK(T_INVALID
), },
228 { DOT2
, { '.' }, KEEP(T_VARIADIC
), },
230 /* saw L (possible start of normal wide literal) */
231 { LIT
, { C_XXX
}, S_MACRO
, },
232 { LIT
, { C_LET
, C_DEC
}, NID
, },
233 { LIT
, { '"', '\'' }, QUAL(LIT1
), },
235 /* saw " or ' beginning literal */
236 { LIT1
, { C_XXX
}, LIT1
, },
237 { LIT1
, { '"', '\'' }, S_LITEND
, },
238 { LIT1
, { '\n', C_EOF
}, S_LITEND
, },
239 { LIT1
, { '\\' }, LIT2
, },
241 /* saw \ in literal */
242 { LIT2
, { C_XXX
}, S_LITESC
, },
243 { LIT2
, { '\n', C_EOF
}, S_LITEND
, },
245 /* eat malformed numeric constant */
246 { BAD1
, { C_XXX
}, BACK(T_INVALID
), },
247 { BAD1
, { C_LET
, C_DEC
, '.' }, BAD1
, },
248 { BAD1
, { 'e', 'E' }, BAD2
, },
250 /* eat malformed numeric fraction|exponent */
251 { BAD2
, { C_XXX
}, BACK(T_INVALID
), },
252 { BAD2
, { C_LET
, C_DEC
, '.' }, BAD1
, },
253 { BAD2
, { '+', '-' }, BAD1
, },
255 /* saw white space, eat it up */
256 { WS1
, { C_XXX
}, S_WS
, },
257 { WS1
, { ' ', '\t' }, WS1
, },
258 { WS1
, { '\f', '\v' }, S_VS
, },
263 { QUICK
, { C_XXX
}, QTOK
, },
264 { QUICK
, { C_EOF
, MARK
}, S_CHRB
, },
265 { QUICK
, { C_LET
, C_DEC
}, QID
, },
266 { QUICK
, { 'L' }, LIT0
, },
267 { QUICK
, { '"', '\'' }, S_LITBEG
, },
268 { QUICK
, { '/' }, S_CHRB
, },
269 { QUICK
, { '*' }, QCOM
, },
270 { QUICK
, { '#' }, SHARP1
, },
271 { QUICK
, { '\n' }, S_NL
, },
272 { QUICK
, { '\f', '\v' }, S_VS
, },
274 /* copy QUICK to QUICK+1 through MAC0+1 */
275 { OP
, {QUICK
,QUICK
+1,MAC0
+1}, COPY
, },
277 /* quick start state */
278 { QUICK
, { C_EOF
}, S_EOF
, },
279 { QUICK
, { C_DEC
}, QNUM
, },
280 { QUICK
, { MARK
}, QTOK
, },
281 { QUICK
, { '/' }, COM1
, },
282 { QUICK
, { ' ', '\t' }, QUICK
, },
284 /* grab non-macro tokens */
285 { QTOK
, { C_DEC
}, QNUM
, },
287 /* grab numeric and invalid tokens */
288 { QNUM
, { C_LET
, C_DEC
, '.' }, QNUM
, },
289 { QNUM
, { 'e', 'E' }, QEXP
, },
291 /* grab exponent token */
292 { QEXP
, { C_LET
, C_DEC
, '.' }, QNUM
, },
293 { QEXP
, { '+', '-' }, QNUM
, },
295 /* saw *, grab possible bad comment terminator */
296 { QCOM
, { C_DEC
}, QNUM
, },
297 { QCOM
, { '/' }, S_COMMENT
, },
299 /* saw L (possible start of wide string or first macro char) */
300 { MAC0
, { 'L' }, QID
, },
301 { MAC0
, { '"', '\'' }, QUAL(LIT1
), },
303 /* macro candidate template */
304 { MAC0
+1, { 'L' }, QID
, },
306 /* copy MAC0+1 to MAC0+2 through MACN */
307 { OP
, {MAC0
+1,MAC0
+2,MACN
}, COPY
},
309 /* saw L (possible start of wide string or macro L) */
310 { HIT0
, { C_XXX
}, S_MACRO
, },
311 { HIT0
, { C_LET
, C_DEC
}, QID
, },
312 { HIT0
, { '"', '\'' }, QUAL(LIT1
), },
314 /* macro hit template */
315 { HIT0
+1, { C_XXX
}, S_MACRO
, },
316 { HIT0
+1, { C_LET
, C_DEC
}, QID
, },
318 /* copy HIT0+1 to HIT0+2 through HITN */
319 { OP
, {HIT0
+1,HIT0
+2,HITN
}, COPY
},
321 /* saw L (possible start of wide literal) */
322 { LIT0
, { C_XXX
}, S_MACRO
, },
323 { LIT0
, { C_LET
, C_DEC
}, QID
, },
324 { LIT0
, { '"', '\'' }, QUAL(LIT1
), },
326 /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
327 { COM1
, { '=' }, KEEP(T_DIVEQ
), },
329 /* normal start state */
330 { TOKEN
, { C_XXX
}, S_HUH
, },
331 { TOKEN
, { C_EOF
}, S_EOF
, },
332 { TOKEN
, { C_DEC
}, DEC1
, },
333 { TOKEN
, { '0' }, OCT1
, },
334 { TOKEN
, { '.' }, DOT1
, },
335 { TOKEN
, { C_LET
}, NID
, },
336 { TOKEN
, { 'L' }, LIT
, },
337 { TOKEN
, { '"', '\'', '<' }, S_LITBEG
, },
338 { TOKEN
, { '/' }, COM1
, },
339 { TOKEN
, { '\n' }, S_NL
, },
340 { TOKEN
, { ' ', '\t' }, WS1
, },
341 { TOKEN
, { '\f', '\v' }, S_VS
, },
342 { TOKEN
, { '#' }, SHARP1
, },
343 { TOKEN
, { ':' }, COLON1
, },
344 { TOKEN
, { '%' }, PCT1
, },
345 { TOKEN
, { '&' }, AND1
, },
346 { TOKEN
, { '*' }, STAR1
, },
347 { TOKEN
, { '+' }, PLUS1
, },
348 { TOKEN
, { '-' }, MINUS1
, },
349 { TOKEN
, { '=' }, EQ1
, },
350 { TOKEN
, { '!' }, NOT1
, },
351 { TOKEN
, { '>' }, GT1
, },
352 { TOKEN
, { '^' }, CIRC1
, },
353 { TOKEN
, { '|' }, OR1
, },
354 { TOKEN
, { '(', ')', '[', ']' }, S_CHR
, },
355 { TOKEN
, { '{', '}', ',', ';' }, S_CHR
, },
356 { TOKEN
, { '~', '?' }, S_CHR
, },
358 /* saw 0, possible oct|hex|dec|dbl constant */
359 { OCT1
, { C_XXX
}, BACK(T_DECIMAL
), },
360 { OCT1
, { C_LET
, C_DEC
}, BAD1
, },
361 { OCT1
, { C_OCT
}, OCT2
, },
362 { OCT1
, { 'e', 'E' }, DBL2
, },
363 { OCT1
, { 'l', 'L', 'u', 'U' }, QUAL(DEC2
), },
364 { OCT1
, { 'x', 'X' }, HEX1
, },
365 { OCT1
, { '.' }, DBL1
, },
367 /* saw 0<oct>, oct constant */
368 { OCT2
, { C_XXX
}, BACK(T_OCTAL
), },
369 { OCT2
, { C_LET
, C_DEC
}, BAD1
, },
370 { OCT2
, { C_OCT
}, OCT2
, },
371 { OCT2
, { 'e', 'E' }, DBL2
, },
372 { OCT2
, { 'l', 'L', 'u', 'U' }, QUAL(OCT3
), },
373 { OCT2
, { '.' }, DBL1
, },
375 /* oct constant qualifier */
376 { OCT3
, { C_XXX
}, BACK(T_OCTAL
), },
377 { OCT3
, { C_LET
, C_DEC
, '.' }, BAD1
, },
378 { OCT3
, { 'l', 'L', 'u', 'U' }, QUAL(OCT3
), },
380 /* saw 0 [xX], hex constant */
381 { HEX1
, { C_XXX
}, BACK(T_HEXADECIMAL
), },
382 { HEX1
, { C_LET
}, BAD1
, },
383 { HEX1
, { C_HEX
}, HEX1
, },
384 { HEX1
, { 'e', 'E' }, HEX3
, },
385 { HEX1
, { 'l', 'L', 'u', 'U' }, QUAL(HEX2
), },
386 { HEX1
, { '.' }, HEX4
, },
387 { HEX1
, { 'p', 'P' }, HEX5
, },
389 /* hex constant qualifier */
390 { HEX2
, { C_XXX
}, BACK(T_HEXADECIMAL
), },
391 { HEX2
, { C_LET
, C_DEC
, '.' }, BAD1
, },
392 { HEX2
, { 'l', 'L', 'u', 'U' }, QUAL(HEX2
), },
394 /* hex [eE][-+] botch */
395 { HEX3
, { C_XXX
}, BACK(T_HEXADECIMAL
), },
396 { HEX3
, { C_LET
, '.', '-', '+'},BAD1
, },
397 { HEX3
, { C_HEX
}, HEX1
, },
398 { HEX3
, { 'e', 'E' }, HEX3
, },
399 { HEX3
, { 'l', 'L', 'u', 'U' }, QUAL(HEX2
), },
401 /* hex dbl fraction */
402 { HEX4
, { C_XXX
}, BACK(T_HEXDOUBLE
), },
403 { HEX4
, { C_LET
, '.' }, BAD1
, },
404 { HEX4
, { C_HEX
}, HEX4
, },
405 { HEX4
, { 'p', 'P' }, HEX5
, },
406 { HEX4
, { 'f', 'F', 'l', 'L' }, QUAL(HEX8
), },
408 /* optional hex dbl exponent sign */
409 { HEX5
, { C_XXX
}, BACK(T_INVALID
), },
410 { HEX5
, { C_LET
, '.' }, BAD1
, },
411 { HEX5
, { '+', '-' }, HEX6
, },
412 { HEX5
, { C_DEC
}, HEX7
, },
414 /* mandatory hex dbl exponent first digit */
415 { HEX6
, { C_XXX
}, BACK(T_INVALID
), },
416 { HEX6
, { C_LET
, '.' }, BAD1
, },
417 { HEX6
, { C_DEC
}, HEX7
, },
419 /* hex dbl exponent digits */
420 { HEX7
, { C_XXX
}, BACK(T_HEXDOUBLE
), },
421 { HEX7
, { C_LET
, '.' }, BAD1
, },
422 { HEX7
, { C_DEC
}, HEX7
, },
423 { HEX7
, { 'f', 'F', 'l', 'L' }, QUAL(HEX8
), },
425 /* hex dbl constant qualifier */
426 { HEX8
, { C_XXX
}, BACK(T_HEXDOUBLE
), },
427 { HEX8
, { C_LET
, '.' }, BAD1
, },
428 { HEX8
, { 'f', 'F', 'l', 'L' }, QUAL(HEX8
), },
430 /* saw <dec>, dec constant */
431 { DEC1
, { C_XXX
}, BACK(T_DECIMAL
), },
432 { DEC1
, { C_LET
}, BAD1
, },
433 { DEC1
, { C_DEC
}, DEC1
, },
434 { DEC1
, { 'e', 'E' }, DBL2
, },
435 { DEC1
, { 'l', 'L', 'u', 'U' }, QUAL(DEC2
), },
436 { DEC1
, { '.' }, DBL1
, },
438 /* dec constant qualifier */
439 { DEC2
, { C_XXX
}, BACK(T_DECIMAL
), },
440 { DEC2
, { C_LET
, C_DEC
}, BAD1
, },
441 { DEC2
, { 'l', 'L', 'u', 'U' }, QUAL(DEC2
), },
443 /* saw ., operator or dbl constant */
444 { DOT1
, { C_XXX
}, S_CHRB
, },
445 { DOT1
, { '.' }, DOT2
, },
446 { DOT1
, { C_DEC
}, DBL1
, },
449 { DBL1
, { C_XXX
}, BACK(T_DOUBLE
), },
450 { DBL1
, { C_LET
, '.' }, BAD1
, },
451 { DBL1
, { C_DEC
}, DBL1
, },
452 { DBL1
, { 'e', 'E' }, DBL2
, },
453 { DBL1
, { 'f', 'F', 'l', 'L' }, QUAL(DBL5
), },
455 /* optional dbl exponent sign */
456 { DBL2
, { C_XXX
}, BACK(T_INVALID
), },
457 { DBL2
, { C_LET
, '.' }, BAD1
, },
458 { DBL2
, { '+', '-' }, DBL3
, },
459 { DBL2
, { C_DEC
}, DBL4
, },
461 /* mandatory dbl exponent first digit */
462 { DBL3
, { C_XXX
}, BACK(T_INVALID
), },
463 { DBL3
, { C_LET
, '.' }, BAD1
, },
464 { DBL3
, { C_DEC
}, DBL4
, },
466 /* dbl exponent digits */
467 { DBL4
, { C_XXX
}, BACK(T_DOUBLE
), },
468 { DBL4
, { C_LET
, '.' }, BAD1
, },
469 { DBL4
, { C_DEC
}, DBL4
, },
470 { DBL4
, { 'f', 'F', 'l', 'L' }, QUAL(DBL5
), },
472 /* dbl constant qualifier */
473 { DBL5
, { C_XXX
}, BACK(T_DOUBLE
), },
474 { DBL5
, { C_LET
, '.' }, BAD1
, },
475 { DBL5
, { 'f', 'F', 'l', 'L' }, QUAL(DBL5
), },
477 /* saw < starting include header */
478 { HDR1
, { C_XXX
}, HDR1
, },
479 { HDR1
, { '>', '\n', C_EOF
}, S_LITEND
, },
481 /* saw <binop><space> expecting = */
482 { BIN1
, { C_XXX
}, S_HUH
, },
483 { BIN1
, { ' ', '\t' }, BIN1
, },
487 { SHARP1
, { C_XXX
}, S_SHARP
, },
489 { PCT1
, { C_XXX
}, S_CHRB
, },
490 { PCT1
, { '=' }, KEEP(T_MODEQ
), },
492 { AND1
, { C_XXX
}, S_CHRB
, },
493 { AND1
, { '=' }, KEEP(T_ANDEQ
), },
494 { AND1
, { '&' }, KEEP(T_ANDAND
), },
496 { STAR1
, { C_XXX
}, S_CHRB
, },
497 { STAR1
, { '=' }, KEEP(T_MPYEQ
), },
498 { STAR1
, { '/' }, S_COMMENT
, },
500 { PLUS1
, { C_XXX
}, S_CHRB
, },
501 { PLUS1
, { '=' }, KEEP(T_ADDEQ
), },
502 { PLUS1
, { '+' }, KEEP(T_ADDADD
), },
504 { MINUS1
, { C_XXX
}, S_CHRB
, },
505 { MINUS1
, { '=' }, KEEP(T_SUBEQ
), },
506 { MINUS1
, { '-' }, KEEP(T_SUBSUB
), },
507 { MINUS1
, { '>' }, KEEP(T_PTRMEM
), },
509 { COLON1
, { C_XXX
}, S_CHRB
, },
510 { COLON1
, { '=', '>' }, S_HUH
, },
512 { LT1
, { C_XXX
}, S_CHRB
, },
513 { LT1
, { '=' }, KEEP(T_LE
), },
514 { LT1
, { '<' }, LSH1
, },
516 { EQ1
, { C_XXX
}, S_CHRB
, },
517 { EQ1
, { '=' }, KEEP(T_EQ
), },
519 { NOT1
, { C_XXX
}, S_CHRB
, },
520 { NOT1
, { '=' }, KEEP(T_NE
), },
522 { GT1
, { C_XXX
}, S_CHRB
, },
523 { GT1
, { '=' }, KEEP(T_GE
), },
524 { GT1
, { '>' }, RSH1
, },
526 { CIRC1
, { C_XXX
}, S_CHRB
, },
527 { CIRC1
, { '=' }, KEEP(T_XOREQ
), },
529 { OR1
, { C_XXX
}, S_CHRB
, },
530 { OR1
, { '=' }, KEEP(T_OREQ
), },
531 { OR1
, { '|' }, KEEP(T_OROR
), },
535 { ARROW1
, { C_XXX
}, BACK(T_PTRMEM
), },
536 { ARROW1
, { '*' }, KEEP(T_PTRMEMREF
), },
538 { LSH1
, { C_XXX
}, BACK(T_LSHIFT
), },
539 { LSH1
, { '=' }, KEEP(T_LSHIFTEQ
), },
541 { RSH1
, { C_XXX
}, BACK(T_RSHIFT
), },
542 { RSH1
, { '=' }, KEEP(T_RSHIFTEQ
), },
550 short fsm
[TERMINAL
+1][MAX
+1];
552 char trigraph
[MAX
+1];
555 static char spl
[] = { '\\', '\r', 0 };
556 static char aln
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
558 static char spl
[] = { MARK
, '?', '\\', '\r', CC_sub
, 0 };
559 static char aln
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
561 static char* let
= &aln
[10];
562 static char hex
[] = "fedcbaFEDCBA9876543210";
563 static char* dec
= &hex
[12];
564 static char* oct
= &hex
[14];
567 * runtime FSM modifications
568 * ppfsm(FSM_INIT,0) must be called first
572 ppfsm(int op
, register char* s
)
578 register struct fsminit
* fp
;
593 if (fsm
[TOKEN
][c
] == ~S_HUH
)
596 for (i
= 0; i
< TERMINAL
; i
++)
597 fsm
[i
][c
] = IDSTATE(fsm
[i
]['_']);
599 else error(2, "%c: cannot add to identifier set", c
);
608 for (i
= 0; i
< TERMINAL
; i
++)
616 for (fp
= fsminit
;; fp
++)
618 if ((n
= fp
->nextstate
) >= TERMINAL
) n
= ~n
;
627 for (i
= fp
->ch
[1]; i
<= n
; i
++)
637 for (i
= 0; i
< sizeof(fp
->ch
) && (c
= fp
->ch
[i
]); i
++)
642 for (c
= 0; c
<= MAX
; c
++)
647 fsm
[TERMINAL
][fp
->state
+1] = n
< 0 ? ~n
: n
;
676 * install splice special cases
677 * and same non-terminal transitions
680 for (i
= 0; i
< TERMINAL
; i
++)
685 if (c
!= MARK
|| !INCOMMENT(rp
))
687 if (rp
[c
] >= 0) rp
[c
] = ~rp
[c
];
691 for (c
= 0; c
<= MAX
; c
++)
695 fsm
[TERMINAL
][0] = ~S_EOB
;
700 * default character types
710 do setsplice(c
= *s
++); while (c
);
718 trigraph
['/'] = '\\';
720 trigraph
['\''] = '^';
731 if (pp
.option
& PLUSPLUS
)
733 fsm
[COLON1
][':'] = ~KEEP(T_SCOPE
);
734 fsm
[DOT1
]['*'] = ~KEEP(T_DOTREF
);
735 fsm
[MINUS1
]['>'] = ARROW1
;
736 fsm
[COM1
]['/'] = COM5
;
738 for (i
= 0; i
< TERMINAL
; i
++)
741 if (!INCOMMENT(rp
) && !INQUOTE(rp
))
746 if (rp
[c
] > 0) rp
[c
] = ~rp
[c
];
747 else if (!rp
[c
]) rp
[c
] = ~i
;
753 while (c
= *s
++) setsplice(c
);
757 fsm
[COLON1
][':'] = ~S_CHRB
;
758 fsm
[DOT1
]['*'] = ~S_CHRB
;
759 fsm
[MINUS1
]['>'] = ~KEEP(T_PTRMEM
);
760 fsm
[COM1
]['/'] = (pp
.option
& PLUSCOMMENT
) ? COM5
: ~S_CHRB
;
766 case FSM_COMPATIBILITY
:
767 if (pp
.state
& COMPATIBILITY
)
769 fsm
[HEX1
]['e'] = HEX1
;
770 fsm
[HEX1
]['E'] = HEX1
;
771 fsm
[QNUM
]['e'] = QNUM
;
772 fsm
[QNUM
]['E'] = QNUM
;
773 fsm
[QNUM
]['u'] = ~QUAL(QNUM
);
774 fsm
[QNUM
]['U'] = ~QUAL(QNUM
);
778 fsm
[HEX1
]['e'] = HEX3
;
779 fsm
[HEX1
]['E'] = HEX3
;
780 fsm
[QNUM
]['e'] = QEXP
;
781 fsm
[QNUM
]['E'] = QEXP
;
782 fsm
[QNUM
]['u'] = QNUM
;
783 fsm
[QNUM
]['U'] = QNUM
;
791 if (fsm
[TOKEN
][c
] == ~S_HUH
)
792 for (i
= 0; i
< TERMINAL
; i
++)
793 fsm
[i
][c
] = fsm
[i
]['"'];
794 else error(2, "%c: cannot add to quote set", c
);
799 if (c
!= '"' && fsm
[TOKEN
][c
] == fsm
[TOKEN
]['"'])
800 for (i
= 0; i
< TERMINAL
; i
++)
801 fsm
[i
][c
] = fsm
[i
]['_'];
805 n
= s
? BIN1
: ~S_CHRB
;
806 fsm
[COM1
][' '] = fsm
[COM1
]['\t'] = n
;
807 fsm
[AND1
][' '] = fsm
[AND1
]['\t'] = n
;
808 fsm
[STAR1
][' '] = fsm
[STAR1
]['\t'] = n
;
809 fsm
[PCT1
][' '] = fsm
[PCT1
]['\t'] = n
;
810 fsm
[PLUS1
][' '] = fsm
[PLUS1
]['\t'] = n
;
811 fsm
[MINUS1
][' '] = fsm
[MINUS1
]['\t'] = n
;
812 fsm
[CIRC1
][' '] = fsm
[CIRC1
]['\t'] = n
;
813 fsm
[OR1
][' '] = fsm
[OR1
]['\t'] = n
;
814 fsm
[LSH1
][' '] = fsm
[LSH1
]['\t'] = s
? BIN1
: ~BACK(T_LSHIFT
);
815 fsm
[RSH1
][' '] = fsm
[RSH1
]['\t'] = s
? BIN1
: ~BACK(T_RSHIFT
);
819 if (pp
.truncate
&& strlen(s
) >= pp
.truncate
)
825 i
= MAC0
+ ((c
= *s
++) != 'L');
826 if ((n
= fsm
[QUICK
][c
]) != (i
+ NMAC
))
831 if (fsm
[QUICK
][c
] != n
)
832 fsm
[QUICK
][c
] = fsm
[QCOM
][c
] = fsm
[QTOK
][c
] = n
;
845 if (fsm
[i
][c
] < HIT0
)
847 if (fsm
[i
+ NMAC
][c
] < HIT0
)
848 fsm
[i
+ NMAC
][c
] = n
;
854 if (fsm
[i
][c
] < HIT0
)
865 for (n
= CHAR_MIN
; n
<= CHAR_MAX
; n
++)
872 if (i
< HIT0
&& fsm
[i
+ NMAC
][c
] < n
)
873 fsm
[i
+ NMAC
][c
] = n
;
886 * c is current input char
890 refill(register int c
)
892 if (pp
.in
->flags
& IN_eof
)
899 *((pp
.in
->nextchr
= pp
.in
->buffer
+ PPBAKSIZ
) - 1) = c
;
902 (pp
.in
->flags
& IN_prototype
) ? pppread(pp
.in
->nextchr
) :
904 read(pp
.in
->fd
, pp
.in
->nextchr
, PPBUFSIZ
);
908 if (pp
.in
->nextchr
[c
- 1] == '\n') pp
.in
->flags
|= IN_newline
;
909 else pp
.in
->flags
&= ~IN_newline
;
911 if (!(pp
.in
->flags
& IN_prototype
))
913 if (c
< PPBUFSIZ
&& (pp
.in
->flags
& IN_regular
))
915 pp
.in
->flags
|= IN_eof
;
924 error(ERROR_SYSTEM
|3, "read error");
927 else if ((pp
.in
->flags
^ pp
.in
->prev
->flags
) & IN_c
)
929 static char ket
[] = { 0, '}', '\n', 0 };
931 pp
.in
->flags
^= IN_c
;
932 pp
.in
->nextchr
= ket
+ 1;
935 pp
.in
->flags
|= IN_eof
;
940 pp
.in
->nextchr
[c
] = 0;
941 debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info
.file
, c
, (c
> 32 ? 32 : c
), pp
.in
->nextchr
, c
> 32 ? "..." : ""));
942 if (pp
.test
& 0x0080)
943 sfprintf(sfstderr
, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info
.file
, c
, pp
.in
->nextchr
, error_info
.file
);