1 /* $Id: token.c,v 1.48.2.2 2011/03/12 17:08:26 ragge Exp $ */
4 * Copyright (c) 2004,2009 Anders Magnusson. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Tokenizer for the C preprocessor.
29 * There are three main routines:
30 * - fastscan() loops over the input stream searching for magic
31 * characters that may require actions.
32 * - sloscan() tokenize the input stream and returns tokens.
33 * It may recurse into itself during expansion.
34 * - yylex() returns something from the input stream that
35 * is suitable for yacc.
37 * Other functions of common use:
38 * - inpch() returns a raw character from the current input stream.
39 * - inch() is like inpch but \\n and trigraphs are expanded.
40 * - unch() pushes back a character to the input stream.
58 static void cvtdig(int rad
);
59 static int charcon(usch
*);
60 static void elsestmt(void);
61 static void ifdefstmt(void);
62 static void ifndefstmt(void);
63 static void endifstmt(void);
64 static void ifstmt(void);
65 static void cpperror(void);
66 static void pragmastmt(void);
67 static void undefstmt(void);
68 static void cppwarning(void);
69 static void elifstmt(void);
70 static void badop(const char *);
71 static int chktg(void);
72 static void ppdir(void);
74 void include_next(void);
76 static int inpch(void);
78 extern int yyget_lineno (void);
79 extern void yyset_lineno (int);
81 static int inch(void);
86 #define PUTCH(ch) if (!flslvl) putch(ch)
87 /* protection against recursion in #include */
88 #define MAX_INCLEVEL 100
91 /* get next character unaltered */
92 #define NXTCH() (ifiles->curptr < ifiles->maxread ? *ifiles->curptr++ : inpch())
97 0, 0, 0, 0, C_SPEC
, C_SPEC
, 0, 0,
98 0, C_WSNL
, C_SPEC
|C_WSNL
, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0,
103 C_WSNL
, C_2
, C_SPEC
, 0, 0, 0, C_2
, C_SPEC
,
104 0, 0, 0, C_2
, 0, C_2
, 0, C_SPEC
|C_2
,
105 C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
,
106 C_I
, C_I
, 0, 0, C_2
, C_2
, C_2
, C_SPEC
,
108 0, C_I
, C_I
, C_I
, C_I
, C_I
|C_EP
, C_I
, C_I
,
109 C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
,
110 C_I
|C_EP
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
,
111 C_I
, C_I
, C_I
, 0, C_I
, 0, 0, C_I
,
113 0, C_I
, C_I
, C_I
, C_I
, C_I
|C_EP
, C_I
, C_I
,
114 C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
,
115 C_I
|C_EP
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
, C_I
,
116 C_I
, C_I
, C_I
, 0, C_2
, 0, 0, 0,
121 * No-replacement array. If a macro is found and exists in this array
122 * then no replacement shall occur. This is a stack.
124 struct symtab
*norep
[RECMAX
]; /* Symbol table index table */
125 int norepptr
= 1; /* Top of index table */
126 unsigned short bptr
[RECMAX
]; /* currently active noexpand macro stack */
127 int bidx
; /* Top of bptr stack */
134 if (ifiles
->curptr
< ifiles
->bbuf
)
135 error("pushback buffer full");
136 *ifiles
->curptr
= (usch
)c
;
144 if (Cflag
) { PUTCH('/'); PUTCH('*'); }
166 if (Cflag
) PUTCH(ch
);
172 * Scan quickly the input file searching for:
174 * - keywords (if not flslvl)
177 * Handle strings, numbers and trigraphs with care.
178 * Only data from pp files are scanned here, never any rescans.
179 * TODO: Only print out strings before calling other functions.
185 int ch
, i
, ccnt
, onemore
;
194 printf("fastscan ch %d (%c)\n", ch
, ch
> 31 ? ch
: '@');
195 if ((spechr
[ch
] & C_SPEC
) == 0) {
203 error("bad char passed");
206 case '/': /* Comments */
207 if ((ch
= inch()) == '/') {
208 cppcmt
: if (Cflag
) { PUTCH(ch
); } else { PUTCH(' '); }
210 if (Cflag
) PUTCH(ch
);
212 } while (ch
!= -1 && ch
!= '\n');
214 } else if (ch
== '*') {
223 case '?': /* trigraphs */
230 if ((ch
= NXTCH()) == '\n') {
238 case '\n': /* newlines, for pp directives */
239 run2
: ifiles
->lineno
++;
255 } while (ch
== ' ' || ch
== '\t');
266 } else if (ch
== '%') {
278 case '\"': /* strings */
280 while ((ch
= inch()) != '\"') {
292 case '.': /* for pp-number */
295 if (ch
< '0' || ch
> '9')
298 case '0': case '1': case '2': case '3': case '4':
299 case '5': case '6': case '7': case '8': case '9':
312 if (spechr
[ch
] & C_EP
) {
315 if (ch
== '-' || ch
== '+')
318 } while ((spechr
[ch
] & C_ID
) || (ch
== '.'));
321 case '\'': /* character literal */
324 continue; /* character constants ignored */
325 while ((ch
= NXTCH()) != '\'') {
352 if ((spechr
[ch
] & C_ID
) == 0)
355 while (spechr
[ch
] & C_ID
)
359 onemore
= i
= ccnt
= 0;
361 yytext
[i
++] = (usch
)ch
;
375 } while (spechr
[ch
] & C_ID
);
381 if ((nl
= lookup((usch
*)yytext
, FIND
)) && kfind(nl
)) {
384 putstr((usch
*)yytext
);
401 yytext
[yyp
++] = (usch
)ch
;
406 /* sloscan() never passes \n, that's up to fastscan() */
410 case '\r': /* Ignore CR's */
414 case '0': case '1': case '2': case '3': case '4': case '5':
415 case '6': case '7': case '8': case '9':
416 /* readin a "pp-number" */
419 if (spechr
[ch
] & C_EP
) {
420 yytext
[yyp
++] = (usch
)ch
;
422 if (ch
== '-' || ch
== '+') {
423 yytext
[yyp
++] = (usch
)ch
;
428 if ((spechr
[ch
] & C_ID
) || ch
== '.') {
429 yytext
[yyp
++] = (usch
)ch
;
442 if ((ch
= inch()) == '\\') {
443 yytext
[yyp
++] = (usch
)ch
;
444 yytext
[yyp
++] = (usch
)inch();
446 } else if (ch
== '\n') {
453 yytext
[yyp
++] = (usch
)ch
;
463 while ((ch
= inch()) == ' ' || ch
== '\t')
464 yytext
[yyp
++] = (usch
)ch
;
470 if ((ch
= inch()) == '/') {
472 yytext
[yyp
++] = (usch
)ch
;
474 } while (ch
&& ch
!= '\n');
478 } else if (ch
== '*') {
482 if (Cflag
&& !flslvl
&& readmac
) {
489 more
: while ((c
= inch()) && c
!= '*') {
491 putch(c
), ifiles
->lineno
++;
492 else if (c
== EBLOCK
) {
495 } else if (c
== 1) /* WARN */
500 if ((c
= inch()) && c
!= '/') {
506 if (!tflag
&& !Cflag
&& !flslvl
)
519 yytext
[yyp
++] = (usch
)ch
;
532 if ((ch
= inch()) == '\\') {
533 yytext
[yyp
++] = (usch
)ch
;
534 yytext
[yyp
++] = (usch
)inch();
537 yytext
[yyp
++] = (usch
)ch
;
545 if ((ch
= inch()) == '\"' && !tflag
) {
546 yytext
[yyp
++] = (usch
)ch
;
548 } else if (ch
== '\'' && !tflag
) {
549 yytext
[yyp
++] = (usch
)ch
;
555 /* Yetch, all identifiers */
556 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
557 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
558 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
559 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
561 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
562 case 'G': case 'H': case 'I': case 'J': case 'K':
563 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
564 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
566 case '_': /* {L}({L}|{D})* */
569 for (;;) { /* get chars */
571 if (isalpha(ch
) || isdigit(ch
) || ch
== '_') {
572 yytext
[yyp
++] = (usch
)ch
;
578 yytext
[yyp
] = 0; /* need already string */
579 /* end special hacks */
598 static int ifdef
, noex
;
602 while ((ch
= sloscan()) == WSPACE
)
604 if (ch
< 128 && spechr
[ch
] & C_2
)
609 #define C2(a,b,c) case a: if (c2 == b) return c; break
614 C2('&', '&', ANDAND
);
616 if (c2
== '<') return LS
;
617 if (c2
== '=') return LE
;
620 if (c2
== '>') return RS
;
621 if (c2
== '=') return GE
;
630 if (Cflag
== 0 || c2
!= '*')
632 /* Found comment that need to be skipped */
637 if ((ch
= inpch()) == '/')
644 if (yytext
[0] == '\'') {
645 yylval
.node
.op
= NUMBER
;
646 yylval
.node
.nd_val
= charcon((usch
*)yytext
);
648 cvtdig(yytext
[0] != '0' ? 10 :
649 yytext
[1] == 'x' || yytext
[1] == 'X' ? 16 : 8);
653 if (strcmp((char *)yytext
, "defined") == 0) {
657 nl
= lookup((usch
*)yytext
, FIND
);
659 yylval
.node
.nd_val
= nl
!= NULL
;
661 } else if (nl
&& noex
== 0) {
662 usch
*och
= stringbuf
;
675 yylval
.node
.nd_val
= 0;
677 yylval
.node
.op
= NUMBER
;
689 usch
*yyp
, yybuf
[CPPBUF
];
698 if (ifiles
->curptr
< ifiles
->maxread
)
699 return *ifiles
->curptr
++;
701 if (ifiles
->infil
== -1)
703 if ((len
= read(ifiles
->infil
, ifiles
->buffer
, CPPBUF
)) < 0)
704 error("read error on file %s", ifiles
->orgfn
);
707 ifiles
->curptr
= ifiles
->buffer
;
708 ifiles
->maxread
= ifiles
->buffer
+ len
;
717 again
: switch (c
= inpch()) {
718 case '\\': /* continued lines */
719 msdos
: if ((c
= inpch()) == '\n') {
722 } else if (c
== '\r')
726 case '?': /* trigraphs */
738 * Let the command-line args be faked defines at beginning of file.
741 prinit(struct initar
*it
, struct includ
*ic
)
743 const char *pre
, *post
;
747 prinit(it
->next
, ic
);
748 pre
= post
= NULL
; /* XXX gcc */
752 if ((a
= strchr(it
->str
, '=')) != NULL
) {
769 strlcat((char *)ic
->buffer
, pre
, CPPBUF
+1);
770 strlcat((char *)ic
->buffer
, it
->str
, CPPBUF
+1);
771 if (strlcat((char *)ic
->buffer
, post
, CPPBUF
+1) >= CPPBUF
+1)
772 error("line exceeds buffer size");
780 * A new file included.
781 * If ifiles == NULL, this is the first file and already opened (stdin).
782 * Return 0 on success, -1 if file to be included is not found.
785 pushfile(const usch
*file
, const usch
*fn
, int idx
, void *incs
)
787 extern struct initar
*initar
;
796 if ((ic
->infil
= open((const char *)file
, O_RDONLY
)) < 0)
798 ic
->orgfn
= ic
->fname
= file
;
799 if (++inclevel
> MAX_INCLEVEL
)
800 error("Limit for nested includes exceeded");
803 ic
->orgfn
= ic
->fname
= (const usch
*)"<stdin>";
806 ic
->bbuf
= malloc(BBUFSZ
);
808 ic
->buffer
= ic
->bbuf
+NAMEMAX
;
809 ic
->curptr
= ic
->buffer
;
812 ic
->maxread
= ic
->curptr
;
824 write(ofd
, ic
->buffer
, strlen((char *)ic
->buffer
));
834 if (otrulvl
!= trulvl
|| flslvl
)
835 error("unterminated conditional");
847 * Print current position to output file.
852 usch
*s
, *os
= stringbuf
;
856 return; /* no output */
857 if (ifiles
->lineno
== 1) {
858 s
= sheap("%s: %s\n", Mfile
, ifiles
->fname
);
859 write(ofd
, s
, strlen((char *)s
));
862 putstr(sheap("\n# %d \"%s\"\n", ifiles
->lineno
, ifiles
->fname
));
871 // if (dflag)printf(": '%c'(%d)\n", c > 31 ? c : ' ', c);
875 printf("c == 10!!!\n");
881 int yywrap(void) { return 1; }
896 * Convert string numbers to unsigned long long and check overflow.
901 unsigned long long rv
= 0;
902 unsigned long long rv2
= 0;
909 while (isxdigit(c
)) {
910 rv
= rv
* rad
+ dig2num(c
);
913 error("Constant \"%s\" is out of range", yytext
);
918 while (*y
== 'l' || *y
== 'L')
920 yylval
.node
.op
= *y
== 'u' || *y
== 'U' ? UNUMBER
: NUMBER
;
921 yylval
.node
.nd_uval
= rv
;
922 if ((rad
== 8 || rad
== 16) && yylval
.node
.nd_val
< 0)
923 yylval
.node
.op
= UNUMBER
;
924 if (yylval
.node
.op
== NUMBER
&& yylval
.node
.nd_val
< 0)
925 /* too large for signed, see 6.4.4.1 */
926 error("Constant \"%s\" is out of range", yytext
);
934 p
++; /* skip first ' */
938 case 'a': val
= '\a'; break;
939 case 'b': val
= '\b'; break;
940 case 'f': val
= '\f'; break;
941 case 'n': val
= '\n'; break;
942 case 'r': val
= '\r'; break;
943 case 't': val
= '\t'; break;
944 case 'v': val
= '\v'; break;
945 case '\"': val
= '\"'; break;
946 case '\'': val
= '\''; break;
947 case '\\': val
= '\\'; break;
949 while (isxdigit(c
= *p
)) {
950 val
= val
* 16 + dig2num(c
);
954 case '0': case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7':
957 while (isdigit(c
= *p
)) {
958 val
= val
* 8 + (c
- '0');
962 default: val
= p
[-1];
975 while ((t
= sloscan()) == WSPACE
)
979 warning("newline expected, got \"%s\"", yytext
);
980 /* ignore rest of line */
981 while ((t
= sloscan()) && t
!= '\n')
985 error("newline expected, got \"%s\"", yytext
);
995 else if (--flslvl
!=0) {
1001 } else if (trulvl
) {
1005 error("If-less else");
1006 if (elslvl
==trulvl
+flslvl
)
1007 error("Too many else");
1008 elslvl
=trulvl
+flslvl
;
1015 /* just ignore the rest of the line */
1016 while (inch() != '\n')
1033 while (t
== WSPACE
);
1036 if (lookup((usch
*)yytext
, FIND
) == 0) {
1055 while (t
== WSPACE
);
1057 error("bad ifndef");
1058 if (lookup((usch
*)yytext
, FIND
) != 0) {
1078 error("If-less endif");
1089 if (yyparse() == 0) {
1104 if (elflvl
> trulvl
)
1106 else if (--flslvl
!=0)
1117 } else if (trulvl
) {
1121 error("If-less elif");
1128 usch
*cp
= stringbuf
;
1130 while ((c
= inch()) && c
!= '\n')
1146 if (c
!= WSPACE
&& c
!= '\n')
1164 if (c
!= WSPACE
&& c
!= '\n')
1165 error("bad warning");
1167 /* svinp() add an unwanted \n */
1169 while ((c
= inch()) && c
!= '\n')
1176 warning("#warning %s", cp
);
1186 if (sloscan() != WSPACE
|| sloscan() != IDENT
)
1188 if (flslvl
== 0 && (np
= lookup((usch
*)yytext
, FIND
)))
1198 if (sloscan() != WSPACE
)
1199 error("bad pragma");
1201 putstr((const usch
*)"\n#pragma ");
1205 putch(c
); /* Do arg expansion instead? */
1206 } while (c
&& c
!= '\n');
1213 badop(const char *op
)
1215 error("invalid operator in preprocessor expression: %s", op
);
1225 * Check for (and convert) trigraphs.
1232 if ((c
= inpch()) != '?') {
1236 switch (c
= inpch()) {
1237 case '=': c
= '#'; break;
1238 case '(': c
= '['; break;
1239 case ')': c
= ']'; break;
1240 case '<': c
= '{'; break;
1241 case '>': c
= '}'; break;
1242 case '/': c
= '\\'; break;
1243 case '\'': c
= '^'; break;
1244 case '!': c
= '|'; break;
1245 case '-': c
= '~'; break;
1258 { "ifndef", ifndefstmt
},
1259 { "ifdef", ifdefstmt
},
1261 { "include", include
},
1262 { "else", elsestmt
},
1263 { "endif", endifstmt
},
1264 { "error", cpperror
},
1265 { "warning", cppwarning
},
1266 { "define", define
},
1267 { "undef", undefstmt
},
1269 { "pragma", pragmastmt
},
1270 { "elif", elifstmt
},
1272 { "include_next", include_next
},
1277 * Handle a preprocessor directive.
1285 while ((ch
= inch()) == ' ' || ch
== '\t')
1287 if (ch
== '\n') { /* empty directive */
1291 if (ch
< 'a' || ch
> 'z')
1292 goto out
; /* something else, ignore */
1296 if (i
== sizeof(bp
)-1)
1297 goto out
; /* too long */
1299 } while ((ch
>= 'a' && ch
<= 'z') || (ch
== '_'));
1304 #define SZ (int)(sizeof(ppd)/sizeof(ppd[0]))
1305 for (i
= 0; i
< SZ
; i
++)
1306 if (bp
[0] == ppd
[i
].name
[0] && strcmp(bp
, ppd
[i
].name
) == 0)
1311 /* Found matching keyword */
1315 out
: while ((ch
= inch()) != '\n' && ch
!= -1)