1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Lexical analysis for genksyms.
4 * Copyright 1996, 1997 Linux International.
6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
9 * Taken from Linux modutils 2.4.22.
20 #include "parse.tab.h"
22 /* We've got a two-level lexer here. We let flex do basic tokenization
23 and then we categorize those basic tokens in the second stage. */
24 #define YY_DECL static int yylex1(void)
28 IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
32 X_INT 0[Xx][0-9A-Fa-f]+
33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
34 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
36 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
39 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
41 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
42 CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
46 /* We don't do multiple input files. */
54 /* Keep track of our location in the original source files. */
55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
59 /* Ignore all other whitespace. */
63 {STRING} return STRING;
67 /* The Pedant requires that the other C multi-character tokens be
68 recognized as tokens. We don't actually use them since we don't
69 parse expressions, but we do want whitespace to be arranged
70 around them properly. */
71 {MC_TOKEN} return OTHER;
77 /* All other tokens are single characters. */
83 /* Bring in the keyword recognizer. */
88 /* Macros to append to our phrase collection list. */
91 * We mark any token, that that equals to a known enumerator, as
92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
93 * the only problem is struct and union members:
94 * enum e { a, b }; struct s { int a, b; }
95 * but in this case, the only effect will be, that the ABI checksums become
96 * more volatile, which is acceptable. Also, such collisions are quite rare,
97 * so far it was only observed in include/linux/telephony.h.
99 #define _APP(T,L) do { \
100 cur_node = next_node; \
101 next_node = xmalloc(sizeof(*next_node)); \
102 next_node->next = cur_node; \
103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106 SYM_ENUM_CONST : SYM_NORMAL ; \
107 cur_node->in_source_file = in_source_file; \
110 #define APP _APP(yytext, yyleng)
113 /* The second stage lexer. Here we incorporate knowledge of the state
114 of the parser to tailor the tokens that are returned. */
120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
122 } lexstate = ST_NOTSTARTED;
124 static int suppress_type_lookup, dont_want_brace_phrase;
125 static struct string_list *next_node;
126 static char *source_file;
128 int token, count = 0;
129 struct string_list *cur_node;
131 if (lexstate == ST_NOTSTARTED)
133 next_node = xmalloc(sizeof(*next_node));
134 next_node->next = NULL;
135 lexstate = ST_NORMAL;
143 else if (token == FILENAME)
147 /* Save the filename and line number for later error messages. */
152 file = strchr(yytext, '\"')+1;
153 e = strchr(file, '\"');
155 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
156 cur_line = atoi(yytext+2);
159 source_file = xstrdup(cur_filename);
162 in_source_file = (strcmp(cur_filename, source_file) == 0);
176 int r = is_reserved_word(yytext, yyleng);
182 lexstate = ST_ATTRIBUTE;
190 lexstate = ST_TYPEOF;
197 dont_want_brace_phrase = 3;
198 suppress_type_lookup = 2;
201 case EXPORT_SYMBOL_KEYW:
204 case STATIC_ASSERT_KEYW:
205 lexstate = ST_STATIC_ASSERT;
210 if (!suppress_type_lookup)
212 if (find_symbol(yytext, SYM_TYPEDEF, 1))
220 lexstate = ST_BRACKET;
226 if (dont_want_brace_phrase)
234 lexstate = ST_EXPRESSION;
253 lexstate = ST_NORMAL;
254 token = ATTRIBUTE_PHRASE;
273 lexstate = ST_NORMAL;
286 if (is_reserved_word(yytext, yyleng) >= 0
287 || find_symbol(yytext, SYM_TYPEDEF, 1))
291 lexstate = ST_NORMAL;
297 lexstate = ST_TYPEOF;
305 lexstate = ST_TYPEOF_1;
313 lexstate = ST_NORMAL;
314 token = TYPEOF_PHRASE;
334 lexstate = ST_NORMAL;
335 token = BRACKET_PHRASE;
354 lexstate = ST_NORMAL;
355 token = BRACE_PHRASE;
367 case '(': case '[': case '{':
372 /* is this the last line of an enum declaration? */
375 /* Put back the token we just read so's we can find it again
376 after registering the expression. */
379 lexstate = ST_NORMAL;
380 token = EXPRESSION_PHRASE;
391 /* Put back the token we just read so's we can find it again
392 after registering the expression. */
395 lexstate = ST_NORMAL;
396 token = EXPRESSION_PHRASE;
407 case ST_STATIC_ASSERT:
417 lexstate = ST_NORMAL;
418 token = STATIC_ASSERT_PHRASE;
432 if (suppress_type_lookup > 0)
433 --suppress_type_lookup;
434 if (dont_want_brace_phrase > 0)
435 --dont_want_brace_phrase;
437 yylval = &next_node->next;