scripts/genksyms/lex.l

   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * Lexical analysis for genksyms.
   4  * Copyright 1996, 1997 Linux International.
   5  *
   6  * New implementation contributed by Richard Henderson <rth@tamu.edu>
   7  * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
   8  *
   9  * Taken from Linux modutils 2.4.22.
  10  */
  11
  12 %{
  13
  14 #include <limits.h>
  15 #include <stdlib.h>
  16 #include <string.h>
  17 #include <ctype.h>
  18
  19 #include "genksyms.h"
  20 #include "parse.tab.h"
  21
  22 /* We've got a two-level lexer here.  We let flex do basic tokenization
  23    and then we categorize those basic tokens in the second stage.  */
  24 #define YY_DECL         static int yylex1(void)
  25
  26 %}
  27
  28 IDENT                   [A-Za-z_\$][A-Za-z0-9_\$]*
  29
  30 O_INT                   0[0-7]*
  31 D_INT                   [1-9][0-9]*
  32 X_INT                   0[Xx][0-9A-Fa-f]+
  33 I_SUF                   [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
  34 INT                     ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
  35
  36 FRAC                    ([0-9]*\.[0-9]+)|([0-9]+\.)
  37 EXP                     [Ee][+-]?[0-9]+
  38 F_SUF                   [FfLl]
  39 REAL                    ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
  40
  41 STRING                  L?\"([^\\\"]*\\.)*[^\\\"]*\"
  42 CHAR                    L?\'([^\\\']*\\.)*[^\\\']*\'
  43
  44 MC_TOKEN                ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
  45
  46 /* We don't do multiple input files.  */
  47 %option noyywrap
  48
  49 %option noinput
  50
  51 %%
  52
  53
  54  /* Keep track of our location in the original source files.  */
  55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n     return FILENAME;
  56 ^#.*\n                                  cur_line++;
  57 \n                                      cur_line++;
  58
  59  /* Ignore all other whitespace.  */
  60 [ \t\f\v\r]+                            ;
  61
  62
  63 {STRING}                                return STRING;
  64 {CHAR}                                  return CHAR;
  65 {IDENT}                                 return IDENT;
  66
  67  /* The Pedant requires that the other C multi-character tokens be
  68     recognized as tokens.  We don't actually use them since we don't
  69     parse expressions, but we do want whitespace to be arranged
  70     around them properly.  */
  71 {MC_TOKEN}                              return OTHER;
  72 {INT}                                   return INT;
  73 {REAL}                                  return REAL;
  74
  75 "..."                                   return DOTS;
  76
  77  /* All other tokens are single characters.  */
  78 .                                       return yytext[0];
  79
  80
  81 %%
  82
  83 /* Bring in the keyword recognizer.  */
  84
  85 #include "keywords.c"
  86
  87
  88 /* Macros to append to our phrase collection list.  */
  89
  90 /*
  91  * We mark any token, that that equals to a known enumerator, as
  92  * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
  93  * the only problem is struct and union members:
  94  *    enum e { a, b }; struct s { int a, b; }
  95  * but in this case, the only effect will be, that the ABI checksums become
  96  * more volatile, which is acceptable. Also, such collisions are quite rare,
  97  * so far it was only observed in include/linux/telephony.h.
  98  */
  99 #define _APP(T,L)       do {                                               \
 100                           cur_node = next_node;                            \
 101                           next_node = xmalloc(sizeof(*next_node));         \
 102                           next_node->next = cur_node;                      \
 103                           cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
 104                           cur_node->tag =                                  \
 105                             find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
 106                             SYM_ENUM_CONST : SYM_NORMAL ;                  \
 107                           cur_node->in_source_file = in_source_file;       \
 108                         } while (0)
 109
 110 #define APP             _APP(yytext, yyleng)
 111
 112
 113 /* The second stage lexer.  Here we incorporate knowledge of the state
 114    of the parser to tailor the tokens that are returned.  */
 115
 116 int
 117 yylex(void)
 118 {
 119   static enum {
 120     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
 121     ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
 122   } lexstate = ST_NOTSTARTED;
 123
 124   static int suppress_type_lookup, dont_want_brace_phrase;
 125   static struct string_list *next_node;
 126   static char *source_file;
 127
 128   int token, count = 0;
 129   struct string_list *cur_node;
 130
 131   if (lexstate == ST_NOTSTARTED)
 132     {
 133       next_node = xmalloc(sizeof(*next_node));
 134       next_node->next = NULL;
 135       lexstate = ST_NORMAL;
 136     }
 137
 138 repeat:
 139   token = yylex1();
 140
 141   if (token == 0)
 142     return 0;
 143   else if (token == FILENAME)
 144     {
 145       char *file, *e;
 146
 147       /* Save the filename and line number for later error messages.  */
 148
 149       if (cur_filename)
 150         free(cur_filename);
 151
 152       file = strchr(yytext, '\"')+1;
 153       e = strchr(file, '\"');
 154       *e = '\0';
 155       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
 156       cur_line = atoi(yytext+2);
 157
 158       if (!source_file) {
 159         source_file = xstrdup(cur_filename);
 160         in_source_file = 1;
 161       } else {
 162         in_source_file = (strcmp(cur_filename, source_file) == 0);
 163       }
 164
 165       goto repeat;
 166     }
 167
 168   switch (lexstate)
 169     {
 170     case ST_NORMAL:
 171       switch (token)
 172         {
 173         case IDENT:
 174           APP;
 175           {
 176             int r = is_reserved_word(yytext, yyleng);
 177             if (r >= 0)
 178               {
 179                 switch (token = r)
 180                   {
 181                   case ATTRIBUTE_KEYW:
 182                     lexstate = ST_ATTRIBUTE;
 183                     count = 0;
 184                     goto repeat;
 185                   case ASM_KEYW:
 186                     lexstate = ST_ASM;
 187                     count = 0;
 188                     goto repeat;
 189                   case TYPEOF_KEYW:
 190                     lexstate = ST_TYPEOF;
 191                     count = 0;
 192                     goto repeat;
 193
 194                   case STRUCT_KEYW:
 195                   case UNION_KEYW:
 196                   case ENUM_KEYW:
 197                     dont_want_brace_phrase = 3;
 198                     suppress_type_lookup = 2;
 199                     goto fini;
 200
 201                   case EXPORT_SYMBOL_KEYW:
 202                       goto fini;
 203
 204                   case STATIC_ASSERT_KEYW:
 205                     lexstate = ST_STATIC_ASSERT;
 206                     count = 0;
 207                     goto repeat;
 208                   }
 209               }
 210             if (!suppress_type_lookup)
 211               {
 212                 if (find_symbol(yytext, SYM_TYPEDEF, 1))
 213                   token = TYPE;
 214               }
 215           }
 216           break;
 217
 218         case '[':
 219           APP;
 220           lexstate = ST_BRACKET;
 221           count = 1;
 222           goto repeat;
 223
 224         case '{':
 225           APP;
 226           if (dont_want_brace_phrase)
 227             break;
 228           lexstate = ST_BRACE;
 229           count = 1;
 230           goto repeat;
 231
 232         case '=': case ':':
 233           APP;
 234           lexstate = ST_EXPRESSION;
 235           break;
 236
 237         default:
 238           APP;
 239           break;
 240         }
 241       break;
 242
 243     case ST_ATTRIBUTE:
 244       APP;
 245       switch (token)
 246         {
 247         case '(':
 248           ++count;
 249           goto repeat;
 250         case ')':
 251           if (--count == 0)
 252             {
 253               lexstate = ST_NORMAL;
 254               token = ATTRIBUTE_PHRASE;
 255               break;
 256             }
 257           goto repeat;
 258         default:
 259           goto repeat;
 260         }
 261       break;
 262
 263     case ST_ASM:
 264       APP;
 265       switch (token)
 266         {
 267         case '(':
 268           ++count;
 269           goto repeat;
 270         case ')':
 271           if (--count == 0)
 272             {
 273               lexstate = ST_NORMAL;
 274               token = ASM_PHRASE;
 275               break;
 276             }
 277           goto repeat;
 278         default:
 279           goto repeat;
 280         }
 281       break;
 282
 283     case ST_TYPEOF_1:
 284       if (token == IDENT)
 285         {
 286           if (is_reserved_word(yytext, yyleng) >= 0
 287               || find_symbol(yytext, SYM_TYPEDEF, 1))
 288             {
 289               yyless(0);
 290               unput('(');
 291               lexstate = ST_NORMAL;
 292               token = TYPEOF_KEYW;
 293               break;
 294             }
 295           _APP("(", 1);
 296         }
 297         lexstate = ST_TYPEOF;
 298         /* FALLTHRU */
 299
 300     case ST_TYPEOF:
 301       switch (token)
 302         {
 303         case '(':
 304           if ( ++count == 1 )
 305             lexstate = ST_TYPEOF_1;
 306           else
 307             APP;
 308           goto repeat;
 309         case ')':
 310           APP;
 311           if (--count == 0)
 312             {
 313               lexstate = ST_NORMAL;
 314               token = TYPEOF_PHRASE;
 315               break;
 316             }
 317           goto repeat;
 318         default:
 319           APP;
 320           goto repeat;
 321         }
 322       break;
 323
 324     case ST_BRACKET:
 325       APP;
 326       switch (token)
 327         {
 328         case '[':
 329           ++count;
 330           goto repeat;
 331         case ']':
 332           if (--count == 0)
 333             {
 334               lexstate = ST_NORMAL;
 335               token = BRACKET_PHRASE;
 336               break;
 337             }
 338           goto repeat;
 339         default:
 340           goto repeat;
 341         }
 342       break;
 343
 344     case ST_BRACE:
 345       APP;
 346       switch (token)
 347         {
 348         case '{':
 349           ++count;
 350           goto repeat;
 351         case '}':
 352           if (--count == 0)
 353             {
 354               lexstate = ST_NORMAL;
 355               token = BRACE_PHRASE;
 356               break;
 357             }
 358           goto repeat;
 359         default:
 360           goto repeat;
 361         }
 362       break;
 363
 364     case ST_EXPRESSION:
 365       switch (token)
 366         {
 367         case '(': case '[': case '{':
 368           ++count;
 369           APP;
 370           goto repeat;
 371         case '}':
 372           /* is this the last line of an enum declaration? */
 373           if (count == 0)
 374             {
 375               /* Put back the token we just read so's we can find it again
 376                  after registering the expression.  */
 377               unput(token);
 378
 379               lexstate = ST_NORMAL;
 380               token = EXPRESSION_PHRASE;
 381               break;
 382             }
 383           /* FALLTHRU */
 384         case ')': case ']':
 385           --count;
 386           APP;
 387           goto repeat;
 388         case ',': case ';':
 389           if (count == 0)
 390             {
 391               /* Put back the token we just read so's we can find it again
 392                  after registering the expression.  */
 393               unput(token);
 394
 395               lexstate = ST_NORMAL;
 396               token = EXPRESSION_PHRASE;
 397               break;
 398             }
 399           APP;
 400           goto repeat;
 401         default:
 402           APP;
 403           goto repeat;
 404         }
 405       break;
 406
 407     case ST_STATIC_ASSERT:
 408       APP;
 409       switch (token)
 410         {
 411         case '(':
 412           ++count;
 413           goto repeat;
 414         case ')':
 415           if (--count == 0)
 416             {
 417               lexstate = ST_NORMAL;
 418               token = STATIC_ASSERT_PHRASE;
 419               break;
 420             }
 421           goto repeat;
 422         default:
 423           goto repeat;
 424         }
 425       break;
 426
 427     default:
 428       exit(1);
 429     }
 430 fini:
 431
 432   if (suppress_type_lookup > 0)
 433     --suppress_type_lookup;
 434   if (dont_want_brace_phrase > 0)
 435     --dont_want_brace_phrase;
 436
 437   yylval = &next_node->next;
 438
 439   return token;
 440 }