src/config/lxrgmr-code/lxr/script.l

   1
   2 /**********************************************************************
   3 @ read charpter of "1.lex define for general txt" in lxr-doc.md.
   4  **********************************************************************/
   5
   6 /* INITIAL */
   7 %x DUALQUOTE DOLLR_QUOTE C_CMNT_STATE SH_CMNT_STATE
   8 %x HELP_TXT COMMAND PARAM
   9 %x C_PREPROC ENV_VAR THIS_DOC
  10
  11 %{
  12 // flex -o script.lex.c script.l
  13 #include "../token_id.h"
  14 #include "../gmr/parser.tab.h"
  15
  16
  17 enum EN_CMNT_STATE {
  18         EN_CMNT_NONE = 0,
  19         EN_CMNT_BEGIN,
  20         EN_CMNT,
  21         EN_CMNT_EXTEND,
  22 };
  23
  24 int g_cpreproc_flag = 0;
  25 int g_cmnt_state = EN_CMNT_NONE;
  26
  27 int cmnt_idx=0;
  28 char cmnt_buff[1024]={0};
  29
  30 int retval = 0;
  31
  32 /** set content flag automatically. **/
  33 #define RETURN(token)                   g_content_before_cmnt=1; return token;
  34
  35 /**
  36  ** script.lexXXX.c is extension for general script.l.
  37  ** config.lexXXX.c is extension for script.l with menuconfig
  38  **/
  39
  40 //#include "script.lexdecl.c"
  41
  42 #include "config.lexdecl.c"
  43 //#include "../lexer.lexdecl.c"
  44
  45 %}
  46
  47 /**********************************************************************
  48 @ read charpter of "2.char set" in lxr-doc.md
  49  **********************************************************************/
  50
  51 /** char set defination **/
  52 g               [^[:space:]\\]
  53 a               [[:alpha:]_]
  54 b               [A-Za-a_]
  55 n               [A-Za-z0-9_]
  56 s               [[:punct:]]
  57
  58 S               [^\'\\[[:space:]]\`\<\>\&\|\;]
  59
  60 O               [0-7]
  61 D               [[:digit:]]
  62 L               [[:alpha:]_]
  63 H               [[:xdigit:]]
  64 E               [Ee][+-]?{D}+
  65
  66 FS              [FfLl]
  67 IS              [UuLl]
  68
  69 /**
  70  ** lex proc:
  71  ** @ BEGIN(<STATE>);, set state. use BEGIN(INITIAL); and return XXX; to recover to previous state.
  72  ** @ return <TOKEN_ID>;, return a non-zero value, it's the token id output to grammar program.
  73  ** @ without return, it steal stay in yylex(), and analyze text continiously.
  74  **/
  75
  76 %%
  77
  78
  79         /*****************************************************
  80          ** mask, quoted string
  81          **
  82          ** trans-byte char used as a normal string char. it will
  83          ** be combined to T_WORD and T_WORD_QUOTE.
  84          *****************************************************/
  85         /** '\', mask **/
  86 \\.                                                             { append_string(&strbuff, &yytext[1], 1); }
  87
  88         /** '', single quote string **/
  89 \$\'                                                    { append_string(&strbuff, &yytext[1], 1); BEGIN(DOLLR_QUOTE); }
  90 <DOLLR_QUOTE>{
  91         [^\\\']*                                        { append_string(&strbuff, yytext, yyleng); }
  92         \\.                                                     { append_string(&strbuff, yytext, yyleng); /* TBD: append trans-byte func */ }
  93         \'                                                      { append_string(&strbuff, yytext, yyleng); BEGIN(INITIAL); return T_WORD_QUOTE; }
  94 }
  95 \'[^\']*\'                                              { append_string(&strbuff, yytext, yyleng); return T_WORD_QUOTE; }
  96         /**
  97          ** $``, append. get executing script text, after string expanded, append
  98          ** '$' before $1. or append '\$' before string expanded.
  99          **/
 100 \$\`[^\`]*\`                                    {
 101         printf(":: string content:\n%s\n", yytext);
 102
 103         // TBD:
 104         // str=$(echo "$yytext" | bash)
 105         // append_string "$str" ${#str}
 106         ;
 107 }
 108         /** ``, command executing string quote. **/
 109 \`[^\`]*\`                                              {
 110         printf(":: string content:\n%s\n", yytext);
 111
 112         // TBD:
 113         // str=$(echo "$yytext" | bash)
 114         // append_string "$str" ${#str}
 115         ;
 116 }
 117
 118         /*****************************************************
 119          ** comment
 120          ** @ C-PREPROC-CMD begin with '#', with token matching.
 121          ** @ SH-CMNT begin with '#'.
 122          ** @ C99-CMNT.
 123          ** @ C89-CMNT, and compatible with sh cmdline.
 124          ** @ C89-EXT-CMNT, compatible with sh cmdline, and it
 125          **   can include C89-CMNT in it.
 126          ** @ comment token proc. eg: CmntStub, apidoc-gen.
 127          *****************************************************/
 128         /**
 129          ** c pre-proc define
 130          ** [ \t]*#.*
 131          ** TBD:
 132          ** @ C_PREPROC_CMD token recognizing after '#', to compatible with c-lang(#include or #if).
 133          ** @ modify comment expr, and do not use SH_CMNT_STATE state.
 134          ** @ append OnCPreProc() invoking.
 135          ** @ process C_PREPROC_CMD in C_PREPROC state.
 136          **/
 137 #[[:alnum:]_]+                                  { if (retval = lex_c_preproc_hdr ()) return retval; }
 138         /* this state should be defined in grammar extend. */
 139 <C_PREPROC>{
 140         /* XXX: use this expr for tmp. */
 141         [[:blank:]]*[^\r\n]*            { if (retval = lex_c_preproc_param ()) return retval; }
 142 }
 143         /**
 144          **     script comment
 145          ** to compatible with c-lang, insert a ' ' space after comment char.
 146          **
 147          **/
 148 [#]                                                             { printf("SH-CMNT:\n%s", yytext); BEGIN(SH_CMNT_STATE); }
 149 <SH_CMNT_STATE>{
 150         [^\n\r]*                                        { /* printf("%s", yytext); */ }
 151         \n|\r                                           { if (retval = lex_sh_cmnt ()) return retval; }
 152 }
 153         /**
 154          ** c99 comment
 155          ** TBD:
 156          ** this will be mixed with cmd dir. append a ' ' after '//'.
 157          ** or, char before '//' is a newline signature. eg: '\n', ',', ';'
 158          **/
 159 \/\/\ [^\n\r]*  {
 160 //      printf("last_token=%d; g_content_before_cmnt=%d;\n", last_token, g_content_before_cmnt);
 161 //      if (last_token == 0 || g_content_before_cmnt == 0)
 162         /* TBD: '//' will be mixed with command line parameter. */
 163         {
 164                 printf("C99-CMNT:\n%s\n", yytext);
 165         //      return T_EOL;
 166         //      return T_CMNT_STR;
 167         }
 168 }
 169         /**
 170          ** C89 normal style comment.
 171          ** TBD: append a C_CMNT_STATE2, process comment without punct and token
 172          **      processing, in order to decreas cpu cost.
 173          ** TBD: use xxx_string() as a string for comment.
 174          **/
 175 \/\*    {
 176         g_cmnt_state=EN_CMNT_BEGIN;
 177         cmnt_idx=0;
 178         strcpy(cmnt_buff, "/*");
 179         cmnt_idx=2;
 180         BEGIN(C_CMNT_STATE);
 181 //      return T_CMNT_STR;
 182 }
 183 <C_CMNT_STATE>{
 184         /* only process newline mask */
 185         \\\\                                                    {
 186                 /* dual-mask treated as one splash */
 187                 cmnt_buff[cmnt_idx]='\\';
 188                 cmnt_idx++;
 189                 cmnt_buff[cmnt_idx]=0;
 190         }
 191         \\\n                                                    {
 192                 cmnt_buff[cmnt_idx]='\n';
 193                 cmnt_idx++;
 194                 cmnt_buff[cmnt_idx]=0;
 195         }
 196         [\*]+                                                   {
 197                 /*
 198                  * this means a char after /*, set with extended comment state.
 199                  * process '\n' as a normal char in comment.
 200                  * or, if it's a standard comment after content, '\n' is a termination char for comment.
 201                  */
 202                 if (g_cmnt_state == EN_CMNT_BEGIN)
 203                         g_cmnt_state=EN_CMNT_EXTEND;
 204
 205                 memcpy(&cmnt_buff[cmnt_idx], yytext, yyleng);
 206                 cmnt_idx+=yyleng;
 207                 cmnt_buff[cmnt_idx]=0;
 208
 209                 /*
 210                  * C89 extend comment. use /@ instead of "/**" in cmdline.
 211                  * or use '/**' as '/*', it's compactiable with c comment in shell script.
 212                  * and does not conflect with string wildcast feature in CLI.
 213                  */
 214         }
 215         [^\\\*\/\n\r]*                                  {
 216                 if (g_cmnt_state == EN_CMNT_BEGIN)
 217                         g_cmnt_state=EN_CMNT;
 218                 strcat(&cmnt_buff[cmnt_idx], yytext);
 219                 cmnt_idx+=yyleng;
 220                 /* append_string(&strbuff, yytext, yyleng); */
 221         }
 222         \/                                                              {
 223                 cmnt_buff[cmnt_idx]='/';
 224                 cmnt_idx++;
 225                 cmnt_buff[cmnt_idx]=0;
 226         }
 227         \n                                                              {
 228                 lex_c89_cmnt_nl ();
 229         }
 230         [\*]+\/                                                 {
 231                 lex_c89_cmnt_tail ();
 232         }
 233 }
 234         /*
 235          * TBD: append this code for comment grammar.
 236         // punct proc in comment state
 237         [[:punct:]]             {
 238                 const struct token_id *id = punct_look_up_token(yytext, yyleng);
 239                 printf(": %s\n", yytext);
 240                 RETURN( 1 );
 241         }
 242         // token proc in comment state
 243         {n}+                    {
 244                 const struct token_id *id = token_id_lookup(yytext, yyleng);
 245
 246                 if (id && id->flags & TF_COMMAND)
 247                 {
 248                         yylval.id = id;
 249                         RETURN( id->token );
 250                 }
 251                 alloc_string(&strbuff, yytext, yyleng);
 252                 yylval.string = strbuff.text;
 253                 RETURN( T_WORD );
 254         }
 255          */
 256
 257
 258         /*****************************************************
 259          ** blank & newline
 260          *****************************************************/
 261 \\[\n\r]                {
 262         /* do nothing */
 263 //      printf("NL-CONTINUE:\n");
 264 }
 265 [[:blank:]]*            {
 266         if (!is_unallocated_string(&strbuff))
 267         {
 268                 last_token = 0;
 269
 270                 yylval.string = strbuff.text;
 271 //              printf(".WORD: %s\n", strbuff.text);
 272                 RETURN( T_WORD );
 273         }
 274 }
 275         /*
 276          * newline token
 277          * TBD: multi-nl can be combined into one nl.
 278          *      it is processed by grammar.
 279          */
 280 [[:blank:]]*[\n\r]      {
 281         if (!is_unallocated_string(&strbuff))
 282         {
 283 //              printf("^WORD: %s\n", strbuff.text);
 284                 yylval.string = strbuff.text;
 285         }
 286         g_content_before_cmnt=0;
 287 //      printf("NL:\n");
 288         return T_EOL;
 289 }
 290
 291
 292         /*****************************************************
 293          ** quoted string.
 294          *****************************************************/
 295         /** ", dual quote string. **/
 296 \"                      {
 297         if (is_unallocated_string(&strbuff))
 298                 new_string(&strbuff);
 299
 300         BEGIN(DUALQUOTE);
 301 }
 302         /**
 303          ** DUALQUOTE
 304          ** TBD:
 305          ** append nest implement with $() and ``.
 306          **/
 307 <DUALQUOTE>{
 308         \\.                                             {
 309                 // c=translation(yytext[1]);
 310                 // if (c == 0 && yytext[1] != 0)
 311                 //      unput(yytext[1]);
 312                 // else
 313                 //      append_string(&strbuff, &c, 1);
 314                 append_string(&strbuff, &yytext[1], 1);
 315         }
 316         `                                               { append_string(&strbuff, &yytext[1], 1); /* BEGIN(QUOTE_EXEC); */ }
 317         \"                                              {
 318 //              printf("T_WORD_QUOTE\n");
 319                 // state=pop_state();
 320                 yylval.string = strbuff.text;
 321                 BEGIN(INITIAL);
 322                 return T_WORD_QUOTE;
 323         }
 324         [^\`\"\\[:punct:]]*              { append_string(&strbuff, yytext, yyleng); }
 325         {s}             {
 326 //              printf("%s", yytext);
 327                 append_string(&strbuff, yytext, yyleng);
 328                 /*
 329                  * '\', '$', '@', '`', process.
 330                  */
 331         }
 332 }
 333
 334
 335         /*****************************************************
 336          ** punct and env var.
 337          *****************************************************/
 338         /**
 339          ** [[:punct:]] char filter, get special char like $, ", entering relative
 340          ** state. as the special char can be used nestly, use stack to store it.
 341          **   for others, unput() back to buffer.
 342          **/
 343         /**
 344          ** do not recognize punct in INITIAL state.
 345          ** recognize it in received word state.
 346          **/
 347 [[:punct:]]                                                                                     { if (retval = lex_punct(INITIAL))      return retval; }
 348
 349         /** put those conent into state of ¡¯DOLLOR¡® **/
 350 \${b}{n}*                                                                                       { if (retval = const_string(&strbuff, T_ENVAR)) { g_content_before_cmnt=1; return retval; } }
 351 \$\{(.*(\\\\)*\\\})*[^\}]*[^\\](\\\\)*\}                        { if (retval = const_string(&strbuff, T_ENVAR)) { g_content_before_cmnt=1; return retval; } }
 352 <ENV_VAR>{
 353         {b}{n}*                         {
 354                 // name
 355                 ;
 356         }
 357         [[:punct:]]                     {
 358                 //
 359                 if (yytext[0] == '{')
 360                 {
 361                         env_state = ENV_BRACE;
 362                 }
 363                 else if (yytext[0] == '}')
 364                 {
 365                         env_state = ENV_NONE;
 366                 }
 367                 else if (yytext[0] == '(')
 368                 {
 369                         env_state = ENV_PAREN;
 370                 }
 371                 else if (yytext[0] == ')')
 372                 {
 373                         env_state = ENV_NONE;
 374                 }
 375                 else if (yytext[0] == '[')
 376                 {
 377                         env_state = ENV_BRACKET;
 378                 }
 379                 else if (yytext[0] == ']')
 380                 {
 381                         env_state = ENV_NONE;
 382                 }
 383         }
 384         /* [[]] */
 385         \[\[                    {
 386                 env_state = ENV_DBRACKET;
 387         }
 388         \]\]                    {
 389                 //
 390                 env_state = ENV_NONE;
 391         }
 392         /* (()) */
 393         \(\(                    {
 394                 env_state = ENV_DPAREN;
 395         }
 396         \)\)                    {
 397                 //
 398                 env_state = ENV_NONE;
 399         }
 400 }
 401
 402         /*****************************************************
 403          ** const value word.
 404          *****************************************************/
 405         /**
 406          * const word.
 407          * hex, digit, oct, bin, float
 408          * @ the difference from c-lang is, it append signature
 409          * before digit/oct/float as a const word.
 410          * @ l/L sfx at the float value means it's a double flout,
 411          * here append d/D for the same meaning.
 412          * @ it append binary const word.
 413          **/
 414 0[xX]{H}+{IS}?                                  { if (retval = const_string(&strbuff, T_HEX_WORD)) { g_content_before_cmnt=1; return retval; } }
 415 [+-]?(0|[1-9]{D}*){IS}?                 { if (retval = const_string(&strbuff, T_DIGIT_WORD)) { g_content_before_cmnt=1; return retval; } }
 416         /*
 417          * oct string recognized as digital string.
 418          */
 419 [+-]?0{D}+{IS}?                                 { if (retval = const_string(&strbuff, T_OCT_WORD)) { g_content_before_cmnt=1; return retval; } }
 420         /* binary const */
 421 [01]+[Bb]                                               { if (retval = const_string(&strbuff, T_BIN_WORD)) { g_content_before_cmnt=1; return retval; } }
 422         /* float const */
 423 [+-]?{D}+(\.{D}+)?({E})?{FS}?   { if (retval = const_string(&strbuff, T_FLOAT_WORD)) { g_content_before_cmnt=1; return retval; } }
 424         /** string literal
 425 [+-]?{D}+\.{D}+({E})?{FS}?              {
 426         printf("FLOAT: %s\n", yytext);
 427         RETURN( 1 );
 428         // alloc_string(&strbuff, yytext, yyleng);
 429         // yylval.string = strbuff.text;
 430         // last="T_STR_LITERAL";
 431         // return T_STR_LITERAL;
 432 }
 433         **/
 434
 435         /*****************************************************
 436          ** WORD
 437          *****************************************************/
 438         /**
 439          ** symbol word or rsv-word.
 440          ** if rsv-word not matching, it's a symbol
 441          **/
 442 {a}{n}*                                 { if (retval = lex_token_word()) return retval; }
 443         /**
 444          ** normal word.
 445          ** this stage can be combined with symbol.
 446          **/
 447 {n}+                                    {
 448 //      printf("WORD: %s\n", yytext);
 449
 450         append_string(&strbuff, yytext, yyleng);
 451
 452         yylval.string = strbuff.text;
 453         RETURN( T_WORD );
 454 }
 455         /**
 456          ** string word.
 457          ** text without blanks, and include none-blank display char.
 458          ** it's a WORD with SIGNATURES. normally, it can be a regex string.
 459          **/
 460         /**
 461          ** STRWORD is not used in INITIAL state.
 462 {g}+                                    {
 463         printf("STRWORD: %s\n", yytext);
 464         RETURN( 1 );
 465 }
 466         **/
 467
 468         /*****************************************************
 469          ** help doc
 470          ** this state is invoked in 'help' reserved-word proc.
 471          ** help
 472          *****************************************************/
 473 <HELP_TXT>{
 474         /** general string append. **/
 475         [^\ \r\n][^\r\n]*                                                       { append_string(&strbuff, yytext, strlen(yytext)); }
 476         /** ignore string after reserved-word 'help'. **/
 477         [[:blank:]][^\r\n]*                                                     { /* do nothing here. */ }
 478
 479         /** comment info in help txt, it will be treated as a new line **/
 480         [\r\n][[:blank:]]*\#[^\r\n]*                            { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
 481         /** multiple new lines treated as one new line **/
 482         [\r\n][[:blank:]]*                                                      {
 483                 if (strbuff.text_size)
 484                         append_string(&strbuff, "\n", 1);
 485                 else
 486                 {
 487                         /* at the beginning of help_text doc */
 488                         return T_EOL;
 489                 }
 490         }
 491
 492         /** new line with a word char. exit help doc state. **/
 493         ([\r\n][[:blank:]]*)+[\r\n][[:alnum:]_]         {
 494                 unput(yytext[yyleng-1]);
 495                 //append_string(&strbuff, "\n", 1);
 496                 ln_string(&strbuff);
 497                 yylval.string = strbuff.text;
 498                 BEGIN(INITIAL);
 499                 return T_HELPTEXT;
 500         }
 501         <<EOF>> {
 502                 ln_string(&strbuff);
 503                 yylval.string = strbuff.text;
 504                 if (current_file) {
 505                         zconf_endfile();
 506                 }
 507                 else
 508                 {
 509                         fclose(yyin);
 510                         yyterminate();
 511                 }
 512
 513                 BEGIN(INITIAL);
 514                 return T_HELPTEXT;
 515         }
 516 }
 517 <THIS_DOC>{
 518         /** general string append. **/
 519         [^\ \r\n][^\r\n]*                                                       {
 520                 /* first word is used for ending token. */
 521                 //if (1)
 522                 append_string(&strbuff, yytext, strlen(yytext));
 523         }
 524         /** ignore string after reserved-word 'help'. **/
 525         [[:blank:]][^\r\n]*                                                     { /* do nothing here. */ }
 526
 527         /** comment info in help txt, it will be treated as a new line **/
 528         [\r\n][[:blank:]]*\#[^\r\n]*                            { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
 529         /** multiple new lines treated as one new line **/
 530         [\r\n][[:blank:]]*                                                      { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
 531
 532         /** new line with a word char. exit help doc state. **/
 533         ([\r\n][[:blank:]]*)+[\r\n][[:alnum:]_]         {
 534                 unput(yytext[yyleng-1]);
 535                 append_string(&strbuff, "\n", 1);
 536                 yylval.string = strbuff.text;
 537                 BEGIN(INITIAL);
 538                 return T_HELPTEXT;
 539         }
 540         <<EOF>> {
 541                 //zconf_endhelp();
 542                 return T_HELPTEXT;
 543         }
 544 }
 545
 546         /*****************************************************
 547          ** EOF
 548          *****************************************************/
 549 <<EOF>> {
 550         if (current_file) {
 551                 zconf_endfile();
 552                 return 0;
 553         }
 554         fclose(yyin);
 555         yyterminate();
 556 }
 557
 558
 559         /*****************************************************
 560          ** misc
 561          *****************************************************/
 562 . {} /** For others, do nothing **/
 563
 564 %%
 565
 566 #include "config.lexproc.c"
 567 //#include "../lexer.lexproc.c"
 568
 569 #include "script.lexproc.c"
 570
 571
 572
 573         /*****************************************************
 574          ** COMMAND state
 575          ** this state is invoked in reserved-word proc.
 576          *****************************************************/
 577         /**
 578 <COMMAND>{
 579         {n}+    {
 580                 const struct token_id *id = token_id_lookup(yytext, yyleng);
 581                 BEGIN(PARAM);
 582                 current_pos.file = current_file;
 583                 current_pos.lineno = yylineno;
 584                 if (id && id->flags & TF_COMMAND) {
 585                         yylval.id = id;
 586                         return id->token;
 587                 }
 588                 alloc_string(&strbuff, yytext, yyleng);
 589                 yylval.string = text;
 590                 return T_WORD;
 591         }
 592         .       {
 593                 // warn_ignored_character(*yytext);
 594         }
 595         \n      {
 596                 BEGIN(INITIAL);
 597                 return T_EOL;
 598         }
 599 }
 600
 601 <PARAM>{
 602         "&&"    return T_AND;
 603         "||"    return T_OR;
 604         "="     return T_EQUAL;
 605         "!="    return T_UNEQUAL;
 606         "<"     return T_LESS;
 607         "<="    return T_LESS_EQUAL;
 608         ">"     return T_GREATER;
 609         ">="    return T_GREATER_EQUAL;
 610         "!"     return T_NOT;
 611         "("     return T_OPEN_PAREN;
 612         ")"     return T_CLOSE_PAREN;
 613         ":="    { return T_COLON_EQUAL; }
 614         "+="    { return T_PLUS_EQUAL; }
 615         \"|\'   {
 616 //              char str = yytext[0];
 617                 new_string(&strbuff);
 618                 BEGIN(DUALQUOTE);
 619         }
 620         \n      BEGIN(INITIAL); return T_EOL;
 621         ({n}|[/.])+     {
 622                 const struct token_id *id = token_id_lookup(yytext, yyleng);
 623                 if (id && id->flags & TF_PARAM) {
 624                         yylval.id = id;
 625                         return id->token;
 626                 }
 627                 alloc_string(&strbuff, yytext, yyleng);
 628                 yylval.string = text;
 629                 return T_WORD;
 630         }
 631         #.*     // comment
 632         \\\n    ;
 633         [[:blank:]]+
 634         .       warn_ignored_character(*yytext);
 635         <<EOF>> {
 636                 BEGIN(INITIAL);
 637         }
 638 }
 639          **/