use strbuff instead of str_buff & str_idx
[build-config.git] / src / config / lxrgmr-code / lxr / script.l
blob7195b8a0eac07dd9363b840a7caedec6d31437e9
2 /**********************************************************************
3 @ read charpter of "1.lex define for general txt" in lxr-doc.md.
4  **********************************************************************/
6 /* INITIAL */
7 %x DUALQUOTE DOLLR_QUOTE C_CMNT_STATE SH_CMNT_STATE
8 %x HELP_TXT COMMAND PARAM
9 %x C_PREPROC ENV_VAR THIS_DOC
12 // flex -o script.lex.c script.l
13 #include "../token_id.h"
14 #include "../gmr/parser.tab.h"
17 enum EN_CMNT_STATE {
18         EN_CMNT_NONE = 0,
19         EN_CMNT_BEGIN,
20         EN_CMNT,
21         EN_CMNT_EXTEND,
24 int g_cpreproc_flag = 0;
25 int g_cmnt_state = EN_CMNT_NONE;
27 int cmnt_idx=0;
28 char cmnt_buff[1024]={0};
30 int retval = 0;
32 /** set content flag automatically. **/
33 #define RETURN(token)                   g_content_before_cmnt=1; return token;
35 /**
36  ** script.lexXXX.c is extension for general script.l.
37  ** config.lexXXX.c is extension for script.l with menuconfig
38  **/
40 //#include "script.lexdecl.c"
42 #include "config.lexdecl.c"
43 //#include "../lexer.lexdecl.c"
47 /**********************************************************************
48 @ read charpter of "2.char set" in lxr-doc.md
49  **********************************************************************/
51 /** char set defination **/
52 g               [^[:space:]\\]
53 a               [[:alpha:]_]
54 b               [A-Za-a_]
55 n               [A-Za-z0-9_]
56 s               [[:punct:]]
58 S               [^\'\\[[:space:]]\`\<\>\&\|\;]
60 O               [0-7]
61 D               [[:digit:]]
62 L               [[:alpha:]_]
63 H               [[:xdigit:]]
64 E               [Ee][+-]?{D}+
66 FS              [FfLl]
67 IS              [UuLl]
69 /**
70  ** lex proc:
71  ** @ BEGIN(<STATE>);, set state. use BEGIN(INITIAL); and return XXX; to recover to previous state.
72  ** @ return <TOKEN_ID>;, return a non-zero value, it's the token id output to grammar program.
73  ** @ without return, it steal stay in yylex(), and analyze text continiously.
74  **/
79         /*****************************************************
80          ** mask, quoted string
81          **
82          ** trans-byte char used as a normal string char. it will
83          ** be combined to T_WORD and T_WORD_QUOTE.
84          *****************************************************/
85         /** '\', mask **/
86 \\.                                                             { append_string(&strbuff, &yytext[1], 1); }
88         /** '', single quote string **/
89 \$\'                                                    { append_string(&strbuff, &yytext[1], 1); BEGIN(DOLLR_QUOTE); }
90 <DOLLR_QUOTE>{
91         [^\\\']*                                        { append_string(&strbuff, yytext, yyleng); }
92         \\.                                                     { append_string(&strbuff, yytext, yyleng); /* TBD: append trans-byte func */ }
93         \'                                                      { append_string(&strbuff, yytext, yyleng); BEGIN(INITIAL); return T_WORD_QUOTE; }
95 \'[^\']*\'                                              { append_string(&strbuff, yytext, yyleng); return T_WORD_QUOTE; }
96         /**
97          ** $``, append. get executing script text, after string expanded, append
98          ** '$' before $1. or append '\$' before string expanded.
99          **/
100 \$\`[^\`]*\`                                    {
101         printf(":: string content:\n%s\n", yytext);
102         
103         // TBD:
104         // str=$(echo "$yytext" | bash)
105         // append_string "$str" ${#str}
106         ;
108         /** ``, command executing string quote. **/
109 \`[^\`]*\`                                              {
110         printf(":: string content:\n%s\n", yytext);
111         
112         // TBD:
113         // str=$(echo "$yytext" | bash)
114         // append_string "$str" ${#str}
115         ;
118         /*****************************************************
119          ** comment
120          ** @ C-PREPROC-CMD begin with '#', with token matching.
121          ** @ SH-CMNT begin with '#'.
122          ** @ C99-CMNT.
123          ** @ C89-CMNT, and compatible with sh cmdline.
124          ** @ C89-EXT-CMNT, compatible with sh cmdline, and it
125          **   can include C89-CMNT in it.
126          ** @ comment token proc. eg: CmntStub, apidoc-gen.
127          *****************************************************/
128         /**
129          ** c pre-proc define
130          ** [ \t]*#.* 
131          ** TBD: 
132          ** @ C_PREPROC_CMD token recognizing after '#', to compatible with c-lang(#include or #if).
133          ** @ modify comment expr, and do not use SH_CMNT_STATE state.
134          ** @ append OnCPreProc() invoking.
135          ** @ process C_PREPROC_CMD in C_PREPROC state.
136          **/
137 #[[:alnum:]_]+                                  { if (retval = lex_c_preproc_hdr ()) return retval; }
138         /* this state should be defined in grammar extend. */
139 <C_PREPROC>{
140         /* XXX: use this expr for tmp. */
141         [[:blank:]]*[^\r\n]*            { if (retval = lex_c_preproc_param ()) return retval; }
143         /**
144          **     script comment
145          ** to compatible with c-lang, insert a ' ' space after comment char.
146          **
147          **/
148 [#]                                                             { printf("SH-CMNT:\n%s", yytext); BEGIN(SH_CMNT_STATE); }
149 <SH_CMNT_STATE>{
150         [^\n\r]*                                        { /* printf("%s", yytext); */ }
151         \n|\r                                           { if (retval = lex_sh_cmnt ()) return retval; }
153         /**
154          ** c99 comment
155          ** TBD:
156          ** this will be mixed with cmd dir. append a ' ' after '//'.
157          ** or, char before '//' is a newline signature. eg: '\n', ',', ';'
158          **/
159 \/\/\ [^\n\r]*  {
160 //      printf("last_token=%d; g_content_before_cmnt=%d;\n", last_token, g_content_before_cmnt);
161 //      if (last_token == 0 || g_content_before_cmnt == 0)
162         /* TBD: '//' will be mixed with command line parameter. */
163         {
164                 printf("C99-CMNT:\n%s\n", yytext);
165         //      return T_EOL;
166         //      return T_CMNT_STR;
167         }
169         /**
170          ** C89 normal style comment.
171          ** TBD: append a C_CMNT_STATE2, process comment without punct and token
172          **      processing, in order to decreas cpu cost.
173          ** TBD: use xxx_string() as a string for comment.
174          **/
175 \/\*    {
176         g_cmnt_state=EN_CMNT_BEGIN;
177         cmnt_idx=0;
178         strcpy(cmnt_buff, "/*");
179         cmnt_idx=2;
180         BEGIN(C_CMNT_STATE);
181 //      return T_CMNT_STR;
183 <C_CMNT_STATE>{
184         /* only process newline mask */
185         \\\\                                                    {
186                 /* dual-mask treated as one splash */
187                 cmnt_buff[cmnt_idx]='\\';
188                 cmnt_idx++;
189                 cmnt_buff[cmnt_idx]=0;
190         }
191         \\\n                                                    {
192                 cmnt_buff[cmnt_idx]='\n';
193                 cmnt_idx++;
194                 cmnt_buff[cmnt_idx]=0;
195         }
196         [\*]+                                                   {
197                 /*
198                  * this means a char after /*, set with extended comment state.
199                  * process '\n' as a normal char in comment.
200                  * or, if it's a standard comment after content, '\n' is a termination char for comment.
201                  */
202                 if (g_cmnt_state == EN_CMNT_BEGIN)
203                         g_cmnt_state=EN_CMNT_EXTEND;
204                 
205                 memcpy(&cmnt_buff[cmnt_idx], yytext, yyleng);
206                 cmnt_idx+=yyleng;
207                 cmnt_buff[cmnt_idx]=0;
208                 
209                 /*
210                  * C89 extend comment. use /@ instead of "/**" in cmdline.
211                  * or use '/**' as '/*', it's compactiable with c comment in shell script.
212                  * and does not conflect with string wildcast feature in CLI.
213                  */
214         }
215         [^\\\*\/\n\r]*                                  {
216                 if (g_cmnt_state == EN_CMNT_BEGIN)
217                         g_cmnt_state=EN_CMNT;
218                 strcat(&cmnt_buff[cmnt_idx], yytext);
219                 cmnt_idx+=yyleng;
220                 /* append_string(&strbuff, yytext, yyleng); */
221         }
222         \/                                                              {
223                 cmnt_buff[cmnt_idx]='/';
224                 cmnt_idx++;
225                 cmnt_buff[cmnt_idx]=0;
226         }
227         \n                                                              {
228                 lex_c89_cmnt_nl ();
229         }
230         [\*]+\/                                                 {
231                 lex_c89_cmnt_tail ();
232         }
234         /* 
235          * TBD: append this code for comment grammar.
236         // punct proc in comment state
237         [[:punct:]]             {
238                 const struct token_id *id = punct_look_up_token(yytext, yyleng);
239                 printf(": %s\n", yytext);
240                 RETURN( 1 );
241         }
242         // token proc in comment state
243         {n}+                    {
244                 const struct token_id *id = token_id_lookup(yytext, yyleng);
246                 if (id && id->flags & TF_COMMAND)
247                 {
248                         yylval.id = id;
249                         RETURN( id->token );
250                 }
251                 alloc_string(&strbuff, yytext, yyleng);
252                 yylval.string = strbuff.text;
253                 RETURN( T_WORD );
254         }
255          */
258         /*****************************************************
259          ** blank & newline
260          *****************************************************/
261 \\[\n\r]                {
262         /* do nothing */
263 //      printf("NL-CONTINUE:\n");
265 [[:blank:]]*            {
266         if (!is_unallocated_string(&strbuff))
267         {
268                 last_token = 0;
269                 
270                 yylval.string = strbuff.text;
271 //              printf(".WORD: %s\n", strbuff.text);
272                 RETURN( T_WORD );
273         }
275         /*
276          * newline token
277          * TBD: multi-nl can be combined into one nl.
278          *      it is processed by grammar.
279          */
280 [[:blank:]]*[\n\r]      {
281         if (!is_unallocated_string(&strbuff))
282         {
283 //              printf("^WORD: %s\n", strbuff.text);
284                 yylval.string = strbuff.text;
285         }
286         g_content_before_cmnt=0;
287 //      printf("NL:\n");
288         return T_EOL;
292         /*****************************************************
293          ** quoted string.
294          *****************************************************/
295         /** ", dual quote string. **/
296 \"                      {
297         if (is_unallocated_string(&strbuff))
298                 new_string(&strbuff);
299         
300         BEGIN(DUALQUOTE);
302         /**
303          ** DUALQUOTE
304          ** TBD:
305          ** append nest implement with $() and ``.
306          **/
307 <DUALQUOTE>{
308         \\.                                             {
309                 // c=translation(yytext[1]);
310                 // if (c == 0 && yytext[1] != 0)
311                 //      unput(yytext[1]);
312                 // else
313                 //      append_string(&strbuff, &c, 1);
314                 append_string(&strbuff, &yytext[1], 1);
315         }
316         `                                               { append_string(&strbuff, &yytext[1], 1); /* BEGIN(QUOTE_EXEC); */ }
317         \"                                              {
318 //              printf("T_WORD_QUOTE\n");
319                 // state=pop_state();
320                 yylval.string = strbuff.text;
321                 BEGIN(INITIAL);
322                 return T_WORD_QUOTE;
323         }
324         [^\`\"\\[:punct:]]*              { append_string(&strbuff, yytext, yyleng); }
325         {s}             {
326 //              printf("%s", yytext);
327                 append_string(&strbuff, yytext, yyleng);
328                 /*
329                  * '\', '$', '@', '`', process.
330                  */
331         }
335         /*****************************************************
336          ** punct and env var.
337          *****************************************************/
338         /**
339          ** [[:punct:]] char filter, get special char like $, ", entering relative
340          ** state. as the special char can be used nestly, use stack to store it.
341          **   for others, unput() back to buffer.
342          **/
343         /**
344          ** do not recognize punct in INITIAL state.
345          ** recognize it in received word state.
346          **/
347 [[:punct:]]                                                                                     { if (retval = lex_punct(INITIAL))      return retval; }
349         /** put those conent into state of Â¡Â¯DOLLOR¡® **/
350 \${b}{n}*                                                                                       { if (retval = const_string(&strbuff, T_ENVAR)) { g_content_before_cmnt=1; return retval; } }
351 \$\{(.*(\\\\)*\\\})*[^\}]*[^\\](\\\\)*\}                        { if (retval = const_string(&strbuff, T_ENVAR)) { g_content_before_cmnt=1; return retval; } }
352 <ENV_VAR>{
353         {b}{n}*                         {
354                 // name
355                 ;
356         }
357         [[:punct:]]                     {
358                 //
359                 if (yytext[0] == '{')
360                 {
361                         env_state = ENV_BRACE;
362                 }
363                 else if (yytext[0] == '}')
364                 {
365                         env_state = ENV_NONE;
366                 }
367                 else if (yytext[0] == '(')
368                 {
369                         env_state = ENV_PAREN;
370                 }
371                 else if (yytext[0] == ')')
372                 {
373                         env_state = ENV_NONE;
374                 }
375                 else if (yytext[0] == '[')
376                 {
377                         env_state = ENV_BRACKET;
378                 }
379                 else if (yytext[0] == ']')
380                 {
381                         env_state = ENV_NONE;
382                 }
383         }
384         /* [[]] */
385         \[\[                    {
386                 env_state = ENV_DBRACKET;
387         }
388         \]\]                    {
389                 //
390                 env_state = ENV_NONE;
391         }
392         /* (()) */
393         \(\(                    {
394                 env_state = ENV_DPAREN;
395         }
396         \)\)                    {
397                 //
398                 env_state = ENV_NONE;
399         }
402         /*****************************************************
403          ** const value word.
404          *****************************************************/
405         /**
406          * const word.
407          * hex, digit, oct, bin, float
408          * @ the difference from c-lang is, it append signature
409          * before digit/oct/float as a const word.
410          * @ l/L sfx at the float value means it's a double flout,
411          * here append d/D for the same meaning.
412          * @ it append binary const word.
413          **/
414 0[xX]{H}+{IS}?                                  { if (retval = const_string(&strbuff, T_HEX_WORD)) { g_content_before_cmnt=1; return retval; } }
415 [+-]?(0|[1-9]{D}*){IS}?                 { if (retval = const_string(&strbuff, T_DIGIT_WORD)) { g_content_before_cmnt=1; return retval; } }
416         /*
417          * oct string recognized as digital string.
418          */
419 [+-]?0{D}+{IS}?                                 { if (retval = const_string(&strbuff, T_OCT_WORD)) { g_content_before_cmnt=1; return retval; } }
420         /* binary const */
421 [01]+[Bb]                                               { if (retval = const_string(&strbuff, T_BIN_WORD)) { g_content_before_cmnt=1; return retval; } }
422         /* float const */
423 [+-]?{D}+(\.{D}+)?({E})?{FS}?   { if (retval = const_string(&strbuff, T_FLOAT_WORD)) { g_content_before_cmnt=1; return retval; } }
424         /** string literal
425 [+-]?{D}+\.{D}+({E})?{FS}?              {
426         printf("FLOAT: %s\n", yytext);
427         RETURN( 1 );
428         // alloc_string(&strbuff, yytext, yyleng);
429         // yylval.string = strbuff.text;
430         // last="T_STR_LITERAL";
431         // return T_STR_LITERAL;
433         **/
435         /*****************************************************
436          ** WORD
437          *****************************************************/
438         /**
439          ** symbol word or rsv-word.
440          ** if rsv-word not matching, it's a symbol
441          **/
442 {a}{n}*                                 { if (retval = lex_token_word()) return retval; }
443         /**
444          ** normal word.
445          ** this stage can be combined with symbol.
446          **/
447 {n}+                                    {
448 //      printf("WORD: %s\n", yytext);
449         
450         append_string(&strbuff, yytext, yyleng);
451         
452         yylval.string = strbuff.text;
453         RETURN( T_WORD );
455         /**
456          ** string word.
457          ** text without blanks, and include none-blank display char.
458          ** it's a WORD with SIGNATURES. normally, it can be a regex string.
459          **/
460         /**
461          ** STRWORD is not used in INITIAL state.
462 {g}+                                    {
463         printf("STRWORD: %s\n", yytext);
464         RETURN( 1 );
466         **/
468         /*****************************************************
469          ** help doc
470          ** this state is invoked in 'help' reserved-word proc.
471          ** help
472          *****************************************************/
473 <HELP_TXT>{
474         /** general string append. **/
475         [^\ \r\n][^\r\n]*                                                       { append_string(&strbuff, yytext, strlen(yytext)); }
476         /** ignore string after reserved-word 'help'. **/
477         [[:blank:]][^\r\n]*                                                     { /* do nothing here. */ }
478         
479         /** comment info in help txt, it will be treated as a new line **/
480         [\r\n][[:blank:]]*\#[^\r\n]*                            { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
481         /** multiple new lines treated as one new line **/
482         [\r\n][[:blank:]]*                                                      {
483                 if (strbuff.text_size)
484                         append_string(&strbuff, "\n", 1);
485                 else
486                 {
487                         /* at the beginning of help_text doc */
488                         return T_EOL;
489                 }
490         }
492         /** new line with a word char. exit help doc state. **/
493         ([\r\n][[:blank:]]*)+[\r\n][[:alnum:]_]         {
494                 unput(yytext[yyleng-1]);
495                 //append_string(&strbuff, "\n", 1);
496                 ln_string(&strbuff);
497                 yylval.string = strbuff.text;
498                 BEGIN(INITIAL);
499                 return T_HELPTEXT;
500         }
501         <<EOF>> {
502                 ln_string(&strbuff);
503                 yylval.string = strbuff.text;
504                 if (current_file) {
505                         zconf_endfile();
506                 }
507                 else
508                 {
509                         fclose(yyin);
510                         yyterminate();
511                 }
512                 
513                 BEGIN(INITIAL);
514                 return T_HELPTEXT;
515         }
517 <THIS_DOC>{
518         /** general string append. **/
519         [^\ \r\n][^\r\n]*                                                       {
520                 /* first word is used for ending token. */
521                 //if (1)
522                 append_string(&strbuff, yytext, strlen(yytext));
523         }
524         /** ignore string after reserved-word 'help'. **/
525         [[:blank:]][^\r\n]*                                                     { /* do nothing here. */ }
526         
527         /** comment info in help txt, it will be treated as a new line **/
528         [\r\n][[:blank:]]*\#[^\r\n]*                            { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
529         /** multiple new lines treated as one new line **/
530         [\r\n][[:blank:]]*                                                      { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
532         /** new line with a word char. exit help doc state. **/
533         ([\r\n][[:blank:]]*)+[\r\n][[:alnum:]_]         {
534                 unput(yytext[yyleng-1]);
535                 append_string(&strbuff, "\n", 1);
536                 yylval.string = strbuff.text;
537                 BEGIN(INITIAL);
538                 return T_HELPTEXT;
539         }
540         <<EOF>> {
541                 //zconf_endhelp();
542                 return T_HELPTEXT;
543         }
546         /*****************************************************
547          ** EOF
548          *****************************************************/
549 <<EOF>> {
550         if (current_file) {
551                 zconf_endfile();
552                 return 0;
553         }
554         fclose(yyin);
555         yyterminate();
557          
558          
559         /*****************************************************
560          ** misc
561          *****************************************************/
562 . {} /** For others, do nothing **/
566 #include "config.lexproc.c"
567 //#include "../lexer.lexproc.c"
569 #include "script.lexproc.c"
573         /*****************************************************
574          ** COMMAND state
575          ** this state is invoked in reserved-word proc.
576          *****************************************************/
577         /**
578 <COMMAND>{
579         {n}+    {
580                 const struct token_id *id = token_id_lookup(yytext, yyleng);
581                 BEGIN(PARAM);
582                 current_pos.file = current_file;
583                 current_pos.lineno = yylineno;
584                 if (id && id->flags & TF_COMMAND) {
585                         yylval.id = id;
586                         return id->token;
587                 }
588                 alloc_string(&strbuff, yytext, yyleng);
589                 yylval.string = text;
590                 return T_WORD;
591         }
592         .       {
593                 // warn_ignored_character(*yytext);
594         }
595         \n      {
596                 BEGIN(INITIAL);
597                 return T_EOL;
598         }
601 <PARAM>{
602         "&&"    return T_AND;
603         "||"    return T_OR;
604         "="     return T_EQUAL;
605         "!="    return T_UNEQUAL;
606         "<"     return T_LESS;
607         "<="    return T_LESS_EQUAL;
608         ">"     return T_GREATER;
609         ">="    return T_GREATER_EQUAL;
610         "!"     return T_NOT;
611         "("     return T_OPEN_PAREN;
612         ")"     return T_CLOSE_PAREN;
613         ":="    { return T_COLON_EQUAL; }
614         "+="    { return T_PLUS_EQUAL; }
615         \"|\'   {
616 //              char str = yytext[0];
617                 new_string(&strbuff);
618                 BEGIN(DUALQUOTE);
619         }
620         \n      BEGIN(INITIAL); return T_EOL;
621         ({n}|[/.])+     {
622                 const struct token_id *id = token_id_lookup(yytext, yyleng);
623                 if (id && id->flags & TF_PARAM) {
624                         yylval.id = id;
625                         return id->token;
626                 }
627                 alloc_string(&strbuff, yytext, yyleng);
628                 yylval.string = text;
629                 return T_WORD;
630         }
631         #.*     // comment
632         \\\n    ;
633         [[:blank:]]+
634         .       warn_ignored_character(*yytext);
635         <<EOF>> {
636                 BEGIN(INITIAL);
637         }
639          **/