2 /**********************************************************************
3 @ read charpter of "1.lex define for general txt" in lxr-doc.md.
4 **********************************************************************/
7 %x DUALQUOTE DOLLR_QUOTE C_CMNT_STATE SH_CMNT_STATE
8 %x HELP_TXT COMMAND PARAM
9 %x C_PREPROC ENV_VAR THIS_DOC
12 // flex -o script.lex.c script.l
13 #include "../token_id.h"
14 #include "../gmr/parser.tab.h"
24 int g_cpreproc_flag = 0;
25 int g_cmnt_state = EN_CMNT_NONE;
28 char cmnt_buff[1024]={0};
32 /** set content flag automatically. **/
33 #define RETURN(token) g_content_before_cmnt=1; return token;
36 ** script.lexXXX.c is extension for general script.l.
37 ** config.lexXXX.c is extension for script.l with menuconfig
40 //#include "script.lexdecl.c"
42 #include "config.lexdecl.c"
43 //#include "../lexer.lexdecl.c"
47 /**********************************************************************
48 @ read charpter of "2.char set" in lxr-doc.md
49 **********************************************************************/
51 /** char set defination **/
58 S [^\'\\[[:space:]]\`\<\>\&\|\;]
71 ** @ BEGIN(<STATE>);, set state. use BEGIN(INITIAL); and return XXX; to recover to previous state.
72 ** @ return <TOKEN_ID>;, return a non-zero value, it's the token id output to grammar program.
73 ** @ without return, it steal stay in yylex(), and analyze text continiously.
79 /*****************************************************
80 ** mask, quoted string
82 ** trans-byte char used as a normal string char. it will
83 ** be combined to T_WORD and T_WORD_QUOTE.
84 *****************************************************/
86 \\. { append_string(&strbuff, &yytext[1], 1); }
88 /** '', single quote string **/
89 \$\' { append_string(&strbuff, &yytext[1], 1); BEGIN(DOLLR_QUOTE); }
91 [^\\\']* { append_string(&strbuff, yytext, yyleng); }
92 \\. { append_string(&strbuff, yytext, yyleng); /* TBD: append trans-byte func */ }
93 \' { append_string(&strbuff, yytext, yyleng); BEGIN(INITIAL); return T_WORD_QUOTE; }
95 \'[^\']*\' { append_string(&strbuff, yytext, yyleng); return T_WORD_QUOTE; }
97 ** $``, append. get executing script text, after string expanded, append
98 ** '$' before $1. or append '\$' before string expanded.
101 printf(":: string content:\n%s\n", yytext);
104 // str=$(echo "$yytext" | bash)
105 // append_string "$str" ${#str}
108 /** ``, command executing string quote. **/
110 printf(":: string content:\n%s\n", yytext);
113 // str=$(echo "$yytext" | bash)
114 // append_string "$str" ${#str}
118 /*****************************************************
120 ** @ C-PREPROC-CMD begin with '#', with token matching.
121 ** @ SH-CMNT begin with '#'.
123 ** @ C89-CMNT, and compatible with sh cmdline.
124 ** @ C89-EXT-CMNT, compatible with sh cmdline, and it
125 ** can include C89-CMNT in it.
126 ** @ comment token proc. eg: CmntStub, apidoc-gen.
127 *****************************************************/
132 ** @ C_PREPROC_CMD token recognizing after '#', to compatible with c-lang(#include or #if).
133 ** @ modify comment expr, and do not use SH_CMNT_STATE state.
134 ** @ append OnCPreProc() invoking.
135 ** @ process C_PREPROC_CMD in C_PREPROC state.
137 #[[:alnum:]_]+ { if (retval = lex_c_preproc_hdr ()) return retval; }
138 /* this state should be defined in grammar extend. */
140 /* XXX: use this expr for tmp. */
141 [[:blank:]]*[^\r\n]* { if (retval = lex_c_preproc_param ()) return retval; }
145 ** to compatible with c-lang, insert a ' ' space after comment char.
148 [#] { printf("SH-CMNT:\n%s", yytext); BEGIN(SH_CMNT_STATE); }
150 [^\n\r]* { /* printf("%s", yytext); */ }
151 \n|\r { if (retval = lex_sh_cmnt ()) return retval; }
156 ** this will be mixed with cmd dir. append a ' ' after '//'.
157 ** or, char before '//' is a newline signature. eg: '\n', ',', ';'
160 // printf("last_token=%d; g_content_before_cmnt=%d;\n", last_token, g_content_before_cmnt);
161 // if (last_token == 0 || g_content_before_cmnt == 0)
162 /* TBD: '//' will be mixed with command line parameter. */
164 printf("C99-CMNT:\n%s\n", yytext);
166 // return T_CMNT_STR;
170 ** C89 normal style comment.
171 ** TBD: append a C_CMNT_STATE2, process comment without punct and token
172 ** processing, in order to decreas cpu cost.
173 ** TBD: use xxx_string() as a string for comment.
176 g_cmnt_state=EN_CMNT_BEGIN;
178 strcpy(cmnt_buff, "/*");
181 // return T_CMNT_STR;
184 /* only process newline mask */
186 /* dual-mask treated as one splash */
187 cmnt_buff[cmnt_idx]='\\';
189 cmnt_buff[cmnt_idx]=0;
192 cmnt_buff[cmnt_idx]='\n';
194 cmnt_buff[cmnt_idx]=0;
198 * this means a char after /*, set with extended comment state.
199 * process '\n' as a normal char in comment.
200 * or, if it's a standard comment after content, '\n' is a termination char for comment.
202 if (g_cmnt_state == EN_CMNT_BEGIN)
203 g_cmnt_state=EN_CMNT_EXTEND;
205 memcpy(&cmnt_buff[cmnt_idx], yytext, yyleng);
207 cmnt_buff[cmnt_idx]=0;
210 * C89 extend comment. use /@ instead of "/**" in cmdline.
211 * or use '/**' as '/*', it's compactiable with c comment in shell script.
212 * and does not conflect with string wildcast feature in CLI.
216 if (g_cmnt_state == EN_CMNT_BEGIN)
217 g_cmnt_state=EN_CMNT;
218 strcat(&cmnt_buff[cmnt_idx], yytext);
220 /* append_string(&strbuff, yytext, yyleng); */
223 cmnt_buff[cmnt_idx]='/';
225 cmnt_buff[cmnt_idx]=0;
231 lex_c89_cmnt_tail ();
235 * TBD: append this code for comment grammar.
236 // punct proc in comment state
238 const struct token_id *id = punct_look_up_token(yytext, yyleng);
239 printf(": %s\n", yytext);
242 // token proc in comment state
244 const struct token_id *id = token_id_lookup(yytext, yyleng);
246 if (id && id->flags & TF_COMMAND)
251 alloc_string(&strbuff, yytext, yyleng);
252 yylval.string = strbuff.text;
258 /*****************************************************
260 *****************************************************/
263 // printf("NL-CONTINUE:\n");
266 if (!is_unallocated_string(&strbuff))
270 yylval.string = strbuff.text;
271 // printf(".WORD: %s\n", strbuff.text);
277 * TBD: multi-nl can be combined into one nl.
278 * it is processed by grammar.
281 if (!is_unallocated_string(&strbuff))
283 // printf("^WORD: %s\n", strbuff.text);
284 yylval.string = strbuff.text;
286 g_content_before_cmnt=0;
292 /*****************************************************
294 *****************************************************/
295 /** ", dual quote string. **/
297 if (is_unallocated_string(&strbuff))
298 new_string(&strbuff);
305 ** append nest implement with $() and ``.
309 // c=translation(yytext[1]);
310 // if (c == 0 && yytext[1] != 0)
313 // append_string(&strbuff, &c, 1);
314 append_string(&strbuff, &yytext[1], 1);
316 ` { append_string(&strbuff, &yytext[1], 1); /* BEGIN(QUOTE_EXEC); */ }
318 // printf("T_WORD_QUOTE\n");
319 // state=pop_state();
320 yylval.string = strbuff.text;
324 [^\`\"\\[:punct:]]* { append_string(&strbuff, yytext, yyleng); }
326 // printf("%s", yytext);
327 append_string(&strbuff, yytext, yyleng);
329 * '\', '$', '@', '`', process.
335 /*****************************************************
336 ** punct and env var.
337 *****************************************************/
339 ** [[:punct:]] char filter, get special char like $, ", entering relative
340 ** state. as the special char can be used nestly, use stack to store it.
341 ** for others, unput() back to buffer.
344 ** do not recognize punct in INITIAL state.
345 ** recognize it in received word state.
347 [[:punct:]] { if (retval = lex_punct(INITIAL)) return retval; }
349 /** put those conent into state of ¡¯DOLLOR¡® **/
350 \${b}{n}* { if (retval = const_string(&strbuff, T_ENVAR)) { g_content_before_cmnt=1; return retval; } }
351 \$\{(.*(\\\\)*\\\})*[^\}]*[^\\](\\\\)*\} { if (retval = const_string(&strbuff, T_ENVAR)) { g_content_before_cmnt=1; return retval; } }
359 if (yytext[0] == '{')
361 env_state = ENV_BRACE;
363 else if (yytext[0] == '}')
365 env_state = ENV_NONE;
367 else if (yytext[0] == '(')
369 env_state = ENV_PAREN;
371 else if (yytext[0] == ')')
373 env_state = ENV_NONE;
375 else if (yytext[0] == '[')
377 env_state = ENV_BRACKET;
379 else if (yytext[0] == ']')
381 env_state = ENV_NONE;
386 env_state = ENV_DBRACKET;
390 env_state = ENV_NONE;
394 env_state = ENV_DPAREN;
398 env_state = ENV_NONE;
402 /*****************************************************
404 *****************************************************/
407 * hex, digit, oct, bin, float
408 * @ the difference from c-lang is, it append signature
409 * before digit/oct/float as a const word.
410 * @ l/L sfx at the float value means it's a double flout,
411 * here append d/D for the same meaning.
412 * @ it append binary const word.
414 0[xX]{H}+{IS}? { if (retval = const_string(&strbuff, T_HEX_WORD)) { g_content_before_cmnt=1; return retval; } }
415 [+-]?(0|[1-9]{D}*){IS}? { if (retval = const_string(&strbuff, T_DIGIT_WORD)) { g_content_before_cmnt=1; return retval; } }
417 * oct string recognized as digital string.
419 [+-]?0{D}+{IS}? { if (retval = const_string(&strbuff, T_OCT_WORD)) { g_content_before_cmnt=1; return retval; } }
421 [01]+[Bb] { if (retval = const_string(&strbuff, T_BIN_WORD)) { g_content_before_cmnt=1; return retval; } }
423 [+-]?{D}+(\.{D}+)?({E})?{FS}? { if (retval = const_string(&strbuff, T_FLOAT_WORD)) { g_content_before_cmnt=1; return retval; } }
425 [+-]?{D}+\.{D}+({E})?{FS}? {
426 printf("FLOAT: %s\n", yytext);
428 // alloc_string(&strbuff, yytext, yyleng);
429 // yylval.string = strbuff.text;
430 // last="T_STR_LITERAL";
431 // return T_STR_LITERAL;
435 /*****************************************************
437 *****************************************************/
439 ** symbol word or rsv-word.
440 ** if rsv-word not matching, it's a symbol
442 {a}{n}* { if (retval = lex_token_word()) return retval; }
445 ** this stage can be combined with symbol.
448 // printf("WORD: %s\n", yytext);
450 append_string(&strbuff, yytext, yyleng);
452 yylval.string = strbuff.text;
457 ** text without blanks, and include none-blank display char.
458 ** it's a WORD with SIGNATURES. normally, it can be a regex string.
461 ** STRWORD is not used in INITIAL state.
463 printf("STRWORD: %s\n", yytext);
468 /*****************************************************
470 ** this state is invoked in 'help' reserved-word proc.
472 *****************************************************/
474 /** general string append. **/
475 [^\ \r\n][^\r\n]* { append_string(&strbuff, yytext, strlen(yytext)); }
476 /** ignore string after reserved-word 'help'. **/
477 [[:blank:]][^\r\n]* { /* do nothing here. */ }
479 /** comment info in help txt, it will be treated as a new line **/
480 [\r\n][[:blank:]]*\#[^\r\n]* { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
481 /** multiple new lines treated as one new line **/
483 if (strbuff.text_size)
484 append_string(&strbuff, "\n", 1);
487 /* at the beginning of help_text doc */
492 /** new line with a word char. exit help doc state. **/
493 ([\r\n][[:blank:]]*)+[\r\n][[:alnum:]_] {
494 unput(yytext[yyleng-1]);
495 //append_string(&strbuff, "\n", 1);
497 yylval.string = strbuff.text;
503 yylval.string = strbuff.text;
518 /** general string append. **/
520 /* first word is used for ending token. */
522 append_string(&strbuff, yytext, strlen(yytext));
524 /** ignore string after reserved-word 'help'. **/
525 [[:blank:]][^\r\n]* { /* do nothing here. */ }
527 /** comment info in help txt, it will be treated as a new line **/
528 [\r\n][[:blank:]]*\#[^\r\n]* { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
529 /** multiple new lines treated as one new line **/
530 [\r\n][[:blank:]]* { if (strbuff.text_size) append_string(&strbuff, "\n", 1); }
532 /** new line with a word char. exit help doc state. **/
533 ([\r\n][[:blank:]]*)+[\r\n][[:alnum:]_] {
534 unput(yytext[yyleng-1]);
535 append_string(&strbuff, "\n", 1);
536 yylval.string = strbuff.text;
546 /*****************************************************
548 *****************************************************/
559 /*****************************************************
561 *****************************************************/
562 . {} /** For others, do nothing **/
566 #include "config.lexproc.c"
567 //#include "../lexer.lexproc.c"
569 #include "script.lexproc.c"
573 /*****************************************************
575 ** this state is invoked in reserved-word proc.
576 *****************************************************/
580 const struct token_id *id = token_id_lookup(yytext, yyleng);
582 current_pos.file = current_file;
583 current_pos.lineno = yylineno;
584 if (id && id->flags & TF_COMMAND) {
588 alloc_string(&strbuff, yytext, yyleng);
589 yylval.string = text;
593 // warn_ignored_character(*yytext);
605 "!=" return T_UNEQUAL;
607 "<=" return T_LESS_EQUAL;
608 ">" return T_GREATER;
609 ">=" return T_GREATER_EQUAL;
611 "(" return T_OPEN_PAREN;
612 ")" return T_CLOSE_PAREN;
613 ":=" { return T_COLON_EQUAL; }
614 "+=" { return T_PLUS_EQUAL; }
616 // char str = yytext[0];
617 new_string(&strbuff);
620 \n BEGIN(INITIAL); return T_EOL;
622 const struct token_id *id = token_id_lookup(yytext, yyleng);
623 if (id && id->flags & TF_PARAM) {
627 alloc_string(&strbuff, yytext, yyleng);
628 yylval.string = text;
634 . warn_ignored_character(*yytext);