Sync usage with man page.
[netbsd-mini2440.git] / usr.bin / lex / scan.l
blob2b4c2321a6afee72726ba1a493b8b02c43e0c28b
1 /* scan.l - scanner for flex input */
3 %{
4 /*-
5  * Copyright (c) 1990 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Vern Paxson.
10  * 
11  * The United States Government has rights in this work pursuant
12  * to contract no. DE-AC03-76SF00098 between the United States
13  * Department of Energy and the University of California.
14  *
15  * Redistribution and use in source and binary forms are permitted provided
16  * that: (1) source distributions retain this entire copyright notice and
17  * comment, and (2) distributions including binaries display the following
18  * acknowledgement:  ``This product includes software developed by the
19  * University of California, Berkeley and its contributors'' in the
20  * documentation or other materials provided with the distribution and in
21  * all advertising materials mentioning features or use of this software.
22  * Neither the name of the University nor the names of its contributors may
23  * be used to endorse or promote products derived from this software without
24  * specific prior written permission.
25  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
26  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
27  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  */
30 /* $NetBSD: scan.l,v 1.12 2003/07/14 11:36:49 itojun Exp $ */
32 #include "flexdef.h"
33 #include "parse.h"
35 #define ACTION_ECHO add_action( yytext )
36 #define ACTION_IFDEF(def, should_define) \
37         { \
38         if ( should_define ) \
39                 action_define( def, 1 ); \
40         }
42 #define MARK_END_OF_PROLOG mark_prolog();
44 #define YY_DECL \
45         int flexscan()
47 #define RETURNCHAR \
48         yylval = (unsigned char) yytext[0]; \
49         return CHAR;
51 #define RETURNNAME \
52         strlcpy(nmstr, yytext, sizeof(nmstr)); \
53         return NAME;
55 #define PUT_BACK_STRING(str, start) \
56         for ( i = strlen( str ) - 1; i >= start; --i ) \
57                 unput((str)[i])
59 #define CHECK_REJECT(str) \
60         if ( all_upper( str ) ) \
61                 reject = true;
63 #define CHECK_YYMORE(str) \
64         if ( all_lower( str ) ) \
65                 yymore_used = true;
68 %option caseless nodefault outfile="scan.c" stack noyy_top_state
69 %option nostdinit
71 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
72 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
73 %x OPTION LINEDIR
75 WS              [[:blank:]]+
76 OPTWS           [[:blank:]]*
77 NOT_WS          [^[:blank:]\n]
79 NL              \r?\n
81 NAME            ([[:alpha:]_][[:alnum:]_-]*)
82 NOT_NAME        [^[:alpha:]_*\n]+
84 SCNAME          {NAME}
86 ESCSEQ          (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
88 FIRST_CCL_CHAR  ([^\\\n]|{ESCSEQ})
89 CCL_CHAR        ([^\\\n\]]|{ESCSEQ})
90 CCL_EXPR        ("[:"[[:alpha:]]+":]")
92 LEXOPT          [aceknopr]
95         static int bracelevel, didadef, indented_code;
96         static int doing_rule_action = false;
97         static int option_sense;
99         int doing_codeblock = false;
100         int i;
101         Char nmdef[MAXLINE];
104 <INITIAL>{
105         ^{WS}           indented_code = true; BEGIN(CODEBLOCK);
106         ^"/*"           ACTION_ECHO; yy_push_state( COMMENT );
107         ^#{OPTWS}line{WS}       yy_push_state( LINEDIR );
108         ^"%s"{NAME}?    return SCDECL;
109         ^"%x"{NAME}?    return XSCDECL;
110         ^"%{".*{NL}     {
111                         ++linenum;
112                         line_directive_out( (FILE *) 0, 1 );
113                         indented_code = false;
114                         BEGIN(CODEBLOCK);
115                         }
117         {WS}            /* discard */
119         ^"%%".*         {
120                         sectnum = 2;
121                         bracelevel = 0;
122                         mark_defs1();
123                         line_directive_out( (FILE *) 0, 1 );
124                         BEGIN(SECT2PROLOG);
125                         return SECTEND;
126                         }
128         ^"%pointer".*{NL}       yytext_is_array = false; ++linenum;
129         ^"%array".*{NL}         yytext_is_array = true; ++linenum;
131         ^"%option"      BEGIN(OPTION); return OPTION_OP;
133         ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}      ++linenum; /* ignore */
134         ^"%"{LEXOPT}{WS}.*{NL}  ++linenum;      /* ignore */
136         ^"%"[^sxaceknopr{}].*   synerr( _( "unrecognized '%' directive" ) );
138         ^{NAME}         {
139                         strlcpy(nmstr, yytext, sizeof(nmstr));
140                         didadef = false;
141                         BEGIN(PICKUPDEF);
142                         }
144         {SCNAME}        RETURNNAME;
145         ^{OPTWS}{NL}    ++linenum; /* allows blank lines in section 1 */
146         {OPTWS}{NL}     ACTION_ECHO; ++linenum; /* maybe end of comment line */
150 <COMMENT>{
151         "*/"            ACTION_ECHO; yy_pop_state();
152         "*"             ACTION_ECHO;
153         [^*\n]+         ACTION_ECHO;
154         [^*\n]*{NL}     ++linenum; ACTION_ECHO;
157 <LINEDIR>{
158         \n              yy_pop_state();
159         [[:digit:]]+    linenum = myctoi( yytext );
161         \"[^"\n]*\"     {
162                         flex_free( (void *) infilename );
163                         infilename = copy_string( yytext + 1 );
164                         infilename[strlen( infilename ) - 1] = '\0';
165                         }
166         .               /* ignore spurious characters */
169 <CODEBLOCK>{
170         ^"%}".*{NL}     ++linenum; BEGIN(INITIAL);
172         {NAME}|{NOT_NAME}|.     ACTION_ECHO;
174         {NL}            {
175                         ++linenum;
176                         ACTION_ECHO;
177                         if ( indented_code )
178                                 BEGIN(INITIAL);
179                         }
183 <PICKUPDEF>{
184         {WS}            /* separates name and definition */
186         {NOT_WS}.*      {
187                         strlcpy((char *)nmdef, yytext, sizeof(nmdef));
189                         /* Skip trailing whitespace. */
190                         for ( i = strlen( (char *) nmdef ) - 1;
191                               i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
192                               --i )
193                                 ;
195                         nmdef[i + 1] = '\0';
197                         ndinstal(nmstr, nmdef);
198                         didadef = true;
199                         }
201         {NL}            {
202                         if ( ! didadef )
203                                 synerr( _( "incomplete name definition" ) );
204                         BEGIN(INITIAL);
205                         ++linenum;
206                         }
210 <OPTION>{
211         {NL}            ++linenum; BEGIN(INITIAL);
212         {WS}            option_sense = true;
214         "="             return '=';
216         no              option_sense = ! option_sense;
218         7bit            csize = option_sense ? 128 : 256;
219         8bit            csize = option_sense ? 256 : 128;
221         align           long_align = option_sense;
222         always-interactive      {
223                         action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
224                         }
225         array           yytext_is_array = option_sense;
226         backup          backing_up_report = option_sense;
227         batch           interactive = ! option_sense;
228         "c++"           C_plus_plus = option_sense;
229         caseful|case-sensitive          caseins = ! option_sense;
230         caseless|case-insensitive       caseins = option_sense;
231         debug           ddebug = option_sense;
232         default         spprdflt = ! option_sense;
233         ecs             useecs = option_sense;
234         fast            {
235                         useecs = usemecs = false;
236                         use_read = fullspd = true;
237                         }
238         full            {
239                         useecs = usemecs = false;
240                         use_read = fulltbl = true;
241                         }
242         input           ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
243         interactive     interactive = option_sense;
244         lex-compat      lex_compat = option_sense;
245         main            {
246                         action_define( "YY_MAIN", option_sense );
247                         do_yywrap = ! option_sense;
248                         }
249         meta-ecs        usemecs = option_sense;
250         never-interactive       {
251                         action_define( "YY_NEVER_INTERACTIVE", option_sense );
252                         }
253         perf-report     performance_report += option_sense ? 1 : -1;
254         pointer         yytext_is_array = ! option_sense;
255         read            use_read = option_sense;
256         reject          reject_really_used = option_sense;
257         stack           action_define( "YY_STACK_USED", option_sense );
258         stdinit         do_stdinit = option_sense;
259         stdout          use_stdout = option_sense;
260         unput           ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
261         verbose         printstats = option_sense;
262         warn            nowarn = ! option_sense;
263         yylineno        do_yylineno = option_sense;
264         yymore          yymore_really_used = option_sense;
265         yywrap          do_yywrap = option_sense;
267         yy_push_state   ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
268         yy_pop_state    ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
269         yy_top_state    ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
271         yy_scan_buffer  ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
272         yy_scan_bytes   ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
273         yy_scan_string  ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
275         outfile         return OPT_OUTFILE;
276         prefix          return OPT_PREFIX;
277         yyclass         return OPT_YYCLASS;
279         \"[^"\n]*\"     {
280                         strlcpy(nmstr, yytext + 1, sizeof(nmstr));
281                         nmstr[strlen( nmstr ) - 1] = '\0';
282                         return NAME;
283                         }
285         (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
286                         format_synerr( _( "unrecognized %%option: %s" ),
287                                 yytext );
288                         BEGIN(RECOVER);
289                         }
292 <RECOVER>.*{NL}         ++linenum; BEGIN(INITIAL);
295 <SECT2PROLOG>{
296         ^"%{".* ++bracelevel; yyless( 2 );      /* eat only %{ */
297         ^"%}".* --bracelevel; yyless( 2 );      /* eat only %} */
299         ^{WS}.* ACTION_ECHO;    /* indented code in prolog */
301         ^{NOT_WS}.*     {       /* non-indented code */
302                         if ( bracelevel <= 0 )
303                                 { /* not in %{ ... %} */
304                                 yyless( 0 );    /* put it all back */
305                                 yy_set_bol( 1 );
306                                 mark_prolog();
307                                 BEGIN(SECT2);
308                                 }
309                         else
310                                 ACTION_ECHO;
311                         }
313         .*              ACTION_ECHO;
314         {NL}    ++linenum; ACTION_ECHO;
316         <<EOF>>         {
317                         mark_prolog();
318                         sectnum = 0;
319                         yyterminate(); /* to stop the parser */
320                         }
323 <SECT2>{
324         ^{OPTWS}{NL}    ++linenum; /* allow blank lines in section 2 */
326         ^{OPTWS}"%{"    {
327                         indented_code = false;
328                         doing_codeblock = true;
329                         bracelevel = 1;
330                         BEGIN(PERCENT_BRACE_ACTION);
331                         }
333         ^{OPTWS}"<"     BEGIN(SC); return '<';
334         ^{OPTWS}"^"     return '^';
335         \"              BEGIN(QUOTE); return '"';
336         "{"/[[:digit:]] BEGIN(NUM); return '{';
337         "$"/([[:blank:]]|{NL})  return '$';
339         {WS}"%{"                {
340                         bracelevel = 1;
341                         BEGIN(PERCENT_BRACE_ACTION);
343                         if ( in_rule )
344                                 {
345                                 doing_rule_action = true;
346                                 in_rule = false;
347                                 return '\n';
348                                 }
349                         }
350         {WS}"|".*{NL}   continued_action = true; ++linenum; return '\n';
352         ^{WS}"/*"       {
353                         yyless( yyleng - 2 );   /* put back '/', '*' */
354                         bracelevel = 0;
355                         continued_action = false;
356                         BEGIN(ACTION);
357                         }
359         ^{WS}           /* allow indented rules */
361         {WS}            {
362                         /* This rule is separate from the one below because
363                          * otherwise we get variable trailing context, so
364                          * we can't build the scanner using -{f,F}.
365                          */
366                         bracelevel = 0;
367                         continued_action = false;
368                         BEGIN(ACTION);
370                         if ( in_rule )
371                                 {
372                                 doing_rule_action = true;
373                                 in_rule = false;
374                                 return '\n';
375                                 }
376                         }
378         {OPTWS}{NL}     {
379                         bracelevel = 0;
380                         continued_action = false;
381                         BEGIN(ACTION);
382                         unput( '\n' );  /* so <ACTION> sees it */
384                         if ( in_rule )
385                                 {
386                                 doing_rule_action = true;
387                                 in_rule = false;
388                                 return '\n';
389                                 }
390                         }
392         ^{OPTWS}"<<EOF>>"       |
393         "<<EOF>>"       return EOF_OP;
395         ^"%%".*         {
396                         sectnum = 3;
397                         BEGIN(SECT3);
398                         yyterminate(); /* to stop the parser */
399                         }
401         "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*        {
402                         int cclval;
404                         strlcpy(nmstr, yytext, sizeof(nmstr));
406                         /* Check to see if we've already encountered this
407                          * ccl.
408                          */
409                         if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
410                                 {
411                                 if ( input() != ']' )
412                                         synerr( _( "bad character class" ) );
414                                 yylval = cclval;
415                                 ++cclreuse;
416                                 return PREVCCL;
417                                 }
418                         else
419                                 {
420                                 /* We fudge a bit.  We know that this ccl will
421                                  * soon be numbered as lastccl + 1 by cclinit.
422                                  */
423                                 cclinstal( (Char *) nmstr, lastccl + 1 );
425                                 /* Push back everything but the leading bracket
426                                  * so the ccl can be rescanned.
427                                  */
428                                 yyless( 1 );
430                                 BEGIN(FIRSTCCL);
431                                 return '[';
432                                 }
433                         }
435         "{"{NAME}"}"    {
436                         register Char *nmdefptr;
438                         strlcpy(nmstr, yytext + 1, sizeof(nmstr));
439                         nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
441                         if ( (nmdefptr = ndlookup( nmstr )) == 0 )
442                                 format_synerr(
443                                         _( "undefined definition {%s}" ),
444                                                 nmstr );
446                         else
447                                 { /* push back name surrounded by ()'s */
448                                 int len = strlen( (char *) nmdefptr );
450                                 if ( lex_compat || nmdefptr[0] == '^' ||
451                                      (len > 0 && nmdefptr[len - 1] == '$') )
452                                         { /* don't use ()'s after all */
453                                         PUT_BACK_STRING((char *) nmdefptr, 0);
455                                         if ( nmdefptr[0] == '^' )
456                                                 BEGIN(CARETISBOL);
457                                         }
459                                 else
460                                         {
461                                         unput(')');
462                                         PUT_BACK_STRING((char *) nmdefptr, 0);
463                                         unput('(');
464                                         }
465                                 }
466                         }
468         [/|*+?.(){}]    return (unsigned char) yytext[0];
469         .               RETURNCHAR;
473 <SC>{
474         [,*]            return (unsigned char) yytext[0];
475         ">"             BEGIN(SECT2); return '>';
476         ">"/^           BEGIN(CARETISBOL); return '>';
477         {SCNAME}        RETURNNAME;
478         .               {
479                         format_synerr( _( "bad <start condition>: %s" ),
480                                 yytext );
481                         }
484 <CARETISBOL>"^"         BEGIN(SECT2); return '^';
487 <QUOTE>{
488         [^"\n]          RETURNCHAR;
489         \"              BEGIN(SECT2); return '"';
491         {NL}            {
492                         synerr( _( "missing quote" ) );
493                         BEGIN(SECT2);
494                         ++linenum;
495                         return '"';
496                         }
500 <FIRSTCCL>{
501         "^"/[^-\]\n]    BEGIN(CCL); return '^';
502         "^"/("-"|"]")   return '^';
503         .               BEGIN(CCL); RETURNCHAR;
506 <CCL>{
507         -/[^\]\n]       return '-';
508         [^\]\n]         RETURNCHAR;
509         "]"             BEGIN(SECT2); return ']';
510         .|{NL}          {
511                         synerr( _( "bad character class" ) );
512                         BEGIN(SECT2);
513                         return ']';
514                         }
517 <FIRSTCCL,CCL>{
518         "[:alnum:]"     BEGIN(CCL); return CCE_ALNUM;
519         "[:alpha:]"     BEGIN(CCL); return CCE_ALPHA;
520         "[:blank:]"     BEGIN(CCL); return CCE_BLANK;
521         "[:cntrl:]"     BEGIN(CCL); return CCE_CNTRL;
522         "[:digit:]"     BEGIN(CCL); return CCE_DIGIT;
523         "[:graph:]"     BEGIN(CCL); return CCE_GRAPH;
524         "[:lower:]"     BEGIN(CCL); return CCE_LOWER;
525         "[:print:]"     BEGIN(CCL); return CCE_PRINT;
526         "[:punct:]"     BEGIN(CCL); return CCE_PUNCT;
527         "[:space:]"     BEGIN(CCL); return CCE_SPACE;
528         "[:upper:]"     BEGIN(CCL); return CCE_UPPER;
529         "[:xdigit:]"    BEGIN(CCL); return CCE_XDIGIT;
530         {CCL_EXPR}      {
531                         format_synerr(
532                                 _( "bad character class expression: %s" ),
533                                         yytext );
534                         BEGIN(CCL); return CCE_ALNUM;
535                         }
538 <NUM>{
539         [[:digit:]]+    {
540                         yylval = myctoi( yytext );
541                         return NUMBER;
542                         }
544         ","             return ',';
545         "}"             BEGIN(SECT2); return '}';
547         .               {
548                         synerr( _( "bad character inside {}'s" ) );
549                         BEGIN(SECT2);
550                         return '}';
551                         }
553         {NL}            {
554                         synerr( _( "missing }" ) );
555                         BEGIN(SECT2);
556                         ++linenum;
557                         return '}';
558                         }
562 <PERCENT_BRACE_ACTION>{
563         {OPTWS}"%}".*           bracelevel = 0;
565         <ACTION>"/*"            ACTION_ECHO; yy_push_state( COMMENT );
567         <CODEBLOCK,ACTION>{
568                 "reject"        {
569                         ACTION_ECHO;
570                         CHECK_REJECT(yytext);
571                         }
572                 "yymore"        {
573                         ACTION_ECHO;
574                         CHECK_YYMORE(yytext);
575                         }
576         }
578         {NAME}|{NOT_NAME}|.     ACTION_ECHO;
579         {NL}            {
580                         ++linenum;
581                         ACTION_ECHO;
582                         if ( bracelevel == 0 ||
583                              (doing_codeblock && indented_code) )
584                                 {
585                                 if ( doing_rule_action )
586                                         add_action( "\tYY_BREAK\n" );
588                                 doing_rule_action = doing_codeblock = false;
589                                 BEGIN(SECT2);
590                                 }
591                         }
595         /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
596 <ACTION>{
597         "{"             ACTION_ECHO; ++bracelevel;
598         "}"             ACTION_ECHO; --bracelevel;
599         [^[:alpha:]_{}"'/\n]+   ACTION_ECHO;
600         {NAME}          ACTION_ECHO;
601         "'"([^'\\\n]|\\.)*"'"   ACTION_ECHO; /* character constant */
602         \"              ACTION_ECHO; BEGIN(ACTION_STRING);
603         {NL}            {
604                         ++linenum;
605                         ACTION_ECHO;
606                         if ( bracelevel == 0 )
607                                 {
608                                 if ( doing_rule_action )
609                                         add_action( "\tYY_BREAK\n" );
611                                 doing_rule_action = false;
612                                 BEGIN(SECT2);
613                                 }
614                         }
615         .               ACTION_ECHO;
618 <ACTION_STRING>{
619         [^"\\\n]+       ACTION_ECHO;
620         \\.             ACTION_ECHO;
621         {NL}            ++linenum; ACTION_ECHO;
622         \"              ACTION_ECHO; BEGIN(ACTION);
623         .               ACTION_ECHO;
626 <COMMENT,ACTION,ACTION_STRING><<EOF>>   {
627                         synerr( _( "EOF encountered inside an action" ) );
628                         yyterminate();
629                         }
632 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}      {
633                         yylval = myesc( (Char *) yytext );
635                         if ( YY_START == FIRSTCCL )
636                                 BEGIN(CCL);
638                         return CHAR;
639                         }
642 <SECT3>{
643         .*(\n?)         ECHO;
644         <<EOF>>         sectnum = 0; yyterminate();
647 <*>.|\n                 format_synerr( _( "bad character: %s" ), yytext );
652 int yywrap()
653         {
654         if ( --num_input_files > 0 )
655                 {
656                 set_input_file( *++input_files );
657                 return 0;
658                 }
660         else
661                 return 1;
662         }
665 /* set_input_file - open the given file (if NULL, stdin) for scanning */
667 void set_input_file( file )
668 char *file;
669         {
670         if ( file && strcmp( file, "-" ) )
671                 {
672                 infilename = copy_string( file );
673                 yyin = fopen( infilename, "r" );
675                 if ( yyin == NULL )
676                         lerrsf( _( "can't open %s" ), file );
677                 }
679         else
680                 {
681                 yyin = stdin;
682                 infilename = copy_string( "<stdin>" );
683                 }
685         linenum = 1;
686         }
689 /* Wrapper routines for accessing the scanner's malloc routines. */
691 void *flex_alloc( size )
692 size_t size;
693         {
694         return (void *) malloc( size );
695         }
697 void *flex_realloc( ptr, size )
698 void *ptr;
699 size_t size;
700         {
701         return (void *) realloc( ptr, size );
702         }
704 void flex_free( ptr )
705 void *ptr;
706         {
707         if ( ptr )
708                 free( ptr );
709         }