Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / usr.bin / lex / parse.y
blobf25dd0cd5f399080fac8b8d6cd476f3b86bf0c6f
1 /* parse.y - parser for flex input */
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
6 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9 %{
10 /*-
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
14 * This code is derived from software contributed to Berkeley by
15 * Vern Paxson.
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
21 * Redistribution and use in source and binary forms are permitted provided
22 * that: (1) source distributions retain this entire copyright notice and
23 * comment, and (2) distributions including binaries display the following
24 * acknowledgement: ``This product includes software developed by the
25 * University of California, Berkeley and its contributors'' in the
26 * documentation or other materials provided with the distribution and in
27 * all advertising materials mentioning features or use of this software.
28 * Neither the name of the University nor the names of its contributors may
29 * be used to endorse or promote products derived from this software without
30 * specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
36 /* $NetBSD: parse.y,v 1.10 2002/01/29 10:20:34 tv Exp $ */
39 /* Some versions of bison are broken in that they use alloca() but don't
40 * declare it properly. The following is the patented (just kidding!)
41 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
43 #ifdef YYBISON
44 /* AIX requires this to be the first thing in the file. What a piece. */
45 # ifdef _AIX
46 #pragma alloca
47 # endif
48 #endif
50 #include "flexdef.h"
52 /* The remainder of the alloca() cruft has to come after including flexdef.h,
53 * so HAVE_ALLOCA_H is (possibly) defined.
55 #ifdef YYBISON
56 # ifdef __GNUC__
57 # ifndef alloca
58 # define alloca __builtin_alloca
59 # endif
60 # else
61 # if HAVE_ALLOCA_H
62 # include <alloca.h>
63 # else
64 # ifdef __hpux
65 void *alloca ();
66 # else
67 # ifdef __TURBOC__
68 # include <malloc.h>
69 # else
70 char *alloca ();
71 # endif
72 # endif
73 # endif
74 # endif
75 #endif
77 /* Bletch, ^^^^ that was ugly! */
80 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
81 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
83 int *scon_stk;
84 int scon_stk_ptr;
86 static int madeany = false; /* whether we've made the '.' character class */
87 int previous_continued_action; /* whether the previous rule's action was '|' */
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
91 { \
92 int c; \
93 for ( c = 0; c < csize; ++c ) \
94 if ( isascii(c) && func(c) ) \
95 ccladd( currccl, c ); \
98 /* While POSIX defines isblank(), it's not ANSI C. */
99 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
101 /* On some over-ambitious machines, such as DEC Alpha's, the default
102 * token type is "long" instead of "int"; this leads to problems with
103 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
104 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
105 * following should ensure that the default token type is "int".
107 #define YYSTYPE int
112 goal : initlex sect1 sect1end sect2 initforrule
113 { /* add default rule */
114 int def_rule;
116 pat = cclinit();
117 cclnegate( pat );
119 def_rule = mkstate( -pat );
121 /* Remember the number of the default rule so we
122 * don't generate "can't match" warnings for it.
124 default_rule = num_rules;
126 finish_rule( def_rule, false, 0, 0 );
128 for ( i = 1; i <= lastsc; ++i )
129 scset[i] = mkbranch( scset[i], def_rule );
131 if ( spprdflt )
132 add_action(
133 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
134 else
135 add_action( "ECHO" );
137 add_action( ";\n\tYY_BREAK\n" );
141 initlex :
142 { /* initialize for processing rules */
144 /* Create default DFA start condition. */
145 scinstal( "INITIAL", false );
149 sect1 : sect1 startconddecl namelist1
150 | sect1 options
152 | error
153 { synerr( "unknown error processing section 1" ); }
156 sect1end : SECTEND
158 check_options();
159 scon_stk = allocate_integer_array( lastsc + 1 );
160 scon_stk_ptr = 0;
164 startconddecl : SCDECL
165 { xcluflg = false; }
167 | XSCDECL
168 { xcluflg = true; }
171 namelist1 : namelist1 NAME
172 { scinstal( nmstr, xcluflg ); }
174 | NAME
175 { scinstal( nmstr, xcluflg ); }
177 | error
178 { synerr( "bad start condition list" ); }
181 options : OPTION_OP optionlist
184 optionlist : optionlist option
188 option : OPT_OUTFILE '=' NAME
190 outfilename = copy_string( nmstr );
191 did_outfilename = 1;
193 | OPT_PREFIX '=' NAME
194 { prefix = copy_string( nmstr ); }
195 | OPT_YYCLASS '=' NAME
196 { yyclass = copy_string( nmstr ); }
199 sect2 : sect2 scon initforrule flexrule '\n'
200 { scon_stk_ptr = $2; }
201 | sect2 scon '{' sect2 '}'
202 { scon_stk_ptr = $2; }
206 initforrule :
208 /* Initialize for a parse of one rule. */
209 trlcontxt = variable_trail_rule = varlength = false;
210 trailcnt = headcnt = rulelen = 0;
211 current_state_type = STATE_NORMAL;
212 previous_continued_action = continued_action;
213 in_rule = true;
215 new_rule();
219 flexrule : '^' rule
221 pat = $2;
222 finish_rule( pat, variable_trail_rule,
223 headcnt, trailcnt );
225 if ( scon_stk_ptr > 0 )
227 for ( i = 1; i <= scon_stk_ptr; ++i )
228 scbol[scon_stk[i]] =
229 mkbranch( scbol[scon_stk[i]],
230 pat );
233 else
235 /* Add to all non-exclusive start conditions,
236 * including the default (0) start condition.
239 for ( i = 1; i <= lastsc; ++i )
240 if ( ! scxclu[i] )
241 scbol[i] = mkbranch( scbol[i],
242 pat );
245 if ( ! bol_needed )
247 bol_needed = true;
249 if ( performance_report > 1 )
250 pinpoint_message(
251 "'^' operator results in sub-optimal performance" );
255 | rule
257 pat = $1;
258 finish_rule( pat, variable_trail_rule,
259 headcnt, trailcnt );
261 if ( scon_stk_ptr > 0 )
263 for ( i = 1; i <= scon_stk_ptr; ++i )
264 scset[scon_stk[i]] =
265 mkbranch( scset[scon_stk[i]],
266 pat );
269 else
271 for ( i = 1; i <= lastsc; ++i )
272 if ( ! scxclu[i] )
273 scset[i] =
274 mkbranch( scset[i],
275 pat );
279 | EOF_OP
281 if ( scon_stk_ptr > 0 )
282 build_eof_action();
284 else
286 /* This EOF applies to all start conditions
287 * which don't already have EOF actions.
289 for ( i = 1; i <= lastsc; ++i )
290 if ( ! sceof[i] )
291 scon_stk[++scon_stk_ptr] = i;
293 if ( scon_stk_ptr == 0 )
294 lwarn(
295 "all start conditions already have <<EOF>> rules" );
297 else
298 build_eof_action();
302 | error
303 { synerr( "unrecognized rule" ); }
306 scon_stk_ptr :
307 { $$ = scon_stk_ptr; }
310 scon : '<' scon_stk_ptr namelist2 '>'
311 { $$ = $2; }
313 | '<' '*' '>'
315 $$ = scon_stk_ptr;
317 for ( i = 1; i <= lastsc; ++i )
319 int j;
321 for ( j = 1; j <= scon_stk_ptr; ++j )
322 if ( scon_stk[j] == i )
323 break;
325 if ( j > scon_stk_ptr )
326 scon_stk[++scon_stk_ptr] = i;
331 { $$ = scon_stk_ptr; }
334 namelist2 : namelist2 ',' sconname
336 | sconname
338 | error
339 { synerr( "bad start condition list" ); }
342 sconname : NAME
344 if ( (scnum = sclookup( nmstr )) == 0 )
345 format_pinpoint_message(
346 "undeclared start condition %s",
347 nmstr );
348 else
350 for ( i = 1; i <= scon_stk_ptr; ++i )
351 if ( scon_stk[i] == scnum )
353 format_warn(
354 "<%s> specified twice",
355 scname[scnum] );
356 break;
359 if ( i > scon_stk_ptr )
360 scon_stk[++scon_stk_ptr] = scnum;
365 rule : re2 re
367 if ( transchar[lastst[$2]] != SYM_EPSILON )
368 /* Provide final transition \now/ so it
369 * will be marked as a trailing context
370 * state.
372 $2 = link_machines( $2,
373 mkstate( SYM_EPSILON ) );
375 mark_beginning_as_normal( $2 );
376 current_state_type = STATE_NORMAL;
378 if ( previous_continued_action )
380 /* We need to treat this as variable trailing
381 * context so that the backup does not happen
382 * in the action but before the action switch
383 * statement. If the backup happens in the
384 * action, then the rules "falling into" this
385 * one's action will *also* do the backup,
386 * erroneously.
388 if ( ! varlength || headcnt != 0 )
389 lwarn(
390 "trailing context made variable due to preceding '|' action" );
392 /* Mark as variable. */
393 varlength = true;
394 headcnt = 0;
397 if ( lex_compat || (varlength && headcnt == 0) )
398 { /* variable trailing context rule */
399 /* Mark the first part of the rule as the
400 * accepting "head" part of a trailing
401 * context rule.
403 * By the way, we didn't do this at the
404 * beginning of this production because back
405 * then current_state_type was set up for a
406 * trail rule, and add_accept() can create
407 * a new state ...
409 add_accept( $1,
410 num_rules | YY_TRAILING_HEAD_MASK );
411 variable_trail_rule = true;
414 else
415 trailcnt = rulelen;
417 $$ = link_machines( $1, $2 );
420 | re2 re '$'
421 { synerr( "trailing context used twice" ); }
423 | re '$'
425 headcnt = 0;
426 trailcnt = 1;
427 rulelen = 1;
428 varlength = false;
430 current_state_type = STATE_TRAILING_CONTEXT;
432 if ( trlcontxt )
434 synerr( "trailing context used twice" );
435 $$ = mkstate( SYM_EPSILON );
438 else if ( previous_continued_action )
440 /* See the comment in the rule for "re2 re"
441 * above.
443 lwarn(
444 "trailing context made variable due to preceding '|' action" );
446 varlength = true;
449 if ( lex_compat || varlength )
451 /* Again, see the comment in the rule for
452 * "re2 re" above.
454 add_accept( $1,
455 num_rules | YY_TRAILING_HEAD_MASK );
456 variable_trail_rule = true;
459 trlcontxt = true;
461 eps = mkstate( SYM_EPSILON );
462 $$ = link_machines( $1,
463 link_machines( eps, mkstate( '\n' ) ) );
466 | re
468 $$ = $1;
470 if ( trlcontxt )
472 if ( lex_compat || (varlength && headcnt == 0) )
473 /* Both head and trail are
474 * variable-length.
476 variable_trail_rule = true;
477 else
478 trailcnt = rulelen;
484 re : re '|' series
486 varlength = true;
487 $$ = mkor( $1, $3 );
490 | series
491 { $$ = $1; }
495 re2 : re '/'
497 /* This rule is written separately so the
498 * reduction will occur before the trailing
499 * series is parsed.
502 if ( trlcontxt )
503 synerr( "trailing context used twice" );
504 else
505 trlcontxt = true;
507 if ( varlength )
508 /* We hope the trailing context is
509 * fixed-length.
511 varlength = false;
512 else
513 headcnt = rulelen;
515 rulelen = 0;
517 current_state_type = STATE_TRAILING_CONTEXT;
518 $$ = $1;
522 series : series singleton
524 /* This is where concatenation of adjacent patterns
525 * gets done.
527 $$ = link_machines( $1, $2 );
530 | singleton
531 { $$ = $1; }
534 singleton : singleton '*'
536 varlength = true;
538 $$ = mkclos( $1 );
541 | singleton '+'
543 varlength = true;
544 $$ = mkposcl( $1 );
547 | singleton '?'
549 varlength = true;
550 $$ = mkopt( $1 );
553 | singleton '{' NUMBER ',' NUMBER '}'
555 varlength = true;
557 if ( $3 > $5 || $3 < 0 )
559 synerr( "bad iteration values" );
560 $$ = $1;
562 else
564 if ( $3 == 0 )
566 if ( $5 <= 0 )
568 synerr(
569 "bad iteration values" );
570 $$ = $1;
572 else
573 $$ = mkopt(
574 mkrep( $1, 1, $5 ) );
576 else
577 $$ = mkrep( $1, $3, $5 );
581 | singleton '{' NUMBER ',' '}'
583 varlength = true;
585 if ( $3 <= 0 )
587 synerr( "iteration value must be positive" );
588 $$ = $1;
591 else
592 $$ = mkrep( $1, $3, INFINITY );
595 | singleton '{' NUMBER '}'
597 /* The singleton could be something like "(foo)",
598 * in which case we have no idea what its length
599 * is, so we punt here.
601 varlength = true;
603 if ( $3 <= 0 )
605 synerr( "iteration value must be positive" );
606 $$ = $1;
609 else
610 $$ = link_machines( $1,
611 copysingl( $1, $3 - 1 ) );
614 | '.'
616 if ( ! madeany )
618 /* Create the '.' character class. */
619 anyccl = cclinit();
620 ccladd( anyccl, '\n' );
621 cclnegate( anyccl );
623 if ( useecs )
624 mkeccl( ccltbl + cclmap[anyccl],
625 ccllen[anyccl], nextecm,
626 ecgroup, csize, csize );
628 madeany = true;
631 ++rulelen;
633 $$ = mkstate( -anyccl );
636 | fullccl
638 if ( ! cclsorted )
639 /* Sort characters for fast searching. We
640 * use a shell sort since this list could
641 * be large.
643 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
645 if ( useecs )
646 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
647 nextecm, ecgroup, csize, csize );
649 ++rulelen;
651 $$ = mkstate( -$1 );
654 | PREVCCL
656 ++rulelen;
658 $$ = mkstate( -$1 );
661 | '"' string '"'
662 { $$ = $2; }
664 | '(' re ')'
665 { $$ = $2; }
667 | CHAR
669 ++rulelen;
671 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
672 $1 = clower( $1 );
674 $$ = mkstate( $1 );
678 fullccl : '[' ccl ']'
679 { $$ = $2; }
681 | '[' '^' ccl ']'
683 cclnegate( $3 );
684 $$ = $3;
688 ccl : ccl CHAR '-' CHAR
690 if ( caseins )
692 if ( $2 >= 'A' && $2 <= 'Z' )
693 $2 = clower( $2 );
694 if ( $4 >= 'A' && $4 <= 'Z' )
695 $4 = clower( $4 );
698 if ( $2 > $4 )
699 synerr( "negative range in character class" );
701 else
703 for ( i = $2; i <= $4; ++i )
704 ccladd( $1, i );
706 /* Keep track if this ccl is staying in
707 * alphabetical order.
709 cclsorted = cclsorted && ($2 > lastchar);
710 lastchar = $4;
713 $$ = $1;
716 | ccl CHAR
718 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
719 $2 = clower( $2 );
721 ccladd( $1, $2 );
722 cclsorted = cclsorted && ($2 > lastchar);
723 lastchar = $2;
724 $$ = $1;
727 | ccl ccl_expr
729 /* Too hard to properly maintain cclsorted. */
730 cclsorted = false;
731 $$ = $1;
736 cclsorted = true;
737 lastchar = 0;
738 currccl = $$ = cclinit();
742 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
743 | CCE_ALPHA { CCL_EXPR(isalpha) }
744 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
745 | CCE_CNTRL { CCL_EXPR(iscntrl) }
746 | CCE_DIGIT { CCL_EXPR(isdigit) }
747 | CCE_GRAPH { CCL_EXPR(isgraph) }
748 | CCE_LOWER { CCL_EXPR(islower) }
749 | CCE_PRINT { CCL_EXPR(isprint) }
750 | CCE_PUNCT { CCL_EXPR(ispunct) }
751 | CCE_SPACE { CCL_EXPR(isspace) }
752 | CCE_UPPER {
753 if ( caseins )
754 CCL_EXPR(islower)
755 else
756 CCL_EXPR(isupper)
758 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
761 string : string CHAR
763 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
764 $2 = clower( $2 );
766 ++rulelen;
768 $$ = link_machines( $1, mkstate( $2 ) );
772 { $$ = mkstate( SYM_EPSILON ); }
778 /* build_eof_action - build the "<<EOF>>" action for the active start
779 * conditions
782 void build_eof_action()
784 register int i;
785 char action_text[MAXLINE];
787 for ( i = 1; i <= scon_stk_ptr; ++i )
789 if ( sceof[scon_stk[i]] )
790 format_pinpoint_message(
791 "multiple <<EOF>> rules for start condition %s",
792 scname[scon_stk[i]] );
794 else
796 sceof[scon_stk[i]] = true;
797 snprintf(action_text, sizeof(action_text),
798 "case YY_STATE_EOF(%s):\n", scname[scon_stk[i]]);
799 add_action( action_text );
803 line_directive_out( (FILE *) 0, 1 );
805 /* This isn't a normal rule after all - don't count it as
806 * such, so we don't have any holes in the rule numbering
807 * (which make generating "rule can never match" warnings
808 * more difficult.
810 --num_rules;
811 ++num_eof_rules;
815 /* format_synerr - write out formatted syntax error */
817 void format_synerr( msg, arg )
818 char msg[], arg[];
820 char errmsg[MAXLINE];
822 (void) snprintf(errmsg, sizeof(errmsg), msg, arg);
823 synerr( errmsg );
827 /* synerr - report a syntax error */
829 void synerr( str )
830 char str[];
832 syntaxerror = true;
833 pinpoint_message( str );
837 /* format_warn - write out formatted warning */
839 void format_warn( msg, arg )
840 char msg[], arg[];
842 char warn_msg[MAXLINE];
844 (void) snprintf(warn_msg, sizeof(warn_msg), msg, arg);
845 lwarn( warn_msg );
849 /* lwarn - report a warning, unless -w was given */
851 void lwarn( str )
852 char str[];
854 line_warning( str, linenum );
857 /* format_pinpoint_message - write out a message formatted with one string,
858 * pinpointing its location
861 void format_pinpoint_message( msg, arg )
862 char msg[], arg[];
864 char errmsg[MAXLINE];
866 (void) snprintf(errmsg, sizeof(errmsg), msg, arg);
867 pinpoint_message( errmsg );
871 /* pinpoint_message - write out a message, pinpointing its location */
873 void pinpoint_message( str )
874 char str[];
876 line_pinpoint( str, linenum );
880 /* line_warning - report a warning at a given line, unless -w was given */
882 void line_warning( str, line )
883 char str[];
884 int line;
886 char warning[MAXLINE];
888 if ( ! nowarn )
890 snprintf(warning, sizeof(warning), "warning, %s", str);
891 line_pinpoint( warning, line );
896 /* line_pinpoint - write out a message, pinpointing it at the given line */
898 void line_pinpoint( str, line )
899 char str[];
900 int line;
902 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
906 /* yyerror - eat up an error message from the parser;
907 * currently, messages are ignore
910 void yyerror( msg )
911 char msg[];