1 /* $NetBSD: parse.y,v 1.5 2014/10/30 18:44:05 christos Exp $ */
3 /* parse.y - parser for flex input */
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
15 %left CCL_OP_DIFF CCL_OP_UNION
18 *POSIX and AT&T lex place the
19 * precedence of the repeat operator, {}, below that of concatenation.
20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
21 * Regular Expression (ERE) precedence that has the repeat operator
22 * higher than concatenation. This causes ab{3} to yield abbb.
24 * In order to support the POSIX and AT&T precedence and the flex
25 * precedence we define two token sets for the begin and end tokens of
26 * the repeat operator, '{' and '}'. The lexical scanner chooses
27 * which tokens to return based on whether posix_compat or lex_compat
28 * are specified. Specifying either posix_compat or lex_compat will
29 * cause flex to parse scanner files as per the AT&T and
30 * POSIX-mandated behavior.
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
37 /* Copyright (c) 1990 The Regents of the University of California. */
38 /* All rights reserved. */
40 /* This code is derived from software contributed to Berkeley by */
43 /* The United States Government has rights in this work pursuant */
44 /* to contract no. DE-AC03-76SF00098 between the United States */
45 /* Department of Energy and the University of California. */
47 /* This file is part of flex. */
49 /* Redistribution and use in source and binary forms, with or without */
50 /* modification, are permitted provided that the following conditions */
53 /* 1. Redistributions of source code must retain the above copyright */
54 /* notice, this list of conditions and the following disclaimer. */
55 /* 2. Redistributions in binary form must reproduce the above copyright */
56 /* notice, this list of conditions and the following disclaimer in the */
57 /* documentation and/or other materials provided with the distribution. */
59 /* Neither the name of the University nor the names of its contributors */
60 /* may be used to endorse or promote products derived from this software */
61 /* without specific prior written permission. */
63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
68 __RCSID
("$NetBSD: parse.y,v 1.5 2014/10/30 18:44:05 christos Exp $");
72 int pat
, scnum
, eps
, headcnt
, trailcnt
, lastchar
, i
, rulelen
;
73 int trlcontxt
, xcluflg
, currccl
, cclsorted
, varlength
, variable_trail_rule
;
78 static int madeany
= false
; /* whether we've made the '.' character class */
79 static int ccldot
, cclany
;
80 int previous_continued_action
; /* whether the previous rule's action was '|' */
82 #define format_warn3(fmt, a1, a2) \
84 char fw3_msg
[MAXLINE
];\
85 snprintf
( fw3_msg
, MAXLINE
,(fmt
), (a1
), (a2
) );\
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
93 for
( c
= 0; c
< csize
; ++c
) \
94 if
( isascii
(c
) && func
(c
) ) \
95 ccladd
( currccl
, c
); \
99 #define CCL_NEG_EXPR(func) \
102 for
( c
= 0; c
< csize
; ++c
) \
104 ccladd
( currccl
, c
); \
107 /* While POSIX defines isblank(), it's not ANSI C. */
108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
110 /* On some over-ambitious machines, such as DEC Alpha's, the default
111 * token type is "long" instead of "int"; this leads to problems with
112 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
113 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
114 * following should ensure that the default token type is "int".
121 goal
: initlex sect1 sect1end sect2 initforrule
122 { /* add default rule */
128 def_rule
= mkstate
( -pat
);
130 /* Remember the number of the default rule so we
131 * don't generate "can't match" warnings for it.
133 default_rule
= num_rules
;
135 finish_rule
( def_rule
, false
, 0, 0, 0);
137 for
( i
= 1; i
<= lastsc
; ++i
)
138 scset
[i
] = mkbranch
( scset
[i
], def_rule
);
142 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
144 add_action
( "ECHO" );
146 add_action
( ";\n\tYY_BREAK\n" );
151 { /* initialize for processing rules */
153 /* Create default DFA start condition. */
154 scinstal
( "INITIAL", false
);
158 sect1
: sect1 startconddecl namelist1
162 { synerr
( _
("unknown error processing section 1") ); }
168 scon_stk
= allocate_integer_array
( lastsc
+ 1 );
173 startconddecl
: SCDECL
180 namelist1
: namelist1 NAME
181 { scinstal
( nmstr
, xcluflg
); }
184 { scinstal
( nmstr
, xcluflg
); }
187 { synerr
( _
("bad start condition list") ); }
190 options
: OPTION_OP optionlist
193 optionlist
: optionlist option
197 option
: OPT_OUTFILE
'=' NAME
199 outfilename
= copy_string
( nmstr
);
202 | OPT_EXTRA_TYPE
'=' NAME
203 { extra_type
= copy_string
( nmstr
); }
204 | OPT_PREFIX
'=' NAME
205 { prefix
= copy_string
( nmstr
); }
206 | OPT_YYCLASS
'=' NAME
207 { yyclass
= copy_string
( nmstr
); }
208 | OPT_HEADER
'=' NAME
209 { headerfilename
= copy_string
( nmstr
); }
210 | OPT_TABLES
'=' NAME
211 { tablesext
= true
; tablesfilename
= copy_string
( nmstr
); }
214 sect2
: sect2 scon initforrule flexrule
'\n'
215 { scon_stk_ptr
= $2; }
216 | sect2 scon
'{' sect2
'}'
217 { scon_stk_ptr
= $2; }
223 /* Initialize for a parse of one rule. */
224 trlcontxt
= variable_trail_rule
= varlength
= false
;
225 trailcnt
= headcnt
= rulelen
= 0;
226 current_state_type
= STATE_NORMAL
;
227 previous_continued_action
= continued_action
;
237 finish_rule
( pat
, variable_trail_rule
,
238 headcnt
, trailcnt
, previous_continued_action
);
240 if
( scon_stk_ptr
> 0 )
242 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
244 mkbranch
( scbol
[scon_stk
[i
]],
250 /* Add to all non-exclusive start conditions,
251 * including the default (0) start condition.
254 for
( i
= 1; i
<= lastsc
; ++i
)
256 scbol
[i
] = mkbranch
( scbol
[i
],
264 if
( performance_report
> 1 )
266 "'^' operator results in sub-optimal performance" );
273 finish_rule
( pat
, variable_trail_rule
,
274 headcnt
, trailcnt
, previous_continued_action
);
276 if
( scon_stk_ptr
> 0 )
278 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
280 mkbranch
( scset
[scon_stk
[i
]],
286 for
( i
= 1; i
<= lastsc
; ++i
)
296 if
( scon_stk_ptr
> 0 )
301 /* This EOF applies to all start conditions
302 * which don't already have EOF actions.
304 for
( i
= 1; i
<= lastsc
; ++i
)
306 scon_stk
[++scon_stk_ptr
] = i
;
308 if
( scon_stk_ptr
== 0 )
310 "all start conditions already have <<EOF>> rules" );
318 { synerr
( _
("unrecognized rule") ); }
322 { $$
= scon_stk_ptr
; }
325 scon
: '<' scon_stk_ptr namelist2
'>'
332 for
( i
= 1; i
<= lastsc
; ++i
)
336 for
( j
= 1; j
<= scon_stk_ptr
; ++j
)
337 if
( scon_stk
[j
] == i
)
340 if
( j
> scon_stk_ptr
)
341 scon_stk
[++scon_stk_ptr
] = i
;
346 { $$
= scon_stk_ptr
; }
349 namelist2
: namelist2
',' sconname
354 { synerr
( _
("bad start condition list") ); }
359 if
( (scnum
= sclookup
( nmstr
)) == 0 )
360 format_pinpoint_message
(
361 "undeclared start condition %s",
365 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
366 if
( scon_stk
[i
] == scnum
)
369 "<%s> specified twice",
374 if
( i
> scon_stk_ptr
)
375 scon_stk
[++scon_stk_ptr
] = scnum
;
382 if
( transchar
[lastst
[$2]] != SYM_EPSILON
)
383 /* Provide final transition \now/ so it
384 * will be marked as a trailing context
387 $2 = link_machines
( $2,
388 mkstate
( SYM_EPSILON
) );
390 mark_beginning_as_normal
( $2 );
391 current_state_type
= STATE_NORMAL
;
393 if
( previous_continued_action
)
395 /* We need to treat this as variable trailing
396 * context so that the backup does not happen
397 * in the action but before the action switch
398 * statement. If the backup happens in the
399 * action, then the rules "falling into" this
400 * one's action will *also* do the backup,
403 if
( ! varlength || headcnt
!= 0 )
405 "trailing context made variable due to preceding '|' action" );
407 /* Mark as variable. */
413 if
( lex_compat ||
(varlength
&& headcnt
== 0) )
414 { /* variable trailing context rule */
415 /* Mark the first part of the rule as the
416 * accepting "head" part of a trailing
419 * By the way, we didn't do this at the
420 * beginning of this production because back
421 * then current_state_type was set up for a
422 * trail rule, and add_accept() can create
426 num_rules | YY_TRAILING_HEAD_MASK
);
427 variable_trail_rule
= true
;
433 $$
= link_machines
( $1, $2 );
437 { synerr
( _
("trailing context used twice") ); }
446 current_state_type
= STATE_TRAILING_CONTEXT
;
450 synerr
( _
("trailing context used twice") );
451 $$
= mkstate
( SYM_EPSILON
);
454 else if
( previous_continued_action
)
456 /* See the comment in the rule for "re2 re"
460 "trailing context made variable due to preceding '|' action" );
465 if
( lex_compat || varlength
)
467 /* Again, see the comment in the rule for
471 num_rules | YY_TRAILING_HEAD_MASK
);
472 variable_trail_rule
= true
;
477 eps
= mkstate
( SYM_EPSILON
);
478 $$
= link_machines
( $1,
479 link_machines
( eps
, mkstate
( '\n' ) ) );
488 if
( lex_compat ||
(varlength
&& headcnt
== 0) )
489 /* Both head and trail are
492 variable_trail_rule
= true
;
513 /* This rule is written separately so the
514 * reduction will occur before the trailing
519 synerr
( _
("trailing context used twice") );
524 /* We hope the trailing context is
533 current_state_type
= STATE_TRAILING_CONTEXT
;
538 series
: series singleton
540 /* This is where concatenation of adjacent patterns
543 $$
= link_machines
( $1, $2 );
549 | series BEGIN_REPEAT_POSIX NUMBER
',' NUMBER END_REPEAT_POSIX
553 if
( $3 > $5 ||
$3 < 0 )
555 synerr
( _
("bad iteration values") );
565 _
("bad iteration values") );
570 mkrep
( $1, 1, $5 ) );
573 $$
= mkrep
( $1, $3, $5 );
577 | series BEGIN_REPEAT_POSIX NUMBER
',' END_REPEAT_POSIX
583 synerr
( _
("iteration value must be positive") );
588 $$
= mkrep
( $1, $3, INFINITE_REPEAT
);
591 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
593 /* The series could be something like "(foo)",
594 * in which case we have no idea what its length
595 * is, so we punt here.
601 synerr
( _
("iteration value must be positive")
607 $$
= link_machines
( $1,
608 copysingl
( $1, $3 - 1 ) );
613 singleton
: singleton
'*'
632 | singleton BEGIN_REPEAT_FLEX NUMBER
',' NUMBER END_REPEAT_FLEX
636 if
( $3 > $5 ||
$3 < 0 )
638 synerr
( _
("bad iteration values") );
648 _
("bad iteration values") );
653 mkrep
( $1, 1, $5 ) );
656 $$
= mkrep
( $1, $3, $5 );
660 | singleton BEGIN_REPEAT_FLEX NUMBER
',' END_REPEAT_FLEX
666 synerr
( _
("iteration value must be positive") );
671 $$
= mkrep
( $1, $3, INFINITE_REPEAT
);
674 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
676 /* The singleton could be something like "(foo)",
677 * in which case we have no idea what its length
678 * is, so we punt here.
684 synerr
( _
("iteration value must be positive") );
689 $$
= link_machines
( $1,
690 copysingl
( $1, $3 - 1 ) );
697 /* Create the '.' character class. */
699 ccladd
( ccldot
, '\n' );
703 mkeccl
( ccltbl
+ cclmap
[ccldot
],
704 ccllen
[ccldot
], nextecm
,
705 ecgroup
, csize
, csize
);
707 /* Create the (?s:'.') character class. */
712 mkeccl
( ccltbl
+ cclmap
[cclany
],
713 ccllen
[cclany
], nextecm
,
714 ecgroup
, csize
, csize
);
722 $$
= mkstate
( -cclany
);
724 $$
= mkstate
( -ccldot
);
729 /* Sort characters for fast searching.
731 qsort
( ccltbl
+ cclmap
[$1], ccllen
[$1], sizeof
(*ccltbl
), cclcmp
);
734 mkeccl
( ccltbl
+ cclmap
[$1], ccllen
[$1],
735 nextecm
, ecgroup
, csize
, csize
);
740 rule_has_nl
[num_rules
] = true
;
750 rule_has_nl
[num_rules
] = true
;
766 rule_has_nl
[num_rules
] = true
;
768 if
(sf_case_ins
() && has_case
($1))
769 /* create an alternation, as in (a|A) */
770 $$
= mkor
(mkstate
($1), mkstate
(reverse_case
($1)));
776 fullccl CCL_OP_DIFF braceccl
{ $$
= ccl_set_diff
($1, $3); }
777 | fullccl CCL_OP_UNION braceccl
{ $$
= ccl_set_union
($1, $3); }
783 '[' ccl
']' { $$
= $2; }
792 ccl
: ccl CHAR
'-' CHAR
798 /* If one end of the range has case and the other
799 * does not, or the cases are different, then we're not
800 * sure what range the user is trying to express.
801 * Examples: [@-z] or [S-t]
803 if
(has_case
($2) != has_case
($4)
804 ||
(has_case
($2) && (b_islower
($2) != b_islower
($4)))
805 ||
(has_case
($2) && (b_isupper
($2) != b_isupper
($4))))
807 _
("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
810 /* If the range spans uppercase characters but not
811 * lowercase (or vice-versa), then should we automatically
812 * include lowercase characters in the range?
813 * Example: [@-_] spans [a-z] but not [A-Z]
815 else if
(!has_case
($2) && !has_case
($4) && !range_covers_case
($2, $4))
817 _
("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
822 synerr
( _
("negative range in character class") );
826 for
( i
= $2; i
<= $4; ++i
)
829 /* Keep track if this ccl is staying in
830 * alphabetical order.
832 cclsorted
= cclsorted
&& ($2 > lastchar
);
835 /* Do it again for upper/lowercase */
836 if
(sf_case_ins
() && has_case
($2) && has_case
($4)){
837 $2 = reverse_case
($2);
838 $4 = reverse_case
($4);
840 for
( i
= $2; i
<= $4; ++i
)
843 cclsorted
= cclsorted
&& ($2 > lastchar
);
855 cclsorted
= cclsorted
&& ($2 > lastchar
);
858 /* Do it again for upper/lowercase */
859 if
(sf_case_ins
() && has_case
($2)){
860 $2 = reverse_case
($2);
863 cclsorted
= cclsorted
&& ($2 > lastchar
);
872 /* Too hard to properly maintain cclsorted. */
881 currccl
= $$
= cclinit
();
886 CCE_ALNUM
{ CCL_EXPR
(isalnum
); }
887 | CCE_ALPHA
{ CCL_EXPR
(isalpha
); }
888 | CCE_BLANK
{ CCL_EXPR
(IS_BLANK
); }
889 | CCE_CNTRL
{ CCL_EXPR
(iscntrl
); }
890 | CCE_DIGIT
{ CCL_EXPR
(isdigit
); }
891 | CCE_GRAPH
{ CCL_EXPR
(isgraph
); }
897 | CCE_PRINT
{ CCL_EXPR
(isprint
); }
898 | CCE_PUNCT
{ CCL_EXPR
(ispunct
); }
899 | CCE_SPACE
{ CCL_EXPR
(isspace
); }
900 | CCE_XDIGIT
{ CCL_EXPR
(isxdigit
); }
907 | CCE_NEG_ALNUM
{ CCL_NEG_EXPR
(isalnum
); }
908 | CCE_NEG_ALPHA
{ CCL_NEG_EXPR
(isalpha
); }
909 | CCE_NEG_BLANK
{ CCL_NEG_EXPR
(IS_BLANK
); }
910 | CCE_NEG_CNTRL
{ CCL_NEG_EXPR
(iscntrl
); }
911 | CCE_NEG_DIGIT
{ CCL_NEG_EXPR
(isdigit
); }
912 | CCE_NEG_GRAPH
{ CCL_NEG_EXPR
(isgraph
); }
913 | CCE_NEG_PRINT
{ CCL_NEG_EXPR
(isprint
); }
914 | CCE_NEG_PUNCT
{ CCL_NEG_EXPR
(ispunct
); }
915 | CCE_NEG_SPACE
{ CCL_NEG_EXPR
(isspace
); }
916 | CCE_NEG_XDIGIT
{ CCL_NEG_EXPR
(isxdigit
); }
919 lwarn
(_
("[:^lower:] is ambiguous in case insensitive scanner"));
921 CCL_NEG_EXPR
(islower
);
925 lwarn
(_
("[:^upper:] ambiguous in case insensitive scanner"));
927 CCL_NEG_EXPR
(isupper
);
934 rule_has_nl
[num_rules
] = true
;
938 if
(sf_case_ins
() && has_case
($2))
939 $$
= mkor
(mkstate
($2), mkstate
(reverse_case
($2)));
943 $$
= link_machines
( $1, $$
);
947 { $$
= mkstate
( SYM_EPSILON
); }
953 /* build_eof_action - build the "<<EOF>>" action for the active start
957 void build_eof_action
()
960 char action_text
[MAXLINE
];
962 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
964 if
( sceof
[scon_stk
[i
]] )
965 format_pinpoint_message
(
966 "multiple <<EOF>> rules for start condition %s",
967 scname
[scon_stk
[i
]] );
971 sceof
[scon_stk
[i
]] = true
;
973 if
(previous_continued_action
/* && previous action was regular */)
974 add_action
("YY_RULE_SETUP\n");
976 snprintf
( action_text
, sizeof
(action_text
), "case YY_STATE_EOF(%s):\n",
977 scname
[scon_stk
[i
]] );
978 add_action
( action_text
);
982 line_directive_out
( (FILE *) 0, 1 );
984 /* This isn't a normal rule after all - don't count it as
985 * such, so we don't have any holes in the rule numbering
986 * (which make generating "rule can never match" warnings
994 /* format_synerr - write out formatted syntax error */
996 void format_synerr
( msg
, arg
)
997 const char *msg
, arg
[];
999 char errmsg
[MAXLINE
];
1001 (void) snprintf
( errmsg
, sizeof
(errmsg
), msg
, arg
);
1006 /* synerr - report a syntax error */
1012 pinpoint_message
( str
);
1016 /* format_warn - write out formatted warning */
1018 void format_warn
( msg
, arg
)
1019 const char *msg
, arg
[];
1021 char warn_msg
[MAXLINE
];
1023 snprintf
( warn_msg
, sizeof
(warn_msg
), msg
, arg
);
1028 /* lwarn - report a warning, unless -w was given */
1033 line_warning
( str
, linenum
);
1036 /* format_pinpoint_message - write out a message formatted with one string,
1037 * pinpointing its location
1040 void format_pinpoint_message
( msg
, arg
)
1041 const char *msg
, arg
[];
1043 char errmsg
[MAXLINE
];
1045 snprintf
( errmsg
, sizeof
(errmsg
), msg
, arg
);
1046 pinpoint_message
( errmsg
);
1050 /* pinpoint_message - write out a message, pinpointing its location */
1052 void pinpoint_message
( str
)
1055 line_pinpoint
( str
, linenum
);
1059 /* line_warning - report a warning at a given line, unless -w was given */
1061 void line_warning
( str
, line
)
1065 char warning
[MAXLINE
];
1069 snprintf
( warning
, sizeof
(warning
), "warning, %s", str
);
1070 line_pinpoint
( warning
, line
);
1075 /* line_pinpoint - write out a message, pinpointing it at the given line */
1077 void line_pinpoint
( str
, line
)
1081 fprintf
( stderr
, "%s:%d: %s\n", infilename
, line
, str
);
1085 /* yyerror - eat up an error message from the parser;
1086 * currently, messages are ignore