1 /* $NetBSD: parse.y,v 1.1.1.1 2009/10/26 00:26:41 christos Exp $ */
3 /* parse.y - parser for flex input */
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
15 %left CCL_OP_DIFF CCL_OP_UNION
18 *POSIX and AT&T lex place the
19 * precedence of the repeat operator, {}, below that of concatenation.
20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
21 * Regular Expression (ERE) precedence that has the repeat operator
22 * higher than concatenation. This causes ab{3} to yield abbb.
24 * In order to support the POSIX and AT&T precedence and the flex
25 * precedence we define two token sets for the begin and end tokens of
26 * the repeat operator, '{' and '}'. The lexical scanner chooses
27 * which tokens to return based on whether posix_compat or lex_compat
28 * are specified. Specifying either posix_compat or lex_compat will
29 * cause flex to parse scanner files as per the AT&T and
30 * POSIX-mandated behavior.
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
37 /* Copyright (c) 1990 The Regents of the University of California. */
38 /* All rights reserved. */
40 /* This code is derived from software contributed to Berkeley by */
43 /* The United States Government has rights in this work pursuant */
44 /* to contract no. DE-AC03-76SF00098 between the United States */
45 /* Department of Energy and the University of California. */
47 /* This file is part of flex. */
49 /* Redistribution and use in source and binary forms, with or without */
50 /* modification, are permitted provided that the following conditions */
53 /* 1. Redistributions of source code must retain the above copyright */
54 /* notice, this list of conditions and the following disclaimer. */
55 /* 2. Redistributions in binary form must reproduce the above copyright */
56 /* notice, this list of conditions and the following disclaimer in the */
57 /* documentation and/or other materials provided with the distribution. */
59 /* Neither the name of the University nor the names of its contributors */
60 /* may be used to endorse or promote products derived from this software */
61 /* without specific prior written permission. */
63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
71 int pat
, scnum
, eps
, headcnt
, trailcnt
, lastchar
, i
, rulelen
;
72 int trlcontxt
, xcluflg
, currccl
, cclsorted
, varlength
, variable_trail_rule
;
77 static int madeany
= false
; /* whether we've made the '.' character class */
78 static int ccldot
, cclany
;
79 int previous_continued_action
; /* whether the previous rule's action was '|' */
81 #define format_warn3(fmt, a1, a2) \
83 char fw3_msg
[MAXLINE
];\
84 snprintf
( fw3_msg
, MAXLINE
,(fmt
), (a1
), (a2
) );\
88 /* Expand a POSIX character class expression. */
89 #define CCL_EXPR(func) \
92 for
( c
= 0; c
< csize
; ++c
) \
93 if
( isascii
(c
) && func
(c
) ) \
94 ccladd
( currccl
, c
); \
98 #define CCL_NEG_EXPR(func) \
101 for
( c
= 0; c
< csize
; ++c
) \
103 ccladd
( currccl
, c
); \
106 /* While POSIX defines isblank(), it's not ANSI C. */
107 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
109 /* On some over-ambitious machines, such as DEC Alpha's, the default
110 * token type is "long" instead of "int"; this leads to problems with
111 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
112 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
113 * following should ensure that the default token type is "int".
120 goal
: initlex sect1 sect1end sect2 initforrule
121 { /* add default rule */
127 def_rule
= mkstate
( -pat
);
129 /* Remember the number of the default rule so we
130 * don't generate "can't match" warnings for it.
132 default_rule
= num_rules
;
134 finish_rule
( def_rule
, false
, 0, 0, 0);
136 for
( i
= 1; i
<= lastsc
; ++i
)
137 scset
[i
] = mkbranch
( scset
[i
], def_rule
);
141 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
143 add_action
( "ECHO" );
145 add_action
( ";\n\tYY_BREAK\n" );
150 { /* initialize for processing rules */
152 /* Create default DFA start condition. */
153 scinstal
( "INITIAL", false
);
157 sect1
: sect1 startconddecl namelist1
161 { synerr
( _
("unknown error processing section 1") ); }
167 scon_stk
= allocate_integer_array
( lastsc
+ 1 );
172 startconddecl
: SCDECL
179 namelist1
: namelist1 NAME
180 { scinstal
( nmstr
, xcluflg
); }
183 { scinstal
( nmstr
, xcluflg
); }
186 { synerr
( _
("bad start condition list") ); }
189 options
: OPTION_OP optionlist
192 optionlist
: optionlist option
196 option
: OPT_OUTFILE
'=' NAME
198 outfilename
= copy_string
( nmstr
);
201 | OPT_EXTRA_TYPE
'=' NAME
202 { extra_type
= copy_string
( nmstr
); }
203 | OPT_PREFIX
'=' NAME
204 { prefix
= copy_string
( nmstr
); }
205 | OPT_YYCLASS
'=' NAME
206 { yyclass
= copy_string
( nmstr
); }
207 | OPT_HEADER
'=' NAME
208 { headerfilename
= copy_string
( nmstr
); }
209 | OPT_TABLES
'=' NAME
210 { tablesext
= true
; tablesfilename
= copy_string
( nmstr
); }
213 sect2
: sect2 scon initforrule flexrule
'\n'
214 { scon_stk_ptr
= $2; }
215 | sect2 scon
'{' sect2
'}'
216 { scon_stk_ptr
= $2; }
222 /* Initialize for a parse of one rule. */
223 trlcontxt
= variable_trail_rule
= varlength
= false
;
224 trailcnt
= headcnt
= rulelen
= 0;
225 current_state_type
= STATE_NORMAL
;
226 previous_continued_action
= continued_action
;
236 finish_rule
( pat
, variable_trail_rule
,
237 headcnt
, trailcnt
, previous_continued_action
);
239 if
( scon_stk_ptr
> 0 )
241 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
243 mkbranch
( scbol
[scon_stk
[i
]],
249 /* Add to all non-exclusive start conditions,
250 * including the default (0) start condition.
253 for
( i
= 1; i
<= lastsc
; ++i
)
255 scbol
[i
] = mkbranch
( scbol
[i
],
263 if
( performance_report
> 1 )
265 "'^' operator results in sub-optimal performance" );
272 finish_rule
( pat
, variable_trail_rule
,
273 headcnt
, trailcnt
, previous_continued_action
);
275 if
( scon_stk_ptr
> 0 )
277 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
279 mkbranch
( scset
[scon_stk
[i
]],
285 for
( i
= 1; i
<= lastsc
; ++i
)
295 if
( scon_stk_ptr
> 0 )
300 /* This EOF applies to all start conditions
301 * which don't already have EOF actions.
303 for
( i
= 1; i
<= lastsc
; ++i
)
305 scon_stk
[++scon_stk_ptr
] = i
;
307 if
( scon_stk_ptr
== 0 )
309 "all start conditions already have <<EOF>> rules" );
317 { synerr
( _
("unrecognized rule") ); }
321 { $$
= scon_stk_ptr
; }
324 scon
: '<' scon_stk_ptr namelist2
'>'
331 for
( i
= 1; i
<= lastsc
; ++i
)
335 for
( j
= 1; j
<= scon_stk_ptr
; ++j
)
336 if
( scon_stk
[j
] == i
)
339 if
( j
> scon_stk_ptr
)
340 scon_stk
[++scon_stk_ptr
] = i
;
345 { $$
= scon_stk_ptr
; }
348 namelist2
: namelist2
',' sconname
353 { synerr
( _
("bad start condition list") ); }
358 if
( (scnum
= sclookup
( nmstr
)) == 0 )
359 format_pinpoint_message
(
360 "undeclared start condition %s",
364 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
365 if
( scon_stk
[i
] == scnum
)
368 "<%s> specified twice",
373 if
( i
> scon_stk_ptr
)
374 scon_stk
[++scon_stk_ptr
] = scnum
;
381 if
( transchar
[lastst
[$2]] != SYM_EPSILON
)
382 /* Provide final transition \now/ so it
383 * will be marked as a trailing context
386 $2 = link_machines
( $2,
387 mkstate
( SYM_EPSILON
) );
389 mark_beginning_as_normal
( $2 );
390 current_state_type
= STATE_NORMAL
;
392 if
( previous_continued_action
)
394 /* We need to treat this as variable trailing
395 * context so that the backup does not happen
396 * in the action but before the action switch
397 * statement. If the backup happens in the
398 * action, then the rules "falling into" this
399 * one's action will *also* do the backup,
402 if
( ! varlength || headcnt
!= 0 )
404 "trailing context made variable due to preceding '|' action" );
406 /* Mark as variable. */
412 if
( lex_compat ||
(varlength
&& headcnt
== 0) )
413 { /* variable trailing context rule */
414 /* Mark the first part of the rule as the
415 * accepting "head" part of a trailing
418 * By the way, we didn't do this at the
419 * beginning of this production because back
420 * then current_state_type was set up for a
421 * trail rule, and add_accept() can create
425 num_rules | YY_TRAILING_HEAD_MASK
);
426 variable_trail_rule
= true
;
432 $$
= link_machines
( $1, $2 );
436 { synerr
( _
("trailing context used twice") ); }
445 current_state_type
= STATE_TRAILING_CONTEXT
;
449 synerr
( _
("trailing context used twice") );
450 $$
= mkstate
( SYM_EPSILON
);
453 else if
( previous_continued_action
)
455 /* See the comment in the rule for "re2 re"
459 "trailing context made variable due to preceding '|' action" );
464 if
( lex_compat || varlength
)
466 /* Again, see the comment in the rule for
470 num_rules | YY_TRAILING_HEAD_MASK
);
471 variable_trail_rule
= true
;
476 eps
= mkstate
( SYM_EPSILON
);
477 $$
= link_machines
( $1,
478 link_machines
( eps
, mkstate
( '\n' ) ) );
487 if
( lex_compat ||
(varlength
&& headcnt
== 0) )
488 /* Both head and trail are
491 variable_trail_rule
= true
;
512 /* This rule is written separately so the
513 * reduction will occur before the trailing
518 synerr
( _
("trailing context used twice") );
523 /* We hope the trailing context is
532 current_state_type
= STATE_TRAILING_CONTEXT
;
537 series
: series singleton
539 /* This is where concatenation of adjacent patterns
542 $$
= link_machines
( $1, $2 );
548 | series BEGIN_REPEAT_POSIX NUMBER
',' NUMBER END_REPEAT_POSIX
552 if
( $3 > $5 ||
$3 < 0 )
554 synerr
( _
("bad iteration values") );
564 _
("bad iteration values") );
569 mkrep
( $1, 1, $5 ) );
572 $$
= mkrep
( $1, $3, $5 );
576 | series BEGIN_REPEAT_POSIX NUMBER
',' END_REPEAT_POSIX
582 synerr
( _
("iteration value must be positive") );
587 $$
= mkrep
( $1, $3, INFINITE_REPEAT
);
590 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
592 /* The series could be something like "(foo)",
593 * in which case we have no idea what its length
594 * is, so we punt here.
600 synerr
( _
("iteration value must be positive")
606 $$
= link_machines
( $1,
607 copysingl
( $1, $3 - 1 ) );
612 singleton
: singleton
'*'
631 | singleton BEGIN_REPEAT_FLEX NUMBER
',' NUMBER END_REPEAT_FLEX
635 if
( $3 > $5 ||
$3 < 0 )
637 synerr
( _
("bad iteration values") );
647 _
("bad iteration values") );
652 mkrep
( $1, 1, $5 ) );
655 $$
= mkrep
( $1, $3, $5 );
659 | singleton BEGIN_REPEAT_FLEX NUMBER
',' END_REPEAT_FLEX
665 synerr
( _
("iteration value must be positive") );
670 $$
= mkrep
( $1, $3, INFINITE_REPEAT
);
673 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
675 /* The singleton could be something like "(foo)",
676 * in which case we have no idea what its length
677 * is, so we punt here.
683 synerr
( _
("iteration value must be positive") );
688 $$
= link_machines
( $1,
689 copysingl
( $1, $3 - 1 ) );
696 /* Create the '.' character class. */
698 ccladd
( ccldot
, '\n' );
702 mkeccl
( ccltbl
+ cclmap
[ccldot
],
703 ccllen
[ccldot
], nextecm
,
704 ecgroup
, csize
, csize
);
706 /* Create the (?s:'.') character class. */
711 mkeccl
( ccltbl
+ cclmap
[cclany
],
712 ccllen
[cclany
], nextecm
,
713 ecgroup
, csize
, csize
);
721 $$
= mkstate
( -cclany
);
723 $$
= mkstate
( -ccldot
);
728 /* Sort characters for fast searching. We
729 * use a shell sort since this list could
732 cshell
( ccltbl
+ cclmap
[$1], ccllen
[$1], true
);
735 mkeccl
( ccltbl
+ cclmap
[$1], ccllen
[$1],
736 nextecm
, ecgroup
, csize
, csize
);
741 rule_has_nl
[num_rules
] = true
;
751 rule_has_nl
[num_rules
] = true
;
767 rule_has_nl
[num_rules
] = true
;
769 if
(sf_case_ins
() && has_case
($1))
770 /* create an alternation, as in (a|A) */
771 $$
= mkor
(mkstate
($1), mkstate
(reverse_case
($1)));
777 fullccl CCL_OP_DIFF braceccl
{ $$
= ccl_set_diff
($1, $3); }
778 | fullccl CCL_OP_UNION braceccl
{ $$
= ccl_set_union
($1, $3); }
784 '[' ccl
']' { $$
= $2; }
793 ccl
: ccl CHAR
'-' CHAR
799 /* If one end of the range has case and the other
800 * does not, or the cases are different, then we're not
801 * sure what range the user is trying to express.
802 * Examples: [@-z] or [S-t]
804 if
(has_case
($2) != has_case
($4)
805 ||
(has_case
($2) && (b_islower
($2) != b_islower
($4)))
806 ||
(has_case
($2) && (b_isupper
($2) != b_isupper
($4))))
808 _
("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
811 /* If the range spans uppercase characters but not
812 * lowercase (or vice-versa), then should we automatically
813 * include lowercase characters in the range?
814 * Example: [@-_] spans [a-z] but not [A-Z]
816 else if
(!has_case
($2) && !has_case
($4) && !range_covers_case
($2, $4))
818 _
("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
823 synerr
( _
("negative range in character class") );
827 for
( i
= $2; i
<= $4; ++i
)
830 /* Keep track if this ccl is staying in
831 * alphabetical order.
833 cclsorted
= cclsorted
&& ($2 > lastchar
);
836 /* Do it again for upper/lowercase */
837 if
(sf_case_ins
() && has_case
($2) && has_case
($4)){
838 $2 = reverse_case
($2);
839 $4 = reverse_case
($4);
841 for
( i
= $2; i
<= $4; ++i
)
844 cclsorted
= cclsorted
&& ($2 > lastchar
);
856 cclsorted
= cclsorted
&& ($2 > lastchar
);
859 /* Do it again for upper/lowercase */
860 if
(sf_case_ins
() && has_case
($2)){
861 $2 = reverse_case
($2);
864 cclsorted
= cclsorted
&& ($2 > lastchar
);
873 /* Too hard to properly maintain cclsorted. */
882 currccl
= $$
= cclinit
();
887 CCE_ALNUM
{ CCL_EXPR
(isalnum
); }
888 | CCE_ALPHA
{ CCL_EXPR
(isalpha
); }
889 | CCE_BLANK
{ CCL_EXPR
(IS_BLANK
); }
890 | CCE_CNTRL
{ CCL_EXPR
(iscntrl
); }
891 | CCE_DIGIT
{ CCL_EXPR
(isdigit
); }
892 | CCE_GRAPH
{ CCL_EXPR
(isgraph
); }
898 | CCE_PRINT
{ CCL_EXPR
(isprint
); }
899 | CCE_PUNCT
{ CCL_EXPR
(ispunct
); }
900 | CCE_SPACE
{ CCL_EXPR
(isspace
); }
901 | CCE_XDIGIT
{ CCL_EXPR
(isxdigit
); }
908 | CCE_NEG_ALNUM
{ CCL_NEG_EXPR
(isalnum
); }
909 | CCE_NEG_ALPHA
{ CCL_NEG_EXPR
(isalpha
); }
910 | CCE_NEG_BLANK
{ CCL_NEG_EXPR
(IS_BLANK
); }
911 | CCE_NEG_CNTRL
{ CCL_NEG_EXPR
(iscntrl
); }
912 | CCE_NEG_DIGIT
{ CCL_NEG_EXPR
(isdigit
); }
913 | CCE_NEG_GRAPH
{ CCL_NEG_EXPR
(isgraph
); }
914 | CCE_NEG_PRINT
{ CCL_NEG_EXPR
(isprint
); }
915 | CCE_NEG_PUNCT
{ CCL_NEG_EXPR
(ispunct
); }
916 | CCE_NEG_SPACE
{ CCL_NEG_EXPR
(isspace
); }
917 | CCE_NEG_XDIGIT
{ CCL_NEG_EXPR
(isxdigit
); }
920 lwarn
(_
("[:^lower:] is ambiguous in case insensitive scanner"));
922 CCL_NEG_EXPR
(islower
);
926 lwarn
(_
("[:^upper:] ambiguous in case insensitive scanner"));
928 CCL_NEG_EXPR
(isupper
);
935 rule_has_nl
[num_rules
] = true
;
939 if
(sf_case_ins
() && has_case
($2))
940 $$
= mkor
(mkstate
($2), mkstate
(reverse_case
($2)));
944 $$
= link_machines
( $1, $$
);
948 { $$
= mkstate
( SYM_EPSILON
); }
954 /* build_eof_action - build the "<<EOF>>" action for the active start
958 void build_eof_action
()
961 char action_text
[MAXLINE
];
963 for
( i
= 1; i
<= scon_stk_ptr
; ++i
)
965 if
( sceof
[scon_stk
[i
]] )
966 format_pinpoint_message
(
967 "multiple <<EOF>> rules for start condition %s",
968 scname
[scon_stk
[i
]] );
972 sceof
[scon_stk
[i
]] = true
;
973 snprintf
( action_text
, sizeof
(action_text
), "case YY_STATE_EOF(%s):\n",
974 scname
[scon_stk
[i
]] );
975 add_action
( action_text
);
979 line_directive_out
( (FILE *) 0, 1 );
981 /* This isn't a normal rule after all - don't count it as
982 * such, so we don't have any holes in the rule numbering
983 * (which make generating "rule can never match" warnings
991 /* format_synerr - write out formatted syntax error */
993 void format_synerr
( msg
, arg
)
994 const char *msg
, arg
[];
996 char errmsg
[MAXLINE
];
998 (void) snprintf
( errmsg
, sizeof
(errmsg
), msg
, arg
);
1003 /* synerr - report a syntax error */
1009 pinpoint_message
( str
);
1013 /* format_warn - write out formatted warning */
1015 void format_warn
( msg
, arg
)
1016 const char *msg
, arg
[];
1018 char warn_msg
[MAXLINE
];
1020 snprintf
( warn_msg
, sizeof
(warn_msg
), msg
, arg
);
1025 /* lwarn - report a warning, unless -w was given */
1030 line_warning
( str
, linenum
);
1033 /* format_pinpoint_message - write out a message formatted with one string,
1034 * pinpointing its location
1037 void format_pinpoint_message
( msg
, arg
)
1038 const char *msg
, arg
[];
1040 char errmsg
[MAXLINE
];
1042 snprintf
( errmsg
, sizeof
(errmsg
), msg
, arg
);
1043 pinpoint_message
( errmsg
);
1047 /* pinpoint_message - write out a message, pinpointing its location */
1049 void pinpoint_message
( str
)
1052 line_pinpoint
( str
, linenum
);
1056 /* line_warning - report a warning at a given line, unless -w was given */
1058 void line_warning
( str
, line
)
1062 char warning
[MAXLINE
];
1066 snprintf
( warning
, sizeof
(warning
), "warning, %s", str
);
1067 line_pinpoint
( warning
, line
);
1072 /* line_pinpoint - write out a message, pinpointing it at the given line */
1074 void line_pinpoint
( str
, line
)
1078 fprintf
( stderr
, "%s:%d: %s\n", infilename
, line
, str
);
1082 /* yyerror - eat up an error message from the parser;
1083 * currently, messages are ignore