Added spec:commit task to commit changes to spec/ruby sources.
[rbx.git] / shotgun / lib / grammar.y
blobfccda09323ab73892b264c057a2351d2146d58d8
1 /**********************************************************************
3 parse.y -
5 $Author: matz $
6 $Date: 2004/11/29 06:13:51 $
7 created at: Fri May 28 18:02:42 JST 1993
9 Copyright (C) 1993-2003 Yukihiro Matsumoto
11 **********************************************************************/
15 #define YYDEBUG 1
16 #define YYERROR_VERBOSE 1
18 #include <stdio.h>
19 #include <errno.h>
20 #include <ctype.h>
21 #include <string.h>
22 #include <stdbool.h>
24 #include "shotgun/lib/grammar_internal.h"
25 #include "shotgun/lib/grammar_runtime.h"
26 #include "shotgun/lib/array.h"
28 static NODE *syd_node_newnode(rb_parse_state*, enum node_type, OBJECT, OBJECT, OBJECT);
30 #undef VALUE
32 #ifndef isnumber
33 #define isnumber isdigit
34 #endif
36 #define ISALPHA isalpha
37 #define ISSPACE isspace
38 #define ISALNUM(x) (isalpha(x) || isnumber(x))
39 #define ISDIGIT isdigit
40 #define ISXDIGIT isxdigit
41 #define ISUPPER isupper
43 #define ismbchar(c) (0)
44 #define mbclen(c) (1)
46 #define ID2SYM(i) (OBJECT)i
48 #define string_new(ptr, len) blk2bstr(ptr, len)
49 #define string_new2(ptr) cstr2bstr(ptr)
51 intptr_t syd_sourceline;
52 static char *syd_sourcefile;
54 #define ruby_sourceline syd_sourceline
55 #define ruby_sourcefile syd_sourcefile
57 static int
58 syd_yyerror(const char *, rb_parse_state*);
59 #define yyparse syd_yyparse
60 #define yylex syd_yylex
61 #define yyerror(str) syd_yyerror(str, parse_state)
62 #define yylval syd_yylval
63 #define yychar syd_yychar
64 #define yydebug syd_yydebug
66 #define YYPARSE_PARAM parse_state
67 #define YYLEX_PARAM parse_state
69 #define ID_SCOPE_SHIFT 3
70 #define ID_SCOPE_MASK 0x07
71 #define ID_LOCAL 0x01
72 #define ID_INSTANCE 0x02
73 #define ID_GLOBAL 0x03
74 #define ID_ATTRSET 0x04
75 #define ID_CONST 0x05
76 #define ID_CLASS 0x06
77 #define ID_JUNK 0x07
78 #define ID_INTERNAL ID_JUNK
80 #define is_notop_id(id) ((id)>tLAST_TOKEN)
81 #define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL)
82 #define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL)
83 #define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE)
84 #define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET)
85 #define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST)
86 #define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS)
87 #define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK)
89 #define is_asgn_or_id(id) ((is_notop_id(id)) && \
90 (((id)&ID_SCOPE_MASK) == ID_GLOBAL || \
91 ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \
92 ((id)&ID_SCOPE_MASK) == ID_CLASS))
95 /* FIXME these went into the ruby_state instead of parse_state
96 because a ton of other crap depends on it
97 char *ruby_sourcefile; current source file
98 int ruby_sourceline; current line no.
100 static int yylex();
103 #define BITSTACK_PUSH(stack, n) (stack = (stack<<1)|((n)&1))
104 #define BITSTACK_POP(stack) (stack >>= 1)
105 #define BITSTACK_LEXPOP(stack) (stack = (stack >> 1) | (stack & 1))
106 #define BITSTACK_SET_P(stack) (stack&1)
108 #define COND_PUSH(n) BITSTACK_PUSH(vps->cond_stack, n)
109 #define COND_POP() BITSTACK_POP(vps->cond_stack)
110 #define COND_LEXPOP() BITSTACK_LEXPOP(vps->cond_stack)
111 #define COND_P() BITSTACK_SET_P(vps->cond_stack)
113 #define CMDARG_PUSH(n) BITSTACK_PUSH(vps->cmdarg_stack, n)
114 #define CMDARG_POP() BITSTACK_POP(vps->cmdarg_stack)
115 #define CMDARG_LEXPOP() BITSTACK_LEXPOP(vps->cmdarg_stack)
116 #define CMDARG_P() BITSTACK_SET_P(vps->cmdarg_stack)
119 static int class_nest = 0;
120 static int in_single = 0;
121 static int in_def = 0;
122 static int compile_for_eval = 0;
123 static ID cur_mid = 0;
126 static NODE *cond(NODE*,rb_parse_state*);
127 static NODE *logop(enum node_type,NODE*,NODE*,rb_parse_state*);
128 static int cond_negative(NODE**);
130 static NODE *newline_node(rb_parse_state*,NODE*);
131 static void fixpos(NODE*,NODE*);
133 static int value_expr0(NODE*,rb_parse_state*);
134 static void void_expr0(NODE *);
135 static void void_stmts(NODE*,rb_parse_state*);
136 static NODE *remove_begin(NODE*);
137 #define value_expr(node) value_expr0((node) = remove_begin(node), parse_state)
138 #define void_expr(node) void_expr0((node) = remove_begin(node))
140 static NODE *block_append(rb_parse_state*,NODE*,NODE*);
141 static NODE *list_append(rb_parse_state*,NODE*,NODE*);
142 static NODE *list_concat(NODE*,NODE*);
143 static NODE *arg_concat(rb_parse_state*,NODE*,NODE*);
144 static NODE *arg_prepend(rb_parse_state*,NODE*,NODE*);
145 static NODE *literal_concat(rb_parse_state*,NODE*,NODE*);
146 static NODE *new_evstr(rb_parse_state*,NODE*);
147 static NODE *evstr2dstr(rb_parse_state*,NODE*);
148 static NODE *call_op(NODE*,ID,int,NODE*,rb_parse_state*);
150 /* static NODE *negate_lit(NODE*); */
151 static NODE *ret_args(rb_parse_state*,NODE*);
152 static NODE *arg_blk_pass(NODE*,NODE*);
153 static NODE *new_call(rb_parse_state*,NODE*,ID,NODE*);
154 static NODE *new_fcall(rb_parse_state*,ID,NODE*);
155 static NODE *new_super(rb_parse_state*,NODE*);
156 static NODE *new_yield(rb_parse_state*,NODE*);
158 static NODE *syd_gettable(rb_parse_state*,ID);
159 #define gettable(i) syd_gettable(parse_state, i)
160 static NODE *assignable(ID,NODE*,rb_parse_state*);
161 static NODE *aryset(NODE*,NODE*,rb_parse_state*);
162 static NODE *attrset(NODE*,ID,rb_parse_state*);
163 static void rb_backref_error(NODE*);
164 static NODE *node_assign(NODE*,NODE*,rb_parse_state*);
166 static NODE *match_gen(NODE*,NODE*,rb_parse_state*);
167 static void syd_local_push(rb_parse_state*, int cnt);
168 #define local_push(cnt) syd_local_push(vps, cnt)
169 static void syd_local_pop(rb_parse_state*);
170 #define local_pop() syd_local_pop(vps)
171 static intptr_t syd_local_cnt(rb_parse_state*,ID);
172 #define local_cnt(i) syd_local_cnt(vps, i)
173 static int syd_local_id(rb_parse_state*,ID);
174 #define local_id(i) syd_local_id(vps, i)
175 static ID *syd_local_tbl();
176 static ID convert_op();
178 static void tokadd(char c, rb_parse_state *parse_state);
179 static int tokadd_string(int, int, int, int *, rb_parse_state*);
181 #define SHOW_PARSER_WARNS 0
183 static int _debug_print(const char *fmt, ...) {
184 #if SHOW_PARSER_WARNS
185 va_list ar;
186 int i;
188 va_start(ar, fmt);
189 i = vprintf(fmt, ar);
190 va_end(ar);
191 return i;
192 #else
193 return 0;
194 #endif
197 #define rb_warn _debug_print
198 #define rb_warning _debug_print
199 #define rb_compile_error _debug_print
201 static ID rb_intern(const char *name);
202 static ID rb_id_attrset(ID);
204 rb_parse_state *alloc_parse_state();
205 static unsigned long scan_oct(const char *start, int len, int *retlen);
206 static unsigned long scan_hex(const char *start, int len, int *retlen);
208 static void reset_block(rb_parse_state *parse_state);
209 static NODE *extract_block_vars(rb_parse_state *parse_state, NODE* node, var_table vars);
211 #define ruby_verbose 0
212 #define RE_OPTION_ONCE 0x80
213 #define RE_OPTION_IGNORECASE (1L)
214 #define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE<<1)
215 #define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED<<1)
216 #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
217 #define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE<<1)
218 #define RE_MAY_IGNORECASE (RE_OPTION_LONGEST<<1)
219 #define RE_OPTIMIZE_ANCHOR (RE_MAY_IGNORECASE<<1)
220 #define RE_OPTIMIZE_EXACTN (RE_OPTIMIZE_ANCHOR<<1)
221 #define RE_OPTIMIZE_NO_BM (RE_OPTIMIZE_EXACTN<<1)
222 #define RE_OPTIMIZE_BMATCH (RE_OPTIMIZE_NO_BM<<1)
224 #define NODE_STRTERM NODE_ZARRAY /* nothing to gc */
225 #define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */
226 #define SIGN_EXTEND(x,n) (((1<<((n)-1))^((x)&~(~0<<(n))))-(1<<((n)-1)))
227 #define nd_func u1.id
228 #if SIZEOF_SHORT != 2
229 #define nd_term(node) SIGN_EXTEND((node)->u2.id, (CHAR_BIT*2))
230 #else
231 #define nd_term(node) ((signed short)(node)->u2.id)
232 #endif
233 #define nd_paren(node) (char)((node)->u2.id >> (CHAR_BIT*2))
234 #define nd_nest u3.id
236 /* Older versions of Yacc set YYMAXDEPTH to a very low value by default (150,
237 for instance). This is too low for Ruby to parse some files, such as
238 date/format.rb, therefore bump the value up to at least Bison's default. */
239 #ifdef OLD_YACC
240 #ifndef YYMAXDEPTH
241 #define YYMAXDEPTH 10000
242 #endif
243 #endif
245 #define vps ((rb_parse_state*)parse_state)
249 %pure-parser
251 %union {
252 NODE *node;
253 ID id;
254 int num;
255 var_table vars;
258 %token kCLASS
259 kMODULE
260 kDEF
261 kUNDEF
262 kBEGIN
263 kRESCUE
264 kENSURE
265 kEND
267 kUNLESS
268 kTHEN
269 kELSIF
270 kELSE
271 kCASE
272 kWHEN
273 kWHILE
274 kUNTIL
275 kFOR
276 kBREAK
277 kNEXT
278 kREDO
279 kRETRY
282 kDO_COND
283 kDO_BLOCK
284 kRETURN
285 kYIELD
286 kSUPER
287 kSELF
288 kNIL
289 kTRUE
290 kFALSE
291 kAND
293 kNOT
294 kIF_MOD
295 kUNLESS_MOD
296 kWHILE_MOD
297 kUNTIL_MOD
298 kRESCUE_MOD
299 kALIAS
300 kDEFINED
301 klBEGIN
302 klEND
303 k__LINE__
304 k__FILE__
306 %token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tXSTRING_BEG
307 %token <node> tINTEGER tFLOAT tSTRING_CONTENT
308 %token <node> tNTH_REF tBACK_REF
309 %token <num> tREGEXP_END
310 %type <node> singleton strings string string1 xstring regexp
311 %type <node> string_contents xstring_contents string_content
312 %type <node> words qwords word_list qword_list word
313 %type <node> literal numeric dsym cpath
314 %type <node> bodystmt compstmt stmts stmt expr arg primary command command_call method_call
315 %type <node> expr_value arg_value primary_value
316 %type <node> if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure
317 %type <node> args when_args call_args call_args2 open_args paren_args opt_paren_args
318 %type <node> command_args aref_args opt_block_arg block_arg var_ref var_lhs
319 %type <node> mrhs superclass block_call block_command
320 %type <node> f_arglist f_args f_optarg f_opt f_block_arg opt_f_block_arg
321 %type <node> assoc_list assocs assoc undef_list backref string_dvar
322 %type <node> block_var opt_block_var brace_block cmd_brace_block do_block lhs none
323 %type <node> mlhs mlhs_head mlhs_basic mlhs_entry mlhs_item mlhs_node
324 %type <id> fitem variable sym symbol operation operation2 operation3
325 %type <id> cname fname op f_rest_arg
326 %type <num> f_norm_arg f_arg
327 %token tUPLUS /* unary+ */
328 %token tUMINUS /* unary- */
329 %token tUBS /* unary\ */
330 %token tPOW /* ** */
331 %token tCMP /* <=> */
332 %token tEQ /* == */
333 %token tEQQ /* === */
334 %token tNEQ /* != */
335 %token tGEQ /* >= */
336 %token tLEQ /* <= */
337 %token tANDOP tOROP /* && and || */
338 %token tMATCH tNMATCH /* =~ and !~ */
339 %token tDOT2 tDOT3 /* .. and ... */
340 %token tAREF tASET /* [] and []= */
341 %token tLSHFT tRSHFT /* << and >> */
342 %token tCOLON2 /* :: */
343 %token tCOLON3 /* :: at EXPR_BEG */
344 %token <id> tOP_ASGN /* +=, -= etc. */
345 %token tASSOC /* => */
346 %token tLPAREN /* ( */
347 %token tLPAREN_ARG /* ( */
348 %token tRPAREN /* ) */
349 %token tLBRACK /* [ */
350 %token tLBRACE /* { */
351 %token tLBRACE_ARG /* { */
352 %token tSTAR /* * */
353 %token tAMPER /* & */
354 %token tSYMBEG tSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG
355 %token tSTRING_DBEG tSTRING_DVAR tSTRING_END
358 * precedence table
361 %nonassoc tLOWEST
362 %nonassoc tLBRACE_ARG
364 %nonassoc kIF_MOD kUNLESS_MOD kWHILE_MOD kUNTIL_MOD
365 %left kOR kAND
366 %right kNOT
367 %nonassoc kDEFINED
368 %right '=' tOP_ASGN
369 %left kRESCUE_MOD
370 %right '?' ':'
371 %nonassoc tDOT2 tDOT3
372 %left tOROP
373 %left tANDOP
374 %nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
375 %left '>' tGEQ '<' tLEQ
376 %left '|' '^'
377 %left '&'
378 %left tLSHFT tRSHFT
379 %left '+' '-'
380 %left '*' '/' '%'
381 %right tUMINUS_NUM tUMINUS
382 %right tPOW
383 %right '!' '~' tUPLUS
385 %token tLAST_TOKEN
388 program : {
389 vps->lex_state = EXPR_BEG;
390 vps->variables = var_table_create();
391 class_nest = 0;
393 compstmt
395 if ($2 && !compile_for_eval) {
396 /* last expression should not be void */
397 if (nd_type($2) != NODE_BLOCK) void_expr($2);
398 else {
399 NODE *node = $2;
400 while (node->nd_next) {
401 node = node->nd_next;
403 void_expr(node->nd_head);
406 vps->top = block_append(parse_state, vps->top, $2);
407 class_nest = 0;
411 bodystmt : compstmt
412 opt_rescue
413 opt_else
414 opt_ensure
416 $$ = $1;
417 if ($2) {
418 $$ = NEW_RESCUE($1, $2, $3);
420 else if ($3) {
421 rb_warn("else without rescue is useless");
422 $$ = block_append(parse_state, $$, $3);
424 if ($4) {
425 $$ = NEW_ENSURE($$, $4);
427 fixpos($$, $1);
431 compstmt : stmts opt_terms
433 void_stmts($1, parse_state);
434 $$ = $1;
438 stmts : none
439 | stmt
441 $$ = newline_node(parse_state, $1);
443 | stmts terms stmt
445 $$ = block_append(parse_state, $1, newline_node(parse_state, $3));
447 | error stmt
449 $$ = $2;
453 stmt : kALIAS fitem {vps->lex_state = EXPR_FNAME;} fitem
455 $$ = NEW_ALIAS($2, $4);
457 | kALIAS tGVAR tGVAR
459 $$ = NEW_VALIAS($2, $3);
461 | kALIAS tGVAR tBACK_REF
463 char buf[3];
465 snprintf(buf, sizeof(buf), "$%c", (char)$3->nd_nth);
466 $$ = NEW_VALIAS($2, rb_intern(buf));
468 | kALIAS tGVAR tNTH_REF
470 yyerror("can't make alias for the number variables");
471 $$ = 0;
473 | kUNDEF undef_list
475 $$ = $2;
477 | stmt kIF_MOD expr_value
479 $$ = NEW_IF(cond($3, parse_state), $1, 0);
480 fixpos($$, $3);
481 if (cond_negative(&$$->nd_cond)) {
482 $$->nd_else = $$->nd_body;
483 $$->nd_body = 0;
486 | stmt kUNLESS_MOD expr_value
488 $$ = NEW_UNLESS(cond($3, parse_state), $1, 0);
489 fixpos($$, $3);
490 if (cond_negative(&$$->nd_cond)) {
491 $$->nd_body = $$->nd_else;
492 $$->nd_else = 0;
495 | stmt kWHILE_MOD expr_value
497 if ($1 && nd_type($1) == NODE_BEGIN) {
498 $$ = NEW_WHILE(cond($3, parse_state), $1->nd_body, 0);
500 else {
501 $$ = NEW_WHILE(cond($3, parse_state), $1, 1);
503 if (cond_negative(&$$->nd_cond)) {
504 nd_set_type($$, NODE_UNTIL);
507 | stmt kUNTIL_MOD expr_value
509 if ($1 && nd_type($1) == NODE_BEGIN) {
510 $$ = NEW_UNTIL(cond($3, parse_state), $1->nd_body, 0);
512 else {
513 $$ = NEW_UNTIL(cond($3, parse_state), $1, 1);
515 if (cond_negative(&$$->nd_cond)) {
516 nd_set_type($$, NODE_WHILE);
519 | stmt kRESCUE_MOD stmt
521 $$ = NEW_RESCUE($1, NEW_RESBODY(0,$3,0), 0);
523 | klBEGIN
525 if (in_def || in_single) {
526 yyerror("BEGIN in method");
528 local_push(0);
530 '{' compstmt '}'
533 ruby_eval_tree_begin = block_append(ruby_eval_tree_begin,
534 NEW_PREEXE($4));
536 local_pop();
537 $$ = 0;
539 | klEND '{' compstmt '}'
541 if (in_def || in_single) {
542 rb_warn("END in method; use at_exit");
545 $$ = NEW_ITER(0, NEW_POSTEXE(), $3);
547 | lhs '=' command_call
549 $$ = node_assign($1, $3, parse_state);
551 | mlhs '=' command_call
553 value_expr($3);
554 $1->nd_value = ($1->nd_head) ? NEW_TO_ARY($3) : NEW_ARRAY($3);
555 $$ = $1;
557 | var_lhs tOP_ASGN command_call
559 value_expr($3);
560 if ($1) {
561 ID vid = $1->nd_vid;
562 if ($2 == tOROP) {
563 $1->nd_value = $3;
564 $$ = NEW_OP_ASGN_OR(gettable(vid), $1);
565 if (is_asgn_or_id(vid)) {
566 $$->nd_aid = vid;
569 else if ($2 == tANDOP) {
570 $1->nd_value = $3;
571 $$ = NEW_OP_ASGN_AND(gettable(vid), $1);
573 else {
574 $$ = $1;
575 $$->nd_value = call_op(gettable(vid),$2,1,$3, parse_state);
578 else {
579 $$ = 0;
582 | primary_value '[' aref_args ']' tOP_ASGN command_call
584 NODE *args;
586 value_expr($6);
587 args = NEW_LIST($6);
588 if ($3 && nd_type($3) != NODE_ARRAY)
589 $3 = NEW_LIST($3);
590 $3 = list_append(parse_state, $3, NEW_NIL());
591 list_concat(args, $3);
592 if ($5 == tOROP) {
593 $5 = 0;
595 else if ($5 == tANDOP) {
596 $5 = 1;
598 $$ = NEW_OP_ASGN1($1, $5, args);
599 fixpos($$, $1);
601 | primary_value '.' tIDENTIFIER tOP_ASGN command_call
603 value_expr($5);
604 if ($4 == tOROP) {
605 $4 = 0;
607 else if ($4 == tANDOP) {
608 $4 = 1;
610 $$ = NEW_OP_ASGN2($1, $3, $4, $5);
611 fixpos($$, $1);
613 | primary_value '.' tCONSTANT tOP_ASGN command_call
615 value_expr($5);
616 if ($4 == tOROP) {
617 $4 = 0;
619 else if ($4 == tANDOP) {
620 $4 = 1;
622 $$ = NEW_OP_ASGN2($1, $3, $4, $5);
623 fixpos($$, $1);
625 | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call
627 value_expr($5);
628 if ($4 == tOROP) {
629 $4 = 0;
631 else if ($4 == tANDOP) {
632 $4 = 1;
634 $$ = NEW_OP_ASGN2($1, $3, $4, $5);
635 fixpos($$, $1);
637 | backref tOP_ASGN command_call
639 rb_backref_error($1);
640 $$ = 0;
642 | lhs '=' mrhs
644 $$ = node_assign($1, NEW_SVALUE($3), parse_state);
646 | mlhs '=' arg_value
648 $1->nd_value = ($1->nd_head) ? NEW_TO_ARY($3) : NEW_ARRAY($3);
649 $$ = $1;
651 | mlhs '=' mrhs
653 $1->nd_value = $3;
654 $$ = $1;
656 | expr
659 expr : command_call
660 | expr kAND expr
662 $$ = logop(NODE_AND, $1, $3, parse_state);
664 | expr kOR expr
666 $$ = logop(NODE_OR, $1, $3, parse_state);
668 | kNOT expr
670 $$ = NEW_NOT(cond($2, parse_state));
672 | '!' command_call
674 $$ = NEW_NOT(cond($2, parse_state));
676 | arg
679 expr_value : expr
681 value_expr($$);
682 $$ = $1;
686 command_call : command
687 | block_command
688 | kRETURN call_args
690 $$ = NEW_RETURN(ret_args(vps, $2));
692 | kBREAK call_args
694 $$ = NEW_BREAK(ret_args(vps, $2));
696 | kNEXT call_args
698 $$ = NEW_NEXT(ret_args(vps, $2));
702 block_command : block_call
703 | block_call '.' operation2 command_args
705 $$ = new_call(parse_state, $1, $3, $4);
707 | block_call tCOLON2 operation2 command_args
709 $$ = new_call(parse_state, $1, $3, $4);
713 cmd_brace_block : tLBRACE_ARG
715 $<num>1 = ruby_sourceline;
716 reset_block(vps);
718 opt_block_var { $<vars>$ = vps->block_vars; }
719 compstmt
722 $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
723 nd_set_line($$, $<num>1);
727 command : operation command_args %prec tLOWEST
729 $$ = new_fcall(parse_state, $1, $2);
730 fixpos($$, $2);
732 | operation command_args cmd_brace_block
734 $$ = new_fcall(parse_state, $1, $2);
735 if ($3) {
736 if (nd_type($$) == NODE_BLOCK_PASS) {
737 rb_compile_error("both block arg and actual block given");
739 $3->nd_iter = $$;
740 $$ = $3;
742 fixpos($$, $2);
744 | primary_value '.' operation2 command_args %prec tLOWEST
746 $$ = new_call(parse_state, $1, $3, $4);
747 fixpos($$, $1);
749 | primary_value '.' operation2 command_args cmd_brace_block
751 $$ = new_call(parse_state, $1, $3, $4);
752 if ($5) {
753 if (nd_type($$) == NODE_BLOCK_PASS) {
754 rb_compile_error("both block arg and actual block given");
756 $5->nd_iter = $$;
757 $$ = $5;
759 fixpos($$, $1);
761 | primary_value tCOLON2 operation2 command_args %prec tLOWEST
763 $$ = new_call(parse_state, $1, $3, $4);
764 fixpos($$, $1);
766 | primary_value tCOLON2 operation2 command_args cmd_brace_block
768 $$ = new_call(parse_state, $1, $3, $4);
769 if ($5) {
770 if (nd_type($$) == NODE_BLOCK_PASS) {
771 rb_compile_error("both block arg and actual block given");
773 $5->nd_iter = $$;
774 $$ = $5;
776 fixpos($$, $1);
778 | kSUPER command_args
780 $$ = new_super(parse_state, $2);
781 fixpos($$, $2);
783 | kYIELD command_args
785 $$ = new_yield(parse_state, $2);
786 fixpos($$, $2);
790 mlhs : mlhs_basic
791 | tLPAREN mlhs_entry ')'
793 $$ = $2;
797 mlhs_entry : mlhs_basic
798 | tLPAREN mlhs_entry ')'
800 $$ = NEW_MASGN(NEW_LIST($2), 0);
804 mlhs_basic : mlhs_head
806 $$ = NEW_MASGN($1, 0);
808 | mlhs_head mlhs_item
810 $$ = NEW_MASGN(list_append(parse_state, $1,$2), 0);
812 | mlhs_head tSTAR mlhs_node
814 $$ = NEW_MASGN($1, $3);
816 | mlhs_head tSTAR
818 $$ = NEW_MASGN($1, -1);
820 | tSTAR mlhs_node
822 $$ = NEW_MASGN(0, $2);
824 | tSTAR
826 $$ = NEW_MASGN(0, -1);
830 mlhs_item : mlhs_node
831 | tLPAREN mlhs_entry ')'
833 $$ = $2;
837 mlhs_head : mlhs_item ','
839 $$ = NEW_LIST($1);
841 | mlhs_head mlhs_item ','
843 $$ = list_append(parse_state, $1, $2);
847 mlhs_node : variable
849 $$ = assignable($1, 0, parse_state);
851 | primary_value '[' aref_args ']'
853 $$ = aryset($1, $3, parse_state);
855 | primary_value '.' tIDENTIFIER
857 $$ = attrset($1, $3, parse_state);
859 | primary_value tCOLON2 tIDENTIFIER
861 $$ = attrset($1, $3, parse_state);
863 | primary_value '.' tCONSTANT
865 $$ = attrset($1, $3, parse_state);
867 | primary_value tCOLON2 tCONSTANT
869 if (in_def || in_single)
870 yyerror("dynamic constant assignment");
871 $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3));
873 | tCOLON3 tCONSTANT
875 if (in_def || in_single)
876 yyerror("dynamic constant assignment");
877 $$ = NEW_CDECL(0, 0, NEW_COLON3($2));
879 | backref
881 rb_backref_error($1);
882 $$ = 0;
886 lhs : variable
888 $$ = assignable($1, 0, parse_state);
890 | primary_value '[' aref_args ']'
892 $$ = aryset($1, $3, parse_state);
894 | primary_value '.' tIDENTIFIER
896 $$ = attrset($1, $3, parse_state);
898 | primary_value tCOLON2 tIDENTIFIER
900 $$ = attrset($1, $3, parse_state);
902 | primary_value '.' tCONSTANT
904 $$ = attrset($1, $3, parse_state);
906 | primary_value tCOLON2 tCONSTANT
908 if (in_def || in_single)
909 yyerror("dynamic constant assignment");
910 $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3));
912 | tCOLON3 tCONSTANT
914 if (in_def || in_single)
915 yyerror("dynamic constant assignment");
916 $$ = NEW_CDECL(0, 0, NEW_COLON3($2));
918 | backref
920 rb_backref_error($1);
921 $$ = 0;
925 cname : tIDENTIFIER
927 yyerror("class/module name must be CONSTANT");
929 | tCONSTANT
932 cpath : tCOLON3 cname
934 $$ = NEW_COLON3($2);
936 | cname
938 $$ = NEW_COLON2(0, $$);
940 | primary_value tCOLON2 cname
942 $$ = NEW_COLON2($1, $3);
946 fname : tIDENTIFIER
947 | tCONSTANT
948 | tFID
949 | op
951 vps->lex_state = EXPR_END;
952 $$ = convert_op($1);
954 | reswords
956 vps->lex_state = EXPR_END;
957 $$ = $<id>1;
961 fitem : fname
962 | symbol
965 undef_list : fitem
967 $$ = NEW_UNDEF($1);
969 | undef_list ',' {vps->lex_state = EXPR_FNAME;} fitem
971 $$ = block_append(parse_state, $1, NEW_UNDEF($4));
975 op : '|' { $$ = '|'; }
976 | '^' { $$ = '^'; }
977 | '&' { $$ = '&'; }
978 | tCMP { $$ = tCMP; }
979 | tEQ { $$ = tEQ; }
980 | tEQQ { $$ = tEQQ; }
981 | tMATCH { $$ = tMATCH; }
982 | '>' { $$ = '>'; }
983 | tGEQ { $$ = tGEQ; }
984 | '<' { $$ = '<'; }
985 | tLEQ { $$ = tLEQ; }
986 | tLSHFT { $$ = tLSHFT; }
987 | tRSHFT { $$ = tRSHFT; }
988 | '+' { $$ = '+'; }
989 | '-' { $$ = '-'; }
990 | '*' { $$ = '*'; }
991 | tSTAR { $$ = '*'; }
992 | '/' { $$ = '/'; }
993 | '%' { $$ = '%'; }
994 | tPOW { $$ = tPOW; }
995 | '~' { $$ = '~'; }
996 | tUPLUS { $$ = tUPLUS; }
997 | tUMINUS { $$ = tUMINUS; }
998 | tAREF { $$ = tAREF; }
999 | tASET { $$ = tASET; }
1000 | '`' { $$ = '`'; }
1003 reswords : k__LINE__ | k__FILE__ | klBEGIN | klEND
1004 | kALIAS | kAND | kBEGIN | kBREAK | kCASE | kCLASS | kDEF
1005 | kDEFINED | kDO | kELSE | kELSIF | kEND | kENSURE | kFALSE
1006 | kFOR | kIN | kMODULE | kNEXT | kNIL | kNOT
1007 | kOR | kREDO | kRESCUE | kRETRY | kRETURN | kSELF | kSUPER
1008 | kTHEN | kTRUE | kUNDEF | kWHEN | kYIELD
1009 | kIF_MOD | kUNLESS_MOD | kWHILE_MOD | kUNTIL_MOD | kRESCUE_MOD
1012 arg : lhs '=' arg
1014 $$ = node_assign($1, $3, parse_state);
1016 | lhs '=' arg kRESCUE_MOD arg
1018 $$ = node_assign($1, NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0), parse_state);
1020 | var_lhs tOP_ASGN arg
1022 value_expr($3);
1023 if ($1) {
1024 ID vid = $1->nd_vid;
1025 if ($2 == tOROP) {
1026 $1->nd_value = $3;
1027 $$ = NEW_OP_ASGN_OR(gettable(vid), $1);
1028 if (is_asgn_or_id(vid)) {
1029 $$->nd_aid = vid;
1032 else if ($2 == tANDOP) {
1033 $1->nd_value = $3;
1034 $$ = NEW_OP_ASGN_AND(gettable(vid), $1);
1036 else {
1037 $$ = $1;
1038 $$->nd_value = call_op(gettable(vid),$2,1,$3, parse_state);
1041 else {
1042 $$ = 0;
1045 | primary_value '[' aref_args ']' tOP_ASGN arg
1047 NODE *args;
1049 value_expr($6);
1050 args = NEW_LIST($6);
1051 if ($3 && nd_type($3) != NODE_ARRAY)
1052 $3 = NEW_LIST($3);
1053 $3 = list_append(parse_state, $3, NEW_NIL());
1054 list_concat(args, $3);
1055 if ($5 == tOROP) {
1056 $5 = 0;
1058 else if ($5 == tANDOP) {
1059 $5 = 1;
1061 $$ = NEW_OP_ASGN1($1, $5, args);
1062 fixpos($$, $1);
1064 | primary_value '.' tIDENTIFIER tOP_ASGN arg
1066 value_expr($5);
1067 if ($4 == tOROP) {
1068 $4 = 0;
1070 else if ($4 == tANDOP) {
1071 $4 = 1;
1073 $$ = NEW_OP_ASGN2($1, $3, $4, $5);
1074 fixpos($$, $1);
1076 | primary_value '.' tCONSTANT tOP_ASGN arg
1078 value_expr($5);
1079 if ($4 == tOROP) {
1080 $4 = 0;
1082 else if ($4 == tANDOP) {
1083 $4 = 1;
1085 $$ = NEW_OP_ASGN2($1, $3, $4, $5);
1086 fixpos($$, $1);
1088 | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg
1090 value_expr($5);
1091 if ($4 == tOROP) {
1092 $4 = 0;
1094 else if ($4 == tANDOP) {
1095 $4 = 1;
1097 $$ = NEW_OP_ASGN2($1, $3, $4, $5);
1098 fixpos($$, $1);
1100 | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
1102 yyerror("constant re-assignment");
1103 $$ = 0;
1105 | tCOLON3 tCONSTANT tOP_ASGN arg
1107 yyerror("constant re-assignment");
1108 $$ = 0;
1110 | backref tOP_ASGN arg
1112 rb_backref_error($1);
1113 $$ = 0;
1115 | arg tDOT2 arg
1117 value_expr($1);
1118 value_expr($3);
1119 $$ = NEW_DOT2($1, $3);
1121 | arg tDOT3 arg
1123 value_expr($1);
1124 value_expr($3);
1125 $$ = NEW_DOT3($1, $3);
1127 | arg '+' arg
1129 $$ = call_op($1, '+', 1, $3, parse_state);
1131 | arg '-' arg
1133 $$ = call_op($1, '-', 1, $3, parse_state);
1135 | arg '*' arg
1137 $$ = call_op($1, '*', 1, $3, parse_state);
1139 | arg '/' arg
1141 $$ = call_op($1, '/', 1, $3, parse_state);
1143 | arg '%' arg
1145 $$ = call_op($1, '%', 1, $3, parse_state);
1147 | arg tPOW arg
1149 $$ = call_op($1, tPOW, 1, $3, parse_state);
1151 | tUMINUS_NUM tINTEGER tPOW arg
1153 $$ = call_op(call_op($2, tPOW, 1, $4, parse_state), tUMINUS, 0, 0, parse_state);
1155 | tUMINUS_NUM tFLOAT tPOW arg
1157 $$ = call_op(call_op($2, tPOW, 1, $4, parse_state), tUMINUS, 0, 0, parse_state);
1159 | tUPLUS arg
1161 $$ = call_op($2, tUPLUS, 0, 0, parse_state);
1163 | tUMINUS arg
1165 $$ = call_op($2, tUMINUS, 0, 0, parse_state);
1167 | arg '|' arg
1169 $$ = call_op($1, '|', 1, $3, parse_state);
1171 | arg '^' arg
1173 $$ = call_op($1, '^', 1, $3, parse_state);
1175 | arg '&' arg
1177 $$ = call_op($1, '&', 1, $3, parse_state);
1179 | arg tCMP arg
1181 $$ = call_op($1, tCMP, 1, $3, parse_state);
1183 | arg '>' arg
1185 $$ = call_op($1, '>', 1, $3, parse_state);
1187 | arg tGEQ arg
1189 $$ = call_op($1, tGEQ, 1, $3, parse_state);
1191 | arg '<' arg
1193 $$ = call_op($1, '<', 1, $3, parse_state);
1195 | arg tLEQ arg
1197 $$ = call_op($1, tLEQ, 1, $3, parse_state);
1199 | arg tEQ arg
1201 $$ = call_op($1, tEQ, 1, $3, parse_state);
1203 | arg tEQQ arg
1205 $$ = call_op($1, tEQQ, 1, $3, parse_state);
1207 | arg tNEQ arg
1209 $$ = NEW_NOT(call_op($1, tEQ, 1, $3, parse_state));
1211 | arg tMATCH arg
1213 $$ = match_gen($1, $3, parse_state);
1215 | arg tNMATCH arg
1217 $$ = NEW_NOT(match_gen($1, $3, parse_state));
1219 | '!' arg
1221 $$ = NEW_NOT(cond($2, parse_state));
1223 | '~' arg
1225 $$ = call_op($2, '~', 0, 0, parse_state);
1227 | arg tLSHFT arg
1229 $$ = call_op($1, tLSHFT, 1, $3, parse_state);
1231 | arg tRSHFT arg
1233 $$ = call_op($1, tRSHFT, 1, $3, parse_state);
1235 | arg tANDOP arg
1237 $$ = logop(NODE_AND, $1, $3, parse_state);
1239 | arg tOROP arg
1241 $$ = logop(NODE_OR, $1, $3, parse_state);
1243 | kDEFINED opt_nl {vps->in_defined = 1;} arg
1245 vps->in_defined = 0;
1246 $$ = NEW_DEFINED($4);
1248 | arg '?' {vps->ternary_colon++;} arg ':' arg
1250 $$ = NEW_IF(cond($1, parse_state), $4, $6);
1251 fixpos($$, $1);
1252 vps->ternary_colon--;
1254 | primary
1256 $$ = $1;
1260 arg_value : arg
1262 value_expr($1);
1263 $$ = $1;
1267 aref_args : none
1268 | command opt_nl
1270 rb_warn("parenthesize argument(s) for future version");
1271 $$ = NEW_LIST($1);
1273 | args trailer
1275 $$ = $1;
1277 | args ',' tSTAR arg opt_nl
1279 value_expr($4);
1280 $$ = arg_concat(parse_state, $1, $4);
1282 | assocs trailer
1284 $$ = NEW_LIST(NEW_HASH($1));
1286 | tSTAR arg opt_nl
1288 value_expr($2);
1289 $$ = NEW_NEWLINE(NEW_SPLAT($2));
1293 paren_args : '(' none ')'
1295 $$ = $2;
1297 | '(' call_args opt_nl ')'
1299 $$ = $2;
1301 | '(' block_call opt_nl ')'
1303 rb_warn("parenthesize argument for future version");
1304 $$ = NEW_LIST($2);
1306 | '(' args ',' block_call opt_nl ')'
1308 rb_warn("parenthesize argument for future version");
1309 $$ = list_append(parse_state, $2, $4);
1313 opt_paren_args : none
1314 | paren_args
1317 call_args : command
1319 rb_warn("parenthesize argument(s) for future version");
1320 $$ = NEW_LIST($1);
1322 | args opt_block_arg
1324 $$ = arg_blk_pass($1, $2);
1326 | args ',' tSTAR arg_value opt_block_arg
1328 $$ = arg_concat(parse_state, $1, $4);
1329 $$ = arg_blk_pass($$, $5);
1331 | assocs opt_block_arg
1333 $$ = NEW_LIST(NEW_POSITIONAL($1));
1334 $$ = arg_blk_pass($$, $2);
1336 | assocs ',' tSTAR arg_value opt_block_arg
1338 $$ = arg_concat(parse_state, NEW_LIST(NEW_POSITIONAL($1)), $4);
1339 $$ = arg_blk_pass($$, $5);
1341 | args ',' assocs opt_block_arg
1343 $$ = list_append(parse_state, $1, NEW_POSITIONAL($3));
1344 $$ = arg_blk_pass($$, $4);
1346 | args ',' assocs ',' tSTAR arg opt_block_arg
1348 value_expr($6);
1349 $$ = arg_concat(parse_state, list_append(parse_state, $1, NEW_POSITIONAL($3)), $6);
1350 $$ = arg_blk_pass($$, $7);
1352 | tSTAR arg_value opt_block_arg
1354 $$ = arg_blk_pass(NEW_SPLAT($2), $3);
1356 | block_arg
1359 call_args2 : arg_value ',' args opt_block_arg
1361 $$ = arg_blk_pass(list_concat(NEW_LIST($1),$3), $4);
1363 | arg_value ',' block_arg
1365 $$ = arg_blk_pass($1, $3);
1367 | arg_value ',' tSTAR arg_value opt_block_arg
1369 $$ = arg_concat(parse_state, NEW_LIST($1), $4);
1370 $$ = arg_blk_pass($$, $5);
1372 | arg_value ',' args ',' tSTAR arg_value opt_block_arg
1374 $$ = arg_concat(parse_state, list_concat(NEW_LIST($1),$3), $6);
1375 $$ = arg_blk_pass($$, $7);
1377 | assocs opt_block_arg
1379 $$ = NEW_LIST(NEW_POSITIONAL($1));
1380 $$ = arg_blk_pass($$, $2);
1382 | assocs ',' tSTAR arg_value opt_block_arg
1384 $$ = arg_concat(parse_state, NEW_LIST(NEW_POSITIONAL($1)), $4);
1385 $$ = arg_blk_pass($$, $5);
1387 | arg_value ',' assocs opt_block_arg
1389 $$ = list_append(parse_state, NEW_LIST($1), NEW_POSITIONAL($3));
1390 $$ = arg_blk_pass($$, $4);
1392 | arg_value ',' args ',' assocs opt_block_arg
1394 $$ = list_append(parse_state, list_concat(NEW_LIST($1),$3), NEW_POSITIONAL($5));
1395 $$ = arg_blk_pass($$, $6);
1397 | arg_value ',' assocs ',' tSTAR arg_value opt_block_arg
1399 $$ = arg_concat(parse_state, list_append(parse_state, NEW_LIST($1), NEW_POSITIONAL($3)), $6);
1400 $$ = arg_blk_pass($$, $7);
1402 | arg_value ',' args ',' assocs ',' tSTAR arg_value opt_block_arg
1404 $$ = arg_concat(parse_state, list_append(parse_state, list_concat(NEW_LIST($1), $3), NEW_POSITIONAL($5)), $8);
1405 $$ = arg_blk_pass($$, $9);
1407 | tSTAR arg_value opt_block_arg
1409 $$ = arg_blk_pass(NEW_SPLAT($2), $3);
1411 | block_arg
1414 command_args : {
1415 $<num>$ = vps->cmdarg_stack;
1416 CMDARG_PUSH(1);
1418 open_args
1420 /* CMDARG_POP() */
1421 vps->cmdarg_stack = $<num>1;
1422 $$ = $2;
1426 open_args : call_args
1427 | tLPAREN_ARG {vps->lex_state = EXPR_ENDARG;} ')'
1429 rb_warn("don't put space before argument parentheses");
1430 $$ = 0;
1432 | tLPAREN_ARG call_args2 {vps->lex_state = EXPR_ENDARG;} ')'
1434 rb_warn("don't put space before argument parentheses");
1435 $$ = $2;
1439 block_arg : tAMPER arg_value
1441 $$ = NEW_BLOCK_PASS($2);
1445 opt_block_arg : ',' block_arg
1447 $$ = $2;
1449 | none
1452 args : arg_value
1454 $$ = NEW_LIST($1);
1456 | args ',' arg_value
1458 $$ = list_append(parse_state, $1, $3);
1462 mrhs : args ',' arg_value
1464 $$ = list_append(parse_state, $1, $3);
1466 | args ',' tSTAR arg_value
1468 $$ = arg_concat(parse_state, $1, $4);
1470 | tSTAR arg_value
1472 $$ = NEW_SPLAT($2);
1476 primary : literal
1477 | strings
1478 | xstring
1479 | regexp
1480 | words
1481 | qwords
1482 | var_ref
1483 | backref
1484 | tFID
1486 $$ = NEW_FCALL($1, 0);
1488 | kBEGIN
1490 $<num>1 = ruby_sourceline;
1492 bodystmt
1493 kEND
1495 if ($3 == NULL)
1496 $$ = NEW_NIL();
1497 else
1498 $$ = NEW_BEGIN($3);
1499 nd_set_line($$, $<num>1);
1501 | tLPAREN_ARG expr {vps->lex_state = EXPR_ENDARG;} opt_nl ')'
1503 rb_warning("(...) interpreted as grouped expression");
1504 $$ = $2;
1506 | tLPAREN compstmt ')'
1508 $$ = $2;
1510 | primary_value tCOLON2 tCONSTANT
1512 $$ = NEW_COLON2($1, $3);
1514 | tCOLON3 tCONSTANT
1516 $$ = NEW_COLON3($2);
1518 | primary_value '[' aref_args ']'
1520 if ($1 && nd_type($1) == NODE_SELF) {
1521 $$ = NEW_FCALL(convert_op(tAREF), $3);
1522 } else {
1523 $$ = NEW_CALL($1, convert_op(tAREF), $3);
1525 fixpos($$, $1);
1527 | tLBRACK aref_args ']'
1529 if ($2 == 0) {
1530 $$ = NEW_ZARRAY(); /* zero length array*/
1532 else {
1533 $$ = $2;
1536 | tLBRACE assoc_list '}'
1538 $$ = NEW_HASH($2);
1540 | kRETURN
1542 $$ = NEW_RETURN(0);
1544 | kYIELD '(' call_args ')'
1546 $$ = new_yield(parse_state, $3);
1548 | kYIELD '(' ')'
1550 $$ = NEW_YIELD(0, Qfalse);
1552 | kYIELD
1554 $$ = NEW_YIELD(0, Qfalse);
1556 | kDEFINED opt_nl '(' {vps->in_defined = 1;} expr ')'
1558 vps->in_defined = 0;
1559 $$ = NEW_DEFINED($5);
1561 | operation brace_block
1563 $2->nd_iter = NEW_FCALL($1, 0);
1564 $$ = $2;
1565 fixpos($2->nd_iter, $2);
1567 | method_call
1568 | method_call brace_block
1570 if ($1 && nd_type($1) == NODE_BLOCK_PASS) {
1571 rb_compile_error("both block arg and actual block given");
1573 $2->nd_iter = $1;
1574 $$ = $2;
1575 fixpos($$, $1);
1577 | kIF expr_value then
1578 compstmt
1579 if_tail
1580 kEND
1582 $$ = NEW_IF(cond($2, parse_state), $4, $5);
1583 fixpos($$, $2);
1584 if (cond_negative(&$$->nd_cond)) {
1585 NODE *tmp = $$->nd_body;
1586 $$->nd_body = $$->nd_else;
1587 $$->nd_else = tmp;
1590 | kUNLESS expr_value then
1591 compstmt
1592 opt_else
1593 kEND
1595 $$ = NEW_UNLESS(cond($2, parse_state), $4, $5);
1596 fixpos($$, $2);
1597 if (cond_negative(&$$->nd_cond)) {
1598 NODE *tmp = $$->nd_body;
1599 $$->nd_body = $$->nd_else;
1600 $$->nd_else = tmp;
1603 | kWHILE {COND_PUSH(1);} expr_value do {COND_POP();}
1604 compstmt
1605 kEND
1607 $$ = NEW_WHILE(cond($3, parse_state), $6, 1);
1608 fixpos($$, $3);
1609 if (cond_negative(&$$->nd_cond)) {
1610 nd_set_type($$, NODE_UNTIL);
1613 | kUNTIL {COND_PUSH(1);} expr_value do {COND_POP();}
1614 compstmt
1615 kEND
1617 $$ = NEW_UNTIL(cond($3, parse_state), $6, 1);
1618 fixpos($$, $3);
1619 if (cond_negative(&$$->nd_cond)) {
1620 nd_set_type($$, NODE_WHILE);
1623 | kCASE expr_value opt_terms
1624 case_body
1625 kEND
1627 $$ = NEW_CASE($2, $4);
1628 fixpos($$, $2);
1630 | kCASE opt_terms case_body kEND
1632 $$ = $3;
1634 | kCASE opt_terms kELSE compstmt kEND
1636 $$ = $4;
1638 | kFOR block_var kIN {COND_PUSH(1);} expr_value do {COND_POP();}
1639 compstmt
1640 kEND
1642 $$ = NEW_FOR($2, $5, $8);
1643 fixpos($$, $2);
1645 | kCLASS cpath superclass
1647 if (in_def || in_single)
1648 yyerror("class definition in method body");
1649 class_nest++;
1650 local_push(0);
1651 $<num>$ = ruby_sourceline;
1653 bodystmt
1654 kEND
1656 $$ = NEW_CLASS($2, $5, $3);
1657 nd_set_line($$, $<num>4);
1658 local_pop();
1659 class_nest--;
1661 | kCLASS tLSHFT expr
1663 $<num>$ = in_def;
1664 in_def = 0;
1666 term
1668 $<num>$ = in_single;
1669 in_single = 0;
1670 class_nest++;
1671 local_push(0);
1673 bodystmt
1674 kEND
1676 $$ = NEW_SCLASS($3, $7);
1677 fixpos($$, $3);
1678 local_pop();
1679 class_nest--;
1680 in_def = $<num>4;
1681 in_single = $<num>6;
1683 | kMODULE cpath
1685 if (in_def || in_single)
1686 yyerror("module definition in method body");
1687 class_nest++;
1688 local_push(0);
1689 $<num>$ = ruby_sourceline;
1691 bodystmt
1692 kEND
1694 $$ = NEW_MODULE($2, $4);
1695 nd_set_line($$, $<num>3);
1696 local_pop();
1697 class_nest--;
1699 | kDEF fname
1701 $<id>$ = cur_mid;
1702 cur_mid = $2;
1703 in_def++;
1704 local_push(0);
1706 f_arglist
1707 bodystmt
1708 kEND
1710 if (!$5) $5 = NEW_NIL();
1711 $$ = NEW_DEFN($2, $4, $5, NOEX_PRIVATE);
1712 fixpos($$, $4);
1713 local_pop();
1714 in_def--;
1715 cur_mid = $<id>3;
1717 | kDEF singleton dot_or_colon {vps->lex_state = EXPR_FNAME;} fname
1719 in_single++;
1720 local_push(0);
1721 vps->lex_state = EXPR_END; /* force for args */
1723 f_arglist
1724 bodystmt
1725 kEND
1727 $$ = NEW_DEFS($2, $5, $7, $8);
1728 fixpos($$, $2);
1729 local_pop();
1730 in_single--;
1732 | kBREAK
1734 $$ = NEW_BREAK(0);
1736 | kNEXT
1738 $$ = NEW_NEXT(0);
1740 | kREDO
1742 $$ = NEW_REDO();
1744 | kRETRY
1746 $$ = NEW_RETRY();
1750 primary_value : primary
1752 value_expr($1);
1753 $$ = $1;
1757 then : term
1758 | ':'
1759 | kTHEN
1760 | term kTHEN
1763 do : term
1764 | ':'
1765 | kDO_COND
1768 if_tail : opt_else
1769 | kELSIF expr_value then
1770 compstmt
1771 if_tail
1773 $$ = NEW_IF(cond($2, parse_state), $4, $5);
1774 fixpos($$, $2);
1778 opt_else : none
1779 | kELSE compstmt
1781 $$ = $2;
1785 block_var : lhs
1786 | mlhs
1789 opt_block_var : none
1790 | '|' /* none */ '|'
1792 $$ = (NODE*)1;
1794 | tOROP
1796 $$ = (NODE*)1;
1798 | '|' block_var '|'
1800 $$ = $2;
1804 do_block : kDO_BLOCK
1806 $<num>1 = ruby_sourceline;
1807 reset_block(vps);
1809 opt_block_var
1811 $<vars>$ = vps->block_vars;
1813 compstmt
1814 kEND
1816 $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
1817 nd_set_line($$, $<num>1);
1821 block_call : command do_block
1823 if ($1 && nd_type($1) == NODE_BLOCK_PASS) {
1824 rb_compile_error("both block arg and actual block given");
1826 $2->nd_iter = $1;
1827 $$ = $2;
1828 fixpos($$, $1);
1830 | block_call '.' operation2 opt_paren_args
1832 $$ = new_call(parse_state, $1, $3, $4);
1834 | block_call tCOLON2 operation2 opt_paren_args
1836 $$ = new_call(parse_state, $1, $3, $4);
1840 method_call : operation paren_args
1842 $$ = new_fcall(parse_state, $1, $2);
1843 fixpos($$, $2);
1845 | primary_value '.' operation2 opt_paren_args
1847 $$ = new_call(parse_state, $1, $3, $4);
1848 fixpos($$, $1);
1850 | primary_value tCOLON2 operation2 paren_args
1852 $$ = new_call(parse_state, $1, $3, $4);
1853 fixpos($$, $1);
1855 | primary_value tCOLON2 operation3
1857 $$ = new_call(parse_state, $1, $3, 0);
1859 | primary_value '\\' operation2
1861 $$ = NEW_CALL($1, rb_intern("get_reference"), NEW_LIST(NEW_LIT(ID2SYM($3))));
1863 | tUBS operation2
1865 $$ = NEW_FCALL(rb_intern("get_reference"), NEW_LIST(NEW_LIT(ID2SYM($2))));
1867 | kSUPER paren_args
1869 $$ = new_super(parse_state, $2);
1871 | kSUPER
1873 $$ = NEW_ZSUPER();
1877 brace_block : '{'
1879 $<num>1 = ruby_sourceline;
1880 reset_block(vps);
1882 opt_block_var { $<vars>$ = vps->block_vars; }
1883 compstmt '}'
1885 $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
1886 nd_set_line($$, $<num>1);
1888 | kDO
1890 $<num>1 = ruby_sourceline;
1891 reset_block(vps);
1893 opt_block_var { $<vars>$ = vps->block_vars; }
1894 compstmt kEND
1896 $$ = NEW_ITER($3, 0, extract_block_vars(vps, $5, $<vars>4));
1897 nd_set_line($$, $<num>1);
1901 case_body : kWHEN when_args then
1902 compstmt
1903 cases
1905 $$ = NEW_WHEN($2, $4, $5);
1908 when_args : args
1909 | args ',' tSTAR arg_value
1911 $$ = list_append(parse_state, $1, NEW_WHEN($4, 0, 0));
1913 | tSTAR arg_value
1915 $$ = NEW_LIST(NEW_WHEN($2, 0, 0));
1919 cases : opt_else
1920 | case_body
1923 opt_rescue : kRESCUE exc_list exc_var then
1924 compstmt
1925 opt_rescue
1927 if ($3) {
1928 $3 = node_assign($3, NEW_GVAR(rb_intern("$!")), parse_state);
1929 $5 = block_append(parse_state, $3, $5);
1931 $$ = NEW_RESBODY($2, $5, $6);
1932 fixpos($$, $2?$2:$5);
1934 | none
1937 exc_list : arg_value
1939 $$ = NEW_LIST($1);
1941 | mrhs
1942 | none
1945 exc_var : tASSOC lhs
1947 $$ = $2;
1949 | none
1952 opt_ensure : kENSURE compstmt
1954 if ($2)
1955 $$ = $2;
1956 else
1957 /* place holder */
1958 $$ = NEW_NIL();
1960 | none
1963 literal : numeric
1964 | symbol
1966 $$ = NEW_LIT(ID2SYM($1));
1968 | dsym
1971 strings : string
1973 NODE *node = $1;
1974 if (!node) {
1975 node = NEW_STR(string_new(0, 0));
1977 else {
1978 node = evstr2dstr(parse_state, node);
1980 $$ = node;
1984 string : string1
1985 | string string1
1987 $$ = literal_concat(parse_state, $1, $2);
1991 string1 : tSTRING_BEG string_contents tSTRING_END
1993 $$ = $2;
1997 xstring : tXSTRING_BEG xstring_contents tSTRING_END
1999 ID code = $1;
2000 NODE *node = $2;
2001 if (!node) {
2002 node = NEW_XSTR(string_new(0, 0));
2004 else {
2005 switch (nd_type(node)) {
2006 case NODE_STR:
2007 nd_set_type(node, NODE_XSTR);
2008 break;
2009 case NODE_DSTR:
2010 nd_set_type(node, NODE_DXSTR);
2011 break;
2012 default:
2013 node = NEW_NODE(NODE_DXSTR, string_new(0, 0), 1, NEW_LIST(node));
2014 break;
2017 if(code) {
2018 node->u2.id = code;
2019 } else {
2020 node->u2.id = 0;
2022 $$ = node;
2026 regexp : tREGEXP_BEG xstring_contents tREGEXP_END
2028 intptr_t options = $3;
2029 NODE *node = $2;
2030 if (!node) {
2031 node = NEW_REGEX(string_new2(""), options & ~RE_OPTION_ONCE);
2033 else switch (nd_type(node)) {
2034 case NODE_STR:
2036 nd_set_type(node, NODE_REGEX);
2037 node->nd_cnt = options & ~RE_OPTION_ONCE;
2039 node->nd_lit = rb_reg_new(RSTRING(src)->ptr,
2040 RSTRING(src)->len,
2041 options & ~RE_OPTION_ONCE);
2044 break;
2045 default:
2046 node = NEW_NODE(NODE_DSTR, string_new(0, 0), 1, NEW_LIST(node));
2047 case NODE_DSTR:
2048 if (options & RE_OPTION_ONCE) {
2049 nd_set_type(node, NODE_DREGX_ONCE);
2051 else {
2052 nd_set_type(node, NODE_DREGX);
2054 node->nd_cflag = options & ~RE_OPTION_ONCE;
2055 break;
2057 $$ = node;
2061 words : tWORDS_BEG ' ' tSTRING_END
2063 $$ = NEW_ZARRAY();
2065 | tWORDS_BEG word_list tSTRING_END
2067 $$ = $2;
2071 word_list : /* none */
2073 $$ = 0;
2075 | word_list word ' '
2077 $$ = list_append(parse_state, $1, evstr2dstr(parse_state, $2));
2081 word : string_content
2082 | word string_content
2084 $$ = literal_concat(parse_state, $1, $2);
2088 qwords : tQWORDS_BEG ' ' tSTRING_END
2090 $$ = NEW_ZARRAY();
2092 | tQWORDS_BEG qword_list tSTRING_END
2094 $$ = $2;
2098 qword_list : /* none */
2100 $$ = 0;
2102 | qword_list tSTRING_CONTENT ' '
2104 $$ = list_append(parse_state, $1, $2);
2108 string_contents : /* none */
2110 $$ = 0;
2112 | string_contents string_content
2114 $$ = literal_concat(parse_state, $1, $2);
2118 xstring_contents: /* none */
2120 $$ = 0;
2122 | xstring_contents string_content
2124 $$ = literal_concat(parse_state, $1, $2);
2128 string_content : tSTRING_CONTENT
2129 | tSTRING_DVAR
2131 $<node>$ = lex_strterm;
2132 lex_strterm = 0;
2133 vps->lex_state = EXPR_BEG;
2135 string_dvar
2137 lex_strterm = $<node>2;
2138 $$ = NEW_EVSTR($3);
2140 | tSTRING_DBEG
2142 $<node>$ = lex_strterm;
2143 lex_strterm = 0;
2144 vps->lex_state = EXPR_BEG;
2145 COND_PUSH(0);
2146 CMDARG_PUSH(0);
2148 compstmt '}'
2150 lex_strterm = $<node>2;
2151 COND_LEXPOP();
2152 CMDARG_LEXPOP();
2153 if (($$ = $3) && nd_type($$) == NODE_NEWLINE) {
2154 $$ = $$->nd_next;
2156 $$ = new_evstr(parse_state, $$);
2160 string_dvar : tGVAR {$$ = NEW_GVAR($1);}
2161 | tIVAR {$$ = NEW_IVAR($1);}
2162 | tCVAR {$$ = NEW_CVAR($1);}
2163 | backref
2166 symbol : tSYMBEG sym
2168 vps->lex_state = EXPR_END;
2169 $$ = $2;
2173 sym : fname
2174 | tIVAR
2175 | tGVAR
2176 | tCVAR
2179 dsym : tSYMBEG xstring_contents tSTRING_END
2181 vps->lex_state = EXPR_END;
2182 if (!($$ = $2)) {
2183 yyerror("empty symbol literal");
2185 else {
2186 switch (nd_type($$)) {
2187 case NODE_DSTR:
2188 nd_set_type($$, NODE_DSYM);
2189 break;
2190 case NODE_STR:
2191 /* TODO: this line should never fail unless nd_str is binary */
2192 if (strlen(bdatae($$->nd_str,"")) == blength($$->nd_str)) {
2193 ID tmp = rb_intern(bdata($$->nd_str));
2194 bdestroy($$->nd_str);
2195 $$->nd_lit = ID2SYM(tmp);
2196 nd_set_type($$, NODE_LIT);
2197 break;
2198 } else {
2199 bdestroy($$->nd_str);
2201 /* fall through */
2202 default:
2203 $$ = NEW_NODE(NODE_DSYM, string_new(0, 0), 1, NEW_LIST($$));
2204 break;
2210 numeric : tINTEGER
2211 | tFLOAT
2212 | tUMINUS_NUM tINTEGER %prec tLOWEST
2214 $$ = NEW_NEGATE($2);
2216 | tUMINUS_NUM tFLOAT %prec tLOWEST
2218 $$ = NEW_NEGATE($2);
2222 variable : tIDENTIFIER
2223 | tIVAR
2224 | tGVAR
2225 | tCONSTANT
2226 | tCVAR
2227 | kNIL {$$ = kNIL;}
2228 | kSELF {$$ = kSELF;}
2229 | kTRUE {$$ = kTRUE;}
2230 | kFALSE {$$ = kFALSE;}
2231 | k__FILE__ {$$ = k__FILE__;}
2232 | k__LINE__ {$$ = k__LINE__;}
2235 var_ref : variable
2237 $$ = gettable($1);
2241 var_lhs : variable
2243 $$ = assignable($1, 0, parse_state);
2247 backref : tNTH_REF
2248 | tBACK_REF
2251 superclass : term
2253 $$ = 0;
2255 | '<'
2257 vps->lex_state = EXPR_BEG;
2259 expr_value term
2261 $$ = $3;
2263 | error term {yyerrok; $$ = 0;}
2266 f_arglist : '(' f_args opt_nl ')'
2268 $$ = $2;
2269 vps->lex_state = EXPR_BEG;
2271 | f_args term
2273 $$ = $1;
2277 f_args : f_arg ',' f_optarg ',' f_rest_arg opt_f_block_arg
2279 $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, $3, $5), $6);
2281 | f_arg ',' f_optarg opt_f_block_arg
2283 $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, $3, -1), $4);
2285 | f_arg ',' f_rest_arg opt_f_block_arg
2287 $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, 0, $3), $4);
2289 | f_arg opt_f_block_arg
2291 $$ = block_append(parse_state, NEW_ARGS((intptr_t)$1, 0, -1), $2);
2293 | f_optarg ',' f_rest_arg opt_f_block_arg
2295 $$ = block_append(parse_state, NEW_ARGS(0, $1, $3), $4);
2297 | f_optarg opt_f_block_arg
2299 $$ = block_append(parse_state, NEW_ARGS(0, $1, -1), $2);
2301 | f_rest_arg opt_f_block_arg
2303 $$ = block_append(parse_state, NEW_ARGS(0, 0, $1), $2);
2305 | f_block_arg
2307 $$ = block_append(parse_state, NEW_ARGS(0, 0, -1), $1);
2309 | /* none */
2311 $$ = NEW_ARGS(0, 0, -1);
2315 f_norm_arg : tCONSTANT
2317 yyerror("formal argument cannot be a constant");
2319 | tIVAR
2321 yyerror("formal argument cannot be an instance variable");
2323 | tGVAR
2325 yyerror("formal argument cannot be a global variable");
2327 | tCVAR
2329 yyerror("formal argument cannot be a class variable");
2331 | tIDENTIFIER
2333 if (!is_local_id($1))
2334 yyerror("formal argument must be local variable");
2335 else if (local_id($1))
2336 yyerror("duplicate argument name");
2337 local_cnt($1);
2338 $$ = 1;
2342 f_arg : f_norm_arg
2343 | f_arg ',' f_norm_arg
2345 $$ += 1;
2349 f_opt : tIDENTIFIER '=' arg_value
2351 if (!is_local_id($1))
2352 yyerror("formal argument must be local variable");
2353 else if (local_id($1))
2354 yyerror("duplicate optional argument name");
2355 $$ = assignable($1, $3, parse_state);
2359 f_optarg : f_opt
2361 $$ = NEW_BLOCK($1);
2362 $$->nd_end = $$;
2364 | f_optarg ',' f_opt
2366 $$ = block_append(parse_state, $1, $3);
2370 restarg_mark : '*'
2371 | tSTAR
2374 f_rest_arg : restarg_mark tIDENTIFIER
2376 if (!is_local_id($2))
2377 yyerror("rest argument must be local variable");
2378 else if (local_id($2))
2379 yyerror("duplicate rest argument name");
2380 $$ = local_cnt($2) + 1;
2382 | restarg_mark
2384 $$ = 0;
2388 blkarg_mark : '&'
2389 | tAMPER
2392 f_block_arg : blkarg_mark tIDENTIFIER
2394 if (!is_local_id($2))
2395 yyerror("block argument must be local variable");
2396 else if (local_id($2))
2397 yyerror("duplicate block argument name");
2398 $$ = NEW_BLOCK_ARG($2);
2402 opt_f_block_arg : ',' f_block_arg
2404 $$ = $2;
2406 | none
2409 singleton : var_ref
2411 if (nd_type($1) == NODE_SELF) {
2412 $$ = NEW_SELF();
2414 else {
2415 $$ = $1;
2416 value_expr($$);
2419 | '(' {vps->lex_state = EXPR_BEG;} expr opt_nl ')'
2421 if ($3 == 0) {
2422 yyerror("can't define singleton method for ().");
2424 else {
2425 switch (nd_type($3)) {
2426 case NODE_STR:
2427 case NODE_DSTR:
2428 case NODE_XSTR:
2429 case NODE_DXSTR:
2430 case NODE_DREGX:
2431 case NODE_LIT:
2432 case NODE_ARRAY:
2433 case NODE_ZARRAY:
2434 yyerror("can't define singleton method for literals");
2435 default:
2436 value_expr($3);
2437 break;
2440 $$ = $3;
2444 assoc_list : none
2445 | assocs trailer
2447 $$ = $1;
2449 | args trailer
2451 if ($1->nd_alen%2 != 0) {
2452 yyerror("odd number list for Hash");
2454 $$ = $1;
2458 assocs : assoc
2459 | assocs ',' assoc
2461 $$ = list_concat($1, $3);
2465 assoc : arg_value tASSOC arg_value
2467 $$ = list_append(parse_state, NEW_LIST($1), $3);
2471 operation : tIDENTIFIER
2472 | tCONSTANT
2473 | tFID
2476 operation2 : tIDENTIFIER
2477 | tCONSTANT
2478 | tFID
2479 | op
2482 operation3 : tIDENTIFIER
2483 | tFID
2484 | op
2487 dot_or_colon : '.'
2488 | tCOLON2
2491 opt_terms : /* none */
2492 | terms
2495 opt_nl : /* none */
2496 | '\n'
2499 trailer : /* none */
2500 | '\n'
2501 | ','
2504 term : ';' {yyerrok;}
2505 | '\n'
2508 terms : term
2509 | terms ';' {yyerrok;}
2512 none : /* none */ {$$ = 0;}
2516 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
2517 since ours (we hope) works properly with all combinations of
2518 machines, compilers, `char' and `unsigned char' argument types.
2519 (Per Bothner suggested the basic approach.) */
2520 #undef SIGN_EXTEND_CHAR
2521 #if __STDC__
2522 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
2523 #else /* not __STDC__ */
2524 /* As in Harbison and Steele. */
2525 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
2526 #endif
2527 #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_' || ismbchar(c)))
2529 #define LEAVE_BS 1
2531 static int
2532 syd_yyerror(msg, parse_state)
2533 const char *msg;
2534 rb_parse_state *parse_state;
2536 create_error(parse_state, (char *)msg);
2538 return 1;
2541 static int
2542 yycompile(parse_state, f, line)
2543 rb_parse_state *parse_state;
2544 char *f;
2545 int line;
2547 int n;
2548 /* Setup an initial empty scope. */
2549 heredoc_end = 0;
2550 lex_strterm = 0;
2551 ruby_sourcefile = f;
2552 n = yyparse(parse_state);
2553 ruby_debug_lines = 0;
2554 compile_for_eval = 0;
2555 parse_state->cond_stack = 0;
2556 parse_state->cmdarg_stack = 0;
2557 command_start = TRUE;
2558 class_nest = 0;
2559 in_single = 0;
2560 in_def = 0;
2561 cur_mid = 0;
2563 lex_strterm = 0;
2565 return n;
2568 static bool
2569 lex_get_str(rb_parse_state *parse_state)
2571 const char *str;
2572 const char *beg, *end, *pend;
2573 int sz;
2575 str = bdata(parse_state->lex_string);
2576 beg = str;
2578 if (parse_state->lex_str_used) {
2579 if (blength(parse_state->lex_string) == parse_state->lex_str_used) {
2580 return false;
2583 beg += parse_state->lex_str_used;
2586 pend = str + blength(parse_state->lex_string);
2587 end = beg;
2589 while(end < pend) {
2590 if(*end++ == '\n') break;
2593 sz = end - beg;
2594 bcatblk(parse_state->line_buffer, beg, sz);
2595 parse_state->lex_str_used += sz;
2597 return TRUE;
2600 void syd_add_to_parse_tree(STATE, OBJECT ary,
2601 NODE * n, int newlines, ID * locals, int line_numbers);
2603 static OBJECT convert_to_sexp(STATE, NODE *node, int newlines) {
2604 OBJECT ary;
2605 ary = array_new(state, 1);
2606 syd_add_to_parse_tree(state, ary, node, newlines, NULL, FALSE);
2607 return array_get(state, ary, 0);
2610 static bool
2611 lex_getline(rb_parse_state *parse_state)
2613 if(!parse_state->line_buffer) {
2614 parse_state->line_buffer = cstr2bstr("");
2615 } else {
2616 btrunc(parse_state->line_buffer, 0);
2619 return parse_state->lex_gets(parse_state);
2622 OBJECT
2623 syd_compile_string(STATE, const char *f, bstring s, int line, int newlines)
2625 int n;
2626 rb_parse_state *parse_state;
2627 OBJECT ret;
2628 parse_state = alloc_parse_state();
2629 parse_state->state = state;
2630 parse_state->lex_string = s;
2631 parse_state->lex_gets = lex_get_str;
2632 parse_state->lex_pbeg = 0;
2633 parse_state->lex_p = 0;
2634 parse_state->lex_pend = 0;
2635 parse_state->error = Qfalse;
2636 ruby_sourceline = line - 1;
2637 compile_for_eval = 1;
2639 n = yycompile(parse_state, f, line);
2641 if(parse_state->error == Qfalse) {
2642 ret = convert_to_sexp(state, parse_state->top, newlines);
2643 } else {
2644 ret = parse_state->error;
2646 pt_free(parse_state);
2647 free(parse_state);
2648 return ret;
2651 static bool parse_io_gets(rb_parse_state *parse_state) {
2652 if(feof(parse_state->lex_io)) {
2653 return false;
2656 while(TRUE) {
2657 char *ptr, buf[1024];
2658 int read;
2660 ptr = fgets(buf, sizeof(buf), parse_state->lex_io);
2661 if(!ptr) {
2662 return false;
2665 read = strlen(ptr);
2666 bcatblk(parse_state->line_buffer, ptr, read);
2668 /* check whether we read a full line */
2669 if(!(read == (sizeof(buf) - 1) && ptr[read] != '\n')) {
2670 break;
2674 return TRUE;
2677 OBJECT
2678 syd_compile_file(STATE, const char *f, FILE *file, int start, int newlines)
2680 int n;
2681 OBJECT ret;
2682 rb_parse_state *parse_state;
2683 parse_state = alloc_parse_state();
2684 parse_state->state = state;
2685 parse_state->lex_io = file;
2686 parse_state->lex_gets = parse_io_gets;
2687 parse_state->lex_pbeg = 0;
2688 parse_state->lex_p = 0;
2689 parse_state->lex_pend = 0;
2690 parse_state->error = Qfalse;
2691 ruby_sourceline = start - 1;
2693 n = yycompile(parse_state, f, start);
2695 if(parse_state->error == Qfalse) {
2696 ret = convert_to_sexp(state, parse_state->top, newlines);
2697 } else {
2698 ret = parse_state->error;
2701 pt_free(parse_state);
2702 free(parse_state);
2703 return ret;
2706 #define nextc() ps_nextc(parse_state)
2708 static inline int
2709 ps_nextc(rb_parse_state *parse_state)
2711 int c;
2713 if (parse_state->lex_p == parse_state->lex_pend) {
2714 bstring v;
2716 if (!lex_getline(parse_state)) return -1;
2717 v = parse_state->line_buffer;
2719 if (heredoc_end > 0) {
2720 ruby_sourceline = heredoc_end;
2721 heredoc_end = 0;
2723 ruby_sourceline++;
2725 /* This code is setup so that lex_pend can be compared to
2726 the data in lex_lastline. Thats important, otherwise
2727 the heredoc code breaks. */
2728 if(parse_state->lex_lastline) {
2729 bassign(parse_state->lex_lastline, v);
2730 } else {
2731 parse_state->lex_lastline = bstrcpy(v);
2734 v = parse_state->lex_lastline;
2736 parse_state->lex_pbeg = parse_state->lex_p = bdata(v);
2737 parse_state->lex_pend = parse_state->lex_p + blength(v);
2739 c = (unsigned char)*(parse_state->lex_p++);
2740 if (c == '\r' && parse_state->lex_p < parse_state->lex_pend && *(parse_state->lex_p) == '\n') {
2741 parse_state->lex_p++;
2742 c = '\n';
2743 parse_state->column = 0;
2744 } else if(c == '\n') {
2745 parse_state->column = 0;
2746 } else {
2747 parse_state->column++;
2750 return c;
2753 static void
2754 pushback(c, parse_state)
2755 int c;
2756 rb_parse_state *parse_state;
2758 if (c == -1) return;
2759 parse_state->lex_p--;
2762 /* Indicates if we're currently at the beginning of a line. */
2763 #define was_bol() (parse_state->lex_p == parse_state->lex_pbeg + 1)
2764 #define peek(c) (parse_state->lex_p != parse_state->lex_pend && (c) == *(parse_state->lex_p))
2766 /* The token buffer. It's just a global string that has
2767 functions to build up the string easily. */
2769 #define tokfix() (tokenbuf[tokidx]='\0')
2770 #define tok() tokenbuf
2771 #define toklen() tokidx
2772 #define toklast() (tokidx>0?tokenbuf[tokidx-1]:0)
2774 static char*
2775 newtok(rb_parse_state *parse_state)
2777 tokidx = 0;
2778 if (!tokenbuf) {
2779 toksiz = 60;
2780 tokenbuf = ALLOC_N(char, 60);
2782 if (toksiz > 4096) {
2783 toksiz = 60;
2784 REALLOC_N(tokenbuf, char, 60);
2786 return tokenbuf;
2789 static void tokadd(char c, rb_parse_state *parse_state)
2791 assert(tokidx < toksiz && tokidx >= 0);
2792 tokenbuf[tokidx++] = c;
2793 if (tokidx >= toksiz) {
2794 toksiz *= 2;
2795 REALLOC_N(tokenbuf, char, toksiz);
2799 static int
2800 read_escape(rb_parse_state *parse_state)
2802 int c;
2804 switch (c = nextc()) {
2805 case '\\': /* Backslash */
2806 return c;
2808 case 'n': /* newline */
2809 return '\n';
2811 case 't': /* horizontal tab */
2812 return '\t';
2814 case 'r': /* carriage-return */
2815 return '\r';
2817 case 'f': /* form-feed */
2818 return '\f';
2820 case 'v': /* vertical tab */
2821 return '\13';
2823 case 'a': /* alarm(bell) */
2824 return '\007';
2826 case 'e': /* escape */
2827 return 033;
2829 case '0': case '1': case '2': case '3': /* octal constant */
2830 case '4': case '5': case '6': case '7':
2832 int numlen;
2834 pushback(c, parse_state);
2835 c = scan_oct(parse_state->lex_p, 3, &numlen);
2836 parse_state->lex_p += numlen;
2838 return c;
2840 case 'x': /* hex constant */
2842 int numlen;
2844 c = scan_hex(parse_state->lex_p, 2, &numlen);
2845 if (numlen == 0) {
2846 yyerror("Invalid escape character syntax");
2847 return 0;
2849 parse_state->lex_p += numlen;
2851 return c;
2853 case 'b': /* backspace */
2854 return '\010';
2856 case 's': /* space */
2857 return ' ';
2859 case 'M':
2860 if ((c = nextc()) != '-') {
2861 yyerror("Invalid escape character syntax");
2862 pushback(c, parse_state);
2863 return '\0';
2865 if ((c = nextc()) == '\\') {
2866 return read_escape(parse_state) | 0x80;
2868 else if (c == -1) goto eof;
2869 else {
2870 return ((c & 0xff) | 0x80);
2873 case 'C':
2874 if ((c = nextc()) != '-') {
2875 yyerror("Invalid escape character syntax");
2876 pushback(c, parse_state);
2877 return '\0';
2879 case 'c':
2880 if ((c = nextc())== '\\') {
2881 c = read_escape(parse_state);
2883 else if (c == '?')
2884 return 0177;
2885 else if (c == -1) goto eof;
2886 return c & 0x9f;
2888 eof:
2889 case -1:
2890 yyerror("Invalid escape character syntax");
2891 return '\0';
2893 default:
2894 return c;
2898 static int
2899 tokadd_escape(term, parse_state)
2900 int term;
2901 rb_parse_state *parse_state;
2903 int c;
2905 switch (c = nextc()) {
2906 case '\n':
2907 return 0; /* just ignore */
2909 case '0': case '1': case '2': case '3': /* octal constant */
2910 case '4': case '5': case '6': case '7':
2912 int i;
2914 tokadd((char)'\\', parse_state);
2915 tokadd((char)c, parse_state);
2916 for (i=0; i<2; i++) {
2917 c = nextc();
2918 if (c == -1) goto eof;
2919 if (c < '0' || '7' < c) {
2920 pushback(c, parse_state);
2921 break;
2923 tokadd((char)c, parse_state);
2926 return 0;
2928 case 'x': /* hex constant */
2930 int numlen;
2932 tokadd('\\', parse_state);
2933 tokadd((char)c, parse_state);
2934 scan_hex(parse_state->lex_p, 2, &numlen);
2935 if (numlen == 0) {
2936 yyerror("Invalid escape character syntax");
2937 return -1;
2939 while (numlen--)
2940 tokadd((char)nextc(), parse_state);
2942 return 0;
2944 case 'M':
2945 if ((c = nextc()) != '-') {
2946 yyerror("Invalid escape character syntax");
2947 pushback(c, parse_state);
2948 return 0;
2950 tokadd('\\',parse_state);
2951 tokadd('M', parse_state);
2952 tokadd('-', parse_state);
2953 goto escaped;
2955 case 'C':
2956 if ((c = nextc()) != '-') {
2957 yyerror("Invalid escape character syntax");
2958 pushback(c, parse_state);
2959 return 0;
2961 tokadd('\\', parse_state);
2962 tokadd('C', parse_state);
2963 tokadd('-', parse_state);
2964 goto escaped;
2966 case 'c':
2967 tokadd('\\', parse_state);
2968 tokadd('c', parse_state);
2969 escaped:
2970 if ((c = nextc()) == '\\') {
2971 return tokadd_escape(term, parse_state);
2973 else if (c == -1) goto eof;
2974 tokadd((char)c, parse_state);
2975 return 0;
2977 eof:
2978 case -1:
2979 yyerror("Invalid escape character syntax");
2980 return -1;
2982 default:
2983 if (c != '\\' || c != term)
2984 tokadd('\\', parse_state);
2985 tokadd((char)c, parse_state);
2987 return 0;
2990 static int
2991 regx_options(rb_parse_state *parse_state)
2993 char kcode = 0;
2994 int options = 0;
2995 int c;
2997 newtok(parse_state);
2998 while (c = nextc(), ISALPHA(c)) {
2999 switch (c) {
3000 case 'i':
3001 options |= RE_OPTION_IGNORECASE;
3002 break;
3003 case 'x':
3004 options |= RE_OPTION_EXTENDED;
3005 break;
3006 case 'm':
3007 options |= RE_OPTION_MULTILINE;
3008 break;
3009 case 'o':
3010 options |= RE_OPTION_ONCE;
3011 break;
3012 case 'n':
3013 kcode = 16;
3014 break;
3015 case 'e':
3016 kcode = 32;
3017 break;
3018 case 's':
3019 kcode = 48;
3020 break;
3021 case 'u':
3022 kcode = 64;
3023 break;
3024 default:
3025 tokadd((char)c, parse_state);
3026 break;
3029 pushback(c, parse_state);
3030 if (toklen()) {
3031 tokfix();
3032 rb_compile_error("unknown regexp option%s - %s",
3033 toklen() > 1 ? "s" : "", tok());
3035 return options | kcode;
3038 #define STR_FUNC_ESCAPE 0x01
3039 #define STR_FUNC_EXPAND 0x02
3040 #define STR_FUNC_REGEXP 0x04
3041 #define STR_FUNC_QWORDS 0x08
3042 #define STR_FUNC_SYMBOL 0x10
3043 #define STR_FUNC_INDENT 0x20
3045 enum string_type {
3046 str_squote = (0),
3047 str_dquote = (STR_FUNC_EXPAND),
3048 str_xquote = (STR_FUNC_EXPAND),
3049 str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND),
3050 str_sword = (STR_FUNC_QWORDS),
3051 str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND),
3052 str_ssym = (STR_FUNC_SYMBOL),
3053 str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND),
3056 static int tokadd_string(int func, int term, int paren, int *nest, rb_parse_state *parse_state)
3058 int c;
3060 while ((c = nextc()) != -1) {
3061 if (paren && c == paren) {
3062 ++*nest;
3064 else if (c == term) {
3065 if (!nest || !*nest) {
3066 pushback(c, parse_state);
3067 break;
3069 --*nest;
3071 else if ((func & STR_FUNC_EXPAND) && c == '#' && parse_state->lex_p < parse_state->lex_pend) {
3072 int c2 = *(parse_state->lex_p);
3073 if (c2 == '$' || c2 == '@' || c2 == '{') {
3074 pushback(c, parse_state);
3075 break;
3078 else if (c == '\\') {
3079 c = nextc();
3080 switch (c) {
3081 case '\n':
3082 if (func & STR_FUNC_QWORDS) break;
3083 if (func & STR_FUNC_EXPAND) continue;
3084 tokadd('\\', parse_state);
3085 break;
3087 case '\\':
3088 if (func & STR_FUNC_ESCAPE) tokadd((char)c, parse_state);
3089 break;
3091 default:
3092 if (func & STR_FUNC_REGEXP) {
3093 pushback(c, parse_state);
3094 if (tokadd_escape(term, parse_state) < 0)
3095 return -1;
3096 continue;
3098 else if (func & STR_FUNC_EXPAND) {
3099 pushback(c, parse_state);
3100 if (func & STR_FUNC_ESCAPE) tokadd('\\', parse_state);
3101 c = read_escape(parse_state);
3103 else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
3104 /* ignore backslashed spaces in %w */
3106 else if (c != term && !(paren && c == paren)) {
3107 tokadd('\\', parse_state);
3111 else if (ismbchar(c)) {
3112 int i, len = mbclen(c)-1;
3114 for (i = 0; i < len; i++) {
3115 tokadd((char)c, parse_state);
3116 c = nextc();
3119 else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
3120 pushback(c, parse_state);
3121 break;
3123 if (!c && (func & STR_FUNC_SYMBOL)) {
3124 func &= ~STR_FUNC_SYMBOL;
3125 rb_compile_error("symbol cannot contain '\\0'");
3126 continue;
3128 tokadd((char)c, parse_state);
3130 return c;
3133 #define NEW_STRTERM(func, term, paren) \
3134 syd_node_newnode(parse_state, NODE_STRTERM, (OBJECT)(func), (OBJECT)((term) | ((paren) << (CHAR_BIT * 2))), NULL)
3135 #define pslval ((YYSTYPE *)parse_state->lval)
3136 static int
3137 parse_string(quote, parse_state)
3138 NODE *quote;
3139 rb_parse_state *parse_state;
3141 int func = quote->nd_func;
3142 int term = nd_term(quote);
3143 int paren = nd_paren(quote);
3144 int c, space = 0;
3146 if (func == -1) return tSTRING_END;
3147 c = nextc();
3148 if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
3149 do {c = nextc();} while (ISSPACE(c));
3150 space = 1;
3152 if (c == term && !quote->nd_nest) {
3153 if (func & STR_FUNC_QWORDS) {
3154 quote->nd_func = -1;
3155 return ' ';
3157 if (!(func & STR_FUNC_REGEXP)) return tSTRING_END;
3158 pslval->num = regx_options(parse_state);
3159 return tREGEXP_END;
3161 if (space) {
3162 pushback(c, parse_state);
3163 return ' ';
3165 newtok(parse_state);
3166 if ((func & STR_FUNC_EXPAND) && c == '#') {
3167 switch (c = nextc()) {
3168 case '$':
3169 case '@':
3170 pushback(c, parse_state);
3171 return tSTRING_DVAR;
3172 case '{':
3173 return tSTRING_DBEG;
3175 tokadd('#', parse_state);
3177 pushback(c, parse_state);
3178 if (tokadd_string(func, term, paren, (int *)&quote->nd_nest, parse_state) == -1) {
3179 ruby_sourceline = nd_line(quote);
3180 rb_compile_error("unterminated string meets end of file");
3181 return tSTRING_END;
3184 tokfix();
3185 pslval->node = NEW_STR(string_new(tok(), toklen()));
3186 return tSTRING_CONTENT;
3189 /* Called when the lexer detects a heredoc is beginning. This pulls
3190 in more characters and detects what kind of heredoc it is. */
3191 static int
3192 heredoc_identifier(rb_parse_state *parse_state)
3194 int c = nextc(), term, func = 0;
3195 size_t len;
3197 if (c == '-') {
3198 c = nextc();
3199 func = STR_FUNC_INDENT;
3201 switch (c) {
3202 case '\'':
3203 func |= str_squote; goto quoted;
3204 case '"':
3205 func |= str_dquote; goto quoted;
3206 case '`':
3207 func |= str_xquote;
3208 quoted:
3209 /* The heredoc indent is quoted, so its easy to find, we just
3210 continue to consume characters into the token buffer until
3211 we hit the terminating character. */
3213 newtok(parse_state);
3214 tokadd((char)func, parse_state);
3215 term = c;
3217 /* Where of where has the term gone.. */
3218 while ((c = nextc()) != -1 && c != term) {
3219 len = mbclen(c);
3220 do {
3221 tokadd((char)c, parse_state);
3222 } while (--len > 0 && (c = nextc()) != -1);
3224 /* Ack! end of file or end of string. */
3225 if (c == -1) {
3226 rb_compile_error("unterminated here document identifier");
3227 return 0;
3230 break;
3232 default:
3233 /* Ok, this is an unquoted heredoc ident. We just consume
3234 until we hit a non-ident character. */
3236 /* Do a quick check that first character is actually valid.
3237 if it's not, then this isn't actually a heredoc at all!
3238 It sucks that it's way down here in this function that in
3239 finally bails with this not being a heredoc.*/
3241 if (!is_identchar(c)) {
3242 pushback(c, parse_state);
3243 if (func & STR_FUNC_INDENT) {
3244 pushback('-', parse_state);
3246 return 0;
3249 /* Finally, setup the token buffer and begin to fill it. */
3250 newtok(parse_state);
3251 term = '"';
3252 tokadd((char)(func |= str_dquote), parse_state);
3253 do {
3254 len = mbclen(c);
3255 do { tokadd((char)c, parse_state); } while (--len > 0 && (c = nextc()) != -1);
3256 } while ((c = nextc()) != -1 && is_identchar(c));
3257 pushback(c, parse_state);
3258 break;
3262 /* Fixup the token buffer, ie set the last character to null. */
3263 tokfix();
3264 len = parse_state->lex_p - parse_state->lex_pbeg;
3265 parse_state->lex_p = parse_state->lex_pend;
3266 pslval->id = 0;
3268 /* Tell the lexer that we're inside a string now. nd_lit is
3269 the heredoc identifier that we watch the stream for to
3270 detect the end of the heredoc. */
3271 bstring str = bstrcpy(parse_state->lex_lastline);
3272 lex_strterm = syd_node_newnode(parse_state, NODE_HEREDOC,
3273 (OBJECT)string_new(tok(), toklen()), /* nd_lit */
3274 (OBJECT)len, /* nd_nth */
3275 (OBJECT)str); /* nd_orig */
3276 return term == '`' ? tXSTRING_BEG : tSTRING_BEG;
3279 static void
3280 heredoc_restore(here, parse_state)
3281 NODE *here;
3282 rb_parse_state *parse_state;
3284 bstring line = here->nd_orig;
3286 bdestroy(parse_state->lex_lastline);
3288 parse_state->lex_lastline = line;
3289 parse_state->lex_pbeg = bdata(line);
3290 parse_state->lex_pend = parse_state->lex_pbeg + blength(line);
3291 parse_state->lex_p = parse_state->lex_pbeg + here->nd_nth;
3292 heredoc_end = ruby_sourceline;
3293 ruby_sourceline = nd_line(here);
3294 bdestroy((bstring)here->nd_lit);
3297 static int
3298 whole_match_p(eos, len, indent, parse_state)
3299 char *eos;
3300 int len, indent;
3301 rb_parse_state *parse_state;
3303 char *p = parse_state->lex_pbeg;
3304 int n;
3306 if (indent) {
3307 while (*p && ISSPACE(*p)) p++;
3309 n = parse_state->lex_pend - (p + len);
3310 if (n < 0 || (n > 0 && p[len] != '\n' && p[len] != '\r')) return FALSE;
3311 if (strncmp(eos, p, len) == 0) return TRUE;
3312 return FALSE;
3315 /* Called when the lexer knows it's inside a heredoc. This function
3316 is responsible for detecting an expandions (ie #{}) in the heredoc
3317 and emitting a lex token and also detecting the end of the heredoc. */
3319 static int
3320 here_document(here, parse_state)
3321 NODE *here;
3322 rb_parse_state *parse_state;
3324 int c, func, indent = 0;
3325 char *eos, *p, *pend;
3326 long len;
3327 bstring str = NULL;
3329 /* eos == the heredoc ident that we found when the heredoc started */
3330 eos = bdata(here->nd_str);
3331 len = blength(here->nd_str) - 1;
3333 /* indicates if we should search for expansions. */
3334 indent = (func = *eos++) & STR_FUNC_INDENT;
3336 /* Ack! EOF or end of input string! */
3337 if ((c = nextc()) == -1) {
3338 error:
3339 rb_compile_error("can't find string \"%s\" anywhere before EOF", eos);
3340 heredoc_restore(lex_strterm, parse_state);
3341 lex_strterm = 0;
3342 return 0;
3344 /* Gr. not yet sure what was_bol() means other than it seems like
3345 it means only 1 character has been consumed. */
3347 if (was_bol() && whole_match_p(eos, len, indent, parse_state)) {
3348 heredoc_restore(lex_strterm, parse_state);
3349 return tSTRING_END;
3352 /* If aren't doing expansions, we can just scan until
3353 we find the identifier. */
3355 if ((func & STR_FUNC_EXPAND) == 0) {
3356 do {
3357 p = bdata(parse_state->lex_lastline);
3358 pend = parse_state->lex_pend;
3359 if (pend > p) {
3360 switch (pend[-1]) {
3361 case '\n':
3362 if (--pend == p || pend[-1] != '\r') {
3363 pend++;
3364 break;
3366 case '\r':
3367 --pend;
3370 if (str) {
3371 bcatblk(str, p, pend - p);
3372 } else {
3373 str = blk2bstr(p, pend - p);
3375 if (pend < parse_state->lex_pend) bcatblk(str, "\n", 1);
3376 parse_state->lex_p = parse_state->lex_pend;
3377 if (nextc() == -1) {
3378 if (str) bdestroy(str);
3379 goto error;
3381 } while (!whole_match_p(eos, len, indent, parse_state));
3383 else {
3384 newtok(parse_state);
3385 if (c == '#') {
3386 switch (c = nextc()) {
3387 case '$':
3388 case '@':
3389 pushback(c, parse_state);
3390 return tSTRING_DVAR;
3391 case '{':
3392 return tSTRING_DBEG;
3394 tokadd('#', parse_state);
3397 /* Loop while we haven't found a the heredoc ident. */
3398 do {
3399 pushback(c, parse_state);
3400 /* Scan up until a \n and fill in the token buffer. */
3401 if ((c = tokadd_string(func, '\n', 0, NULL, parse_state)) == -1) goto error;
3403 /* We finished scanning, but didn't find a \n, so we setup the node
3404 and have the lexer file in more. */
3405 if (c != '\n') {
3406 pslval->node = NEW_STR(string_new(tok(), toklen()));
3407 return tSTRING_CONTENT;
3410 /* I think this consumes the \n */
3411 tokadd((char)nextc(), parse_state);
3412 if ((c = nextc()) == -1) goto error;
3413 } while (!whole_match_p(eos, len, indent, parse_state));
3414 str = string_new(tok(), toklen());
3416 heredoc_restore(lex_strterm, parse_state);
3417 lex_strterm = NEW_STRTERM(-1, 0, 0);
3418 pslval->node = NEW_STR(str);
3419 return tSTRING_CONTENT;
3422 #include "shotgun/lib/grammar_lex.c.tab"
3424 static void
3425 arg_ambiguous()
3427 rb_warning("ambiguous first argument; put parentheses or even spaces");
3430 #define IS_ARG() (parse_state->lex_state == EXPR_ARG || parse_state->lex_state == EXPR_CMDARG)
3432 static int
3433 yylex(YYSTYPE *yylval, void *vstate)
3435 register int c;
3436 int space_seen = 0;
3437 int cmd_state, comment_column;
3438 struct rb_parse_state *parse_state;
3439 bstring cur_line;
3440 parse_state = (struct rb_parse_state*)vstate;
3442 parse_state->lval = (void *)yylval;
3445 c = nextc();
3446 printf("lex char: %c\n", c);
3447 pushback(c, parse_state);
3450 if (lex_strterm) {
3451 int token;
3452 if (nd_type(lex_strterm) == NODE_HEREDOC) {
3453 token = here_document(lex_strterm, parse_state);
3454 if (token == tSTRING_END) {
3455 lex_strterm = 0;
3456 parse_state->lex_state = EXPR_END;
3459 else {
3460 token = parse_string(lex_strterm, parse_state);
3461 if (token == tSTRING_END || token == tREGEXP_END) {
3462 lex_strterm = 0;
3463 parse_state->lex_state = EXPR_END;
3466 return token;
3468 cmd_state = command_start;
3469 command_start = FALSE;
3470 retry:
3471 switch (c = nextc()) {
3472 case '\0': /* NUL */
3473 case '\004': /* ^D */
3474 case '\032': /* ^Z */
3475 case -1: /* end of script. */
3476 return 0;
3478 /* white spaces */
3479 case ' ': case '\t': case '\f': case '\r':
3480 case '\13': /* '\v' */
3481 space_seen++;
3482 goto retry;
3484 case '#': /* it's a comment */
3485 if(parse_state->comments) {
3486 comment_column = parse_state->column;
3487 cur_line = bfromcstralloc(50, "");
3489 while((c = nextc()) != '\n' && c != -1) {
3490 bconchar(cur_line, c);
3493 // FIXME: used to have the file and column too, but took it out.
3494 ptr_array_append(parse_state->comments, cur_line);
3496 if(c == -1) {
3497 return 0;
3499 } else {
3500 while ((c = nextc()) != '\n') {
3501 if (c == -1)
3502 return 0;
3505 /* fall through */
3506 case '\n':
3507 switch (parse_state->lex_state) {
3508 case EXPR_BEG:
3509 case EXPR_FNAME:
3510 case EXPR_DOT:
3511 case EXPR_CLASS:
3512 goto retry;
3513 default:
3514 break;
3516 command_start = TRUE;
3517 parse_state->lex_state = EXPR_BEG;
3518 return '\n';
3520 case '*':
3521 if ((c = nextc()) == '*') {
3522 if ((c = nextc()) == '=') {
3523 pslval->id = tPOW;
3524 parse_state->lex_state = EXPR_BEG;
3525 return tOP_ASGN;
3527 pushback(c, parse_state);
3528 c = tPOW;
3530 else {
3531 if (c == '=') {
3532 pslval->id = '*';
3533 parse_state->lex_state = EXPR_BEG;
3534 return tOP_ASGN;
3536 pushback(c, parse_state);
3537 if (IS_ARG() && space_seen && !ISSPACE(c)){
3538 rb_warning("`*' interpreted as argument prefix");
3539 c = tSTAR;
3541 else if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
3542 c = tSTAR;
3544 else {
3545 c = '*';
3548 switch (parse_state->lex_state) {
3549 case EXPR_FNAME: case EXPR_DOT:
3550 parse_state->lex_state = EXPR_ARG; break;
3551 default:
3552 parse_state->lex_state = EXPR_BEG; break;
3554 return c;
3556 case '!':
3557 parse_state->lex_state = EXPR_BEG;
3558 if ((c = nextc()) == '=') {
3559 return tNEQ;
3561 if (c == '~') {
3562 return tNMATCH;
3564 pushback(c, parse_state);
3565 return '!';
3567 case '=':
3568 if (was_bol()) {
3569 /* skip embedded rd document */
3570 if (strncmp(parse_state->lex_p, "begin", 5) == 0 && ISSPACE(parse_state->lex_p[5])) {
3571 for (;;) {
3572 parse_state->lex_p = parse_state->lex_pend;
3573 c = nextc();
3574 if (c == -1) {
3575 rb_compile_error("embedded document meets end of file");
3576 return 0;
3578 if (c != '=') continue;
3579 if (strncmp(parse_state->lex_p, "end", 3) == 0 &&
3580 (parse_state->lex_p + 3 == parse_state->lex_pend || ISSPACE(parse_state->lex_p[3]))) {
3581 break;
3584 parse_state->lex_p = parse_state->lex_pend;
3585 goto retry;
3589 switch (parse_state->lex_state) {
3590 case EXPR_FNAME: case EXPR_DOT:
3591 parse_state->lex_state = EXPR_ARG; break;
3592 default:
3593 parse_state->lex_state = EXPR_BEG; break;
3595 if ((c = nextc()) == '=') {
3596 if ((c = nextc()) == '=') {
3597 return tEQQ;
3599 pushback(c, parse_state);
3600 return tEQ;
3602 if (c == '~') {
3603 return tMATCH;
3605 else if (c == '>') {
3606 return tASSOC;
3608 pushback(c, parse_state);
3609 return '=';
3611 case '<':
3612 c = nextc();
3613 if (c == '<' &&
3614 parse_state->lex_state != EXPR_END &&
3615 parse_state->lex_state != EXPR_DOT &&
3616 parse_state->lex_state != EXPR_ENDARG &&
3617 parse_state->lex_state != EXPR_CLASS &&
3618 (!IS_ARG() || space_seen)) {
3619 int token = heredoc_identifier(parse_state);
3620 if (token) return token;
3622 switch (parse_state->lex_state) {
3623 case EXPR_FNAME: case EXPR_DOT:
3624 parse_state->lex_state = EXPR_ARG; break;
3625 default:
3626 parse_state->lex_state = EXPR_BEG; break;
3628 if (c == '=') {
3629 if ((c = nextc()) == '>') {
3630 return tCMP;
3632 pushback(c, parse_state);
3633 return tLEQ;
3635 if (c == '<') {
3636 if ((c = nextc()) == '=') {
3637 pslval->id = tLSHFT;
3638 parse_state->lex_state = EXPR_BEG;
3639 return tOP_ASGN;
3641 pushback(c, parse_state);
3642 return tLSHFT;
3644 pushback(c, parse_state);
3645 return '<';
3647 case '>':
3648 switch (parse_state->lex_state) {
3649 case EXPR_FNAME: case EXPR_DOT:
3650 parse_state->lex_state = EXPR_ARG; break;
3651 default:
3652 parse_state->lex_state = EXPR_BEG; break;
3654 if ((c = nextc()) == '=') {
3655 return tGEQ;
3657 if (c == '>') {
3658 if ((c = nextc()) == '=') {
3659 pslval->id = tRSHFT;
3660 parse_state->lex_state = EXPR_BEG;
3661 return tOP_ASGN;
3663 pushback(c, parse_state);
3664 return tRSHFT;
3666 pushback(c, parse_state);
3667 return '>';
3669 case '"':
3670 lex_strterm = NEW_STRTERM(str_dquote, '"', 0);
3671 return tSTRING_BEG;
3673 case '`':
3674 if (parse_state->lex_state == EXPR_FNAME) {
3675 parse_state->lex_state = EXPR_END;
3676 return c;
3678 if (parse_state->lex_state == EXPR_DOT) {
3679 if (cmd_state)
3680 parse_state->lex_state = EXPR_CMDARG;
3681 else
3682 parse_state->lex_state = EXPR_ARG;
3683 return c;
3685 lex_strterm = NEW_STRTERM(str_xquote, '`', 0);
3686 pslval->id = 0; /* so that xstring gets used normally */
3687 return tXSTRING_BEG;
3689 case '\'':
3690 lex_strterm = NEW_STRTERM(str_squote, '\'', 0);
3691 pslval->id = 0; /* so that xstring gets used normally */
3692 return tSTRING_BEG;
3694 case '?':
3695 if (parse_state->lex_state == EXPR_END || parse_state->lex_state == EXPR_ENDARG) {
3696 parse_state->lex_state = EXPR_BEG;
3697 return '?';
3699 c = nextc();
3700 if (c == -1) {
3701 rb_compile_error("incomplete character syntax");
3702 return 0;
3704 if (ISSPACE(c)){
3705 if (!IS_ARG()){
3706 int c2 = 0;
3707 switch (c) {
3708 case ' ':
3709 c2 = 's';
3710 break;
3711 case '\n':
3712 c2 = 'n';
3713 break;
3714 case '\t':
3715 c2 = 't';
3716 break;
3717 case '\v':
3718 c2 = 'v';
3719 break;
3720 case '\r':
3721 c2 = 'r';
3722 break;
3723 case '\f':
3724 c2 = 'f';
3725 break;
3727 if (c2) {
3728 rb_warn("invalid character syntax; use ?\\%c", c2);
3731 ternary:
3732 pushback(c, parse_state);
3733 parse_state->lex_state = EXPR_BEG;
3734 parse_state->ternary_colon = 1;
3735 return '?';
3737 else if (ismbchar(c)) {
3738 rb_warn("multibyte character literal not supported yet; use ?\\%.3o", c);
3739 goto ternary;
3741 else if ((ISALNUM(c) || c == '_') && parse_state->lex_p < parse_state->lex_pend && is_identchar(*(parse_state->lex_p))) {
3742 goto ternary;
3744 else if (c == '\\') {
3745 c = read_escape(parse_state);
3747 c &= 0xff;
3748 parse_state->lex_state = EXPR_END;
3749 pslval->node = NEW_FIXNUM((intptr_t)c);
3750 return tINTEGER;
3752 case '&':
3753 if ((c = nextc()) == '&') {
3754 parse_state->lex_state = EXPR_BEG;
3755 if ((c = nextc()) == '=') {
3756 pslval->id = tANDOP;
3757 parse_state->lex_state = EXPR_BEG;
3758 return tOP_ASGN;
3760 pushback(c, parse_state);
3761 return tANDOP;
3763 else if (c == '=') {
3764 pslval->id = '&';
3765 parse_state->lex_state = EXPR_BEG;
3766 return tOP_ASGN;
3768 pushback(c, parse_state);
3769 if (IS_ARG() && space_seen && !ISSPACE(c)){
3770 rb_warning("`&' interpreted as argument prefix");
3771 c = tAMPER;
3773 else if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
3774 c = tAMPER;
3776 else {
3777 c = '&';
3779 switch (parse_state->lex_state) {
3780 case EXPR_FNAME: case EXPR_DOT:
3781 parse_state->lex_state = EXPR_ARG; break;
3782 default:
3783 parse_state->lex_state = EXPR_BEG;
3785 return c;
3787 case '|':
3788 if ((c = nextc()) == '|') {
3789 parse_state->lex_state = EXPR_BEG;
3790 if ((c = nextc()) == '=') {
3791 pslval->id = tOROP;
3792 parse_state->lex_state = EXPR_BEG;
3793 return tOP_ASGN;
3795 pushback(c, parse_state);
3796 return tOROP;
3798 if (c == '=') {
3799 pslval->id = '|';
3800 parse_state->lex_state = EXPR_BEG;
3801 return tOP_ASGN;
3803 if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
3804 parse_state->lex_state = EXPR_ARG;
3806 else {
3807 parse_state->lex_state = EXPR_BEG;
3809 pushback(c, parse_state);
3810 return '|';
3812 case '+':
3813 c = nextc();
3814 if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
3815 parse_state->lex_state = EXPR_ARG;
3816 if (c == '@') {
3817 return tUPLUS;
3819 pushback(c, parse_state);
3820 return '+';
3822 if (c == '=') {
3823 pslval->id = '+';
3824 parse_state->lex_state = EXPR_BEG;
3825 return tOP_ASGN;
3827 if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID ||
3828 (IS_ARG() && space_seen && !ISSPACE(c))) {
3829 if (IS_ARG()) arg_ambiguous();
3830 parse_state->lex_state = EXPR_BEG;
3831 pushback(c, parse_state);
3832 if (ISDIGIT(c)) {
3833 c = '+';
3834 goto start_num;
3836 return tUPLUS;
3838 parse_state->lex_state = EXPR_BEG;
3839 pushback(c, parse_state);
3840 return '+';
3842 case '-':
3843 c = nextc();
3844 if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
3845 parse_state->lex_state = EXPR_ARG;
3846 if (c == '@') {
3847 return tUMINUS;
3849 pushback(c, parse_state);
3850 return '-';
3852 if (c == '=') {
3853 pslval->id = '-';
3854 parse_state->lex_state = EXPR_BEG;
3855 return tOP_ASGN;
3857 if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID ||
3858 (IS_ARG() && space_seen && !ISSPACE(c))) {
3859 if (IS_ARG()) arg_ambiguous();
3860 parse_state->lex_state = EXPR_BEG;
3861 pushback(c, parse_state);
3862 if (ISDIGIT(c)) {
3863 return tUMINUS_NUM;
3865 return tUMINUS;
3867 parse_state->lex_state = EXPR_BEG;
3868 pushback(c, parse_state);
3869 return '-';
3871 case '.':
3872 parse_state->lex_state = EXPR_BEG;
3873 if ((c = nextc()) == '.') {
3874 if ((c = nextc()) == '.') {
3875 return tDOT3;
3877 pushback(c, parse_state);
3878 return tDOT2;
3880 pushback(c, parse_state);
3881 if (ISDIGIT(c)) {
3882 yyerror("no .<digit> floating literal anymore; put 0 before dot");
3884 parse_state->lex_state = EXPR_DOT;
3885 return '.';
3887 start_num:
3888 case '0': case '1': case '2': case '3': case '4':
3889 case '5': case '6': case '7': case '8': case '9':
3891 int is_float, seen_point, seen_e, nondigit;
3893 is_float = seen_point = seen_e = nondigit = 0;
3894 parse_state->lex_state = EXPR_END;
3895 newtok(parse_state);
3896 if (c == '-' || c == '+') {
3897 tokadd((char)c,parse_state);
3898 c = nextc();
3900 if (c == '0') {
3901 int start = toklen();
3902 c = nextc();
3903 if (c == 'x' || c == 'X') {
3904 /* hexadecimal */
3905 c = nextc();
3906 if (ISXDIGIT(c)) {
3907 do {
3908 if (c == '_') {
3909 if (nondigit) break;
3910 nondigit = c;
3911 continue;
3913 if (!ISXDIGIT(c)) break;
3914 nondigit = 0;
3915 tokadd((char)c,parse_state);
3916 } while ((c = nextc()) != -1);
3918 pushback(c, parse_state);
3919 tokfix();
3920 if (toklen() == start) {
3921 yyerror("numeric literal without digits");
3923 else if (nondigit) goto trailing_uc;
3924 pslval->node = NEW_HEXNUM(string_new2(tok()));
3925 return tINTEGER;
3927 if (c == 'b' || c == 'B') {
3928 /* binary */
3929 c = nextc();
3930 if (c == '0' || c == '1') {
3931 do {
3932 if (c == '_') {
3933 if (nondigit) break;
3934 nondigit = c;
3935 continue;
3937 if (c != '0' && c != '1') break;
3938 nondigit = 0;
3939 tokadd((char)c, parse_state);
3940 } while ((c = nextc()) != -1);
3942 pushback(c, parse_state);
3943 tokfix();
3944 if (toklen() == start) {
3945 yyerror("numeric literal without digits");
3947 else if (nondigit) goto trailing_uc;
3948 pslval->node = NEW_BINNUM(string_new2(tok()));
3949 return tINTEGER;
3951 if (c == 'd' || c == 'D') {
3952 /* decimal */
3953 c = nextc();
3954 if (ISDIGIT(c)) {
3955 do {
3956 if (c == '_') {
3957 if (nondigit) break;
3958 nondigit = c;
3959 continue;
3961 if (!ISDIGIT(c)) break;
3962 nondigit = 0;
3963 tokadd((char)c, parse_state);
3964 } while ((c = nextc()) != -1);
3966 pushback(c, parse_state);
3967 tokfix();
3968 if (toklen() == start) {
3969 yyerror("numeric literal without digits");
3971 else if (nondigit) goto trailing_uc;
3972 pslval->node = NEW_NUMBER(string_new2(tok()));
3973 return tINTEGER;
3975 if (c == '_') {
3976 /* 0_0 */
3977 goto octal_number;
3979 if (c == 'o' || c == 'O') {
3980 /* prefixed octal */
3981 c = nextc();
3982 if (c == '_') {
3983 yyerror("numeric literal without digits");
3986 if (c >= '0' && c <= '7') {
3987 /* octal */
3988 octal_number:
3989 do {
3990 if (c == '_') {
3991 if (nondigit) break;
3992 nondigit = c;
3993 continue;
3995 if (c < '0' || c > '7') break;
3996 nondigit = 0;
3997 tokadd((char)c, parse_state);
3998 } while ((c = nextc()) != -1);
3999 if (toklen() > start) {
4000 pushback(c, parse_state);
4001 tokfix();
4002 if (nondigit) goto trailing_uc;
4003 pslval->node = NEW_OCTNUM(string_new2(tok()));
4004 return tINTEGER;
4006 if (nondigit) {
4007 pushback(c, parse_state);
4008 goto trailing_uc;
4011 if (c > '7' && c <= '9') {
4012 yyerror("Illegal octal digit");
4014 else if (c == '.' || c == 'e' || c == 'E') {
4015 tokadd('0', parse_state);
4017 else {
4018 pushback(c, parse_state);
4019 pslval->node = NEW_FIXNUM(0);
4020 return tINTEGER;
4024 for (;;) {
4025 switch (c) {
4026 case '0': case '1': case '2': case '3': case '4':
4027 case '5': case '6': case '7': case '8': case '9':
4028 nondigit = 0;
4029 tokadd((char)c, parse_state);
4030 break;
4032 case '.':
4033 if (nondigit) goto trailing_uc;
4034 if (seen_point || seen_e) {
4035 goto decode_num;
4037 else {
4038 int c0 = nextc();
4039 if (!ISDIGIT(c0)) {
4040 pushback(c0, parse_state);
4041 goto decode_num;
4043 c = c0;
4045 tokadd('.', parse_state);
4046 tokadd((char)c, parse_state);
4047 is_float++;
4048 seen_point++;
4049 nondigit = 0;
4050 break;
4052 case 'e':
4053 case 'E':
4054 if (nondigit) {
4055 pushback(c, parse_state);
4056 c = nondigit;
4057 goto decode_num;
4059 if (seen_e) {
4060 goto decode_num;
4062 tokadd((char)c, parse_state);
4063 seen_e++;
4064 is_float++;
4065 nondigit = c;
4066 c = nextc();
4067 if (c != '-' && c != '+') continue;
4068 tokadd((char)c, parse_state);
4069 nondigit = c;
4070 break;
4072 case '_': /* `_' in number just ignored */
4073 if (nondigit) goto decode_num;
4074 nondigit = c;
4075 break;
4077 default:
4078 goto decode_num;
4080 c = nextc();
4083 decode_num:
4084 pushback(c, parse_state);
4085 tokfix();
4086 if (nondigit) {
4087 char tmp[30];
4088 trailing_uc:
4089 snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit);
4090 yyerror(tmp);
4092 if (is_float) {
4093 /* Some implementations of strtod() don't guarantee to
4094 * set errno, so we need to reset it ourselves.
4096 errno = 0;
4098 strtod(tok(), 0);
4099 if (errno == ERANGE) {
4100 rb_warn("Float %s out of range", tok());
4101 errno = 0;
4103 pslval->node = NEW_FLOAT(string_new2(tok()));
4104 return tFLOAT;
4106 pslval->node = NEW_NUMBER(string_new2(tok()));
4107 return tINTEGER;
4110 case ']':
4111 case '}':
4112 case ')':
4113 COND_LEXPOP();
4114 CMDARG_LEXPOP();
4115 parse_state->lex_state = EXPR_END;
4116 return c;
4118 case ':':
4119 c = nextc();
4120 if (c == ':') {
4121 if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID ||
4122 parse_state->lex_state == EXPR_CLASS || (IS_ARG() && space_seen)) {
4123 parse_state->lex_state = EXPR_BEG;
4124 return tCOLON3;
4126 parse_state->lex_state = EXPR_DOT;
4127 return tCOLON2;
4129 if (parse_state->lex_state == EXPR_END || parse_state->lex_state == EXPR_ENDARG || ISSPACE(c)) {
4130 pushback(c, parse_state);
4131 parse_state->lex_state = EXPR_BEG;
4132 return ':';
4134 switch (c) {
4135 case '\'':
4136 lex_strterm = NEW_STRTERM(str_ssym, (intptr_t)c, 0);
4137 break;
4138 case '"':
4139 lex_strterm = NEW_STRTERM(str_dsym, (intptr_t)c, 0);
4140 break;
4141 default:
4142 pushback(c, parse_state);
4143 break;
4145 parse_state->lex_state = EXPR_FNAME;
4146 return tSYMBEG;
4148 case '/':
4149 if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
4150 lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
4151 return tREGEXP_BEG;
4153 if ((c = nextc()) == '=') {
4154 pslval->id = '/';
4155 parse_state->lex_state = EXPR_BEG;
4156 return tOP_ASGN;
4158 pushback(c, parse_state);
4159 if (IS_ARG() && space_seen) {
4160 if (!ISSPACE(c)) {
4161 arg_ambiguous();
4162 lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
4163 return tREGEXP_BEG;
4166 switch (parse_state->lex_state) {
4167 case EXPR_FNAME: case EXPR_DOT:
4168 parse_state->lex_state = EXPR_ARG; break;
4169 default:
4170 parse_state->lex_state = EXPR_BEG; break;
4172 return '/';
4174 case '^':
4175 if ((c = nextc()) == '=') {
4176 pslval->id = '^';
4177 parse_state->lex_state = EXPR_BEG;
4178 return tOP_ASGN;
4180 switch (parse_state->lex_state) {
4181 case EXPR_FNAME: case EXPR_DOT:
4182 parse_state->lex_state = EXPR_ARG; break;
4183 default:
4184 parse_state->lex_state = EXPR_BEG; break;
4186 pushback(c, parse_state);
4187 return '^';
4189 case ';':
4190 command_start = TRUE;
4191 case ',':
4192 parse_state->lex_state = EXPR_BEG;
4193 return c;
4195 case '~':
4196 if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
4197 if ((c = nextc()) != '@') {
4198 pushback(c, parse_state);
4201 switch (parse_state->lex_state) {
4202 case EXPR_FNAME: case EXPR_DOT:
4203 parse_state->lex_state = EXPR_ARG; break;
4204 default:
4205 parse_state->lex_state = EXPR_BEG; break;
4207 return '~';
4209 case '(':
4210 command_start = TRUE;
4211 if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
4212 c = tLPAREN;
4214 else if (space_seen) {
4215 if (parse_state->lex_state == EXPR_CMDARG) {
4216 c = tLPAREN_ARG;
4218 else if (parse_state->lex_state == EXPR_ARG) {
4219 rb_warn("don't put space before argument parentheses");
4220 c = '(';
4223 COND_PUSH(0);
4224 CMDARG_PUSH(0);
4225 parse_state->lex_state = EXPR_BEG;
4226 return c;
4228 case '[':
4229 if (parse_state->lex_state == EXPR_FNAME || parse_state->lex_state == EXPR_DOT) {
4230 parse_state->lex_state = EXPR_ARG;
4231 if ((c = nextc()) == ']') {
4232 if ((c = nextc()) == '=') {
4233 return tASET;
4235 pushback(c, parse_state);
4236 return tAREF;
4238 pushback(c, parse_state);
4239 return '[';
4241 else if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
4242 c = tLBRACK;
4244 else if (IS_ARG() && space_seen) {
4245 c = tLBRACK;
4247 parse_state->lex_state = EXPR_BEG;
4248 COND_PUSH(0);
4249 CMDARG_PUSH(0);
4250 return c;
4252 case '{':
4253 if (IS_ARG() || parse_state->lex_state == EXPR_END)
4254 c = '{'; /* block (primary) */
4255 else if (parse_state->lex_state == EXPR_ENDARG)
4256 c = tLBRACE_ARG; /* block (expr) */
4257 else
4258 c = tLBRACE; /* hash */
4259 COND_PUSH(0);
4260 CMDARG_PUSH(0);
4261 parse_state->lex_state = EXPR_BEG;
4262 return c;
4264 case '\\':
4265 c = nextc();
4266 if (c == '\n') {
4267 space_seen = 1;
4268 goto retry; /* skip \\n */
4270 pushback(c, parse_state);
4271 if(parse_state->lex_state == EXPR_BEG
4272 || parse_state->lex_state == EXPR_MID || space_seen) {
4273 parse_state->lex_state = EXPR_DOT;
4274 return tUBS;
4276 parse_state->lex_state = EXPR_DOT;
4277 return '\\';
4279 case '%':
4280 if (parse_state->lex_state == EXPR_BEG || parse_state->lex_state == EXPR_MID) {
4281 intptr_t term;
4282 intptr_t paren;
4283 char tmpstr[256];
4284 char *cur;
4286 c = nextc();
4287 quotation:
4288 if (!ISALNUM(c)) {
4289 term = c;
4290 c = 'Q';
4292 else {
4293 term = nextc();
4294 if (ISALNUM(term) || ismbchar(term)) {
4295 cur = tmpstr;
4296 *cur++ = c;
4297 while(ISALNUM(term) || ismbchar(term)) {
4298 *cur++ = term;
4299 term = nextc();
4301 *cur = 0;
4302 c = 1;
4306 if (c == -1 || term == -1) {
4307 rb_compile_error("unterminated quoted string meets end of file");
4308 return 0;
4310 paren = term;
4311 if (term == '(') term = ')';
4312 else if (term == '[') term = ']';
4313 else if (term == '{') term = '}';
4314 else if (term == '<') term = '>';
4315 else paren = 0;
4317 switch (c) {
4318 case 'Q':
4319 lex_strterm = NEW_STRTERM(str_dquote, term, paren);
4320 return tSTRING_BEG;
4322 case 'q':
4323 lex_strterm = NEW_STRTERM(str_squote, term, paren);
4324 return tSTRING_BEG;
4326 case 'W':
4327 lex_strterm = NEW_STRTERM(str_dquote | STR_FUNC_QWORDS, term, paren);
4328 do {c = nextc();} while (ISSPACE(c));
4329 pushback(c, parse_state);
4330 return tWORDS_BEG;
4332 case 'w':
4333 lex_strterm = NEW_STRTERM(str_squote | STR_FUNC_QWORDS, term, paren);
4334 do {c = nextc();} while (ISSPACE(c));
4335 pushback(c, parse_state);
4336 return tQWORDS_BEG;
4338 case 'x':
4339 lex_strterm = NEW_STRTERM(str_xquote, term, paren);
4340 pslval->id = 0;
4341 return tXSTRING_BEG;
4343 case 'r':
4344 lex_strterm = NEW_STRTERM(str_regexp, term, paren);
4345 return tREGEXP_BEG;
4347 case 's':
4348 lex_strterm = NEW_STRTERM(str_ssym, term, paren);
4349 parse_state->lex_state = EXPR_FNAME;
4350 return tSYMBEG;
4352 case 1:
4353 lex_strterm = NEW_STRTERM(str_xquote, term, paren);
4354 pslval->id = rb_intern(tmpstr);
4355 return tXSTRING_BEG;
4357 default:
4358 lex_strterm = NEW_STRTERM(str_xquote, term, paren);
4359 tmpstr[0] = c;
4360 tmpstr[1] = 0;
4361 pslval->id = rb_intern(tmpstr);
4362 return tXSTRING_BEG;
4365 if ((c = nextc()) == '=') {
4366 pslval->id = '%';
4367 parse_state->lex_state = EXPR_BEG;
4368 return tOP_ASGN;
4370 if (IS_ARG() && space_seen && !ISSPACE(c)) {
4371 goto quotation;
4373 switch (parse_state->lex_state) {
4374 case EXPR_FNAME: case EXPR_DOT:
4375 parse_state->lex_state = EXPR_ARG; break;
4376 default:
4377 parse_state->lex_state = EXPR_BEG; break;
4379 pushback(c, parse_state);
4380 return '%';
4382 case '$':
4383 parse_state->lex_state = EXPR_END;
4384 newtok(parse_state);
4385 c = nextc();
4386 switch (c) {
4387 case '_': /* $_: last read line string */
4388 c = nextc();
4389 if (is_identchar(c)) {
4390 tokadd('$', parse_state);
4391 tokadd('_', parse_state);
4392 break;
4394 pushback(c, parse_state);
4395 c = '_';
4396 /* fall through */
4397 case '~': /* $~: match-data */
4398 local_cnt(c);
4399 /* fall through */
4400 case '*': /* $*: argv */
4401 case '$': /* $$: pid */
4402 case '?': /* $?: last status */
4403 case '!': /* $!: error string */
4404 case '@': /* $@: error position */
4405 case '/': /* $/: input record separator */
4406 case '\\': /* $\: output record separator */
4407 case ';': /* $;: field separator */
4408 case ',': /* $,: output field separator */
4409 case '.': /* $.: last read line number */
4410 case '=': /* $=: ignorecase */
4411 case ':': /* $:: load path */
4412 case '<': /* $<: reading filename */
4413 case '>': /* $>: default output handle */
4414 case '\"': /* $": already loaded files */
4415 tokadd('$', parse_state);
4416 tokadd((char)c, parse_state);
4417 tokfix();
4418 pslval->id = rb_intern(tok());
4419 return tGVAR;
4421 case '-':
4422 tokadd('$', parse_state);
4423 tokadd((char)c, parse_state);
4424 c = nextc();
4425 tokadd((char)c, parse_state);
4426 tokfix();
4427 pslval->id = rb_intern(tok());
4428 /* xxx shouldn't check if valid option variable */
4429 return tGVAR;
4431 case '&': /* $&: last match */
4432 case '`': /* $`: string before last match */
4433 case '\'': /* $': string after last match */
4434 case '+': /* $+: string matches last paren. */
4435 pslval->node = NEW_BACK_REF((intptr_t)c);
4436 return tBACK_REF;
4438 case '1': case '2': case '3':
4439 case '4': case '5': case '6':
4440 case '7': case '8': case '9':
4441 tokadd('$', parse_state);
4442 do {
4443 tokadd((char)c, parse_state);
4444 c = nextc();
4445 } while (ISDIGIT(c));
4446 pushback(c, parse_state);
4447 tokfix();
4448 pslval->node = NEW_NTH_REF((intptr_t)atoi(tok()+1));
4449 return tNTH_REF;
4451 default:
4452 if (!is_identchar(c)) {
4453 pushback(c, parse_state);
4454 return '$';
4456 case '0':
4457 tokadd('$', parse_state);
4459 break;
4461 case '@':
4462 c = nextc();
4463 newtok(parse_state);
4464 tokadd('@', parse_state);
4465 if (c == '@') {
4466 tokadd('@', parse_state);
4467 c = nextc();
4469 if (ISDIGIT(c)) {
4470 if (tokidx == 1) {
4471 rb_compile_error("`@%c' is not allowed as an instance variable name", c);
4473 else {
4474 rb_compile_error("`@@%c' is not allowed as a class variable name", c);
4477 if (!is_identchar(c)) {
4478 pushback(c, parse_state);
4479 return '@';
4481 break;
4483 case '_':
4484 if (was_bol() && whole_match_p("__END__", 7, 0, parse_state)) {
4485 parse_state->lex_lastline = 0;
4486 return -1;
4488 newtok(parse_state);
4489 break;
4491 default:
4492 if (!is_identchar(c)) {
4493 rb_compile_error("Invalid char `\\%03o' in expression", c);
4494 goto retry;
4497 newtok(parse_state);
4498 break;
4501 do {
4502 tokadd((char)c, parse_state);
4503 if (ismbchar(c)) {
4504 int i, len = mbclen(c)-1;
4506 for (i = 0; i < len; i++) {
4507 c = nextc();
4508 tokadd((char)c, parse_state);
4511 c = nextc();
4512 } while (is_identchar(c));
4513 if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) {
4514 tokadd((char)c, parse_state);
4516 else {
4517 pushback(c, parse_state);
4519 tokfix();
4522 int result = 0;
4524 switch (tok()[0]) {
4525 case '$':
4526 parse_state->lex_state = EXPR_END;
4527 result = tGVAR;
4528 break;
4529 case '@':
4530 parse_state->lex_state = EXPR_END;
4531 if (tok()[1] == '@')
4532 result = tCVAR;
4533 else
4534 result = tIVAR;
4535 break;
4537 default:
4538 if (toklast() == '!' || toklast() == '?') {
4539 result = tFID;
4541 else {
4542 if (parse_state->lex_state == EXPR_FNAME) {
4543 if ((c = nextc()) == '=' && !peek('~') && !peek('>') &&
4544 (!peek('=') || (parse_state->lex_p + 1 < parse_state->lex_pend && (parse_state->lex_p)[1] == '>'))) {
4545 result = tIDENTIFIER;
4546 tokadd((char)c, parse_state);
4547 tokfix();
4549 else {
4550 pushback(c, parse_state);
4553 if (result == 0 && ISUPPER(tok()[0])) {
4554 result = tCONSTANT;
4556 else {
4557 result = tIDENTIFIER;
4561 if (parse_state->lex_state != EXPR_DOT) {
4562 const struct kwtable *kw;
4564 /* See if it is a reserved word. */
4565 kw = syd_reserved_word(tok(), toklen());
4566 if (kw) {
4567 enum lex_state state = parse_state->lex_state;
4568 parse_state->lex_state = kw->state;
4569 if (state == EXPR_FNAME) {
4570 pslval->id = rb_intern(kw->name);
4572 if (kw->id[0] == kDO) {
4573 if (COND_P()) return kDO_COND;
4574 if (CMDARG_P() && state != EXPR_CMDARG)
4575 return kDO_BLOCK;
4576 if (state == EXPR_ENDARG)
4577 return kDO_BLOCK;
4578 return kDO;
4580 if (state == EXPR_BEG)
4581 return kw->id[0];
4582 else {
4583 if (kw->id[0] != kw->id[1])
4584 parse_state->lex_state = EXPR_BEG;
4585 return kw->id[1];
4590 if (parse_state->lex_state == EXPR_BEG ||
4591 parse_state->lex_state == EXPR_MID ||
4592 parse_state->lex_state == EXPR_DOT ||
4593 parse_state->lex_state == EXPR_ARG ||
4594 parse_state->lex_state == EXPR_CMDARG) {
4595 if (cmd_state) {
4596 parse_state->lex_state = EXPR_CMDARG;
4598 else {
4599 parse_state->lex_state = EXPR_ARG;
4602 else {
4603 parse_state->lex_state = EXPR_END;
4606 pslval->id = rb_intern(tok());
4609 FIXME: NO clue what this does.
4611 if (is_local_id(pslval->id) &&
4612 ((syd_dyna_in_block(parse_state) &&
4613 rb_dvar_defined(pslval->id)) || local_id(pslval->id))) {
4614 parse_state->lex_state = EXPR_END;
4617 return result;
4622 static NODE*
4623 syd_node_newnode(rb_parse_state *st, enum node_type type,
4624 OBJECT a0, OBJECT a1, OBJECT a2)
4626 NODE *n = (NODE*)pt_allocate(st, sizeof(NODE));
4628 n->flags = 0;
4629 nd_set_type(n, type);
4630 nd_set_line(n, ruby_sourceline);
4631 n->nd_file = ruby_sourcefile;
4633 n->u1.value = a0;
4634 n->u2.value = a1;
4635 n->u3.value = a2;
4637 return n;
4640 static NODE*
4641 newline_node(parse_state, node)
4642 rb_parse_state *parse_state;
4643 NODE *node;
4645 NODE *nl = 0;
4646 if (node) {
4647 if (nd_type(node) == NODE_NEWLINE) return node;
4648 nl = NEW_NEWLINE(node);
4649 fixpos(nl, node);
4650 nl->nd_nth = nd_line(node);
4652 return nl;
4655 static void
4656 fixpos(node, orig)
4657 NODE *node, *orig;
4659 if (!node) return;
4660 if (!orig) return;
4661 if (orig == (NODE*)1) return;
4662 node->nd_file = orig->nd_file;
4663 nd_set_line(node, nd_line(orig));
4666 static void
4667 parser_warning(rb_parse_state *parse_state, NODE *node, const char *mesg)
4669 int line = ruby_sourceline;
4670 if(parse_state->emit_warnings) {
4671 ruby_sourceline = nd_line(node);
4672 printf("%s:%zi: warning: %s\n", ruby_sourcefile, ruby_sourceline, mesg);
4673 ruby_sourceline = line;
4677 static NODE*
4678 block_append(parse_state, head, tail)
4679 rb_parse_state *parse_state;
4680 NODE *head, *tail;
4682 NODE *end, *h = head;
4684 if (tail == 0) return head;
4686 again:
4687 if (h == 0) return tail;
4688 switch (nd_type(h)) {
4689 case NODE_NEWLINE:
4690 h = h->nd_next;
4691 goto again;
4692 case NODE_STR:
4693 case NODE_LIT:
4694 parser_warning(parse_state, h, "unused literal ignored");
4695 default:
4696 h = end = NEW_BLOCK(head);
4697 end->nd_end = end;
4698 fixpos(end, head);
4699 head = end;
4700 break;
4701 case NODE_BLOCK:
4702 end = h->nd_end;
4703 break;
4706 if (RTEST(ruby_verbose)) {
4707 NODE *nd = end->nd_head;
4708 newline:
4709 switch (nd_type(nd)) {
4710 case NODE_RETURN:
4711 case NODE_BREAK:
4712 case NODE_NEXT:
4713 case NODE_REDO:
4714 case NODE_RETRY:
4715 parser_warning(parse_state, nd, "statement not reached");
4716 break;
4718 case NODE_NEWLINE:
4719 nd = nd->nd_next;
4720 goto newline;
4722 default:
4723 break;
4727 if (nd_type(tail) != NODE_BLOCK) {
4728 tail = NEW_BLOCK(tail);
4729 tail->nd_end = tail;
4731 end->nd_next = tail;
4732 h->nd_end = tail->nd_end;
4733 return head;
4736 /* append item to the list */
4737 static NODE*
4738 list_append(parse_state, list, item)
4739 rb_parse_state *parse_state;
4740 NODE *list, *item;
4742 NODE *last;
4744 if (list == 0) return NEW_LIST(item);
4745 if (list->nd_next) {
4746 last = list->nd_next->nd_end;
4748 else {
4749 last = list;
4752 list->nd_alen += 1;
4753 last->nd_next = NEW_LIST(item);
4754 list->nd_next->nd_end = last->nd_next;
4755 return list;
4758 /* concat two lists */
4759 static NODE*
4760 list_concat(head, tail)
4761 NODE *head, *tail;
4763 NODE *last;
4765 if (head->nd_next) {
4766 last = head->nd_next->nd_end;
4768 else {
4769 last = head;
4772 head->nd_alen += tail->nd_alen;
4773 last->nd_next = tail;
4774 if (tail->nd_next) {
4775 head->nd_next->nd_end = tail->nd_next->nd_end;
4777 else {
4778 head->nd_next->nd_end = tail;
4781 return head;
4784 /* concat two string literals */
4785 static NODE *
4786 literal_concat(parse_state, head, tail)
4787 rb_parse_state *parse_state;
4788 NODE *head, *tail;
4790 enum node_type htype;
4792 if (!head) return tail;
4793 if (!tail) return head;
4795 htype = nd_type(head);
4796 if (htype == NODE_EVSTR) {
4797 NODE *node = NEW_DSTR(string_new(0, 0));
4798 head = list_append(parse_state, node, head);
4800 switch (nd_type(tail)) {
4801 case NODE_STR:
4802 if (htype == NODE_STR) {
4803 bconcat(head->nd_str, tail->nd_str);
4804 bdestroy(tail->nd_str);
4806 else {
4807 list_append(parse_state, head, tail);
4809 break;
4811 case NODE_DSTR:
4812 if (htype == NODE_STR) {
4813 bconcat(head->nd_str, tail->nd_str);
4814 bdestroy(tail->nd_str);
4816 tail->nd_lit = head->nd_lit;
4817 head = tail;
4819 else {
4820 nd_set_type(tail, NODE_ARRAY);
4821 tail->nd_head = NEW_STR(tail->nd_lit);
4822 list_concat(head, tail);
4824 break;
4826 case NODE_EVSTR:
4827 if (htype == NODE_STR) {
4828 nd_set_type(head, NODE_DSTR);
4829 head->nd_alen = 1;
4831 list_append(parse_state, head, tail);
4832 break;
4834 return head;
4837 static NODE *
4838 evstr2dstr(parse_state, node)
4839 rb_parse_state *parse_state;
4840 NODE *node;
4842 if (nd_type(node) == NODE_EVSTR) {
4843 node = list_append(parse_state, NEW_DSTR(string_new(0, 0)), node);
4845 return node;
4848 static NODE *
4849 new_evstr(parse_state, node)
4850 rb_parse_state *parse_state;
4851 NODE *node;
4853 NODE *head = node;
4855 again:
4856 if (node) {
4857 switch (nd_type(node)) {
4858 case NODE_STR: case NODE_DSTR: case NODE_EVSTR:
4859 return node;
4860 case NODE_NEWLINE:
4861 node = node->nd_next;
4862 goto again;
4865 return NEW_EVSTR(head);
4868 static const struct {
4869 ID token;
4870 const char name[12];
4871 } op_tbl[] = {
4872 {tDOT2, ".."},
4873 {tDOT3, "..."},
4874 {'+', "+"},
4875 {'-', "-"},
4876 {'+', "+(binary)"},
4877 {'-', "-(binary)"},
4878 {'*', "*"},
4879 {'/', "/"},
4880 {'%', "%"},
4881 {tPOW, "**"},
4882 {tUPLUS, "+@"},
4883 {tUMINUS, "-@"},
4884 {tUPLUS, "+(unary)"},
4885 {tUMINUS, "-(unary)"},
4886 {'|', "|"},
4887 {'^', "^"},
4888 {'&', "&"},
4889 {tCMP, "<=>"},
4890 {'>', ">"},
4891 {tGEQ, ">="},
4892 {'<', "<"},
4893 {tLEQ, "<="},
4894 {tEQ, "=="},
4895 {tEQQ, "==="},
4896 {tNEQ, "!="},
4897 {tMATCH, "=~"},
4898 {tNMATCH, "!~"},
4899 {'!', "!"},
4900 {'~', "~"},
4901 {'!', "!(unary)"},
4902 {'~', "~(unary)"},
4903 {'!', "!@"},
4904 {'~', "~@"},
4905 {tAREF, "[]"},
4906 {tASET, "[]="},
4907 {tLSHFT, "<<"},
4908 {tRSHFT, ">>"},
4909 {tCOLON2, "::"},
4910 {'`', "`"},
4911 {0, ""}
4914 static ID convert_op(ID id) {
4915 int i;
4916 for(i = 0; op_tbl[i].token; i++) {
4917 if(op_tbl[i].token == id) {
4918 return rb_intern(op_tbl[i].name);
4921 return id;
4924 static NODE *
4925 call_op(recv, id, narg, arg1, parse_state)
4926 NODE *recv;
4927 ID id;
4928 int narg;
4929 NODE *arg1;
4930 rb_parse_state *parse_state;
4932 value_expr(recv);
4933 if (narg == 1) {
4934 value_expr(arg1);
4935 arg1 = NEW_LIST(arg1);
4937 else {
4938 arg1 = 0;
4941 id = convert_op(id);
4944 return NEW_CALL(recv, id, arg1);
4947 static NODE*
4948 match_gen(node1, node2, parse_state)
4949 NODE *node1;
4950 NODE *node2;
4951 rb_parse_state *parse_state;
4953 local_cnt('~');
4955 value_expr(node1);
4956 value_expr(node2);
4957 if (node1) {
4958 switch (nd_type(node1)) {
4959 case NODE_DREGX:
4960 case NODE_DREGX_ONCE:
4961 return NEW_MATCH2(node1, node2);
4963 case NODE_REGEX:
4964 return NEW_MATCH2(node1, node2);
4968 if (node2) {
4969 switch (nd_type(node2)) {
4970 case NODE_DREGX:
4971 case NODE_DREGX_ONCE:
4972 return NEW_MATCH3(node2, node1);
4974 case NODE_REGEX:
4975 return NEW_MATCH3(node2, node1);
4979 return NEW_CALL(node1, convert_op(tMATCH), NEW_LIST(node2));
4982 static NODE*
4983 syd_gettable(parse_state, id)
4984 rb_parse_state *parse_state;
4985 ID id;
4987 if (id == kSELF) {
4988 return NEW_SELF();
4990 else if (id == kNIL) {
4991 return NEW_NIL();
4993 else if (id == kTRUE) {
4994 return NEW_TRUE();
4996 else if (id == kFALSE) {
4997 return NEW_FALSE();
4999 else if (id == k__FILE__) {
5000 return NEW_FILE();
5002 else if (id == k__LINE__) {
5003 return NEW_FIXNUM(ruby_sourceline);
5005 else if (is_local_id(id)) {
5006 if (local_id(id)) return NEW_LVAR(id);
5007 /* method call without arguments */
5008 return NEW_VCALL(id);
5010 else if (is_global_id(id)) {
5011 return NEW_GVAR(id);
5013 else if (is_instance_id(id)) {
5014 return NEW_IVAR(id);
5016 else if (is_const_id(id)) {
5017 return NEW_CONST(id);
5019 else if (is_class_id(id)) {
5020 return NEW_CVAR(id);
5022 /* FIXME: indicate which identifier. */
5023 rb_compile_error("identifier is not valid 1\n");
5024 return 0;
5027 static void
5028 reset_block(rb_parse_state *parse_state) {
5029 if(!parse_state->block_vars) {
5030 parse_state->block_vars = var_table_create();
5031 } else {
5032 parse_state->block_vars = var_table_push(parse_state->block_vars);
5036 static NODE *
5037 extract_block_vars(rb_parse_state *parse_state, NODE* node, var_table vars)
5039 int i;
5040 NODE *var, *out = node;
5042 if (!node) goto out;
5043 if(var_table_size(vars) == 0) goto out;
5045 var = NULL;
5046 for(i = 0; i < var_table_size(vars); i++) {
5047 var = NEW_DASGN_CURR(var_table_get(vars, i), var);
5049 out = block_append(parse_state, var, node);
5051 out:
5052 assert(vars == parse_state->block_vars);
5053 parse_state->block_vars = var_table_pop(parse_state->block_vars);
5055 return out;
5058 static NODE*
5059 assignable(id, val, parse_state)
5060 ID id;
5061 NODE *val;
5062 rb_parse_state *parse_state;
5064 value_expr(val);
5065 if (id == kSELF) {
5066 yyerror("Can't change the value of self");
5068 else if (id == kNIL) {
5069 yyerror("Can't assign to nil");
5071 else if (id == kTRUE) {
5072 yyerror("Can't assign to true");
5074 else if (id == kFALSE) {
5075 yyerror("Can't assign to false");
5077 else if (id == k__FILE__) {
5078 yyerror("Can't assign to __FILE__");
5080 else if (id == k__LINE__) {
5081 yyerror("Can't assign to __LINE__");
5083 else if (is_local_id(id)) {
5084 if(parse_state->block_vars) {
5085 var_table_add(parse_state->block_vars, id);
5087 return NEW_LASGN(id, val);
5089 else if (is_global_id(id)) {
5090 return NEW_GASGN(id, val);
5092 else if (is_instance_id(id)) {
5093 return NEW_IASGN(id, val);
5095 else if (is_const_id(id)) {
5096 if (in_def || in_single)
5097 yyerror("dynamic constant assignment");
5098 return NEW_CDECL(id, val, 0);
5100 else if (is_class_id(id)) {
5101 if (in_def || in_single) return NEW_CVASGN(id, val);
5102 return NEW_CVDECL(id, val);
5104 else {
5105 /* FIXME: indicate which identifier. */
5106 rb_compile_error("identifier is not valid 2 (%d)\n", id);
5108 return 0;
5111 static NODE *
5112 aryset(recv, idx, parse_state)
5113 NODE *recv, *idx;
5114 rb_parse_state *parse_state;
5116 if (recv && nd_type(recv) == NODE_SELF)
5117 recv = (NODE *)1;
5118 else
5119 value_expr(recv);
5120 return NEW_ATTRASGN(recv, convert_op(tASET), idx);
5124 static ID
5125 rb_id_attrset(id)
5126 ID id;
5128 id &= ~ID_SCOPE_MASK;
5129 id |= ID_ATTRSET;
5130 return id;
5133 static NODE *
5134 attrset(recv, id, parse_state)
5135 NODE *recv;
5136 ID id;
5137 rb_parse_state *parse_state;
5139 if (recv && nd_type(recv) == NODE_SELF)
5140 recv = (NODE *)1;
5141 else
5142 value_expr(recv);
5143 return NEW_ATTRASGN(recv, rb_id_attrset(id), 0);
5146 static void
5147 rb_backref_error(node)
5148 NODE *node;
5150 switch (nd_type(node)) {
5151 case NODE_NTH_REF:
5152 rb_compile_error("Can't set variable $%u", node->nd_nth);
5153 break;
5154 case NODE_BACK_REF:
5155 rb_compile_error("Can't set variable $%c", (int)node->nd_nth);
5156 break;
5160 static NODE *
5161 arg_concat(parse_state, node1, node2)
5162 rb_parse_state *parse_state;
5163 NODE *node1;
5164 NODE *node2;
5166 if (!node2) return node1;
5167 return NEW_ARGSCAT(node1, node2);
5170 static NODE *
5171 arg_add(parse_state, node1, node2)
5172 rb_parse_state *parse_state;
5173 NODE *node1;
5174 NODE *node2;
5176 if (!node1) return NEW_LIST(node2);
5177 if (nd_type(node1) == NODE_ARRAY) {
5178 return list_append(parse_state, node1, node2);
5180 else {
5181 return NEW_ARGSPUSH(node1, node2);
5185 static NODE*
5186 node_assign(lhs, rhs, parse_state)
5187 NODE *lhs, *rhs;
5188 rb_parse_state *parse_state;
5190 if (!lhs) return 0;
5192 value_expr(rhs);
5193 switch (nd_type(lhs)) {
5194 case NODE_GASGN:
5195 case NODE_IASGN:
5196 case NODE_LASGN:
5197 case NODE_DASGN:
5198 case NODE_DASGN_CURR:
5199 case NODE_MASGN:
5200 case NODE_CDECL:
5201 case NODE_CVDECL:
5202 case NODE_CVASGN:
5203 lhs->nd_value = rhs;
5204 break;
5206 case NODE_ATTRASGN:
5207 case NODE_CALL:
5208 lhs->nd_args = arg_add(parse_state, lhs->nd_args, rhs);
5209 break;
5211 default:
5212 /* should not happen */
5213 break;
5216 return lhs;
5219 static int
5220 value_expr0(node, parse_state)
5221 NODE *node;
5222 rb_parse_state *parse_state;
5224 int cond = 0;
5226 while (node) {
5227 switch (nd_type(node)) {
5228 case NODE_DEFN:
5229 case NODE_DEFS:
5230 parser_warning(parse_state, node, "void value expression");
5231 return FALSE;
5233 case NODE_RETURN:
5234 case NODE_BREAK:
5235 case NODE_NEXT:
5236 case NODE_REDO:
5237 case NODE_RETRY:
5238 if (!cond) yyerror("void value expression");
5239 /* or "control never reach"? */
5240 return FALSE;
5242 case NODE_BLOCK:
5243 while (node->nd_next) {
5244 node = node->nd_next;
5246 node = node->nd_head;
5247 break;
5249 case NODE_BEGIN:
5250 node = node->nd_body;
5251 break;
5253 case NODE_IF:
5254 if (!value_expr(node->nd_body)) return FALSE;
5255 node = node->nd_else;
5256 break;
5258 case NODE_AND:
5259 case NODE_OR:
5260 cond = 1;
5261 node = node->nd_2nd;
5262 break;
5264 case NODE_NEWLINE:
5265 node = node->nd_next;
5266 break;
5268 default:
5269 return TRUE;
5273 return TRUE;
5276 static void
5277 void_expr0(node)
5278 NODE *node;
5280 const char *useless = NULL;
5282 if (!RTEST(ruby_verbose)) return;
5284 again:
5285 if (!node) return;
5286 switch (nd_type(node)) {
5287 case NODE_NEWLINE:
5288 node = node->nd_next;
5289 goto again;
5291 case NODE_CALL:
5292 switch (node->nd_mid) {
5293 case '+':
5294 case '-':
5295 case '*':
5296 case '/':
5297 case '%':
5298 case tPOW:
5299 case tUPLUS:
5300 case tUMINUS:
5301 case '|':
5302 case '^':
5303 case '&':
5304 case tCMP:
5305 case '>':
5306 case tGEQ:
5307 case '<':
5308 case tLEQ:
5309 case tEQ:
5310 case tNEQ:
5311 useless = "";
5312 break;
5314 break;
5316 case NODE_LVAR:
5317 case NODE_DVAR:
5318 case NODE_GVAR:
5319 case NODE_IVAR:
5320 case NODE_CVAR:
5321 case NODE_NTH_REF:
5322 case NODE_BACK_REF:
5323 useless = "a variable";
5324 break;
5325 case NODE_CONST:
5326 case NODE_CREF:
5327 useless = "a constant";
5328 break;
5329 case NODE_LIT:
5330 case NODE_STR:
5331 case NODE_DSTR:
5332 case NODE_DREGX:
5333 case NODE_DREGX_ONCE:
5334 useless = "a literal";
5335 break;
5336 case NODE_COLON2:
5337 case NODE_COLON3:
5338 useless = "::";
5339 break;
5340 case NODE_DOT2:
5341 useless = "..";
5342 break;
5343 case NODE_DOT3:
5344 useless = "...";
5345 break;
5346 case NODE_SELF:
5347 useless = "self";
5348 break;
5349 case NODE_NIL:
5350 useless = "nil";
5351 break;
5352 case NODE_TRUE:
5353 useless = "true";
5354 break;
5355 case NODE_FALSE:
5356 useless = "false";
5357 break;
5358 case NODE_DEFINED:
5359 useless = "defined?";
5360 break;
5363 if (useless) {
5364 int line = ruby_sourceline;
5366 ruby_sourceline = nd_line(node);
5367 rb_warn("useless use of %s in void context", useless);
5368 ruby_sourceline = line;
5372 static void
5373 void_stmts(node, parse_state)
5374 NODE *node;
5375 rb_parse_state *parse_state;
5377 if (!RTEST(ruby_verbose)) return;
5378 if (!node) return;
5379 if (nd_type(node) != NODE_BLOCK) return;
5381 for (;;) {
5382 if (!node->nd_next) return;
5383 void_expr(node->nd_head);
5384 node = node->nd_next;
5388 static NODE *
5389 remove_begin(node)
5390 NODE *node;
5392 NODE **n = &node;
5393 while (*n) {
5394 switch (nd_type(*n)) {
5395 case NODE_NEWLINE:
5396 n = &(*n)->nd_next;
5397 continue;
5398 case NODE_BEGIN:
5399 *n = (*n)->nd_body;
5400 default:
5401 return node;
5404 return node;
5407 static int
5408 assign_in_cond(node, parse_state)
5409 NODE *node;
5410 rb_parse_state *parse_state;
5412 switch (nd_type(node)) {
5413 case NODE_MASGN:
5414 yyerror("multiple assignment in conditional");
5415 return 1;
5417 case NODE_LASGN:
5418 case NODE_DASGN:
5419 case NODE_GASGN:
5420 case NODE_IASGN:
5421 break;
5423 case NODE_NEWLINE:
5424 default:
5425 return 0;
5428 switch (nd_type(node->nd_value)) {
5429 case NODE_LIT:
5430 case NODE_STR:
5431 case NODE_NIL:
5432 case NODE_TRUE:
5433 case NODE_FALSE:
5434 return 1;
5436 case NODE_DSTR:
5437 case NODE_XSTR:
5438 case NODE_DXSTR:
5439 case NODE_EVSTR:
5440 case NODE_DREGX:
5441 default:
5442 break;
5444 return 1;
5447 static int
5448 e_option_supplied()
5450 if (strcmp(ruby_sourcefile, "-e") == 0)
5451 return TRUE;
5452 return FALSE;
5455 static void
5456 warn_unless_e_option(ps, node, str)
5457 rb_parse_state *ps;
5458 NODE *node;
5459 const char *str;
5461 if (!e_option_supplied()) parser_warning(ps, node, str);
5464 static NODE *cond0();
5466 static NODE*
5467 range_op(node, parse_state)
5468 NODE *node;
5469 rb_parse_state *parse_state;
5471 enum node_type type;
5473 if (!e_option_supplied()) return node;
5474 if (node == 0) return 0;
5476 value_expr(node);
5477 node = cond0(node, parse_state);
5478 type = nd_type(node);
5479 if (type == NODE_NEWLINE) {
5480 node = node->nd_next;
5481 type = nd_type(node);
5483 if (type == NODE_LIT && FIXNUM_P(node->nd_lit)) {
5484 warn_unless_e_option(parse_state, node, "integer literal in conditional range");
5485 return call_op(node,tEQ,1,NEW_GVAR(rb_intern("$.")), parse_state);
5487 return node;
5490 static int
5491 literal_node(node)
5492 NODE *node;
5494 if (!node) return 1; /* same as NODE_NIL */
5495 switch (nd_type(node)) {
5496 case NODE_LIT:
5497 case NODE_STR:
5498 case NODE_DSTR:
5499 case NODE_EVSTR:
5500 case NODE_DREGX:
5501 case NODE_DREGX_ONCE:
5502 case NODE_DSYM:
5503 return 2;
5504 case NODE_TRUE:
5505 case NODE_FALSE:
5506 case NODE_NIL:
5507 return 1;
5509 return 0;
5512 static NODE*
5513 cond0(node, parse_state)
5514 NODE *node;
5515 rb_parse_state *parse_state;
5517 if (node == 0) return 0;
5518 assign_in_cond(node, parse_state);
5520 switch (nd_type(node)) {
5521 case NODE_DSTR:
5522 case NODE_EVSTR:
5523 case NODE_STR:
5524 break;
5526 case NODE_DREGX:
5527 case NODE_DREGX_ONCE:
5528 local_cnt('_');
5529 local_cnt('~');
5530 return NEW_MATCH2(node, NEW_GVAR(rb_intern("$_")));
5532 case NODE_AND:
5533 case NODE_OR:
5534 node->nd_1st = cond0(node->nd_1st, parse_state);
5535 node->nd_2nd = cond0(node->nd_2nd, parse_state);
5536 break;
5538 case NODE_DOT2:
5539 case NODE_DOT3:
5540 node->nd_beg = range_op(node->nd_beg, parse_state);
5541 node->nd_end = range_op(node->nd_end, parse_state);
5542 if (nd_type(node) == NODE_DOT2) nd_set_type(node,NODE_FLIP2);
5543 else if (nd_type(node) == NODE_DOT3) nd_set_type(node, NODE_FLIP3);
5544 if (!e_option_supplied()) {
5545 int b = literal_node(node->nd_beg);
5546 int e = literal_node(node->nd_end);
5547 if ((b == 1 && e == 1) || (b + e >= 2 && RTEST(ruby_verbose))) {
5550 break;
5552 case NODE_DSYM:
5553 break;
5555 case NODE_REGEX:
5556 nd_set_type(node, NODE_MATCH);
5557 local_cnt('_');
5558 local_cnt('~');
5559 default:
5560 break;
5562 return node;
5565 static NODE*
5566 cond(node, parse_state)
5567 NODE *node;
5568 rb_parse_state *parse_state;
5570 if (node == 0) return 0;
5571 value_expr(node);
5572 if (nd_type(node) == NODE_NEWLINE){
5573 node->nd_next = cond0(node->nd_next, parse_state);
5574 return node;
5576 return cond0(node, parse_state);
5579 static NODE*
5580 logop(type, left, right, parse_state)
5581 enum node_type type;
5582 NODE *left, *right;
5583 rb_parse_state *parse_state;
5585 value_expr(left);
5586 if (left && nd_type(left) == type) {
5587 NODE *node = left, *second;
5588 while ((second = node->nd_2nd) != 0 && nd_type(second) == type) {
5589 node = second;
5591 node->nd_2nd = NEW_NODE(type, second, right, 0);
5592 return left;
5594 return NEW_NODE(type, left, right, 0);
5597 static int
5598 cond_negative(nodep)
5599 NODE **nodep;
5601 NODE *c = *nodep;
5603 if (!c) return 0;
5604 switch (nd_type(c)) {
5605 case NODE_NOT:
5606 *nodep = c->nd_body;
5607 return 1;
5608 case NODE_NEWLINE:
5609 if (c->nd_next && nd_type(c->nd_next) == NODE_NOT) {
5610 c->nd_next = c->nd_next->nd_body;
5611 return 1;
5614 return 0;
5617 static void
5618 no_blockarg(node)
5619 NODE *node;
5621 if (node && nd_type(node) == NODE_BLOCK_PASS) {
5622 rb_compile_error("block argument should not be given");
5626 static NODE *
5627 ret_args(parse_state, node)
5628 rb_parse_state *parse_state;
5629 NODE *node;
5631 if (node) {
5632 no_blockarg(node);
5633 if (nd_type(node) == NODE_ARRAY && node->nd_next == 0) {
5634 node = node->nd_head;
5636 if (node && nd_type(node) == NODE_SPLAT) {
5637 node = NEW_SVALUE(node);
5640 return node;
5643 static NODE *
5644 new_yield(parse_state, node)
5645 rb_parse_state *parse_state;
5646 NODE *node;
5648 OBJECT state = Qtrue;
5650 if (node) {
5651 no_blockarg(node);
5652 if (nd_type(node) == NODE_ARRAY && node->nd_next == 0) {
5653 node = node->nd_head;
5654 state = Qfalse;
5656 if (node && nd_type(node) == NODE_SPLAT) {
5657 state = Qtrue;
5660 else {
5661 state = Qfalse;
5663 return NEW_YIELD(node, state);
5666 static NODE *
5667 arg_blk_pass(node1, node2)
5668 NODE *node1;
5669 NODE *node2;
5671 if (node2) {
5672 node2->nd_head = node1;
5673 return node2;
5675 return node1;
5678 static NODE*
5679 arg_prepend(parse_state, node1, node2)
5680 rb_parse_state *parse_state;
5681 NODE *node1, *node2;
5683 switch (nd_type(node2)) {
5684 case NODE_ARRAY:
5685 return list_concat(NEW_LIST(node1), node2);
5687 case NODE_SPLAT:
5688 return arg_concat(parse_state, node1, node2->nd_head);
5690 case NODE_BLOCK_PASS:
5691 node2->nd_body = arg_prepend(parse_state, node1, node2->nd_body);
5692 return node2;
5694 default:
5695 printf("unknown nodetype(%d) for arg_prepend", nd_type(node2));
5696 abort();
5698 return 0; /* not reached */
5701 static NODE*
5702 new_call(parse_state, r,m,a)
5703 rb_parse_state *parse_state;
5704 NODE *r;
5705 ID m;
5706 NODE *a;
5708 if (a && nd_type(a) == NODE_BLOCK_PASS) {
5709 a->nd_iter = NEW_CALL(r,convert_op(m),a->nd_head);
5710 return a;
5712 return NEW_CALL(r,convert_op(m),a);
5715 static NODE*
5716 new_fcall(parse_state, m,a)
5717 rb_parse_state *parse_state;
5718 ID m;
5719 NODE *a;
5721 if (a && nd_type(a) == NODE_BLOCK_PASS) {
5722 a->nd_iter = NEW_FCALL(m,a->nd_head);
5723 return a;
5725 return NEW_FCALL(m,a);
5728 static NODE*
5729 new_super(parse_state,a)
5730 rb_parse_state *parse_state;
5731 NODE *a;
5733 if (a && nd_type(a) == NODE_BLOCK_PASS) {
5734 a->nd_iter = NEW_SUPER(a->nd_head);
5735 return a;
5737 return NEW_SUPER(a);
5741 static void
5742 syd_local_push(rb_parse_state *st, int top)
5744 st->variables = var_table_push(st->variables);
5747 static void
5748 syd_local_pop(rb_parse_state *st)
5750 st->variables = var_table_pop(st->variables);
5754 static ID*
5755 syd_local_tbl(rb_parse_state *st)
5757 ID *lcl_tbl;
5758 var_table tbl;
5759 int i, len;
5760 tbl = st->variables;
5761 len = var_table_size(tbl);
5762 lcl_tbl = pt_allocate(st, sizeof(ID) * (len + 3));
5763 lcl_tbl[0] = (ID)len;
5764 lcl_tbl[1] = '_';
5765 lcl_tbl[2] = '~';
5766 for(i = 0; i < len; i++) {
5767 lcl_tbl[i + 3] = var_table_get(tbl, i);
5769 return lcl_tbl;
5772 static intptr_t
5773 syd_local_cnt(rb_parse_state *st, ID id)
5775 int idx;
5776 /* Leave these hardcoded here because they arne't REALLY ids at all. */
5777 if(id == '_') {
5778 return 0;
5779 } else if(id == '~') {
5780 return 1;
5783 idx = var_table_find(st->variables, id);
5784 if(idx >= 0) return idx + 2;
5786 return var_table_add(st->variables, id);
5789 static int
5790 syd_local_id(rb_parse_state *st, ID id)
5792 if(var_table_find(st->variables, id) >= 0) return 1;
5793 return 0;
5796 static ID
5797 rb_intern(const char *name)
5799 const char *m = name;
5800 ID id, pre, qrk, bef;
5801 int last;
5803 id = 0;
5804 last = strlen(name)-1;
5805 switch (*name) {
5806 case '$':
5807 id |= ID_GLOBAL;
5808 m++;
5809 if (!is_identchar(*m)) m++;
5810 break;
5811 case '@':
5812 if (name[1] == '@') {
5813 m++;
5814 id |= ID_CLASS;
5816 else {
5817 id |= ID_INSTANCE;
5819 m++;
5820 break;
5821 default:
5822 if (name[0] != '_' && !ISALPHA(name[0]) && !ismbchar(name[0])) {
5823 int i;
5825 for (i=0; op_tbl[i].token; i++) {
5826 if (*op_tbl[i].name == *name &&
5827 strcmp(op_tbl[i].name, name) == 0) {
5828 id = op_tbl[i].token;
5829 return id;
5834 if (name[last] == '=') {
5835 id = ID_ATTRSET;
5837 else if (ISUPPER(name[0])) {
5838 id = ID_CONST;
5840 else {
5841 id = ID_LOCAL;
5843 break;
5845 while (m <= name + last && is_identchar(*m)) {
5846 m += mbclen(*m);
5848 if (*m) id = ID_JUNK;
5849 qrk = (ID)quark_from_string(name);
5850 pre = qrk + tLAST_TOKEN;
5851 bef = id;
5852 id |= ( pre << ID_SCOPE_SHIFT );
5853 return id;
5856 quark id_to_quark(ID id) {
5857 quark qrk;
5859 qrk = (quark)((id >> ID_SCOPE_SHIFT) - tLAST_TOKEN);
5860 return qrk;
5863 static unsigned long
5864 scan_oct(const char *start, int len, int *retlen)
5866 register const char *s = start;
5867 register unsigned long retval = 0;
5869 while (len-- && *s >= '0' && *s <= '7') {
5870 retval <<= 3;
5871 retval |= *s++ - '0';
5873 *retlen = s - start;
5874 return retval;
5877 static unsigned long
5878 scan_hex(const char *start, int len, int *retlen)
5880 static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF";
5881 register const char *s = start;
5882 register unsigned long retval = 0;
5883 char *tmp;
5885 while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
5886 retval <<= 4;
5887 retval |= (tmp - hexdigit) & 15;
5888 s++;
5890 *retlen = s - start;
5891 return retval;
5894 const char *op_to_name(ID id) {
5895 if(id < tLAST_TOKEN) {
5896 int i = 0;
5898 for (i=0; op_tbl[i].token; i++) {
5899 if (op_tbl[i].token == id)
5900 return op_tbl[i].name;
5903 return NULL;