1 /* $Id: parse.y,v 1.29 2007/01/12 16:17:42 tringali Exp $ */
11 #include "interpret.h"
17 #include <X11/Intrinsic.h>
20 #include "../util/VMSparam.h"
23 #include <sys/param.h>
31 /* Macros to add error processing to AddOp and AddSym calls */
32 #define ADD_OP(op) if (!AddOp(op, &ErrMsg)) return 1
33 #define ADD_SYM(sym) if (!AddSym(sym, &ErrMsg)) return 1
34 #define ADD_IMMED(val) if (!AddImmediate(val, &ErrMsg)) return 1
35 #define ADD_BR_OFF(to) if (!AddBranchOffset(to, &ErrMsg)) return 1
36 #define SET_BR_OFF(from, to) ((from)->value) = ((Inst *)(to)) - ((Inst *)(from))
38 /* Max. length for a string constant (... there shouldn't be a maximum) */
39 #define MAX_STRING_CONST_LEN 5000
41 static const char CVSID
[] = "$Id: parse.y,v 1.29 2007/01/12 16:17:42 tringali Exp $";
42 static int yyerror(char *s
);
43 static int yylex(void);
45 static int follow
(char expect
, int yes
, int no
);
46 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
);
47 static int follow_non_whitespace
(char expect
, int yes
, int no
);
48 static Symbol
*matchesActionRoutine
(char **inPtr
);
52 extern Inst
*LoopStack
[]; /* addresses of break, cont stmts */
53 extern Inst
**LoopStackPtr
; /* to fill at the end of a loop */
62 %token
<sym
> NUMBER STRING SYMBOL
63 %token DELETE ARG_LOOKUP
64 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
66 %type
<inst
> cond comastmts for while else and or arrayexpr
72 %nonassoc SYMBOL ARG_LOOKUP
73 %right
'=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
79 %left GT GE LT LE EQ NE IN
82 %nonassoc UNARY_MINUS NOT
91 program: blank stmts
{
92 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
94 | blank
'{' blank stmts
'}' {
95 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
97 | blank
'{' blank
'}' {
98 ADD_OP
(OP_RETURN_NO_VAL
); return
0;
104 block: '{' blank stmts
'}' blank
105 |
'{' blank
'}' blank
111 stmt: simpstmt
'\n' blank
112 | IF
'(' cond
')' blank block %prec IF_NO_ELSE
{
113 SET_BR_OFF
($3, GetPC
());
115 | IF
'(' cond
')' blank block else blank block %prec ELSE
{
116 SET_BR_OFF
($3, ($7+1)); SET_BR_OFF
($7, GetPC
());
118 | while
'(' cond
')' blank block
{
119 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($1);
120 SET_BR_OFF
($3, GetPC
()); FillLoopAddrs
(GetPC
(), $1);
122 | for
'(' comastmts
';' cond
';' comastmts
')' blank block
{
123 FillLoopAddrs
(GetPC
()+2+($7-($5+1)), GetPC
());
124 SwapCode
($5+1, $7, GetPC
());
125 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($3); SET_BR_OFF
($5, GetPC
());
127 | for
'(' SYMBOL IN arrayexpr
')' {
128 Symbol
*iterSym
= InstallIteratorSymbol
();
129 ADD_OP
(OP_BEGIN_ARRAY_ITER
); ADD_SYM
(iterSym
);
130 ADD_OP
(OP_ARRAY_ITER
); ADD_SYM
($3); ADD_SYM
(iterSym
); ADD_BR_OFF
(0);
133 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
($5+2);
134 SET_BR_OFF
($5+5, GetPC
());
135 FillLoopAddrs
(GetPC
(), $5+2);
138 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
139 if
(AddBreakAddr
(GetPC
()-1)) {
140 yyerror("break outside loop"); YYERROR;
143 | CONTINUE
'\n' blank
{
144 ADD_OP
(OP_BRANCH
); ADD_BR_OFF
(0);
145 if
(AddContinueAddr
(GetPC
()-1)) {
146 yyerror("continue outside loop"); YYERROR;
149 | RETURN expr
'\n' blank
{
152 | RETURN
'\n' blank
{
153 ADD_OP
(OP_RETURN_NO_VAL
);
156 simpstmt: SYMBOL
'=' expr
{
157 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
159 | evalsym ADDEQ expr
{
160 ADD_OP
(OP_ADD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
162 | evalsym SUBEQ expr
{
163 ADD_OP
(OP_SUB
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
165 | evalsym MULEQ expr
{
166 ADD_OP
(OP_MUL
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
168 | evalsym DIVEQ expr
{
169 ADD_OP
(OP_DIV
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
171 | evalsym MODEQ expr
{
172 ADD_OP
(OP_MOD
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
174 | evalsym ANDEQ expr
{
175 ADD_OP
(OP_BIT_AND
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
177 | evalsym OREQ expr
{
178 ADD_OP
(OP_BIT_OR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
180 | DELETE arraylv
'[' arglist
']' {
181 ADD_OP
(OP_ARRAY_DELETE
); ADD_IMMED
($4);
183 | initarraylv
'[' arglist
']' '=' expr
{
184 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
186 | initarraylv
'[' arglist
']' ADDEQ expr
{
187 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
189 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
191 | initarraylv
'[' arglist
']' SUBEQ expr
{
192 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
194 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
196 | initarraylv
'[' arglist
']' MULEQ expr
{
197 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
199 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
201 | initarraylv
'[' arglist
']' DIVEQ expr
{
202 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
204 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
206 | initarraylv
'[' arglist
']' MODEQ expr
{
207 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
209 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
211 | initarraylv
'[' arglist
']' ANDEQ expr
{
212 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
214 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
216 | initarraylv
'[' arglist
']' OREQ expr
{
217 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(1); ADD_IMMED
($3);
219 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
221 | initarraylv
'[' arglist
']' INCR
{
222 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(0); ADD_IMMED
($3);
224 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
226 | initarraylv
'[' arglist
']' DECR
{
227 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(0); ADD_IMMED
($3);
229 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($3);
231 | INCR initarraylv
'[' arglist
']' {
232 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(0); ADD_IMMED
($4);
234 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($4);
236 | DECR initarraylv
'[' arglist
']' {
237 ADD_OP
(OP_ARRAY_REF_ASSIGN_SETUP
); ADD_IMMED
(0); ADD_IMMED
($4);
239 ADD_OP
(OP_ARRAY_ASSIGN
); ADD_IMMED
($4);
241 | SYMBOL
'(' arglist
')' {
242 ADD_OP
(OP_SUBR_CALL
);
243 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
($3);
246 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
247 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
250 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_INCR
);
251 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
254 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
255 ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
258 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DECR
);
259 ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
263 $$
= $1; ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
266 comastmts: /* nothing */ {
272 | comastmts
',' simpstmt
{
276 arglist: /* nothing */ {
286 expr: numexpr %prec CONCAT
287 | expr numexpr %prec CONCAT
{
291 initarraylv: SYMBOL
{
292 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
(1);
294 | initarraylv
'[' arglist
']' {
295 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
($3);
299 ADD_OP
(OP_PUSH_ARRAY_SYM
); ADD_SYM
($1); ADD_IMMED
(0);
301 | arraylv
'[' arglist
']' {
302 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
($3);
310 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
313 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
316 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1);
318 | SYMBOL
'(' arglist
')' {
319 ADD_OP
(OP_SUBR_CALL
);
320 ADD_SYM
(PromoteToGlobal
($1)); ADD_IMMED
($3);
321 ADD_OP
(OP_FETCH_RET_VAL
);
324 | ARG_LOOKUP
'[' numexpr
']' {
327 | ARG_LOOKUP
'[' ']' {
328 ADD_OP
(OP_PUSH_ARG_COUNT
);
331 ADD_OP
(OP_PUSH_ARG_ARRAY
);
333 | numexpr
'[' arglist
']' {
334 ADD_OP
(OP_ARRAY_REF
); ADD_IMMED
($3);
336 | numexpr
'+' numexpr
{
339 | numexpr
'-' numexpr
{
342 | numexpr
'*' numexpr
{
345 | numexpr
'/' numexpr
{
348 | numexpr
'%' numexpr
{
351 | numexpr POW numexpr
{
354 |
'-' numexpr %prec UNARY_MINUS
{
357 | numexpr GT numexpr
{
360 | numexpr GE numexpr
{
363 | numexpr LT numexpr
{
366 | numexpr LE numexpr
{
369 | numexpr EQ numexpr
{
372 | numexpr NE numexpr
{
375 | numexpr
'&' numexpr
{
378 | numexpr
'|' numexpr
{
381 | numexpr and numexpr %prec AND
{
382 ADD_OP
(OP_AND
); SET_BR_OFF
($2, GetPC
());
384 | numexpr or numexpr %prec OR
{
385 ADD_OP
(OP_OR
); SET_BR_OFF
($2, GetPC
());
391 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_INCR
);
392 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
395 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
396 ADD_OP
(OP_INCR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
399 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($2); ADD_OP
(OP_DECR
);
400 ADD_OP
(OP_DUP
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($2);
403 ADD_OP
(OP_PUSH_SYM
); ADD_SYM
($1); ADD_OP
(OP_DUP
);
404 ADD_OP
(OP_DECR
); ADD_OP
(OP_ASSIGN
); ADD_SYM
($1);
406 | numexpr IN numexpr
{
411 $$
= GetPC
(); StartLoopAddrList
();
415 StartLoopAddrList
(); $$
= GetPC
();
419 ADD_OP
(OP_BRANCH
); $$
= GetPC
(); ADD_BR_OFF
(0);
422 cond: /* nothing */ {
423 ADD_OP
(OP_BRANCH_NEVER
); $$
= GetPC
(); ADD_BR_OFF
(0);
426 ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
(); ADD_BR_OFF
(0);
430 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_FALSE
); $$
= GetPC
();
435 ADD_OP
(OP_DUP
); ADD_OP
(OP_BRANCH_TRUE
); $$
= GetPC
();
443 %%
/* User Subroutines Section */
447 ** Parse a null terminated string and create a program from it (this is the
448 ** parser entry point). The program created by this routine can be
449 ** executed using ExecuteProgram. Returns program on success, or NULL
450 ** on failure. If the command failed, the error message is returned
451 ** as a pointer to a static string in msg, and the length of the string up
452 ** to where parsing failed in stoppedAt.
454 Program
*ParseMacro
(char *expr
, char **msg
, char **stoppedAt
)
458 BeginCreatingProgram
();
460 /* call yyparse to parse the string and check for success. If the parse
461 failed, return the error message and string index (the grammar aborts
462 parsing at the first error) */
467 FreeProgram
(FinishCreatingProgram
());
471 /* get the newly created program */
472 prog
= FinishCreatingProgram
();
474 /* parse succeeded */
481 static int yylex(void)
485 static DataValue value
= {NO_TAG
, {0}};
486 static char escape
[] = "\\\"ntbrfave";
487 #ifdef EBCDIC_CHARSET
488 static char replace
[] = "\\\"\n\t\b\r\f\a\v\x27"; /* EBCDIC escape */
490 static char replace
[] = "\\\"\n\t\b\r\f\a\v\x1B"; /* ASCII escape */
493 /* skip whitespace, backslash-newline combinations, and comments, which are
494 all considered whitespace */
496 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n')
498 else if
(*InPtr
== ' ' ||
*InPtr
== '\t')
500 else if
(*InPtr
== '#')
501 while
(*InPtr
!= '\n' && *InPtr
!= '\0') {
502 /* Comments stop at escaped newlines */
503 if
(*InPtr
== '\\' && *(InPtr
+ 1) == '\n') {
513 /* return end of input at the end of the string */
514 if
(*InPtr
== '\0') {
518 /* process number tokens */
519 if
(isdigit
((unsigned char)*InPtr
)) { /* number */
521 sscanf
(InPtr
, "%d%n", &value.val.n
, &len
);
522 sprintf
(name
, "const %d", value.val.n
);
525 if
((yylval.sym
=LookupSymbol
(name
)) == NULL
)
526 yylval.sym
= InstallSymbol
(name
, CONST_SYM
, value
);
530 /* process symbol tokens. "define" is a special case not handled
531 by this parser, considered end of input. Another special case
532 is action routine names which are allowed to contain '-' despite
533 the ambiguity, handled in matchesActionRoutine. */
534 if
(isalpha
((unsigned char)*InPtr
) ||
*InPtr
== '$') {
535 if
((s
=matchesActionRoutine
(&InPtr
)) == NULL
) {
536 char symName
[MAX_SYM_LEN
+1], *p
= symName
;
538 while
(isalnum
((unsigned char)*InPtr
) ||
*InPtr
=='_') {
539 if
(p
>= symName
+ MAX_SYM_LEN
)
545 if
(!strcmp
(symName
, "while")) return WHILE
;
546 if
(!strcmp
(symName
, "if")) return IF
;
547 if
(!strcmp
(symName
, "else")) return ELSE
;
548 if
(!strcmp
(symName
, "for")) return FOR
;
549 if
(!strcmp
(symName
, "break")) return BREAK
;
550 if
(!strcmp
(symName
, "continue")) return CONTINUE
;
551 if
(!strcmp
(symName
, "return")) return RETURN
;
552 if
(!strcmp
(symName
, "in")) return IN
;
553 if
(!strcmp
(symName
, "$args")) return ARG_LOOKUP
;
554 if
(!strcmp
(symName
, "delete") && follow_non_whitespace
('(', SYMBOL
, DELETE
) == DELETE
) return DELETE
;
555 if
(!strcmp
(symName
, "define")) {
559 if
((s
=LookupSymbol
(symName
)) == NULL
) {
560 s
= InstallSymbol
(symName
, symName
[0]=='$' ?
561 (((symName
[1] > '0' && symName
[1] <= '9') && symName
[2] == 0) ?
562 ARG_SYM
: GLOBAL_SYM
) : LOCAL_SYM
, value
);
563 s
->value.tag
= NO_TAG
;
570 /* Process quoted strings with embedded escape sequences:
571 For backslashes we recognise hexadecimal values with initial 'x' such
572 as "\x1B"; octal value (upto 3 oct digits with a possible leading zero)
573 such as "\33", "\033" or "\0033", and the C escapes: \", \', \n, \t, \b,
574 \r, \f, \a, \v, and the added \e for the escape character, as for REs.
575 Disallow hex/octal zero values (NUL): instead ignore the introductory
576 backslash, eg "\x0xyz" becomes "x0xyz" and "\0000hello" becomes
579 if
(*InPtr
== '\"') {
580 char string[MAX_STRING_CONST_LEN
], *p
= string;
583 while
(*InPtr
!= '\0' && *InPtr
!= '\"' && *InPtr
!= '\n') {
584 if
(p
>= string + MAX_STRING_CONST_LEN
) {
588 if
(*InPtr
== '\\') {
591 if
(*InPtr
== '\n') {
596 /* a hex introducer */
598 const char *hexDigits
= "0123456789abcdef";
601 if
(*InPtr
== '\0' ||
602 (hexD
= strchr
(hexDigits
, tolower
(*InPtr
))) == NULL
) {
606 hexValue
= hexD
- hexDigits
;
608 /* now do we have another digit? only accept one more */
609 if
(*InPtr
!= '\0' &&
610 (hexD
= strchr
(hexDigits
,tolower
(*InPtr
))) != NULL
){
611 hexValue
= hexD
- hexDigits
+ (hexValue
<< 4);
615 *p
++ = (char)hexValue
;
618 InPtr
= backslash
+ 1; /* just skip the backslash */
623 /* the RE documentation requires \0 as the octal introducer;
624 here you can start with any octal digit, but you are only
625 allowed up to three (or four if the first is '0'). */
626 if
('0' <= *InPtr
&& *InPtr
<= '7') {
628 InPtr
++; /* octal introducer: don't count this digit */
630 if
('0' <= *InPtr
&& *InPtr
<= '7') {
631 /* treat as octal - first digit */
632 char octD
= *InPtr
++;
633 int octValue
= octD
- '0';
634 if
('0' <= *InPtr
&& *InPtr
<= '7') {
637 octValue
= (octValue
<< 3) + octD
- '0';
638 /* now do we have another digit? can we add it?
639 if value is going to be too big for char (greater
640 than 0377), stop converting now before adding the
642 if
('0' <= *InPtr
&& *InPtr
<= '7' &&
644 /* third digit is acceptable */
646 octValue
= (octValue
<< 3) + octD
- '0';
650 *p
++ = (char)octValue
;
653 InPtr
= backslash
+ 1; /* just skip the backslash */
656 else
{ /* \0 followed by non-digits: go back to 0 */
657 InPtr
= backslash
+ 1; /* just skip the backslash */
661 for
(i
=0; escape
[i
]!='\0'; i
++) {
662 if
(escape
[i
] == *InPtr
) {
668 /* if we get here, we didn't recognise the character after
669 the backslash: just copy it next time round the loop */
677 yylval.sym
= InstallStringConstSymbol
(string);
681 /* process remaining two character tokens or return single char as token */
683 case
'>': return follow
('=', GE
, GT
);
684 case
'<': return follow
('=', LE
, LT
);
685 case
'=': return follow
('=', EQ
, '=');
686 case
'!': return follow
('=', NE
, NOT
);
687 case
'+': return follow2
('+', INCR
, '=', ADDEQ
, '+');
688 case
'-': return follow2
('-', DECR
, '=', SUBEQ
, '-');
689 case
'|': return follow2
('|', OR
, '=', OREQ
, '|');
690 case
'&': return follow2
('&', AND
, '=', ANDEQ
, '&');
691 case
'*': return follow2
('*', POW
, '=', MULEQ
, '*');
692 case
'/': return follow
('=', DIVEQ
, '/');
693 case
'%': return follow
('=', MODEQ
, '%');
694 case
'^': return POW
;
695 default
: return
*(InPtr
-1);
700 ** look ahead for >=, etc.
702 static int follow
(char expect
, int yes
, int no
)
704 if
(*InPtr
++ == expect
)
709 static int follow2
(char expect1
, int yes1
, char expect2
, int yes2
, int no
)
711 char next
= *InPtr
++;
720 static int follow_non_whitespace
(char expect
, int yes
, int no
)
722 char *localInPtr
= InPtr
;
725 if
(*localInPtr
== ' ' ||
*localInPtr
== '\t') {
728 else if
(*localInPtr
== '\\' && *(localInPtr
+ 1) == '\n') {
731 else if
(*localInPtr
== expect
) {
741 ** Look (way) ahead for hyphenated routine names which begin at inPtr. A
742 ** hyphenated name is allowed if it is pre-defined in the global symbol
743 ** table. If a matching name exists, returns the symbol, and update "inPtr".
745 ** I know this is horrible language design, but existing nedit action routine
746 ** names contain hyphens. Handling them here in the lexical analysis process
747 ** is much easier than trying to deal with it in the parser itself. (sorry)
749 static Symbol
*matchesActionRoutine
(char **inPtr
)
753 char symbolName
[MAX_SYM_LEN
+1];
757 for
(c
= *inPtr
; isalnum
((unsigned char)*c
) ||
*c
=='_' ||
758 ( *c
=='-' && isalnum
((unsigned char)(*(c
+1)))); c
++) {
766 s
= LookupSymbol
(symbolName
);
773 ** Called by yacc to report errors (just stores for returning when
774 ** parsing is aborted. The error token action is to immediate abort
775 ** parsing, so this message is immediately reported to the caller
778 static int yyerror(char *s
)