1 /* $NetBSD: syn.c,v 1.9 2006/10/16 00:07:32 christos Exp $ */
4 * shell parser (C version)
9 __RCSID("$NetBSD: syn.c,v 1.9 2006/10/16 00:07:32 christos Exp $");
17 struct nesting_state
{
18 int start_token
; /* token than began nesting (eg, FOR) */
19 int start_line
; /* line nesting began on */
22 static void yyparse
ARGS((void));
23 static struct op
*pipeline
ARGS((int cf
));
24 static struct op
*andor
ARGS((void));
25 static struct op
*c_list
ARGS((int multi
));
26 static struct ioword
*synio
ARGS((int cf
));
27 static void musthave
ARGS((int c
, int cf
));
28 static struct op
*nested
ARGS((int type
, int smark
, int emark
));
29 static struct op
*get_command
ARGS((int cf
));
30 static struct op
*dogroup
ARGS((void));
31 static struct op
*thenpart
ARGS((void));
32 static struct op
*elsepart
ARGS((void));
33 static struct op
*caselist
ARGS((void));
34 static struct op
*casepart
ARGS((int endtok
));
35 static struct op
*function_body
ARGS((char *name
, int ksh_func
));
36 static char ** wordlist
ARGS((void));
37 static struct op
*block
ARGS((int type
, struct op
*t1
, struct op
*t2
,
39 static struct op
*newtp
ARGS((int type
));
40 static void syntaxerr
ARGS((const char *what
))
41 GCC_FUNC_ATTR(noreturn
);
42 static void nesting_push
ARGS((struct nesting_state
*save
, int tok
));
43 static void nesting_pop
ARGS((struct nesting_state
*saved
));
44 static int assign_command
ARGS((char *s
));
45 static int inalias
ARGS((struct source
*s
));
47 static int dbtestp_isa
ARGS((Test_env
*te
, Test_meta meta
));
48 static const char *dbtestp_getopnd
ARGS((Test_env
*te
, Test_op op
,
50 static int dbtestp_eval
ARGS((Test_env
*te
, Test_op op
, const char *opnd1
,
51 const char *opnd2
, int do_eval
));
52 static void dbtestp_error
ARGS((Test_env
*te
, int offset
, const char *msg
));
55 static struct op
*outtree
; /* yyparse output */
57 static struct nesting_state nesting
; /* \n changed to ; */
59 static int reject
; /* token(cf) gets symbol again */
60 static int symbol
; /* yylex value */
62 #define REJECT (reject = 1)
63 #define ACCEPT (reject = 0)
65 ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
67 ((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
76 outtree
= c_list(source
->type
== SSTRING
);
78 if (c
== 0 && !outtree
)
79 outtree
= newtp(TEOF
);
80 else if (c
!= '\n' && c
!= 0)
81 syntaxerr((char *) 0);
88 register struct op
*t
, *p
, *tl
= NULL
;
92 while (token(0) == '|') {
93 if ((p
= get_command(CONTIN
)) == NULL
)
94 syntaxerr((char *) 0);
96 t
= tl
= block(TPIPE
, t
, p
, NOWORDS
);
98 tl
= tl
->right
= block(TPIPE
, tl
->right
, p
, NOWORDS
);
108 register struct op
*t
, *p
;
113 while ((c
= token(0)) == LOGAND
|| c
== LOGOR
) {
114 if ((p
= pipeline(CONTIN
)) == NULL
)
115 syntaxerr((char *) 0);
116 t
= block(c
== LOGAND
? TAND
: TOR
, t
, p
, NOWORDS
);
127 register struct op
*t
= NULL
, *p
, *tl
= NULL
;
133 /* Token has always been read/rejected at this point, so
134 * we don't worry about what flags to pass token()
138 if (c
== '\n' && (multi
|| inalias(source
))) {
139 if (!p
) /* ignore blank lines */
143 else if (c
== '&' || c
== COPROC
)
144 p
= block(c
== '&' ? TASYNC
: TCOPROC
,
145 p
, NOBLOCK
, NOWORDS
);
151 t
= tl
= block(TLIST
, t
, p
, NOWORDS
);
153 tl
= tl
->right
= block(TLIST
, tl
->right
, p
, NOWORDS
);
161 static struct ioword
*
165 register struct ioword
*iop
;
168 if (tpeek(cf
) != REDIR
)
172 ishere
= (iop
->flag
&IOTYPE
) == IOHERE
;
173 musthave(LWORD
, ishere
? HEREDELIM
: 0);
175 iop
->delim
= yylval
.cp
;
176 if (*ident
!= 0) /* unquoted */
178 if (herep
>= &heres
[HERES
])
179 yyerror("too many <<'s\n");
182 iop
->name
= yylval
.cp
;
190 if ((token(cf
)) != c
)
191 syntaxerr((char *) 0);
195 nested(type
, smark
, emark
)
196 int type
, smark
, emark
;
198 register struct op
*t
;
199 struct nesting_state old_nesting
;
201 nesting_push(&old_nesting
, smark
);
203 musthave(emark
, KEYWORD
|ALIAS
);
204 nesting_pop(&old_nesting
);
205 return (block(type
, t
, NOBLOCK
, NOWORDS
));
212 register struct op
*t
;
213 register int c
, iopn
= 0, syniocf
;
214 struct ioword
*iop
, **iops
;
216 struct nesting_state old_nesting
;
218 iops
= (struct ioword
**) alloc(sizeofN(struct ioword
*, NUFILE
+1),
223 syniocf
= KEYWORD
|ALIAS
;
224 switch (c
= token(cf
|KEYWORD
|ALIAS
|VARASN
)) {
227 afree((void*) iops
, ATEMP
);
230 return NULL
; /* empty line */
235 syniocf
&= ~(KEYWORD
|ALIAS
);
237 t
->lineno
= source
->line
;
239 cf
= (t
->u
.evalflags
? ARRAYVAR
: 0)
240 | (XPsize(args
) == 0 ? ALIAS
|VARASN
: CMDWORD
);
244 yyerror("too many redirections\n");
245 iops
[iopn
++] = synio(cf
);
250 /* the iopn == 0 and XPsize(vars) == 0 are
251 * dubious but at&t ksh acts this way
253 if (iopn
== 0 && XPsize(vars
) == 0
255 && assign_command(ident
))
256 t
->u
.evalflags
= DOVACHECK
;
257 if ((XPsize(args
) == 0 || Flag(FKEYWORD
))
258 && is_wdvarassign(yylval
.cp
))
259 XPput(vars
, yylval
.cp
);
261 XPput(args
, yylval
.cp
);
265 /* Check for "> foo (echo hi)", which at&t ksh
266 * allows (not POSIX, but not disallowed)
269 if (XPsize(args
) == 0 && XPsize(vars
) == 0) {
273 /* Must be a function */
274 if (iopn
!= 0 || XPsize(args
) != 1
275 || XPsize(vars
) != 0)
276 syntaxerr((char *) 0);
280 t
= function_body(XPptrv(args
)[0], FALSE
);
292 t
= nested(TPAREN
, '(', ')');
296 t
= nested(TBRACE
, '{', '}');
302 static const char let_cmd
[] = { CHAR
, 'l', CHAR
, 'e',
304 /* Leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
306 t
->lineno
= source
->line
;
308 XPput(args
, wdcopy(let_cmd
, ATEMP
));
309 musthave(LWORD
,LETEXPR
);
310 XPput(args
, yylval
.cp
);
316 case DBRACKET
: /* [[ .. ]] */
317 /* Leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
318 t
= newtp(TDBRACKET
);
323 te
.flags
= TEF_DBRACKET
;
325 te
.isa
= dbtestp_isa
;
326 te
.getopnd
= dbtestp_getopnd
;
327 te
.eval
= dbtestp_eval
;
328 te
.error
= dbtestp_error
;
337 t
= newtp((c
== FOR
) ? TFOR
: TSELECT
);
338 musthave(LWORD
, ARRAYVAR
);
339 if (!is_wdvarname(yylval
.cp
, TRUE
))
340 yyerror("%s: bad identifier\n",
341 c
== FOR
? "for" : "select");
342 t
->str
= str_save(ident
, ATEMP
);
343 nesting_push(&old_nesting
, c
);
344 t
->vars
= wordlist();
346 nesting_pop(&old_nesting
);
351 nesting_push(&old_nesting
, c
);
352 t
= newtp((c
== WHILE
) ? TWHILE
: TUNTIL
);
353 t
->left
= c_list(TRUE
);
354 t
->right
= dogroup();
355 nesting_pop(&old_nesting
);
362 nesting_push(&old_nesting
, c
);
363 t
->left
= caselist();
364 nesting_pop(&old_nesting
);
368 nesting_push(&old_nesting
, c
);
370 t
->left
= c_list(TRUE
);
371 t
->right
= thenpart();
372 musthave(FI
, KEYWORD
|ALIAS
);
373 nesting_pop(&old_nesting
);
377 syniocf
&= ~(KEYWORD
|ALIAS
);
379 if (t
== (struct op
*) 0)
380 syntaxerr((char *) 0);
381 t
= block(TBANG
, NOBLOCK
, t
, NOWORDS
);
385 syniocf
&= ~(KEYWORD
|ALIAS
);
387 t
= block(TTIME
, t
, NOBLOCK
, NOWORDS
);
392 t
= function_body(yylval
.cp
, TRUE
);
396 while ((iop
= synio(syniocf
)) != NULL
) {
398 yyerror("too many redirections\n");
403 afree((void*) iops
, ATEMP
);
407 iops
= (struct ioword
**) aresize((void*) iops
,
408 sizeofN(struct ioword
*, iopn
), ATEMP
);
412 if (t
->type
== TCOM
|| t
->type
== TDBRACKET
) {
414 t
->args
= (char **) XPclose(args
);
416 t
->vars
= (char **) XPclose(vars
);
429 register struct op
*list
;
431 c
= token(CONTIN
|KEYWORD
|ALIAS
);
432 /* A {...} can be used instead of do...done for for/select loops
433 * but not for while/until loops - we don't need to check if it
434 * is a while loop because it would have been parsed as part of
435 * the conditional command list...
442 syntaxerr((char *) 0);
444 musthave(c
, KEYWORD
|ALIAS
);
451 register struct op
*t
;
453 musthave(THEN
, KEYWORD
|ALIAS
);
455 t
->left
= c_list(TRUE
);
457 syntaxerr((char *) 0);
458 t
->right
= elsepart();
465 register struct op
*t
;
467 switch (token(KEYWORD
|ALIAS
|VARASN
)) {
469 if ((t
= c_list(TRUE
)) == NULL
)
470 syntaxerr((char *) 0);
475 t
->left
= c_list(TRUE
);
476 t
->right
= thenpart();
488 register struct op
*t
, *tl
;
491 c
= token(CONTIN
|KEYWORD
|ALIAS
);
492 /* A {...} can be used instead of in...esac for case statements */
498 syntaxerr((char *) 0);
500 while ((tpeek(CONTIN
|KEYWORD
|ESACONLY
)) != c
) { /* no ALIAS here */
501 struct op
*tc
= casepart(c
);
503 t
= tl
= tc
, tl
->right
= NULL
;
505 tl
->right
= tc
, tl
= tc
;
507 musthave(c
, KEYWORD
|ALIAS
);
515 register struct op
*t
;
521 c
= token(CONTIN
|KEYWORD
); /* no ALIAS here */
526 XPput(ptns
, yylval
.cp
);
527 } while ((c
= token(0)) == '|');
530 t
->vars
= (char **) XPclose(ptns
);
533 t
->left
= c_list(TRUE
);
534 /* Note: Posix requires the ;; */
535 if ((tpeek(CONTIN
|KEYWORD
|ALIAS
)) != endtok
)
536 musthave(BREAK
, CONTIN
|KEYWORD
|ALIAS
);
541 function_body(name
, ksh_func
)
543 int ksh_func
; /* function foo { ... } vs foo() { .. } */
549 sname
= wdstrip(name
);
550 /* Check for valid characters in name. posix and ksh93 say only
551 * allow [a-zA-Z_0-9] but this allows more as old pdksh's have
552 * allowed more (the following were never allowed:
553 * nul space nl tab $ ' " \ ` ( ) & | ; = < >
554 * C_QUOTE covers all but = and adds # [ ? *)
556 for (p
= sname
; *p
; p
++)
557 if (ctype(*p
, C_QUOTE
) || *p
== '=')
558 yyerror("%s: invalid function name\n", sname
);
562 t
->u
.ksh_func
= ksh_func
;
563 t
->lineno
= source
->line
;
565 /* Note that POSIX allows only compound statements after foo(), sh and
566 * at&t ksh allow any command, go with the later since it shouldn't
567 * break anything. However, for function foo, at&t ksh only accepts
571 musthave('{', CONTIN
|KEYWORD
|ALIAS
); /* } */
575 old_func_parse
= e
->flags
& EF_FUNC_PARSE
;
576 e
->flags
|= EF_FUNC_PARSE
;
577 if ((t
->left
= get_command(CONTIN
)) == (struct op
*) 0) {
579 * Probably something like foo() followed by eof or ;.
580 * This is accepted by sh and ksh88.
581 * To make "typeset -f foo" work reliably (so its output can
582 * be used as input), we pretend there is a colon here.
584 t
->left
= newtp(TCOM
);
585 t
->left
->args
= (char **) alloc(sizeof(char *) * 2, ATEMP
);
586 t
->left
->args
[0] = alloc(sizeof(char) * 3, ATEMP
);
587 t
->left
->args
[0][0] = CHAR
;
588 t
->left
->args
[0][1] = ':';
589 t
->left
->args
[0][2] = EOS
;
590 t
->left
->args
[1] = (char *) 0;
591 t
->left
->vars
= (char **) alloc(sizeof(char *), ATEMP
);
592 t
->left
->vars
[0] = (char *) 0;
596 e
->flags
&= ~EF_FUNC_PARSE
;
608 /* Posix does not do alias expansion here... */
609 if ((c
= token(CONTIN
|KEYWORD
|ALIAS
)) != IN
) {
610 if (c
!= ';') /* non-POSIX, but at&t ksh accepts a ; here */
614 while ((c
= token(0)) == LWORD
)
615 XPput(args
, yylval
.cp
);
616 if (c
!= '\n' && c
!= ';')
617 syntaxerr((char *) 0);
618 if (XPsize(args
) == 0) {
623 return (char **) XPclose(args
);
628 * supporting functions
632 block(type
, t1
, t2
, wp
)
637 register struct op
*t
;
646 const struct tokeninfo
{
653 { "then", THEN
, TRUE
},
654 { "else", ELSE
, TRUE
},
655 { "elif", ELIF
, TRUE
},
657 { "case", CASE
, TRUE
},
658 { "esac", ESAC
, TRUE
},
659 { "for", FOR
, TRUE
},
661 { "select", SELECT
, TRUE
},
663 { "while", WHILE
, TRUE
},
664 { "until", UNTIL
, TRUE
},
666 { "done", DONE
, TRUE
},
668 { "function", FUNCTION
, TRUE
},
669 { "time", TIME
, TRUE
},
674 { "[[", DBRACKET
, TRUE
},
676 /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
677 { "&&", LOGAND
, FALSE
},
678 { "||", LOGOR
, FALSE
},
679 { ";;", BREAK
, FALSE
},
681 { "((", MDPAREN
, FALSE
},
682 { "|&", COPROC
, FALSE
},
684 /* and some special cases... */
685 { "newline", '\n', FALSE
},
692 register struct tokeninfo
const *tt
;
693 register struct tbl
*p
;
695 tinit(&keywords
, APERM
, 32); /* must be 2^n (currently 20 keywords) */
696 for (tt
= tokentab
; tt
->name
; tt
++) {
698 p
= tenter(&keywords
, tt
->name
, hash(tt
->name
));
699 p
->flag
|= DEFINED
|ISSET
;
710 char redir
[6]; /* 2<<- is the longest redirection, I think */
712 struct tokeninfo
const *tt
;
722 if (nesting
.start_token
) {
723 c
= nesting
.start_token
;
724 source
->errline
= nesting
.start_line
;
728 /* don't quote the EOF */
729 yyerror("syntax error: unexpected EOF\n");
733 s
= snptreef((char *) 0, 32, "%S", yylval
.cp
);
737 s
= snptreef(redir
, sizeof(redir
), "%R", yylval
.iop
);
741 for (tt
= tokentab
; tt
->name
; tt
++)
747 if (c
> 0 && c
< 256) {
751 shf_snprintf(redir
, sizeof(redir
),
756 yyerror("syntax error: `%s' %s\n", s
, what
);
760 nesting_push(save
, tok
)
761 struct nesting_state
*save
;
765 nesting
.start_token
= tok
;
766 nesting
.start_line
= source
->line
;
771 struct nesting_state
*saved
;
780 register struct op
*t
;
782 t
= (struct op
*) alloc(sizeof(*t
), ATEMP
);
785 t
->args
= t
->vars
= NULL
;
787 t
->left
= t
->right
= NULL
;
796 nesting
.start_token
= 0;
797 nesting
.start_line
= 0;
804 /* This kludge exists to take care of sh/at&t ksh oddity in which
805 * the arguments of alias/export/readonly/typeset have no field
806 * splitting, file globbing, or (normal) tilde expansion done.
807 * at&t ksh seems to do something similar to this since
808 * $ touch a=a; typeset a=[ab]; echo "$a"
810 * $ x=typeset; $x a=[ab]; echo "$a"
820 if (Flag(FPOSIX
) || !*s
)
822 return (c
== 'a' && strcmp(s
, "alias") == 0)
823 || (c
== 'e' && strcmp(s
, "export") == 0)
824 || (c
== 'r' && strcmp(s
, "readonly") == 0)
825 || (c
== 't' && strcmp(s
, "typeset") == 0);
828 /* Check if we are in the middle of reading an alias */
833 for (; s
&& s
->type
== SALIAS
; s
= s
->next
)
834 if (!(s
->flags
& SF_ALIASEND
))
841 /* Order important - indexed by Test_meta values
842 * Note that ||, &&, ( and ) can't appear in as unquoted strings
843 * in normal shell input, so these can be interpreted unambiguously
844 * in the evaluation pass.
846 static const char dbtest_or
[] = { CHAR
, '|', CHAR
, '|', EOS
};
847 static const char dbtest_and
[] = { CHAR
, '&', CHAR
, '&', EOS
};
848 static const char dbtest_not
[] = { CHAR
, '!', EOS
};
849 static const char dbtest_oparen
[] = { CHAR
, '(', EOS
};
850 static const char dbtest_cparen
[] = { CHAR
, ')', EOS
};
851 const char *const dbtest_tokens
[] = {
852 dbtest_or
, dbtest_and
, dbtest_not
,
853 dbtest_oparen
, dbtest_cparen
855 const char db_close
[] = { CHAR
, ']', CHAR
, ']', EOS
};
856 const char db_lthan
[] = { CHAR
, '<', EOS
};
857 const char db_gthan
[] = { CHAR
, '>', EOS
};
859 /* Test if the current token is a whatever. Accepts the current token if
860 * it is. Returns 0 if it is not, non-zero if it is (in the case of
861 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
864 dbtestp_isa(te
, meta
)
868 int c
= tpeek(ARRAYVAR
| (meta
== TM_BINOP
? 0 : CONTIN
));
870 char *save
= (char *) 0;
874 uqword
= c
== LWORD
&& *ident
;
878 else if (meta
== TM_AND
)
880 else if (meta
== TM_NOT
)
881 ret
= uqword
&& strcmp(yylval
.cp
, dbtest_tokens
[(int) TM_NOT
]) == 0;
882 else if (meta
== TM_OPAREN
)
883 ret
= c
== '(' /*)*/;
884 else if (meta
== TM_CPAREN
)
885 ret
= c
== /*(*/ ')';
886 else if (meta
== TM_UNOP
|| meta
== TM_BINOP
) {
887 if (meta
== TM_BINOP
&& c
== REDIR
888 && (yylval
.iop
->flag
== IOREAD
889 || yylval
.iop
->flag
== IOWRITE
))
892 save
= wdcopy(yylval
.iop
->flag
== IOREAD
?
893 db_lthan
: db_gthan
, ATEMP
);
894 } else if (uqword
&& (ret
= (int) test_isop(te
, meta
, ident
)))
896 } else /* meta == TM_END */
897 ret
= uqword
&& strcmp(yylval
.cp
, db_close
) == 0;
900 if (meta
!= TM_END
) {
902 assert(/* meta >= 0 && */
903 meta
< sizeof(dbtest_tokens
) /
904 sizeof(dbtest_tokens
[0]));
905 save
= wdcopy(dbtest_tokens
[(int) meta
], ATEMP
);
907 XPput(*te
->pos
.av
, save
);
914 dbtestp_getopnd(te
, op
, do_eval
)
919 int c
= tpeek(ARRAYVAR
);
922 return (const char *) 0;
925 XPput(*te
->pos
.av
, yylval
.cp
);
931 dbtestp_eval(te
, op
, opnd1
, opnd2
, do_eval
)
942 dbtestp_error(te
, offset
, msg
)
947 te
->flags
|= TEF_ERROR
;
951 /* Kludgy to say the least... */
953 yylval
.cp
= *(XPptrv(*te
->pos
.av
) + XPsize(*te
->pos
.av
)