1 /* $OpenBSD: syn.c,v 1.29 2013/06/03 18:40:05 jca Exp $ */
4 * shell parser (C version)
10 struct nesting_state
{
11 int start_token
; /* token than began nesting (eg, FOR) */
12 int start_line
; /* line nesting began on */
15 static void yyparse(void);
16 static struct op
*pipeline(int);
17 static struct op
*andor(void);
18 static struct op
*c_list(int);
19 static struct ioword
*synio(int);
20 static void musthave(int, int);
21 static struct op
*nested(int, int, int);
22 static struct op
*get_command(int);
23 static struct op
*dogroup(void);
24 static struct op
*thenpart(void);
25 static struct op
*elsepart(void);
26 static struct op
*caselist(void);
27 static struct op
*casepart(int);
28 static struct op
*function_body(char *, int);
29 static char ** wordlist(void);
30 static struct op
*block(int, struct op
*, struct op
*, char **);
31 static struct op
*newtp(int);
32 static void syntaxerr(const char *) __attribute__((__noreturn__
));
33 static void nesting_push(struct nesting_state
*, int);
34 static void nesting_pop(struct nesting_state
*);
35 static int assign_command(char *);
36 static int inalias(struct source
*);
37 static int dbtestp_isa(Test_env
*, Test_meta
);
38 static const char *dbtestp_getopnd(Test_env
*, Test_op
, int);
39 static int dbtestp_eval(Test_env
*, Test_op
, const char *, const char *,
41 static void dbtestp_error(Test_env
*, int, const char *);
43 static struct op
*outtree
; /* yyparse output */
45 static struct nesting_state nesting
; /* \n changed to ; */
47 static int reject
; /* token(cf) gets symbol again */
48 static int symbol
; /* yylex value */
50 #define REJECT (reject = 1)
51 #define ACCEPT (reject = 0)
53 ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
55 ((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
64 outtree
= c_list(source
->type
== SSTRING
);
66 if (c
== 0 && !outtree
)
67 outtree
= newtp(TEOF
);
68 else if (c
!= '\n' && c
!= 0)
69 syntaxerr((char *) 0);
75 struct op
*t
, *p
, *tl
= NULL
;
79 while (token(0) == '|') {
80 if ((p
= get_command(CONTIN
)) == NULL
)
81 syntaxerr((char *) 0);
83 t
= tl
= block(TPIPE
, t
, p
, NOWORDS
);
85 tl
= tl
->right
= block(TPIPE
, tl
->right
, p
, NOWORDS
);
100 while ((c
= token(0)) == LOGAND
|| c
== LOGOR
) {
101 if ((p
= pipeline(CONTIN
)) == NULL
)
102 syntaxerr((char *) 0);
103 t
= block(c
== LOGAND
? TAND
: TOR
, t
, p
, NOWORDS
);
113 struct op
*t
= NULL
, *p
, *tl
= NULL
;
119 /* Token has always been read/rejected at this point, so
120 * we don't worry about what flags to pass token()
124 if (c
== '\n' && (multi
|| inalias(source
))) {
125 if (!p
) /* ignore blank lines */
129 else if (c
== '&' || c
== COPROC
)
130 p
= block(c
== '&' ? TASYNC
: TCOPROC
,
131 p
, NOBLOCK
, NOWORDS
);
137 t
= tl
= block(TLIST
, t
, p
, NOWORDS
);
139 tl
= tl
->right
= block(TLIST
, tl
->right
, p
, NOWORDS
);
147 static struct ioword
*
153 if (tpeek(cf
) != REDIR
)
157 ishere
= (iop
->flag
&IOTYPE
) == IOHERE
;
158 musthave(LWORD
, ishere
? HEREDELIM
: 0);
160 iop
->delim
= yylval
.cp
;
161 if (*ident
!= 0) /* unquoted */
163 if (herep
>= &heres
[HERES
])
164 yyerror("too many <<'s\n");
167 iop
->name
= yylval
.cp
;
172 musthave(int c
, int cf
)
174 if ((token(cf
)) != c
)
175 syntaxerr((char *) 0);
179 nested(int type
, int smark
, int emark
)
182 struct nesting_state old_nesting
;
184 nesting_push(&old_nesting
, smark
);
186 musthave(emark
, KEYWORD
|ALIAS
);
187 nesting_pop(&old_nesting
);
188 return (block(type
, t
, NOBLOCK
, NOWORDS
));
195 int c
, iopn
= 0, syniocf
;
196 struct ioword
*iop
, **iops
;
198 struct nesting_state old_nesting
;
200 iops
= (struct ioword
**) alloc(sizeofN(struct ioword
*, NUFILE
+1),
205 syniocf
= KEYWORD
|ALIAS
;
206 switch (c
= token(cf
|KEYWORD
|ALIAS
|VARASN
)) {
209 afree((void*) iops
, ATEMP
);
212 return NULL
; /* empty line */
217 syniocf
&= ~(KEYWORD
|ALIAS
);
219 t
->lineno
= source
->line
;
221 cf
= (t
->u
.evalflags
? ARRAYVAR
: 0) |
222 (XPsize(args
) == 0 ? ALIAS
|VARASN
: CMDWORD
);
226 yyerror("too many redirections\n");
227 iops
[iopn
++] = synio(cf
);
232 /* the iopn == 0 and XPsize(vars) == 0 are
233 * dubious but at&t ksh acts this way
235 if (iopn
== 0 && XPsize(vars
) == 0 &&
237 assign_command(ident
))
238 t
->u
.evalflags
= DOVACHECK
;
239 if ((XPsize(args
) == 0 || Flag(FKEYWORD
)) &&
240 is_wdvarassign(yylval
.cp
))
241 XPput(vars
, yylval
.cp
);
243 XPput(args
, yylval
.cp
);
247 /* Check for "> foo (echo hi)", which at&t ksh
248 * allows (not POSIX, but not disallowed)
251 if (XPsize(args
) == 0 && XPsize(vars
) == 0) {
255 /* Must be a function */
256 if (iopn
!= 0 || XPsize(args
) != 1 ||
258 syntaxerr((char *) 0);
262 t
= function_body(XPptrv(args
)[0], false);
274 t
= nested(TPAREN
, '(', ')');
278 t
= nested(TBRACE
, '{', '}');
283 static const char let_cmd
[] = {
284 CHAR
, 'l', CHAR
, 'e',
287 /* Leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
289 t
->lineno
= source
->line
;
291 XPput(args
, wdcopy(let_cmd
, ATEMP
));
292 musthave(LWORD
,LETEXPR
);
293 XPput(args
, yylval
.cp
);
297 case DBRACKET
: /* [[ .. ]] */
298 /* Leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
299 t
= newtp(TDBRACKET
);
304 te
.flags
= TEF_DBRACKET
;
306 te
.isa
= dbtestp_isa
;
307 te
.getopnd
= dbtestp_getopnd
;
308 te
.eval
= dbtestp_eval
;
309 te
.error
= dbtestp_error
;
317 t
= newtp((c
== FOR
) ? TFOR
: TSELECT
);
318 musthave(LWORD
, ARRAYVAR
);
319 if (!is_wdvarname(yylval
.cp
, true))
320 yyerror("%s: bad identifier\n",
321 c
== FOR
? "for" : "select");
322 t
->str
= str_save(ident
, ATEMP
);
323 nesting_push(&old_nesting
, c
);
324 t
->vars
= wordlist();
326 nesting_pop(&old_nesting
);
331 nesting_push(&old_nesting
, c
);
332 t
= newtp((c
== WHILE
) ? TWHILE
: TUNTIL
);
333 t
->left
= c_list(true);
334 t
->right
= dogroup();
335 nesting_pop(&old_nesting
);
342 nesting_push(&old_nesting
, c
);
343 t
->left
= caselist();
344 nesting_pop(&old_nesting
);
348 nesting_push(&old_nesting
, c
);
350 t
->left
= c_list(true);
351 t
->right
= thenpart();
352 musthave(FI
, KEYWORD
|ALIAS
);
353 nesting_pop(&old_nesting
);
357 syniocf
&= ~(KEYWORD
|ALIAS
);
359 if (t
== (struct op
*) 0)
360 syntaxerr((char *) 0);
361 t
= block(TBANG
, NOBLOCK
, t
, NOWORDS
);
365 syniocf
&= ~(KEYWORD
|ALIAS
);
368 t
->str
= alloc(2, ATEMP
);
369 t
->str
[0] = '\0'; /* TF_* flags */
372 t
= block(TTIME
, t
, NOBLOCK
, NOWORDS
);
377 t
= function_body(yylval
.cp
, true);
381 while ((iop
= synio(syniocf
)) != NULL
) {
383 yyerror("too many redirections\n");
388 afree((void*) iops
, ATEMP
);
392 iops
= (struct ioword
**) aresize((void*) iops
,
393 sizeofN(struct ioword
*, iopn
), ATEMP
);
397 if (t
->type
== TCOM
|| t
->type
== TDBRACKET
) {
399 t
->args
= (char **) XPclose(args
);
401 t
->vars
= (char **) XPclose(vars
);
416 c
= token(CONTIN
|KEYWORD
|ALIAS
);
417 /* A {...} can be used instead of do...done for for/select loops
418 * but not for while/until loops - we don't need to check if it
419 * is a while loop because it would have been parsed as part of
420 * the conditional command list...
427 syntaxerr((char *) 0);
429 musthave(c
, KEYWORD
|ALIAS
);
438 musthave(THEN
, KEYWORD
|ALIAS
);
440 t
->left
= c_list(true);
442 syntaxerr((char *) 0);
443 t
->right
= elsepart();
452 switch (token(KEYWORD
|ALIAS
|VARASN
)) {
454 if ((t
= c_list(true)) == NULL
)
455 syntaxerr((char *) 0);
460 t
->left
= c_list(true);
461 t
->right
= thenpart();
476 c
= token(CONTIN
|KEYWORD
|ALIAS
);
477 /* A {...} can be used instead of in...esac for case statements */
483 syntaxerr((char *) 0);
485 while ((tpeek(CONTIN
|KEYWORD
|ESACONLY
)) != c
) { /* no ALIAS here */
486 struct op
*tc
= casepart(c
);
488 t
= tl
= tc
, tl
->right
= NULL
;
490 tl
->right
= tc
, tl
= tc
;
492 musthave(c
, KEYWORD
|ALIAS
);
505 c
= token(CONTIN
|KEYWORD
); /* no ALIAS here */
510 XPput(ptns
, yylval
.cp
);
511 } while ((c
= token(0)) == '|');
514 t
->vars
= (char **) XPclose(ptns
);
517 t
->left
= c_list(true);
518 /* Note: Posix requires the ;; */
519 if ((tpeek(CONTIN
|KEYWORD
|ALIAS
)) != endtok
)
520 musthave(BREAK
, CONTIN
|KEYWORD
|ALIAS
);
525 function_body(char *name
,
526 int ksh_func
) /* function foo { ... } vs foo() { .. } */
532 sname
= wdstrip(name
);
533 /* Check for valid characters in name. posix and ksh93 say only
534 * allow [a-zA-Z_0-9] but this allows more as old pdksh's have
535 * allowed more (the following were never allowed:
536 * nul space nl tab $ ' " \ ` ( ) & | ; = < >
537 * C_QUOTE covers all but = and adds # [ ? *)
539 for (p
= sname
; *p
; p
++)
540 if (ctype(*p
, C_QUOTE
) || *p
== '=')
541 yyerror("%s: invalid function name\n", sname
);
545 t
->u
.ksh_func
= ksh_func
;
546 t
->lineno
= source
->line
;
548 /* Note that POSIX allows only compound statements after foo(), sh and
549 * at&t ksh allow any command, go with the later since it shouldn't
550 * break anything. However, for function foo, at&t ksh only accepts
554 musthave('{', CONTIN
|KEYWORD
|ALIAS
); /* } */
558 old_func_parse
= e
->flags
& EF_FUNC_PARSE
;
559 e
->flags
|= EF_FUNC_PARSE
;
560 if ((t
->left
= get_command(CONTIN
)) == (struct op
*) 0) {
562 * Probably something like foo() followed by eof or ;.
563 * This is accepted by sh and ksh88.
564 * To make "typeset -f foo" work reliably (so its output can
565 * be used as input), we pretend there is a colon here.
567 t
->left
= newtp(TCOM
);
568 t
->left
->args
= (char **) alloc(sizeof(char *) * 2, ATEMP
);
569 t
->left
->args
[0] = alloc(sizeof(char) * 3, ATEMP
);
570 t
->left
->args
[0][0] = CHAR
;
571 t
->left
->args
[0][1] = ':';
572 t
->left
->args
[0][2] = EOS
;
573 t
->left
->args
[1] = (char *) 0;
574 t
->left
->vars
= (char **) alloc(sizeof(char *), ATEMP
);
575 t
->left
->vars
[0] = (char *) 0;
579 e
->flags
&= ~EF_FUNC_PARSE
;
591 /* Posix does not do alias expansion here... */
592 if ((c
= token(CONTIN
|KEYWORD
|ALIAS
)) != IN
) {
593 if (c
!= ';') /* non-POSIX, but at&t ksh accepts a ; here */
597 while ((c
= token(0)) == LWORD
)
598 XPput(args
, yylval
.cp
);
599 if (c
!= '\n' && c
!= ';')
600 syntaxerr((char *) 0);
602 return (char **) XPclose(args
);
606 * supporting functions
610 block(int type
, struct op
*t1
, struct op
*t2
, char **wp
)
621 const struct tokeninfo
{
628 { "then", THEN
, true },
629 { "else", ELSE
, true },
630 { "elif", ELIF
, true },
632 { "case", CASE
, true },
633 { "esac", ESAC
, true },
634 { "for", FOR
, true },
635 { "select", SELECT
, true },
636 { "while", WHILE
, true },
637 { "until", UNTIL
, true },
639 { "done", DONE
, true },
641 { "function", FUNCTION
, true },
642 { "time", TIME
, true },
646 { "[[", DBRACKET
, true },
647 /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
648 { "&&", LOGAND
, false },
649 { "||", LOGOR
, false },
650 { ";;", BREAK
, false },
651 { "((", MDPAREN
, false },
652 { "|&", COPROC
, false },
653 /* and some special cases... */
654 { "newline", '\n', false },
661 struct tokeninfo
const *tt
;
664 ktinit(&keywords
, APERM
, 32); /* must be 2^n (currently 20 keywords) */
665 for (tt
= tokentab
; tt
->name
; tt
++) {
667 p
= ktenter(&keywords
, tt
->name
, hash(tt
->name
));
668 p
->flag
|= DEFINED
|ISSET
;
676 syntaxerr(const char *what
)
678 char redir
[6]; /* 2<<- is the longest redirection, I think */
680 struct tokeninfo
const *tt
;
690 if (nesting
.start_token
) {
691 c
= nesting
.start_token
;
692 source
->errline
= nesting
.start_line
;
696 /* don't quote the EOF */
697 yyerror("syntax error: unexpected EOF\n");
701 s
= snptreef((char *) 0, 32, "%S", yylval
.cp
);
705 s
= snptreef(redir
, sizeof(redir
), "%R", yylval
.iop
);
709 for (tt
= tokentab
; tt
->name
; tt
++)
715 if (c
> 0 && c
< 256) {
719 shf_snprintf(redir
, sizeof(redir
),
724 yyerror("syntax error: `%s' %s\n", s
, what
);
728 nesting_push(struct nesting_state
*save
, int tok
)
731 nesting
.start_token
= tok
;
732 nesting
.start_line
= source
->line
;
736 nesting_pop(struct nesting_state
*saved
)
746 t
= (struct op
*) alloc(sizeof(*t
), ATEMP
);
749 t
->args
= t
->vars
= NULL
;
751 t
->left
= t
->right
= NULL
;
759 nesting
.start_token
= 0;
760 nesting
.start_line
= 0;
767 /* This kludge exists to take care of sh/at&t ksh oddity in which
768 * the arguments of alias/export/readonly/typeset have no field
769 * splitting, file globbing, or (normal) tilde expansion done.
770 * at&t ksh seems to do something similar to this since
771 * $ touch a=a; typeset a=[ab]; echo "$a"
773 * $ x=typeset; $x a=[ab]; echo "$a"
778 assign_command(char *s
)
780 if (Flag(FPOSIX
) || !*s
)
782 return (strcmp(s
, "alias") == 0) ||
783 (strcmp(s
, "export") == 0) ||
784 (strcmp(s
, "readonly") == 0) ||
785 (strcmp(s
, "typeset") == 0);
788 /* Check if we are in the middle of reading an alias */
790 inalias(struct source
*s
)
792 for (; s
&& s
->type
== SALIAS
; s
= s
->next
)
793 if (!(s
->flags
& SF_ALIASEND
))
799 /* Order important - indexed by Test_meta values
800 * Note that ||, &&, ( and ) can't appear in as unquoted strings
801 * in normal shell input, so these can be interpreted unambiguously
802 * in the evaluation pass.
804 static const char dbtest_or
[] = { CHAR
, '|', CHAR
, '|', EOS
};
805 static const char dbtest_and
[] = { CHAR
, '&', CHAR
, '&', EOS
};
806 static const char dbtest_not
[] = { CHAR
, '!', EOS
};
807 static const char dbtest_oparen
[] = { CHAR
, '(', EOS
};
808 static const char dbtest_cparen
[] = { CHAR
, ')', EOS
};
809 const char *const dbtest_tokens
[] = {
810 dbtest_or
, dbtest_and
, dbtest_not
,
811 dbtest_oparen
, dbtest_cparen
813 const char db_close
[] = { CHAR
, ']', CHAR
, ']', EOS
};
814 const char db_lthan
[] = { CHAR
, '<', EOS
};
815 const char db_gthan
[] = { CHAR
, '>', EOS
};
817 /* Test if the current token is a whatever. Accepts the current token if
818 * it is. Returns 0 if it is not, non-zero if it is (in the case of
819 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
822 dbtestp_isa(Test_env
*te
, Test_meta meta
)
824 int c
= tpeek(ARRAYVAR
| (meta
== TM_BINOP
? 0 : CONTIN
));
826 char *save
= (char *) 0;
830 uqword
= c
== LWORD
&& *ident
;
834 else if (meta
== TM_AND
)
836 else if (meta
== TM_NOT
)
837 ret
= uqword
&& strcmp(yylval
.cp
, dbtest_tokens
[(int) TM_NOT
]) == 0;
838 else if (meta
== TM_OPAREN
)
839 ret
= c
== '(' /*)*/;
840 else if (meta
== TM_CPAREN
)
841 ret
= c
== /*(*/ ')';
842 else if (meta
== TM_UNOP
|| meta
== TM_BINOP
) {
843 if (meta
== TM_BINOP
&& c
== REDIR
&&
844 (yylval
.iop
->flag
== IOREAD
|| yylval
.iop
->flag
== IOWRITE
)) {
846 save
= wdcopy(yylval
.iop
->flag
== IOREAD
?
847 db_lthan
: db_gthan
, ATEMP
);
848 } else if (uqword
&& (ret
= (int) test_isop(te
, meta
, ident
)))
850 } else /* meta == TM_END */
851 ret
= uqword
&& strcmp(yylval
.cp
, db_close
) == 0;
854 if (meta
!= TM_END
) {
856 save
= wdcopy(dbtest_tokens
[(int) meta
], ATEMP
);
857 XPput(*te
->pos
.av
, save
);
864 dbtestp_getopnd(Test_env
*te
, Test_op op
, int do_eval
)
866 int c
= tpeek(ARRAYVAR
);
869 return (const char *) 0;
872 XPput(*te
->pos
.av
, yylval
.cp
);
878 dbtestp_eval(Test_env
*te
, Test_op op
, const char *opnd1
, const char *opnd2
,
885 dbtestp_error(Test_env
*te
, int offset
, const char *msg
)
887 te
->flags
|= TEF_ERROR
;
891 /* Kludgy to say the least... */
893 yylval
.cp
= *(XPptrv(*te
->pos
.av
) + XPsize(*te
->pos
.av
) +