sync
[bitrig.git] / bin / ksh / lex.c
blob01aa6863c3b2ae08e9268fe988be46ebf85550ab
1 /* $OpenBSD: lex.c,v 1.47 2013/03/03 19:11:34 guenther Exp $ */
3 /*
4 * lexical analysis and source input
5 */
7 #include "sh.h"
8 #include <libgen.h>
9 #include <ctype.h>
12 /* Structure to keep track of the lexing state and the various pieces of info
13 * needed for each particular state.
15 typedef struct lex_state Lex_state;
16 struct lex_state {
17 int ls_state;
18 union {
19 /* $(...) */
20 struct scsparen_info {
21 int nparen; /* count open parenthesis */
22 int csstate; /* XXX remove */
23 #define ls_scsparen ls_info.u_scsparen
24 } u_scsparen;
26 /* $((...)) */
27 struct sasparen_info {
28 int nparen; /* count open parenthesis */
29 int start; /* marks start of $(( in output str */
30 #define ls_sasparen ls_info.u_sasparen
31 } u_sasparen;
33 /* ((...)) */
34 struct sletparen_info {
35 int nparen; /* count open parenthesis */
36 #define ls_sletparen ls_info.u_sletparen
37 } u_sletparen;
39 /* `...` */
40 struct sbquote_info {
41 int indquotes; /* true if in double quotes: "`...`" */
42 #define ls_sbquote ls_info.u_sbquote
43 } u_sbquote;
45 Lex_state *base; /* used to point to next state block */
46 } ls_info;
49 typedef struct State_info State_info;
50 struct State_info {
51 Lex_state *base;
52 Lex_state *end;
56 static void readhere(struct ioword *);
57 static int getsc__(void);
58 static void getsc_line(Source *);
59 static int getsc_bn(void);
60 static char *get_brace_var(XString *, char *);
61 static int arraysub(char **);
62 static const char *ungetsc(int);
63 static void gethere(void);
64 static Lex_state *push_state_(State_info *, Lex_state *);
65 static Lex_state *pop_state_(State_info *, Lex_state *);
66 static char *special_prompt_expand(char *);
67 static int dopprompt(const char *, int, const char **, int);
69 static int backslash_skip;
70 static int ignore_backslash_newline;
72 /* optimized getsc_bn() */
73 #define getsc() (*source->str != '\0' && *source->str != '\\' \
74 && !backslash_skip ? *source->str++ : getsc_bn())
75 /* optimized getsc__() */
76 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
78 #define STATE_BSIZE 32
80 #define PUSH_STATE(s) do { \
81 if (++statep == state_info.end) \
82 statep = push_state_(&state_info, statep); \
83 state = statep->ls_state = (s); \
84 } while (0)
86 #define POP_STATE() do { \
87 if (--statep == state_info.base) \
88 statep = pop_state_(&state_info, statep); \
89 state = statep->ls_state; \
90 } while (0)
95 * Lexical analyzer
97 * tokens are not regular expressions, they are LL(1).
98 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
99 * hence the state stack.
103 yylex(int cf)
105 Lex_state states[STATE_BSIZE], *statep;
106 State_info state_info;
107 int c, state;
108 XString ws; /* expandable output word */
109 char *wp; /* output word pointer */
110 char *sp, *dp;
111 int c2;
114 Again:
115 states[0].ls_state = -1;
116 states[0].ls_info.base = NULL;
117 statep = &states[1];
118 state_info.base = states;
119 state_info.end = &states[STATE_BSIZE];
121 Xinit(ws, wp, 64, ATEMP);
123 backslash_skip = 0;
124 ignore_backslash_newline = 0;
126 if (cf&ONEWORD)
127 state = SWORD;
128 else if (cf&LETEXPR) {
129 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
130 state = SLETPAREN;
131 statep->ls_sletparen.nparen = 0;
132 } else { /* normal lexing */
133 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
134 while ((c = getsc()) == ' ' || c == '\t')
136 if (c == '#') {
137 ignore_backslash_newline++;
138 while ((c = getsc()) != '\0' && c != '\n')
140 ignore_backslash_newline--;
142 ungetsc(c);
144 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
145 source->flags &= ~SF_ALIAS;
146 /* In POSIX mode, a trailing space only counts if we are
147 * parsing a simple command
149 if (!Flag(FPOSIX) || (cf & CMDWORD))
150 cf |= ALIAS;
153 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
154 statep->ls_state = state;
156 /* collect non-special or quoted characters to form word */
157 while (!((c = getsc()) == 0 ||
158 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
159 Xcheck(ws, wp);
160 switch (state) {
161 case SBASE:
162 if (Flag(FCSHHISTORY) && (source->flags & SF_TTY) &&
163 c == '!') {
164 char **replace = NULL;
166 c2 = getsc();
167 if (c2 == '\0' || c2 == ' ' || c2 == '\t')
169 else if (c2 == '!')
170 replace = hist_get_newest(0);
171 else if (isdigit(c2) || c2 == '-' ||
172 isalpha(c2)) {
173 int get = !isalpha(c2);
174 char match[200], *str = match;
176 *str++ = c2;
177 do {
178 if ((c2 = getsc()) == '\0')
179 break;
180 if (c2 == '\t' || c2 == ' ' ||
181 c2 == '\n') {
182 ungetsc(c2);
183 break;
185 *str++ = c2;
186 } while (str < &match[sizeof(match)-1]);
187 *str = '\0';
189 if (get) {
190 int h = findhistrel(match);
191 if (h >= 0)
192 replace = &history[h];
193 } else {
194 int h = findhist(-1, 0, match, true);
195 if (h >= 0)
196 replace = &history[h];
201 * XXX ksh history buffer saves un-expanded
202 * commands. Until the history buffer code is
203 * changed to contain expanded commands, we
204 * ignore the bad commands (spinning sucks)
206 if (replace && **replace == '!')
207 ungetsc(c2);
208 else if (replace) {
209 Source *s;
211 /* do not strdup replacement via alloc */
212 s = pushs(SREREAD, source->areap);
213 s->start = s->str = *replace;
214 s->next = source;
215 s->u.freeme = NULL;
216 source = s;
217 continue;
218 } else
219 ungetsc(c2);
221 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
222 *wp = EOS; /* temporary */
223 if (is_wdvarname(Xstring(ws, wp), false)) {
224 char *p, *tmp;
226 if (arraysub(&tmp)) {
227 *wp++ = CHAR;
228 *wp++ = c;
229 for (p = tmp; *p; ) {
230 Xcheck(ws, wp);
231 *wp++ = CHAR;
232 *wp++ = *p++;
234 afree(tmp, ATEMP);
235 break;
236 } else {
237 Source *s;
239 s = pushs(SREREAD,
240 source->areap);
241 s->start = s->str
242 = s->u.freeme = tmp;
243 s->next = source;
244 source = s;
247 *wp++ = CHAR;
248 *wp++ = c;
249 break;
251 /* FALLTHROUGH */
252 Sbase1: /* includes *(...|...) pattern (*+?@!) */
253 if (c == '*' || c == '@' || c == '+' || c == '?' ||
254 c == '!') {
255 c2 = getsc();
256 if (c2 == '(' /*)*/ ) {
257 *wp++ = OPAT;
258 *wp++ = c;
259 PUSH_STATE(SPATTERN);
260 break;
262 ungetsc(c2);
264 /* FALLTHROUGH */
265 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
266 switch (c) {
267 case '\\':
268 c = getsc();
269 if (c) /* trailing \ is lost */
270 *wp++ = QCHAR, *wp++ = c;
271 break;
272 case '\'':
273 if ((cf & HEREDOC) || state == SBRACEQ) {
274 *wp++ = CHAR, *wp++ = c;
275 break;
277 *wp++ = OQUOTE;
278 ignore_backslash_newline++;
279 PUSH_STATE(SSQUOTE);
280 break;
281 case '"':
282 *wp++ = OQUOTE;
283 PUSH_STATE(SDQUOTE);
284 break;
285 default:
286 goto Subst;
288 break;
290 Subst:
291 switch (c) {
292 case '\\':
293 c = getsc();
294 switch (c) {
295 case '\\':
296 case '$': case '`':
297 *wp++ = QCHAR, *wp++ = c;
298 break;
299 case '"':
300 if ((cf & HEREDOC) == 0) {
301 *wp++ = QCHAR, *wp++ = c;
302 break;
304 /* FALLTHROUGH */
305 default:
306 if (cf & UNESCAPE) {
307 *wp++ = QCHAR, *wp++ = c;
308 break;
310 Xcheck(ws, wp);
311 if (c) { /* trailing \ is lost */
312 *wp++ = CHAR, *wp++ = '\\';
313 *wp++ = CHAR, *wp++ = c;
315 break;
317 break;
318 case '$':
319 c = getsc();
320 if (c == '(') /*)*/ {
321 c = getsc();
322 if (c == '(') /*)*/ {
323 PUSH_STATE(SASPAREN);
324 statep->ls_sasparen.nparen = 2;
325 statep->ls_sasparen.start =
326 Xsavepos(ws, wp);
327 *wp++ = EXPRSUB;
328 } else {
329 ungetsc(c);
330 PUSH_STATE(SCSPAREN);
331 statep->ls_scsparen.nparen = 1;
332 statep->ls_scsparen.csstate = 0;
333 *wp++ = COMSUB;
335 } else if (c == '{') /*}*/ {
336 *wp++ = OSUBST;
337 *wp++ = '{'; /*}*/
338 wp = get_brace_var(&ws, wp);
339 c = getsc();
340 /* allow :# and :% (ksh88 compat) */
341 if (c == ':') {
342 *wp++ = CHAR, *wp++ = c;
343 c = getsc();
345 /* If this is a trim operation,
346 * treat (,|,) specially in STBRACE.
348 if (c == '#' || c == '%') {
349 ungetsc(c);
350 PUSH_STATE(STBRACE);
351 } else {
352 ungetsc(c);
353 if (state == SDQUOTE ||
354 state == SBRACEQ)
355 PUSH_STATE(SBRACEQ);
356 else
357 PUSH_STATE(SBRACE);
359 } else if (ctype(c, C_ALPHA)) {
360 *wp++ = OSUBST;
361 *wp++ = 'X';
362 do {
363 Xcheck(ws, wp);
364 *wp++ = c;
365 c = getsc();
366 } while (ctype(c, C_ALPHA|C_DIGIT));
367 *wp++ = '\0';
368 *wp++ = CSUBST;
369 *wp++ = 'X';
370 ungetsc(c);
371 } else if (ctype(c, C_DIGIT|C_VAR1)) {
372 Xcheck(ws, wp);
373 *wp++ = OSUBST;
374 *wp++ = 'X';
375 *wp++ = c;
376 *wp++ = '\0';
377 *wp++ = CSUBST;
378 *wp++ = 'X';
379 } else {
380 *wp++ = CHAR, *wp++ = '$';
381 ungetsc(c);
383 break;
384 case '`':
385 PUSH_STATE(SBQUOTE);
386 *wp++ = COMSUB;
387 /* Need to know if we are inside double quotes
388 * since sh/at&t-ksh translate the \" to " in
389 * "`..\"..`".
390 * This is not done in posix mode (section
391 * 3.2.3, Double Quotes: "The backquote shall
392 * retain its special meaning introducing the
393 * other form of command substitution (see
394 * 3.6.3). The portion of the quoted string
395 * from the initial backquote and the
396 * characters up to the next backquote that
397 * is not preceded by a backslash (having
398 * escape characters removed) defines that
399 * command whose output replaces `...` when
400 * the word is expanded."
401 * Section 3.6.3, Command Substitution:
402 * "Within the backquoted style of command
403 * substitution, backslash shall retain its
404 * literal meaning, except when followed by
405 * $ ` \.").
407 statep->ls_sbquote.indquotes = 0;
408 if (!Flag(FPOSIX)) {
409 Lex_state *s = statep;
410 Lex_state *base = state_info.base;
411 while (1) {
412 for (; s != base; s--) {
413 if (s->ls_state == SDQUOTE) {
414 statep->ls_sbquote.indquotes = 1;
415 break;
418 if (s != base)
419 break;
420 if (!(s = s->ls_info.base))
421 break;
422 base = s-- - STATE_BSIZE;
425 break;
426 default:
427 *wp++ = CHAR, *wp++ = c;
429 break;
431 case SSQUOTE:
432 if (c == '\'') {
433 POP_STATE();
434 if (state == SBRACEQ) {
435 *wp++ = CHAR, *wp++ = c;
436 break;
438 *wp++ = CQUOTE;
439 ignore_backslash_newline--;
440 } else
441 *wp++ = QCHAR, *wp++ = c;
442 break;
444 case SDQUOTE:
445 if (c == '"') {
446 POP_STATE();
447 *wp++ = CQUOTE;
448 } else
449 goto Subst;
450 break;
452 case SCSPAREN: /* $( .. ) */
453 /* todo: deal with $(...) quoting properly
454 * kludge to partly fake quoting inside $(..): doesn't
455 * really work because nested $(..) or ${..} inside
456 * double quotes aren't dealt with.
458 switch (statep->ls_scsparen.csstate) {
459 case 0: /* normal */
460 switch (c) {
461 case '(':
462 statep->ls_scsparen.nparen++;
463 break;
464 case ')':
465 statep->ls_scsparen.nparen--;
466 break;
467 case '\\':
468 statep->ls_scsparen.csstate = 1;
469 break;
470 case '"':
471 statep->ls_scsparen.csstate = 2;
472 break;
473 case '\'':
474 statep->ls_scsparen.csstate = 4;
475 ignore_backslash_newline++;
476 break;
478 break;
480 case 1: /* backslash in normal mode */
481 case 3: /* backslash in double quotes */
482 --statep->ls_scsparen.csstate;
483 break;
485 case 2: /* double quotes */
486 if (c == '"')
487 statep->ls_scsparen.csstate = 0;
488 else if (c == '\\')
489 statep->ls_scsparen.csstate = 3;
490 break;
492 case 4: /* single quotes */
493 if (c == '\'') {
494 statep->ls_scsparen.csstate = 0;
495 ignore_backslash_newline--;
497 break;
499 if (statep->ls_scsparen.nparen == 0) {
500 POP_STATE();
501 *wp++ = 0; /* end of COMSUB */
502 } else
503 *wp++ = c;
504 break;
506 case SASPAREN: /* $(( .. )) */
507 /* todo: deal with $((...); (...)) properly */
508 /* XXX should nest using existing state machine
509 * (embed "..", $(...), etc.) */
510 if (c == '(')
511 statep->ls_sasparen.nparen++;
512 else if (c == ')') {
513 statep->ls_sasparen.nparen--;
514 if (statep->ls_sasparen.nparen == 1) {
515 /*(*/
516 if ((c2 = getsc()) == ')') {
517 POP_STATE();
518 *wp++ = 0; /* end of EXPRSUB */
519 break;
520 } else {
521 char *s;
523 ungetsc(c2);
524 /* mismatched parenthesis -
525 * assume we were really
526 * parsing a $(..) expression
528 s = Xrestpos(ws, wp,
529 statep->ls_sasparen.start);
530 memmove(s + 1, s, wp - s);
531 *s++ = COMSUB;
532 *s = '('; /*)*/
533 wp++;
534 statep->ls_scsparen.nparen = 1;
535 statep->ls_scsparen.csstate = 0;
536 state = statep->ls_state =
537 SCSPAREN;
541 *wp++ = c;
542 break;
544 case SBRACEQ:
545 case SBRACE:
546 /*{*/
547 if (c == '}') {
548 POP_STATE();
549 *wp++ = CSUBST;
550 *wp++ = /*{*/ '}';
551 } else
552 goto Sbase1;
553 break;
555 case STBRACE:
556 /* Same as SBRACE, except (,|,) treated specially */
557 /*{*/
558 if (c == '}') {
559 POP_STATE();
560 *wp++ = CSUBST;
561 *wp++ = /*{*/ '}';
562 } else if (c == '|') {
563 *wp++ = SPAT;
564 } else if (c == '(') {
565 *wp++ = OPAT;
566 *wp++ = ' '; /* simile for @ */
567 PUSH_STATE(SPATTERN);
568 } else
569 goto Sbase1;
570 break;
572 case SBQUOTE:
573 if (c == '`') {
574 *wp++ = 0;
575 POP_STATE();
576 } else if (c == '\\') {
577 switch (c = getsc()) {
578 case '\\':
579 case '$': case '`':
580 *wp++ = c;
581 break;
582 case '"':
583 if (statep->ls_sbquote.indquotes) {
584 *wp++ = c;
585 break;
587 /* FALLTHROUGH */
588 default:
589 if (c) { /* trailing \ is lost */
590 *wp++ = '\\';
591 *wp++ = c;
593 break;
595 } else
596 *wp++ = c;
597 break;
599 case SWORD: /* ONEWORD */
600 goto Subst;
602 case SLETPAREN: /* LETEXPR: (( ... )) */
603 /*(*/
604 if (c == ')') {
605 if (statep->ls_sletparen.nparen > 0)
606 --statep->ls_sletparen.nparen;
607 /*(*/
608 else if ((c2 = getsc()) == ')') {
609 c = 0;
610 *wp++ = CQUOTE;
611 goto Done;
612 } else
613 ungetsc(c2);
614 } else if (c == '(')
615 /* parenthesis inside quotes and backslashes
616 * are lost, but at&t ksh doesn't count them
617 * either
619 ++statep->ls_sletparen.nparen;
620 goto Sbase2;
622 case SHEREDELIM: /* <<,<<- delimiter */
623 /* XXX chuck this state (and the next) - use
624 * the existing states ($ and \`..` should be
625 * stripped of their specialness after the
626 * fact).
628 /* here delimiters need a special case since
629 * $ and `..` are not to be treated specially
631 if (c == '\\') {
632 c = getsc();
633 if (c) { /* trailing \ is lost */
634 *wp++ = QCHAR;
635 *wp++ = c;
637 } else if (c == '\'') {
638 PUSH_STATE(SSQUOTE);
639 *wp++ = OQUOTE;
640 ignore_backslash_newline++;
641 } else if (c == '"') {
642 state = statep->ls_state = SHEREDQUOTE;
643 *wp++ = OQUOTE;
644 } else {
645 *wp++ = CHAR;
646 *wp++ = c;
648 break;
650 case SHEREDQUOTE: /* " in <<,<<- delimiter */
651 if (c == '"') {
652 *wp++ = CQUOTE;
653 state = statep->ls_state = SHEREDELIM;
654 } else {
655 if (c == '\\') {
656 switch (c = getsc()) {
657 case '\\': case '"':
658 case '$': case '`':
659 break;
660 default:
661 if (c) { /* trailing \ lost */
662 *wp++ = CHAR;
663 *wp++ = '\\';
665 break;
668 *wp++ = CHAR;
669 *wp++ = c;
671 break;
673 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
674 if ( /*(*/ c == ')') {
675 *wp++ = CPAT;
676 POP_STATE();
677 } else if (c == '|') {
678 *wp++ = SPAT;
679 } else if (c == '(') {
680 *wp++ = OPAT;
681 *wp++ = ' '; /* simile for @ */
682 PUSH_STATE(SPATTERN);
683 } else
684 goto Sbase1;
685 break;
688 Done:
689 Xcheck(ws, wp);
690 if (statep != &states[1])
691 /* XXX figure out what is missing */
692 yyerror("no closing quote\n");
694 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
695 if (state == SHEREDELIM)
696 state = SBASE;
698 dp = Xstring(ws, wp);
699 if ((c == '<' || c == '>') && state == SBASE &&
700 ((c2 = Xlength(ws, wp)) == 0 ||
701 (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) {
702 struct ioword *iop = (struct ioword *) alloc(sizeof(*iop), ATEMP);
704 if (c2 == 2)
705 iop->unit = dp[1] - '0';
706 else
707 iop->unit = c == '>'; /* 0 for <, 1 for > */
709 c2 = getsc();
710 /* <<, >>, <> are ok, >< is not */
711 if (c == c2 || (c == '<' && c2 == '>')) {
712 iop->flag = c == c2 ?
713 (c == '>' ? IOCAT : IOHERE) : IORDWR;
714 if (iop->flag == IOHERE) {
715 if ((c2 = getsc()) == '-')
716 iop->flag |= IOSKIP;
717 else
718 ungetsc(c2);
720 } else if (c2 == '&')
721 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
722 else {
723 iop->flag = c == '>' ? IOWRITE : IOREAD;
724 if (c == '>' && c2 == '|')
725 iop->flag |= IOCLOB;
726 else
727 ungetsc(c2);
730 iop->name = (char *) 0;
731 iop->delim = (char *) 0;
732 iop->heredoc = (char *) 0;
733 Xfree(ws, wp); /* free word */
734 yylval.iop = iop;
735 return REDIR;
738 if (wp == dp && state == SBASE) {
739 Xfree(ws, wp); /* free word */
740 /* no word, process LEX1 character */
741 switch (c) {
742 default:
743 return c;
745 case '|':
746 case '&':
747 case ';':
748 if ((c2 = getsc()) == c)
749 c = (c == ';') ? BREAK :
750 (c == '|') ? LOGOR :
751 (c == '&') ? LOGAND :
752 YYERRCODE;
753 else if (c == '|' && c2 == '&')
754 c = COPROC;
755 else
756 ungetsc(c2);
757 return c;
759 case '\n':
760 gethere();
761 if (cf & CONTIN)
762 goto Again;
763 return c;
765 case '(': /*)*/
766 if (!Flag(FSH)) {
767 if ((c2 = getsc()) == '(') /*)*/
768 /* XXX need to handle ((...); (...)) */
769 c = MDPAREN;
770 else
771 ungetsc(c2);
773 return c;
774 /*(*/
775 case ')':
776 return c;
780 *wp++ = EOS; /* terminate word */
781 yylval.cp = Xclose(ws, wp);
782 if (state == SWORD || state == SLETPAREN) /* ONEWORD? */
783 return LWORD;
784 ungetsc(c); /* unget terminator */
786 /* copy word to unprefixed string ident */
787 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
788 *dp++ = *sp++;
789 /* Make sure the ident array stays '\0' padded */
790 memset(dp, 0, (ident+IDENT) - dp + 1);
791 if (c != EOS)
792 *ident = '\0'; /* word is not unquoted */
794 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
795 struct tbl *p;
796 int h = hash(ident);
798 /* { */
799 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
800 (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
801 afree(yylval.cp, ATEMP);
802 return p->val.i;
804 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
805 (p->flag & ISSET)) {
806 Source *s;
808 for (s = source; s->type == SALIAS; s = s->next)
809 if (s->u.tblp == p)
810 return LWORD;
811 /* push alias expansion */
812 s = pushs(SALIAS, source->areap);
813 s->start = s->str = p->val.s;
814 s->u.tblp = p;
815 s->next = source;
816 source = s;
817 afree(yylval.cp, ATEMP);
818 goto Again;
822 return LWORD;
825 static void
826 gethere(void)
828 struct ioword **p;
830 for (p = heres; p < herep; p++)
831 readhere(*p);
832 herep = heres;
836 * read "<<word" text into temp file
839 static void
840 readhere(struct ioword *iop)
842 int c;
843 char *volatile eof;
844 char *eofp;
845 int skiptabs;
846 XString xs;
847 char *xp;
848 int xpos;
850 eof = evalstr(iop->delim, 0);
852 if (!(iop->flag & IOEVAL))
853 ignore_backslash_newline++;
855 Xinit(xs, xp, 256, ATEMP);
857 for (;;) {
858 eofp = eof;
859 skiptabs = iop->flag & IOSKIP;
860 xpos = Xsavepos(xs, xp);
861 while ((c = getsc()) != 0) {
862 if (skiptabs) {
863 if (c == '\t')
864 continue;
865 skiptabs = 0;
867 if (c != *eofp)
868 break;
869 Xcheck(xs, xp);
870 Xput(xs, xp, c);
871 eofp++;
873 /* Allow EOF here so commands with out trailing newlines
874 * will work (eg, ksh -c '...', $(...), etc).
876 if (*eofp == '\0' && (c == 0 || c == '\n')) {
877 xp = Xrestpos(xs, xp, xpos);
878 break;
880 ungetsc(c);
881 while ((c = getsc()) != '\n') {
882 if (c == 0)
883 yyerror("here document `%s' unclosed\n", eof);
884 Xcheck(xs, xp);
885 Xput(xs, xp, c);
887 Xcheck(xs, xp);
888 Xput(xs, xp, c);
890 Xput(xs, xp, '\0');
891 iop->heredoc = Xclose(xs, xp);
893 if (!(iop->flag & IOEVAL))
894 ignore_backslash_newline--;
897 void
898 yyerror(const char *fmt, ...)
900 va_list va;
902 /* pop aliases and re-reads */
903 while (source->type == SALIAS || source->type == SREREAD)
904 source = source->next;
905 source->str = null; /* zap pending input */
907 error_prefix(true);
908 va_start(va, fmt);
909 shf_vfprintf(shl_out, fmt, va);
910 va_end(va);
911 errorf(null);
915 * input for yylex with alias expansion
918 Source *
919 pushs(int type, Area *areap)
921 Source *s;
923 s = (Source *) alloc(sizeof(Source), areap);
924 s->type = type;
925 s->str = null;
926 s->start = NULL;
927 s->line = 0;
928 s->cmd_offset = 0;
929 s->errline = 0;
930 s->file = NULL;
931 s->flags = 0;
932 s->next = NULL;
933 s->areap = areap;
934 if (type == SFILE || type == SSTDIN) {
935 char *dummy;
936 Xinit(s->xs, dummy, 256, s->areap);
937 } else
938 memset(&s->xs, 0, sizeof(s->xs));
939 return s;
942 static int
943 getsc__(void)
945 Source *s = source;
946 int c;
948 while ((c = *s->str++) == 0) {
949 s->str = NULL; /* return 0 for EOF by default */
950 switch (s->type) {
951 case SEOF:
952 s->str = null;
953 return 0;
955 case SSTDIN:
956 case SFILE:
957 getsc_line(s);
958 break;
960 case SWSTR:
961 break;
963 case SSTRING:
964 break;
966 case SWORDS:
967 s->start = s->str = *s->u.strv++;
968 s->type = SWORDSEP;
969 break;
971 case SWORDSEP:
972 if (*s->u.strv == NULL) {
973 s->start = s->str = newline;
974 s->type = SEOF;
975 } else {
976 s->start = s->str = space;
977 s->type = SWORDS;
979 break;
981 case SALIAS:
982 if (s->flags & SF_ALIASEND) {
983 /* pass on an unused SF_ALIAS flag */
984 source = s->next;
985 source->flags |= s->flags & SF_ALIAS;
986 s = source;
987 } else if (*s->u.tblp->val.s &&
988 isspace(strchr(s->u.tblp->val.s, 0)[-1])) {
989 source = s = s->next; /* pop source stack */
990 /* Note that this alias ended with a space,
991 * enabling alias expansion on the following
992 * word.
994 s->flags |= SF_ALIAS;
995 } else {
996 /* At this point, we need to keep the current
997 * alias in the source list so recursive
998 * aliases can be detected and we also need
999 * to return the next character. Do this
1000 * by temporarily popping the alias to get
1001 * the next character and then put it back
1002 * in the source list with the SF_ALIASEND
1003 * flag set.
1005 source = s->next; /* pop source stack */
1006 source->flags |= s->flags & SF_ALIAS;
1007 c = getsc__();
1008 if (c) {
1009 s->flags |= SF_ALIASEND;
1010 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1011 s->start = s->str = s->ugbuf;
1012 s->next = source;
1013 source = s;
1014 } else {
1015 s = source;
1016 /* avoid reading eof twice */
1017 s->str = NULL;
1018 break;
1021 continue;
1023 case SREREAD:
1024 if (s->start != s->ugbuf) /* yuck */
1025 afree(s->u.freeme, ATEMP);
1026 source = s = s->next;
1027 continue;
1029 if (s->str == NULL) {
1030 s->type = SEOF;
1031 s->start = s->str = null;
1032 return '\0';
1034 if (s->flags & SF_ECHO) {
1035 shf_puts(s->str, shl_out);
1036 shf_flush(shl_out);
1039 return c;
1042 static void
1043 getsc_line(Source *s)
1045 char *xp = Xstring(s->xs, xp);
1046 int interactive = Flag(FTALKING) && s->type == SSTDIN;
1047 int have_tty = interactive && (s->flags & SF_TTY);
1049 /* Done here to ensure nothing odd happens when a timeout occurs */
1050 XcheckN(s->xs, xp, LINE);
1051 *xp = '\0';
1052 s->start = s->str = xp;
1054 if (have_tty && ksh_tmout) {
1055 ksh_tmout_state = TMOUT_READING;
1056 alarm(ksh_tmout);
1058 #ifdef EDIT
1059 if (have_tty && (0
1060 # ifdef VI
1061 || Flag(FVI)
1062 # endif /* VI */
1063 # ifdef EMACS
1064 || Flag(FEMACS) || Flag(FGMACS)
1065 # endif /* EMACS */
1066 )) {
1067 int nread;
1069 nread = x_read(xp, LINE);
1070 if (nread < 0) /* read error */
1071 nread = 0;
1072 xp[nread] = '\0';
1073 xp += nread;
1075 else
1076 #endif /* EDIT */
1078 if (interactive) {
1079 pprompt(prompt, 0);
1080 } else
1081 s->line++;
1083 while (1) {
1084 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1086 if (!p && shf_error(s->u.shf) &&
1087 shf_errno(s->u.shf) == EINTR) {
1088 shf_clearerr(s->u.shf);
1089 if (trap)
1090 runtraps(0);
1091 continue;
1093 if (!p || (xp = p, xp[-1] == '\n'))
1094 break;
1095 /* double buffer size */
1096 xp++; /* move past null so doubling works... */
1097 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1098 xp--; /* ...and move back again */
1100 /* flush any unwanted input so other programs/builtins
1101 * can read it. Not very optimal, but less error prone
1102 * than flushing else where, dealing with redirections,
1103 * etc..
1104 * todo: reduce size of shf buffer (~128?) if SSTDIN
1106 if (s->type == SSTDIN)
1107 shf_flush(s->u.shf);
1109 /* XXX: temporary kludge to restore source after a
1110 * trap may have been executed.
1112 source = s;
1113 if (have_tty && ksh_tmout) {
1114 ksh_tmout_state = TMOUT_EXECUTING;
1115 alarm(0);
1117 s->start = s->str = Xstring(s->xs, xp);
1118 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1119 /* Note: if input is all nulls, this is not eof */
1120 if (Xlength(s->xs, xp) == 0) { /* EOF */
1121 if (s->type == SFILE)
1122 shf_fdclose(s->u.shf);
1123 s->str = NULL;
1124 } else if (interactive) {
1125 #ifdef HISTORY
1126 char *p = Xstring(s->xs, xp);
1127 if (cur_prompt == PS1)
1128 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1129 p++;
1130 if (*p) {
1131 s->line++;
1132 histsave(s->line, s->str, 1);
1134 #endif /* HISTORY */
1136 if (interactive)
1137 set_prompt(PS2, (Source *) 0);
1140 static char *
1141 special_prompt_expand(char *str)
1143 char *p = str;
1145 while ((p = strstr(p, "\\$")) != NULL) {
1146 *(p+1) = 'p';
1148 return str;
1151 void
1152 set_prompt(int to, Source *s)
1154 char *ps1;
1155 Area *saved_atemp;
1157 cur_prompt = to;
1159 switch (to) {
1160 case PS1: /* command */
1161 ps1 = str_save(str_val(global("PS1")), ATEMP);
1162 saved_atemp = ATEMP; /* ps1 is freed by substitute() */
1163 newenv(E_ERRH);
1164 if (sigsetjmp(e->jbuf, 0)) {
1165 prompt = safe_prompt;
1166 /* Don't print an error - assume it has already
1167 * been printed. Reason is we may have forked
1168 * to run a command and the child may be
1169 * unwinding its stack through this code as it
1170 * exits.
1172 } else {
1173 /* expand \$ before other substitutions are done */
1174 char *tmp = special_prompt_expand(ps1);
1175 prompt = str_save(substitute(tmp, 0), saved_atemp);
1177 quitenv(NULL);
1178 break;
1179 case PS2: /* command continuation */
1180 prompt = str_val(global("PS2"));
1181 break;
1185 static int
1186 dopprompt(const char *sp, int ntruncate, const char **spp, int doprint)
1188 char strbuf[1024], tmpbuf[1024], *p, *str, nbuf[32], delimiter = '\0';
1189 int len, c, n, totlen = 0, indelimit = 0, counting = 1, delimitthis;
1190 const char *cp = sp;
1191 struct tm *tm;
1192 time_t t;
1194 if (*cp && cp[1] == '\r') {
1195 delimiter = *cp;
1196 cp += 2;
1199 while (*cp != 0) {
1200 delimitthis = 0;
1201 if (indelimit && *cp != delimiter)
1203 else if (*cp == '\n' || *cp == '\r') {
1204 totlen = 0;
1205 sp = cp + 1;
1206 } else if (*cp == '\t') {
1207 if (counting)
1208 totlen = (totlen | 7) + 1;
1209 } else if (*cp == delimiter) {
1210 indelimit = !indelimit;
1211 delimitthis = 1;
1214 if (*cp == '\\') {
1215 cp++;
1216 if (!*cp)
1217 break;
1218 if (Flag(FSH))
1219 snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1220 else switch (*cp) {
1221 case 'a': /* '\' 'a' bell */
1222 strbuf[0] = '\007';
1223 strbuf[1] = '\0';
1224 break;
1225 case 'd': /* '\' 'd' Dow Mon DD */
1226 time(&t);
1227 tm = localtime(&t);
1228 strftime(strbuf, sizeof strbuf, "%a %b %d", tm);
1229 break;
1230 case 'D': /* '\' 'D' '{' strftime format '}' */
1231 p = strchr(cp + 2, '}');
1232 if (cp[1] != '{' || p == NULL) {
1233 snprintf(strbuf, sizeof strbuf,
1234 "\\%c", *cp);
1235 break;
1237 strlcpy(tmpbuf, cp + 2, sizeof tmpbuf);
1238 p = strchr(tmpbuf, '}');
1239 if (p)
1240 *p = '\0';
1241 time(&t);
1242 tm = localtime(&t);
1243 strftime(strbuf, sizeof strbuf, tmpbuf, tm);
1244 cp = strchr(cp + 2, '}');
1245 break;
1246 case 'e': /* '\' 'e' escape */
1247 strbuf[0] = '\033';
1248 strbuf[1] = '\0';
1249 break;
1250 case 'h': /* '\' 'h' shortened hostname */
1251 gethostname(strbuf, sizeof strbuf);
1252 p = strchr(strbuf, '.');
1253 if (p)
1254 *p = '\0';
1255 break;
1256 case 'H': /* '\' 'H' full hostname */
1257 gethostname(strbuf, sizeof strbuf);
1258 break;
1259 case 'j': /* '\' 'j' number of jobs */
1260 snprintf(strbuf, sizeof strbuf, "%d",
1261 j_njobs());
1262 break;
1263 case 'l': /* '\' 'l' basename of tty */
1264 p = ttyname(0);
1265 if (p)
1266 p = basename(p);
1267 if (p)
1268 strlcpy(strbuf, p, sizeof strbuf);
1269 break;
1270 case 'n': /* '\' 'n' newline */
1271 strbuf[0] = '\n';
1272 strbuf[1] = '\0';
1273 totlen = 0; /* reset for prompt re-print */
1274 sp = cp + 1;
1275 break;
1276 case 'p': /* '\' '$' $ or # */
1277 strbuf[0] = ksheuid ? '$' : '#';
1278 strbuf[1] = '\0';
1279 break;
1280 case 'r': /* '\' 'r' return */
1281 strbuf[0] = '\r';
1282 strbuf[1] = '\0';
1283 totlen = 0; /* reset for prompt re-print */
1284 sp = cp + 1;
1285 break;
1286 case 's': /* '\' 's' basename $0 */
1287 strlcpy(strbuf, kshname, sizeof strbuf);
1288 break;
1289 case 't': /* '\' 't' 24 hour HH:MM:SS */
1290 time(&t);
1291 tm = localtime(&t);
1292 strftime(strbuf, sizeof strbuf, "%T", tm);
1293 break;
1294 case 'T': /* '\' 'T' 12 hour HH:MM:SS */
1295 time(&t);
1296 tm = localtime(&t);
1297 strftime(strbuf, sizeof strbuf, "%l:%M:%S", tm);
1298 break;
1299 case '@': /* '\' '@' 12 hour am/pm format */
1300 time(&t);
1301 tm = localtime(&t);
1302 strftime(strbuf, sizeof strbuf, "%r", tm);
1303 break;
1304 case 'A': /* '\' 'A' 24 hour HH:MM */
1305 time(&t);
1306 tm = localtime(&t);
1307 strftime(strbuf, sizeof strbuf, "%R", tm);
1308 break;
1309 case 'u': /* '\' 'u' username */
1310 strlcpy(strbuf, username, sizeof strbuf);
1311 break;
1312 case 'v': /* '\' 'v' version (short) */
1313 p = strchr(ksh_version, ' ');
1314 if (p)
1315 p = strchr(p + 1, ' ');
1316 if (p) {
1317 p++;
1318 strlcpy(strbuf, p, sizeof strbuf);
1319 p = strchr(strbuf, ' ');
1320 if (p)
1321 *p = '\0';
1323 break;
1324 case 'V': /* '\' 'V' version (long) */
1325 strlcpy(strbuf, ksh_version, sizeof strbuf);
1326 break;
1327 case 'w': /* '\' 'w' cwd */
1328 p = str_val(global("PWD"));
1329 n = strlen(str_val(global("HOME")));
1330 if (strcmp(p, "/") == 0) {
1331 strlcpy(strbuf, p, sizeof strbuf);
1332 } else if (strcmp(p, str_val(global("HOME"))) == 0) {
1333 strbuf[0] = '~';
1334 strbuf[1] = '\0';
1335 } else if (strncmp(p, str_val(global("HOME")), n)
1336 == 0 && p[n] == '/') {
1337 snprintf(strbuf, sizeof strbuf, "~/%s",
1338 str_val(global("PWD")) + n + 1);
1339 } else
1340 strlcpy(strbuf, p, sizeof strbuf);
1341 break;
1342 case 'W': /* '\' 'W' basename(cwd) */
1343 p = str_val(global("PWD"));
1344 if (strcmp(p, str_val(global("HOME"))) == 0) {
1345 strbuf[0] = '~';
1346 strbuf[1] = '\0';
1347 } else
1348 strlcpy(strbuf, basename(p), sizeof strbuf);
1349 break;
1350 case '!': /* '\' '!' history line number */
1351 snprintf(strbuf, sizeof strbuf, "%d",
1352 source->line + 1);
1353 break;
1354 case '#': /* '\' '#' command line number */
1355 snprintf(strbuf, sizeof strbuf, "%d",
1356 source->line - source->cmd_offset + 1);
1357 break;
1358 case '0': /* '\' '#' '#' ' #' octal numeric handling */
1359 case '1':
1360 case '2':
1361 case '3':
1362 case '4':
1363 case '5':
1364 case '6':
1365 case '7':
1366 if ((cp[1] > '7' || cp[1] < '0') ||
1367 (cp[2] > '7' || cp[2] < '0')) {
1368 snprintf(strbuf, sizeof strbuf,
1369 "\\%c", *cp);
1370 break;
1372 n = cp[0] * 8 * 8 + cp[1] * 8 + cp[2];
1373 snprintf(strbuf, sizeof strbuf, "%c", n);
1374 cp += 2;
1375 break;
1376 case '\\': /* '\' '\' */
1377 strbuf[0] = '\\';
1378 strbuf[1] = '\0';
1379 break;
1380 case '[': /* '\' '[' .... stop counting */
1381 strbuf[0] = '\0';
1382 counting = 0;
1383 break;
1384 case ']': /* '\' ']' restart counting */
1385 strbuf[0] = '\0';
1386 counting = 1;
1387 break;
1389 default:
1390 snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1391 break;
1393 cp++;
1395 str = strbuf;
1396 len = strlen(str);
1397 if (ntruncate) {
1398 if (ntruncate >= len) {
1399 ntruncate -= len;
1400 continue;
1402 str += ntruncate;
1403 len -= ntruncate;
1404 ntruncate = 0;
1406 if (doprint)
1407 shf_write(str, len, shl_out);
1408 if (counting && !indelimit && !delimitthis)
1409 totlen += len;
1410 continue;
1411 } else if (*cp != '!')
1412 c = *cp++;
1413 else if (*++cp == '!')
1414 c = *cp++;
1415 else {
1416 char *p;
1418 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1419 source->line + 1);
1420 len = strlen(nbuf);
1421 if (ntruncate) {
1422 if (ntruncate >= len) {
1423 ntruncate -= len;
1424 continue;
1426 p += ntruncate;
1427 len -= ntruncate;
1428 ntruncate = 0;
1430 if (doprint)
1431 shf_write(p, len, shl_out);
1432 if (counting && !indelimit && !delimitthis)
1433 totlen += len;
1434 continue;
1436 if (counting && ntruncate)
1437 --ntruncate;
1438 else if (doprint) {
1439 shf_putc(c, shl_out);
1441 if (counting && !indelimit && !delimitthis)
1442 totlen++;
1444 if (doprint)
1445 shf_flush(shl_out);
1446 if (spp)
1447 *spp = sp;
1448 return (totlen);
1451 void
1452 pprompt(const char *cp, int ntruncate)
1454 dopprompt(cp, ntruncate, NULL, 1);
1458 promptlen(const char *cp, const char **spp)
1460 return dopprompt(cp, 0, spp, 0);
1463 /* Read the variable part of a ${...} expression (ie, up to but not including
1464 * the :[-+?=#%] or close-brace.
1466 static char *
1467 get_brace_var(XString *wsp, char *wp)
1469 enum parse_state {
1470 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1471 PS_NUMBER, PS_VAR1, PS_END
1473 state;
1474 char c;
1476 state = PS_INITIAL;
1477 while (1) {
1478 c = getsc();
1479 /* State machine to figure out where the variable part ends. */
1480 switch (state) {
1481 case PS_INITIAL:
1482 if (c == '#') {
1483 state = PS_SAW_HASH;
1484 break;
1486 /* FALLTHROUGH */
1487 case PS_SAW_HASH:
1488 if (letter(c))
1489 state = PS_IDENT;
1490 else if (digit(c))
1491 state = PS_NUMBER;
1492 else if (ctype(c, C_VAR1))
1493 state = PS_VAR1;
1494 else
1495 state = PS_END;
1496 break;
1497 case PS_IDENT:
1498 if (!letnum(c)) {
1499 state = PS_END;
1500 if (c == '[') {
1501 char *tmp, *p;
1503 if (!arraysub(&tmp))
1504 yyerror("missing ]\n");
1505 *wp++ = c;
1506 for (p = tmp; *p; ) {
1507 Xcheck(*wsp, wp);
1508 *wp++ = *p++;
1510 afree(tmp, ATEMP);
1511 c = getsc(); /* the ] */
1514 break;
1515 case PS_NUMBER:
1516 if (!digit(c))
1517 state = PS_END;
1518 break;
1519 case PS_VAR1:
1520 state = PS_END;
1521 break;
1522 case PS_END: /* keep gcc happy */
1523 break;
1525 if (state == PS_END) {
1526 *wp++ = '\0'; /* end of variable part */
1527 ungetsc(c);
1528 break;
1530 Xcheck(*wsp, wp);
1531 *wp++ = c;
1533 return wp;
1537 * Save an array subscript - returns true if matching bracket found, false
1538 * if eof or newline was found.
1539 * (Returned string double null terminated)
1541 static int
1542 arraysub(char **strp)
1544 XString ws;
1545 char *wp;
1546 char c;
1547 int depth = 1; /* we are just past the initial [ */
1549 Xinit(ws, wp, 32, ATEMP);
1551 do {
1552 c = getsc();
1553 Xcheck(ws, wp);
1554 *wp++ = c;
1555 if (c == '[')
1556 depth++;
1557 else if (c == ']')
1558 depth--;
1559 } while (depth > 0 && c && c != '\n');
1561 *wp++ = '\0';
1562 *strp = Xclose(ws, wp);
1564 return depth == 0 ? 1 : 0;
1567 /* Unget a char: handles case when we are already at the start of the buffer */
1568 static const char *
1569 ungetsc(int c)
1571 if (backslash_skip)
1572 backslash_skip--;
1573 /* Don't unget eof... */
1574 if (source->str == null && c == '\0')
1575 return source->str;
1576 if (source->str > source->start)
1577 source->str--;
1578 else {
1579 Source *s;
1581 s = pushs(SREREAD, source->areap);
1582 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1583 s->start = s->str = s->ugbuf;
1584 s->next = source;
1585 source = s;
1587 return source->str;
1591 /* Called to get a char that isn't a \newline sequence. */
1592 static int
1593 getsc_bn(void)
1595 int c, c2;
1597 if (ignore_backslash_newline)
1598 return getsc_();
1600 if (backslash_skip == 1) {
1601 backslash_skip = 2;
1602 return getsc_();
1605 backslash_skip = 0;
1607 while (1) {
1608 c = getsc_();
1609 if (c == '\\') {
1610 if ((c2 = getsc_()) == '\n')
1611 /* ignore the \newline; get the next char... */
1612 continue;
1613 ungetsc(c2);
1614 backslash_skip = 1;
1616 return c;
1620 static Lex_state *
1621 push_state_(State_info *si, Lex_state *old_end)
1623 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1625 new[0].ls_info.base = old_end;
1626 si->base = &new[0];
1627 si->end = &new[STATE_BSIZE];
1628 return &new[1];
1631 static Lex_state *
1632 pop_state_(State_info *si, Lex_state *old_end)
1634 Lex_state *old_base = si->base;
1636 si->base = old_end->ls_info.base - STATE_BSIZE;
1637 si->end = old_end->ls_info.base;
1639 afree(old_base, ATEMP);
1641 return si->base + STATE_BSIZE - 1;