Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / bin / ksh / lex.c
blob5c28e01989c7499893bd650f292eed9aaeed41b1
1 /* $NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $ */
3 /*
4 * lexical analysis and source input
5 */
6 #include <sys/cdefs.h>
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.12 2005/09/11 22:16:00 christos Exp $");
10 #endif
13 #include "sh.h"
14 #include <ctype.h>
17 /* Structure to keep track of the lexing state and the various pieces of info
18 * needed for each particular state.
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 int ls_state;
23 union {
24 /* $(...) */
25 struct scsparen_info {
26 int nparen; /* count open parenthesis */
27 int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 } u_scsparen;
31 /* $((...)) */
32 struct sasparen_info {
33 int nparen; /* count open parenthesis */
34 int start; /* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 } u_sasparen;
38 /* ((...)) */
39 struct sletparen_info {
40 int nparen; /* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 } u_sletparen;
44 /* `...` */
45 struct sbquote_info {
46 int indquotes; /* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 } u_sbquote;
50 Lex_state *base; /* used to point to next state block */
51 } ls_info;
54 typedef struct State_info State_info;
55 struct State_info {
56 Lex_state *base;
57 Lex_state *end;
61 static void readhere ARGS((struct ioword *iop));
62 static int getsc__ ARGS((void));
63 static void getsc_line ARGS((Source *s));
64 static int getsc_bn ARGS((void));
65 static char *get_brace_var ARGS((XString *wsp, char *wp));
66 static int arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
72 static int backslash_skip;
73 static int ignore_backslash_newline;
75 /* optimized getsc_bn() */
76 #define getsc() (*source->str != '\0' && *source->str != '\\' \
77 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
81 #define STATE_BSIZE 32
83 #define PUSH_STATE(s) do { \
84 if (++statep == state_info.end) \
85 statep = push_state_(&state_info, statep); \
86 state = statep->ls_state = (s); \
87 } while (0)
89 #define POP_STATE() do { \
90 if (--statep == state_info.base) \
91 statep = pop_state_(&state_info, statep); \
92 state = statep->ls_state; \
93 } while (0)
98 * Lexical analyzer
100 * tokens are not regular expressions, they are LL(1).
101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102 * hence the state stack.
106 yylex(cf)
107 int cf;
109 Lex_state states[STATE_BSIZE], *statep;
110 State_info state_info;
111 register int c, state;
112 XString ws; /* expandable output word */
113 register char *wp; /* output word pointer */
114 char *sp, *dp;
115 int c2;
118 Again:
119 states[0].ls_state = -1;
120 states[0].ls_info.base = (Lex_state *) 0;
121 statep = &states[1];
122 state_info.base = states;
123 state_info.end = &states[STATE_BSIZE];
125 Xinit(ws, wp, 64, ATEMP);
127 backslash_skip = 0;
128 ignore_backslash_newline = 0;
130 if (cf&ONEWORD)
131 state = SWORD;
132 #ifdef KSH
133 else if (cf&LETEXPR) {
134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
135 state = SLETPAREN;
136 statep->ls_sletparen.nparen = 0;
138 #endif /* KSH */
139 else { /* normal lexing */
140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 while ((c = getsc()) == ' ' || c == '\t')
143 if (c == '#') {
144 ignore_backslash_newline++;
145 while ((c = getsc()) != '\0' && c != '\n')
147 ignore_backslash_newline--;
149 ungetsc(c);
151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
152 source->flags &= ~SF_ALIAS;
153 /* In POSIX mode, a trailing space only counts if we are
154 * parsing a simple command
156 if (!Flag(FPOSIX) || (cf & CMDWORD))
157 cf |= ALIAS;
160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 statep->ls_state = state;
163 /* collect non-special or quoted characters to form word */
164 while (!((c = getsc()) == 0
165 || ((state == SBASE || state == SHEREDELIM)
166 && ctype(c, C_LEX1))))
168 Xcheck(ws, wp);
169 switch (state) {
170 case SBASE:
171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 *wp = EOS; /* temporary */
173 if (is_wdvarname(Xstring(ws, wp), FALSE))
175 char *p, *tmp;
177 if (arraysub(&tmp)) {
178 *wp++ = CHAR;
179 *wp++ = c;
180 for (p = tmp; *p; ) {
181 Xcheck(ws, wp);
182 *wp++ = CHAR;
183 *wp++ = *p++;
185 afree(tmp, ATEMP);
186 break;
187 } else {
188 Source *s;
190 s = pushs(SREREAD,
191 source->areap);
192 s->start = s->str
193 = s->u.freeme = tmp;
194 s->next = source;
195 source = s;
198 *wp++ = CHAR;
199 *wp++ = c;
200 break;
202 /* fall through.. */
203 Sbase1: /* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 if (c == '*' || c == '@' || c == '+' || c == '?'
206 || c == '!')
208 c2 = getsc();
209 if (c2 == '(' /*)*/ ) {
210 *wp++ = OPAT;
211 *wp++ = c;
212 PUSH_STATE(SPATTERN);
213 break;
215 ungetsc(c2);
217 #endif /* KSH */
218 /* fall through.. */
219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
220 switch (c) {
221 case '\\':
222 c = getsc();
223 #ifdef OS2
224 if (isalnum((unsigned char)c)) {
225 *wp++ = CHAR, *wp++ = '\\';
226 *wp++ = CHAR, *wp++ = c;
227 } else
228 #endif
229 if (c) /* trailing \ is lost */
230 *wp++ = QCHAR, *wp++ = c;
231 break;
232 case '\'':
233 *wp++ = OQUOTE;
234 ignore_backslash_newline++;
235 PUSH_STATE(SSQUOTE);
236 break;
237 case '"':
238 *wp++ = OQUOTE;
239 PUSH_STATE(SDQUOTE);
240 break;
241 default:
242 goto Subst;
244 break;
246 Subst:
247 switch (c) {
248 case '\\':
249 c = getsc();
250 switch (c) {
251 case '\\':
252 case '$': case '`':
253 *wp++ = QCHAR, *wp++ = c;
254 break;
255 case '"':
256 if ((cf & HEREDOC) == 0) {
257 *wp++ = QCHAR, *wp++ = c;
258 break;
260 /* FALLTROUGH */
261 default:
262 Xcheck(ws, wp);
263 if (c) { /* trailing \ is lost */
264 *wp++ = CHAR, *wp++ = '\\';
265 *wp++ = CHAR, *wp++ = c;
267 break;
269 break;
270 case '$':
271 c = getsc();
272 if (c == '(') /*)*/ {
273 c = getsc();
274 if (c == '(') /*)*/ {
275 PUSH_STATE(SASPAREN);
276 statep->ls_sasparen.nparen = 2;
277 statep->ls_sasparen.start =
278 Xsavepos(ws, wp);
279 *wp++ = EXPRSUB;
280 } else {
281 ungetsc(c);
282 PUSH_STATE(SCSPAREN);
283 statep->ls_scsparen.nparen = 1;
284 statep->ls_scsparen.csstate = 0;
285 *wp++ = COMSUB;
287 } else if (c == '{') /*}*/ {
288 *wp++ = OSUBST;
289 *wp++ = '{'; /*}*/
290 wp = get_brace_var(&ws, wp);
291 c = getsc();
292 /* allow :# and :% (ksh88 compat) */
293 if (c == ':') {
294 *wp++ = CHAR, *wp++ = c;
295 c = getsc();
297 /* If this is a trim operation,
298 * treat (,|,) specially in STBRACE.
300 if (c == '#' || c == '%') {
301 ungetsc(c);
302 PUSH_STATE(STBRACE);
303 } else {
304 ungetsc(c);
305 PUSH_STATE(SBRACE);
307 } else if (ctype(c, C_ALPHA)) {
308 *wp++ = OSUBST;
309 *wp++ = 'X';
310 do {
311 Xcheck(ws, wp);
312 *wp++ = c;
313 c = getsc();
314 } while (ctype(c, C_ALPHA|C_DIGIT));
315 *wp++ = '\0';
316 *wp++ = CSUBST;
317 *wp++ = 'X';
318 ungetsc(c);
319 } else if (ctype(c, C_DIGIT|C_VAR1)) {
320 Xcheck(ws, wp);
321 *wp++ = OSUBST;
322 *wp++ = 'X';
323 *wp++ = c;
324 *wp++ = '\0';
325 *wp++ = CSUBST;
326 *wp++ = 'X';
327 } else {
328 *wp++ = CHAR, *wp++ = '$';
329 ungetsc(c);
331 break;
332 case '`':
333 PUSH_STATE(SBQUOTE);
334 *wp++ = COMSUB;
335 /* Need to know if we are inside double quotes
336 * since sh/at&t-ksh translate the \" to " in
337 * "`..\"..`". POSIX also requires this.
338 * An earlier version of ksh misinterpreted
339 * the POSIX specification and performed
340 * removal of backslash escapes only if
341 * posix mode was not in effect.
343 statep->ls_sbquote.indquotes = 0;
344 Lex_state *s = statep;
345 Lex_state *base = state_info.base;
346 while (1) {
347 for (; s != base; s--) {
348 if (s->ls_state == SDQUOTE) {
349 statep->ls_sbquote.indquotes = 1;
350 break;
353 if (s != base)
354 break;
355 if (!(s = s->ls_info.base))
356 break;
357 base = s-- - STATE_BSIZE;
359 break;
360 default:
361 *wp++ = CHAR, *wp++ = c;
363 break;
365 case SSQUOTE:
366 if (c == '\'') {
367 POP_STATE();
368 *wp++ = CQUOTE;
369 ignore_backslash_newline--;
370 } else
371 *wp++ = QCHAR, *wp++ = c;
372 break;
374 case SDQUOTE:
375 if (c == '"') {
376 POP_STATE();
377 *wp++ = CQUOTE;
378 } else
379 goto Subst;
380 break;
382 case SCSPAREN: /* $( .. ) */
383 /* todo: deal with $(...) quoting properly
384 * kludge to partly fake quoting inside $(..): doesn't
385 * really work because nested $(..) or ${..} inside
386 * double quotes aren't dealt with.
388 switch (statep->ls_scsparen.csstate) {
389 case 0: /* normal */
390 switch (c) {
391 case '(':
392 statep->ls_scsparen.nparen++;
393 break;
394 case ')':
395 statep->ls_scsparen.nparen--;
396 break;
397 case '\\':
398 statep->ls_scsparen.csstate = 1;
399 break;
400 case '"':
401 statep->ls_scsparen.csstate = 2;
402 break;
403 case '\'':
404 statep->ls_scsparen.csstate = 4;
405 ignore_backslash_newline++;
406 break;
408 break;
410 case 1: /* backslash in normal mode */
411 case 3: /* backslash in double quotes */
412 --statep->ls_scsparen.csstate;
413 break;
415 case 2: /* double quotes */
416 if (c == '"')
417 statep->ls_scsparen.csstate = 0;
418 else if (c == '\\')
419 statep->ls_scsparen.csstate = 3;
420 break;
422 case 4: /* single quotes */
423 if (c == '\'') {
424 statep->ls_scsparen.csstate = 0;
425 ignore_backslash_newline--;
427 break;
429 if (statep->ls_scsparen.nparen == 0) {
430 POP_STATE();
431 *wp++ = 0; /* end of COMSUB */
432 } else
433 *wp++ = c;
434 break;
436 case SASPAREN: /* $(( .. )) */
437 /* todo: deal with $((...); (...)) properly */
438 /* XXX should nest using existing state machine
439 * (embed "..", $(...), etc.) */
440 if (c == '(')
441 statep->ls_sasparen.nparen++;
442 else if (c == ')') {
443 statep->ls_sasparen.nparen--;
444 if (statep->ls_sasparen.nparen == 1) {
445 /*(*/
446 if ((c2 = getsc()) == ')') {
447 POP_STATE();
448 *wp++ = 0; /* end of EXPRSUB */
449 break;
450 } else {
451 char *s;
453 ungetsc(c2);
454 /* mismatched parenthesis -
455 * assume we were really
456 * parsing a $(..) expression
458 s = Xrestpos(ws, wp,
459 statep->ls_sasparen.start);
460 memmove(s + 1, s, wp - s);
461 *s++ = COMSUB;
462 *s = '('; /*)*/
463 wp++;
464 statep->ls_scsparen.nparen = 1;
465 statep->ls_scsparen.csstate = 0;
466 state = statep->ls_state
467 = SCSPAREN;
472 *wp++ = c;
473 break;
475 case SBRACE:
476 /*{*/
477 if (c == '}') {
478 POP_STATE();
479 *wp++ = CSUBST;
480 *wp++ = /*{*/ '}';
481 } else
482 goto Sbase1;
483 break;
485 case STBRACE:
486 /* Same as SBRACE, except (,|,) treated specially */
487 /*{*/
488 if (c == '}') {
489 POP_STATE();
490 *wp++ = CSUBST;
491 *wp++ = /*{*/ '}';
492 } else if (c == '|') {
493 *wp++ = SPAT;
494 } else if (c == '(') {
495 *wp++ = OPAT;
496 *wp++ = ' '; /* simile for @ */
497 PUSH_STATE(SPATTERN);
498 } else
499 goto Sbase1;
500 break;
502 case SBQUOTE:
503 if (c == '`') {
504 *wp++ = 0;
505 POP_STATE();
506 } else if (c == '\\') {
507 switch (c = getsc()) {
508 case '\\':
509 case '$': case '`':
510 *wp++ = c;
511 break;
512 case '"':
513 if (statep->ls_sbquote.indquotes) {
514 *wp++ = c;
515 break;
517 /* fall through.. */
518 default:
519 if (c) { /* trailing \ is lost */
520 *wp++ = '\\';
521 *wp++ = c;
523 break;
525 } else
526 *wp++ = c;
527 break;
529 case SWORD: /* ONEWORD */
530 goto Subst;
532 #ifdef KSH
533 case SLETPAREN: /* LETEXPR: (( ... )) */
534 /*(*/
535 if (c == ')') {
536 if (statep->ls_sletparen.nparen > 0)
537 --statep->ls_sletparen.nparen;
538 /*(*/
539 else if ((c2 = getsc()) == ')') {
540 c = 0;
541 *wp++ = CQUOTE;
542 goto Done;
543 } else
544 ungetsc(c2);
545 } else if (c == '(')
546 /* parenthesis inside quotes and backslashes
547 * are lost, but at&t ksh doesn't count them
548 * either
550 ++statep->ls_sletparen.nparen;
551 goto Sbase2;
552 #endif /* KSH */
554 case SHEREDELIM: /* <<,<<- delimiter */
555 /* XXX chuck this state (and the next) - use
556 * the existing states ($ and \`..` should be
557 * stripped of their specialness after the
558 * fact).
560 /* here delimiters need a special case since
561 * $ and `..` are not to be treated specially
563 if (c == '\\') {
564 c = getsc();
565 if (c) { /* trailing \ is lost */
566 *wp++ = QCHAR;
567 *wp++ = c;
569 } else if (c == '\'') {
570 PUSH_STATE(SSQUOTE);
571 *wp++ = OQUOTE;
572 ignore_backslash_newline++;
573 } else if (c == '"') {
574 state = statep->ls_state = SHEREDQUOTE;
575 *wp++ = OQUOTE;
576 } else {
577 *wp++ = CHAR;
578 *wp++ = c;
580 break;
582 case SHEREDQUOTE: /* " in <<,<<- delimiter */
583 if (c == '"') {
584 *wp++ = CQUOTE;
585 state = statep->ls_state = SHEREDELIM;
586 } else {
587 if (c == '\\') {
588 switch (c = getsc()) {
589 case '\\': case '"':
590 case '$': case '`':
591 break;
592 default:
593 if (c) { /* trailing \ lost */
594 *wp++ = CHAR;
595 *wp++ = '\\';
597 break;
600 *wp++ = CHAR;
601 *wp++ = c;
603 break;
605 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
606 if ( /*(*/ c == ')') {
607 *wp++ = CPAT;
608 POP_STATE();
609 } else if (c == '|') {
610 *wp++ = SPAT;
611 } else if (c == '(') {
612 *wp++ = OPAT;
613 *wp++ = ' '; /* simile for @ */
614 PUSH_STATE(SPATTERN);
615 } else
616 goto Sbase1;
617 break;
620 Done:
621 Xcheck(ws, wp);
622 if (statep != &states[1])
623 /* XXX figure out what is missing */
624 yyerror("no closing quote\n");
626 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
627 if (state == SHEREDELIM)
628 state = SBASE;
630 dp = Xstring(ws, wp);
631 if ((c == '<' || c == '>') && state == SBASE
632 && ((c2 = Xlength(ws, wp)) == 0
633 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
635 struct ioword *iop =
636 (struct ioword *) alloc(sizeof(*iop), ATEMP);
638 if (c2 == 2)
639 iop->unit = dp[1] - '0';
640 else
641 iop->unit = c == '>'; /* 0 for <, 1 for > */
643 c2 = getsc();
644 /* <<, >>, <> are ok, >< is not */
645 if (c == c2 || (c == '<' && c2 == '>')) {
646 iop->flag = c == c2 ?
647 (c == '>' ? IOCAT : IOHERE) : IORDWR;
648 if (iop->flag == IOHERE) {
649 if ((c2 = getsc()) == '-') {
650 iop->flag |= IOSKIP;
651 } else {
652 ungetsc(c2);
655 } else if (c2 == '&')
656 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
657 else {
658 iop->flag = c == '>' ? IOWRITE : IOREAD;
659 if (c == '>' && c2 == '|')
660 iop->flag |= IOCLOB;
661 else
662 ungetsc(c2);
665 iop->name = (char *) 0;
666 iop->delim = (char *) 0;
667 iop->heredoc = (char *) 0;
668 Xfree(ws, wp); /* free word */
669 yylval.iop = iop;
670 return REDIR;
673 if (wp == dp && state == SBASE) {
674 Xfree(ws, wp); /* free word */
675 /* no word, process LEX1 character */
676 switch (c) {
677 default:
678 return c;
680 case '|':
681 case '&':
682 case ';':
683 if ((c2 = getsc()) == c)
684 c = (c == ';') ? BREAK :
685 (c == '|') ? LOGOR :
686 (c == '&') ? LOGAND :
687 YYERRCODE;
688 #ifdef KSH
689 else if (c == '|' && c2 == '&')
690 c = COPROC;
691 #endif /* KSH */
692 else
693 ungetsc(c2);
694 return c;
696 case '\n':
697 gethere();
698 if (cf & CONTIN)
699 goto Again;
700 return c;
702 case '(': /*)*/
703 #ifdef KSH
704 if ((c2 = getsc()) == '(') /*)*/
705 /* XXX need to handle ((...); (...)) */
706 c = MDPAREN;
707 else
708 ungetsc(c2);
709 #endif /* KSH */
710 return c;
711 /*(*/
712 case ')':
713 return c;
717 *wp++ = EOS; /* terminate word */
718 yylval.cp = Xclose(ws, wp);
719 if (state == SWORD
720 #ifdef KSH
721 || state == SLETPAREN
722 #endif /* KSH */
723 ) /* ONEWORD? */
724 return LWORD;
725 ungetsc(c); /* unget terminator */
727 /* copy word to unprefixed string ident */
728 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
729 *dp++ = *sp++;
730 /* Make sure the ident array stays '\0' padded */
731 memset(dp, 0, (ident+IDENT) - dp + 1);
732 if (c != EOS)
733 *ident = '\0'; /* word is not unquoted */
735 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
736 struct tbl *p;
737 int h = hash(ident);
739 /* { */
740 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
741 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
743 afree(yylval.cp, ATEMP);
744 return p->val.i;
746 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
747 && (p->flag & ISSET))
749 register Source *s;
751 for (s = source; s->type == SALIAS; s = s->next)
752 if (s->u.tblp == p)
753 return LWORD;
754 /* push alias expansion */
755 s = pushs(SALIAS, source->areap);
756 s->start = s->str = p->val.s;
757 s->u.tblp = p;
758 s->next = source;
759 source = s;
760 afree(yylval.cp, ATEMP);
761 goto Again;
765 return LWORD;
768 static void
769 gethere()
771 register struct ioword **p;
773 for (p = heres; p < herep; p++)
774 readhere(*p);
775 herep = heres;
779 * read "<<word" text into temp file
782 static void
783 readhere(iop)
784 struct ioword *iop;
786 register int c;
787 char *volatile eof;
788 char *eofp;
789 int skiptabs;
790 XString xs;
791 char *xp;
792 int xpos;
794 eof = evalstr(iop->delim, 0);
796 if (!(iop->flag & IOEVAL))
797 ignore_backslash_newline++;
799 Xinit(xs, xp, 256, ATEMP);
801 for (;;) {
802 eofp = eof;
803 skiptabs = iop->flag & IOSKIP;
804 xpos = Xsavepos(xs, xp);
805 while ((c = getsc()) != 0) {
806 if (skiptabs) {
807 if (c == '\t')
808 continue;
809 skiptabs = 0;
811 if (c != *eofp)
812 break;
813 Xcheck(xs, xp);
814 Xput(xs, xp, c);
815 eofp++;
817 /* Allow EOF here so commands with out trailing newlines
818 * will work (eg, ksh -c '...', $(...), etc).
820 if (*eofp == '\0' && (c == 0 || c == '\n')) {
821 xp = Xrestpos(xs, xp, xpos);
822 break;
824 ungetsc(c);
825 while ((c = getsc()) != '\n') {
826 if (c == 0)
827 yyerror("here document `%s' unclosed\n", eof);
828 Xcheck(xs, xp);
829 Xput(xs, xp, c);
831 Xcheck(xs, xp);
832 Xput(xs, xp, c);
834 Xput(xs, xp, '\0');
835 iop->heredoc = Xclose(xs, xp);
837 if (!(iop->flag & IOEVAL))
838 ignore_backslash_newline--;
841 void
842 #ifdef HAVE_PROTOTYPES
843 yyerror(const char *fmt, ...)
844 #else
845 yyerror(fmt, va_alist)
846 const char *fmt;
847 va_dcl
848 #endif
850 va_list va;
852 /* pop aliases and re-reads */
853 while (source->type == SALIAS || source->type == SREREAD)
854 source = source->next;
855 source->str = null; /* zap pending input */
857 error_prefix(TRUE);
858 SH_VA_START(va, fmt);
859 shf_vfprintf(shl_out, fmt, va);
860 va_end(va);
861 errorf(null);
865 * input for yylex with alias expansion
868 Source *
869 pushs(type, areap)
870 int type;
871 Area *areap;
873 register Source *s;
875 s = (Source *) alloc(sizeof(Source), areap);
876 s->type = type;
877 s->str = null;
878 s->start = NULL;
879 s->line = 0;
880 s->errline = 0;
881 s->file = NULL;
882 s->flags = 0;
883 s->next = NULL;
884 s->areap = areap;
885 if (type == SFILE || type == SSTDIN) {
886 char *dummy;
887 Xinit(s->xs, dummy, 256, s->areap);
888 } else
889 memset(&s->xs, 0, sizeof(s->xs));
890 return s;
893 static int
894 getsc__()
896 register Source *s = source;
897 register int c;
899 while ((c = *s->str++) == 0) {
900 s->str = NULL; /* return 0 for EOF by default */
901 switch (s->type) {
902 case SEOF:
903 s->str = null;
904 return 0;
906 case SSTDIN:
907 case SFILE:
908 getsc_line(s);
909 break;
911 case SWSTR:
912 break;
914 case SSTRING:
915 break;
917 case SWORDS:
918 s->start = s->str = *s->u.strv++;
919 s->type = SWORDSEP;
920 break;
922 case SWORDSEP:
923 if (*s->u.strv == NULL) {
924 s->start = s->str = newline;
925 s->type = SEOF;
926 } else {
927 s->start = s->str = space;
928 s->type = SWORDS;
930 break;
932 case SALIAS:
933 if (s->flags & SF_ALIASEND) {
934 /* pass on an unused SF_ALIAS flag */
935 source = s->next;
936 source->flags |= s->flags & SF_ALIAS;
937 s = source;
938 } else if (*s->u.tblp->val.s
939 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
941 source = s = s->next; /* pop source stack */
942 /* Note that this alias ended with a space,
943 * enabling alias expansion on the following
944 * word.
946 s->flags |= SF_ALIAS;
947 } else {
948 /* At this point, we need to keep the current
949 * alias in the source list so recursive
950 * aliases can be detected and we also need
951 * to return the next character. Do this
952 * by temporarily popping the alias to get
953 * the next character and then put it back
954 * in the source list with the SF_ALIASEND
955 * flag set.
957 source = s->next; /* pop source stack */
958 source->flags |= s->flags & SF_ALIAS;
959 c = getsc__();
960 if (c) {
961 s->flags |= SF_ALIASEND;
962 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
963 s->start = s->str = s->ugbuf;
964 s->next = source;
965 source = s;
966 } else {
967 s = source;
968 /* avoid reading eof twice */
969 s->str = NULL;
970 break;
973 continue;
975 case SREREAD:
976 if (s->start != s->ugbuf) /* yuck */
977 afree(s->u.freeme, ATEMP);
978 source = s = s->next;
979 continue;
981 if (s->str == NULL) {
982 s->type = SEOF;
983 s->start = s->str = null;
984 return '\0';
986 if (s->flags & SF_ECHO) {
987 shf_puts(s->str, shl_out);
988 shf_flush(shl_out);
991 return c;
994 static void
995 getsc_line(s)
996 Source *s;
998 char *xp = Xstring(s->xs, xp);
999 int interactive = Flag(FTALKING) && s->type == SSTDIN;
1000 int have_tty = interactive && (s->flags & SF_TTY);
1002 /* Done here to ensure nothing odd happens when a timeout occurs */
1003 XcheckN(s->xs, xp, LINE);
1004 *xp = '\0';
1005 s->start = s->str = xp;
1007 #ifdef KSH
1008 if (have_tty && ksh_tmout) {
1009 ksh_tmout_state = TMOUT_READING;
1010 alarm(ksh_tmout);
1012 #endif /* KSH */
1013 #ifdef EDIT
1014 if (have_tty && (0
1015 # ifdef VI
1016 || Flag(FVI)
1017 # endif /* VI */
1018 # ifdef EMACS
1019 || Flag(FEMACS) || Flag(FGMACS)
1020 # endif /* EMACS */
1023 int nread;
1025 nread = x_read(xp, LINE);
1026 if (nread < 0) /* read error */
1027 nread = 0;
1028 xp[nread] = '\0';
1029 xp += nread;
1031 else
1032 #endif /* EDIT */
1034 if (interactive) {
1035 pprompt(prompt, 0);
1036 } else
1037 s->line++;
1039 while (1) {
1040 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1042 if (!p && shf_error(s->u.shf)
1043 && shf_errno(s->u.shf) == EINTR)
1045 shf_clearerr(s->u.shf);
1046 if (trap)
1047 runtraps(0);
1048 continue;
1050 if (!p || (xp = p, xp[-1] == '\n'))
1051 break;
1052 /* double buffer size */
1053 xp++; /* move past null so doubling works... */
1054 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1055 xp--; /* ...and move back again */
1057 /* flush any unwanted input so other programs/builtins
1058 * can read it. Not very optimal, but less error prone
1059 * than flushing else where, dealing with redirections,
1060 * etc..
1061 * todo: reduce size of shf buffer (~128?) if SSTDIN
1063 if (s->type == SSTDIN)
1064 shf_flush(s->u.shf);
1066 /* XXX: temporary kludge to restore source after a
1067 * trap may have been executed.
1069 source = s;
1070 #ifdef KSH
1071 if (have_tty && ksh_tmout)
1073 ksh_tmout_state = TMOUT_EXECUTING;
1074 alarm(0);
1076 #endif /* KSH */
1077 s->start = s->str = Xstring(s->xs, xp);
1078 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1079 /* Note: if input is all nulls, this is not eof */
1080 if (Xlength(s->xs, xp) == 0) { /* EOF */
1081 if (s->type == SFILE)
1082 shf_fdclose(s->u.shf);
1083 s->str = NULL;
1084 } else if (interactive) {
1085 #ifdef HISTORY
1086 char *p = Xstring(s->xs, xp);
1087 if (cur_prompt == PS1)
1088 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1089 p++;
1090 if (*p) {
1091 # ifdef EASY_HISTORY
1092 if (cur_prompt == PS2)
1093 histappend(Xstring(s->xs, xp), 1);
1094 else
1095 # endif /* EASY_HISTORY */
1097 s->line++;
1098 histsave(s->line, s->str, 1);
1101 #endif /* HISTORY */
1103 if (interactive)
1104 set_prompt(PS2, (Source *) 0);
1107 void
1108 set_prompt(to, s)
1109 int to;
1110 Source *s;
1112 cur_prompt = to;
1114 switch (to) {
1115 case PS1: /* command */
1116 #ifdef KSH
1117 /* Substitute ! and !! here, before substitutions are done
1118 * so ! in expanded variables are not expanded.
1119 * NOTE: this is not what at&t ksh does (it does it after
1120 * substitutions, POSIX doesn't say which is to be done.
1123 struct shf *shf;
1124 char * volatile ps1;
1125 Area *saved_atemp;
1127 ps1 = str_val(global("PS1"));
1128 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1129 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1130 while (*ps1) {
1131 if (*ps1 != '!' || *++ps1 == '!')
1132 shf_putchar(*ps1++, shf);
1133 else
1134 shf_fprintf(shf, "%d",
1135 s ? s->line + 1 : 0);
1137 ps1 = shf_sclose(shf);
1138 saved_atemp = ATEMP;
1139 newenv(E_ERRH);
1140 if (ksh_sigsetjmp(e->jbuf, 0)) {
1141 prompt = safe_prompt;
1142 /* Don't print an error - assume it has already
1143 * been printed. Reason is we may have forked
1144 * to run a command and the child may be
1145 * unwinding its stack through this code as it
1146 * exits.
1148 } else
1149 prompt = str_save(substitute(ps1, 0),
1150 saved_atemp);
1151 quitenv();
1153 #else /* KSH */
1154 prompt = str_val(global("PS1"));
1155 #endif /* KSH */
1156 break;
1158 case PS2: /* command continuation */
1159 prompt = str_val(global("PS2"));
1160 break;
1164 /* See also related routine, promptlen() in edit.c */
1165 void
1166 pprompt(cp, ntruncate)
1167 const char *cp;
1168 int ntruncate;
1170 #if 0
1171 char nbuf[32];
1172 int c;
1174 while (*cp != 0) {
1175 if (*cp != '!')
1176 c = *cp++;
1177 else if (*++cp == '!')
1178 c = *cp++;
1179 else {
1180 int len;
1181 char *p;
1183 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1184 source->line + 1);
1185 len = strlen(nbuf);
1186 if (ntruncate) {
1187 if (ntruncate >= len) {
1188 ntruncate -= len;
1189 continue;
1191 p += ntruncate;
1192 len -= ntruncate;
1193 ntruncate = 0;
1195 shf_write(p, len, shl_out);
1196 continue;
1198 if (ntruncate)
1199 --ntruncate;
1200 else
1201 shf_putc(c, shl_out);
1203 #endif /* 0 */
1204 shf_puts(cp + ntruncate, shl_out);
1205 shf_flush(shl_out);
1208 /* Read the variable part of a ${...} expression (ie, up to but not including
1209 * the :[-+?=#%] or close-brace.
1211 static char *
1212 get_brace_var(wsp, wp)
1213 XString *wsp;
1214 char *wp;
1216 enum parse_state {
1217 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1218 PS_NUMBER, PS_VAR1, PS_END
1220 state;
1221 char c;
1223 state = PS_INITIAL;
1224 while (1) {
1225 c = getsc();
1226 /* State machine to figure out where the variable part ends. */
1227 switch (state) {
1228 case PS_INITIAL:
1229 if (c == '#') {
1230 state = PS_SAW_HASH;
1231 break;
1233 /* fall through.. */
1234 case PS_SAW_HASH:
1235 if (letter(c))
1236 state = PS_IDENT;
1237 else if (digit(c))
1238 state = PS_NUMBER;
1239 else if (ctype(c, C_VAR1))
1240 state = PS_VAR1;
1241 else
1242 state = PS_END;
1243 break;
1244 case PS_IDENT:
1245 if (!letnum(c)) {
1246 state = PS_END;
1247 if (c == '[') {
1248 char *tmp, *p;
1250 if (!arraysub(&tmp))
1251 yyerror("missing ]\n");
1252 *wp++ = c;
1253 for (p = tmp; *p; ) {
1254 Xcheck(*wsp, wp);
1255 *wp++ = *p++;
1257 afree(tmp, ATEMP);
1258 c = getsc(); /* the ] */
1261 break;
1262 case PS_NUMBER:
1263 if (!digit(c))
1264 state = PS_END;
1265 break;
1266 case PS_VAR1:
1267 state = PS_END;
1268 break;
1269 case PS_END: /* keep gcc happy */
1270 break;
1272 if (state == PS_END) {
1273 *wp++ = '\0'; /* end of variable part */
1274 ungetsc(c);
1275 break;
1277 Xcheck(*wsp, wp);
1278 *wp++ = c;
1280 return wp;
1284 * Save an array subscript - returns true if matching bracket found, false
1285 * if eof or newline was found.
1286 * (Returned string double null terminated)
1288 static int
1289 arraysub(strp)
1290 char **strp;
1292 XString ws;
1293 char *wp;
1294 char c;
1295 int depth = 1; /* we are just past the initial [ */
1297 Xinit(ws, wp, 32, ATEMP);
1299 do {
1300 c = getsc();
1301 Xcheck(ws, wp);
1302 *wp++ = c;
1303 if (c == '[')
1304 depth++;
1305 else if (c == ']')
1306 depth--;
1307 } while (depth > 0 && c && c != '\n');
1309 *wp++ = '\0';
1310 *strp = Xclose(ws, wp);
1312 return depth == 0 ? 1 : 0;
1315 /* Unget a char: handles case when we are already at the start of the buffer */
1316 static const char *
1317 ungetsc(c)
1318 int c;
1320 if (backslash_skip)
1321 backslash_skip--;
1322 /* Don't unget eof... */
1323 if (source->str == null && c == '\0')
1324 return source->str;
1325 if (source->str > source->start)
1326 source->str--;
1327 else {
1328 Source *s;
1330 s = pushs(SREREAD, source->areap);
1331 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1332 s->start = s->str = s->ugbuf;
1333 s->next = source;
1334 source = s;
1336 return source->str;
1340 /* Called to get a char that isn't a \newline sequence. */
1341 static int
1342 getsc_bn ARGS((void))
1344 int c, c2;
1346 if (ignore_backslash_newline)
1347 return getsc_();
1349 if (backslash_skip == 1) {
1350 backslash_skip = 2;
1351 return getsc_();
1354 backslash_skip = 0;
1356 while (1) {
1357 c = getsc_();
1358 if (c == '\\') {
1359 if ((c2 = getsc_()) == '\n')
1360 /* ignore the \newline; get the next char... */
1361 continue;
1362 ungetsc(c2);
1363 backslash_skip = 1;
1365 return c;
1369 static Lex_state *
1370 push_state_(si, old_end)
1371 State_info *si;
1372 Lex_state *old_end;
1374 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1376 new[0].ls_info.base = old_end;
1377 si->base = &new[0];
1378 si->end = &new[STATE_BSIZE];
1379 return &new[1];
1382 static Lex_state *
1383 pop_state_(si, old_end)
1384 State_info *si;
1385 Lex_state *old_end;
1387 Lex_state *old_base = si->base;
1389 si->base = old_end->ls_info.base - STATE_BSIZE;
1390 si->end = old_end->ls_info.base;
1392 afree(old_base, ATEMP);
1394 return si->base + STATE_BSIZE - 1;