etc/services - sync with NetBSD-8
[minix.git] / bin / ksh / lex.c
blob80ded4ad7cc51560656233c9ffc8147206917f6f
1 /* $NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $ */
3 /*
4 * lexical analysis and source input
5 */
6 #include <sys/cdefs.h>
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $");
10 #endif
13 #include "sh.h"
14 #include <ctype.h>
17 /* Structure to keep track of the lexing state and the various pieces of info
18 * needed for each particular state.
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 int ls_state;
23 union {
24 /* $(...) */
25 struct scsparen_info {
26 int nparen; /* count open parenthesis */
27 int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 } u_scsparen;
31 /* $((...)) */
32 struct sasparen_info {
33 int nparen; /* count open parenthesis */
34 int start; /* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 } u_sasparen;
38 /* ((...)) */
39 struct sletparen_info {
40 int nparen; /* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 } u_sletparen;
44 /* `...` */
45 struct sbquote_info {
46 int indquotes; /* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 } u_sbquote;
50 Lex_state *base; /* used to point to next state block */
51 } ls_info;
54 typedef struct State_info State_info;
55 struct State_info {
56 Lex_state *base;
57 Lex_state *end;
61 static void readhere ARGS((struct ioword *iop));
62 static int getsc__ ARGS((void));
63 static void getsc_line ARGS((Source *s));
64 static int getsc_bn ARGS((void));
65 static char *get_brace_var ARGS((XString *wsp, char *wp));
66 static int arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
72 static int backslash_skip;
73 static int ignore_backslash_newline;
75 /* optimized getsc_bn() */
76 #define getsc() (*source->str != '\0' && *source->str != '\\' \
77 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
81 #define STATE_BSIZE 32
83 #define PUSH_STATE(s) do { \
84 if (++statep == state_info.end) \
85 statep = push_state_(&state_info, statep); \
86 state = statep->ls_state = (s); \
87 } while (0)
89 #define POP_STATE() do { \
90 if (--statep == state_info.base) \
91 statep = pop_state_(&state_info, statep); \
92 state = statep->ls_state; \
93 } while (0)
98 * Lexical analyzer
100 * tokens are not regular expressions, they are LL(1).
101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102 * hence the state stack.
106 yylex(cf)
107 int cf;
109 Lex_state states[STATE_BSIZE], *statep;
110 State_info state_info;
111 register int c, state;
112 XString ws; /* expandable output word */
113 register char *wp; /* output word pointer */
114 char *sp, *dp;
115 int c2;
118 Again:
119 states[0].ls_state = -1;
120 states[0].ls_info.base = (Lex_state *) 0;
121 statep = &states[1];
122 state_info.base = states;
123 state_info.end = &states[STATE_BSIZE];
125 Xinit(ws, wp, 64, ATEMP);
127 backslash_skip = 0;
128 ignore_backslash_newline = 0;
130 if (cf&ONEWORD)
131 state = SWORD;
132 #ifdef KSH
133 else if (cf&LETEXPR) {
134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
135 state = SLETPAREN;
136 statep->ls_sletparen.nparen = 0;
138 #endif /* KSH */
139 else { /* normal lexing */
140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 while ((c = getsc()) == ' ' || c == '\t')
143 if (c == '#') {
144 ignore_backslash_newline++;
145 while ((c = getsc()) != '\0' && c != '\n')
147 ignore_backslash_newline--;
149 ungetsc(c);
151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
152 source->flags &= ~SF_ALIAS;
153 /* In POSIX mode, a trailing space only counts if we are
154 * parsing a simple command
156 if (!Flag(FPOSIX) || (cf & CMDWORD))
157 cf |= ALIAS;
160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 statep->ls_state = state;
163 /* collect non-special or quoted characters to form word */
164 while (!((c = getsc()) == 0
165 || ((state == SBASE || state == SHEREDELIM)
166 && ctype(c, C_LEX1))))
168 Xcheck(ws, wp);
169 switch (state) {
170 case SBASE:
171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 *wp = EOS; /* temporary */
173 if (is_wdvarname(Xstring(ws, wp), FALSE))
175 char *p, *tmp;
177 if (arraysub(&tmp)) {
178 *wp++ = CHAR;
179 *wp++ = c;
180 for (p = tmp; *p; ) {
181 Xcheck(ws, wp);
182 *wp++ = CHAR;
183 *wp++ = *p++;
185 afree(tmp, ATEMP);
186 break;
187 } else {
188 Source *s;
190 s = pushs(SREREAD,
191 source->areap);
192 s->start = s->str
193 = s->u.freeme = tmp;
194 s->next = source;
195 source = s;
198 *wp++ = CHAR;
199 *wp++ = c;
200 break;
202 /* fall through.. */
203 Sbase1: /* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 if (c == '*' || c == '@' || c == '+' || c == '?'
206 || c == '!')
208 c2 = getsc();
209 if (c2 == '(' /*)*/ ) {
210 *wp++ = OPAT;
211 *wp++ = c;
212 PUSH_STATE(SPATTERN);
213 break;
215 ungetsc(c2);
217 #endif /* KSH */
218 /* fall through.. */
219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
220 switch (c) {
221 case '\\':
222 c = getsc();
223 #ifdef OS2
224 if (isalnum((unsigned char)c)) {
225 *wp++ = CHAR, *wp++ = '\\';
226 *wp++ = CHAR, *wp++ = c;
227 } else
228 #endif
229 if (c) /* trailing \ is lost */
230 *wp++ = QCHAR, *wp++ = c;
231 break;
232 case '\'':
233 *wp++ = OQUOTE;
234 ignore_backslash_newline++;
235 PUSH_STATE(SSQUOTE);
236 break;
237 case '"':
238 *wp++ = OQUOTE;
239 PUSH_STATE(SDQUOTE);
240 break;
241 default:
242 goto Subst;
244 break;
246 Subst:
247 switch (c) {
248 Lex_state *s;
249 Lex_state *base;
251 case '\\':
252 c = getsc();
253 switch (c) {
254 case '\\':
255 case '$': case '`':
256 *wp++ = QCHAR, *wp++ = c;
257 break;
258 case '"':
259 if ((cf & HEREDOC) == 0) {
260 *wp++ = QCHAR, *wp++ = c;
261 break;
263 /* FALLTROUGH */
264 default:
265 Xcheck(ws, wp);
266 if (c) { /* trailing \ is lost */
267 *wp++ = CHAR, *wp++ = '\\';
268 *wp++ = CHAR, *wp++ = c;
270 break;
272 break;
273 case '$':
274 c = getsc();
275 if (c == '(') /*)*/ {
276 c = getsc();
277 if (c == '(') /*)*/ {
278 PUSH_STATE(SASPAREN);
279 statep->ls_sasparen.nparen = 2;
280 statep->ls_sasparen.start =
281 Xsavepos(ws, wp);
282 *wp++ = EXPRSUB;
283 } else {
284 ungetsc(c);
285 PUSH_STATE(SCSPAREN);
286 statep->ls_scsparen.nparen = 1;
287 statep->ls_scsparen.csstate = 0;
288 *wp++ = COMSUB;
290 } else if (c == '{') /*}*/ {
291 *wp++ = OSUBST;
292 *wp++ = '{'; /*}*/
293 wp = get_brace_var(&ws, wp);
294 c = getsc();
295 /* allow :# and :% (ksh88 compat) */
296 if (c == ':') {
297 *wp++ = CHAR, *wp++ = c;
298 c = getsc();
300 /* If this is a trim operation,
301 * treat (,|,) specially in STBRACE.
303 if (c == '#' || c == '%') {
304 ungetsc(c);
305 PUSH_STATE(STBRACE);
306 } else {
307 ungetsc(c);
308 PUSH_STATE(SBRACE);
310 } else if (ctype(c, C_ALPHA)) {
311 *wp++ = OSUBST;
312 *wp++ = 'X';
313 do {
314 Xcheck(ws, wp);
315 *wp++ = c;
316 c = getsc();
317 } while (ctype(c, C_ALPHA|C_DIGIT));
318 *wp++ = '\0';
319 *wp++ = CSUBST;
320 *wp++ = 'X';
321 ungetsc(c);
322 } else if (ctype(c, C_DIGIT|C_VAR1)) {
323 Xcheck(ws, wp);
324 *wp++ = OSUBST;
325 *wp++ = 'X';
326 *wp++ = c;
327 *wp++ = '\0';
328 *wp++ = CSUBST;
329 *wp++ = 'X';
330 } else {
331 *wp++ = CHAR, *wp++ = '$';
332 ungetsc(c);
334 break;
335 case '`':
336 PUSH_STATE(SBQUOTE);
337 *wp++ = COMSUB;
338 /* Need to know if we are inside double quotes
339 * since sh/at&t-ksh translate the \" to " in
340 * "`..\"..`". POSIX also requires this.
341 * An earlier version of ksh misinterpreted
342 * the POSIX specification and performed
343 * removal of backslash escapes only if
344 * posix mode was not in effect.
346 statep->ls_sbquote.indquotes = 0;
347 s = statep;
348 base = state_info.base;
349 while (1) {
350 for (; s != base; s--) {
351 if (s->ls_state == SDQUOTE) {
352 statep->ls_sbquote.indquotes = 1;
353 break;
356 if (s != base)
357 break;
358 if (!(s = s->ls_info.base))
359 break;
360 base = s-- - STATE_BSIZE;
362 break;
363 default:
364 *wp++ = CHAR, *wp++ = c;
366 break;
368 case SSQUOTE:
369 if (c == '\'') {
370 POP_STATE();
371 *wp++ = CQUOTE;
372 ignore_backslash_newline--;
373 } else
374 *wp++ = QCHAR, *wp++ = c;
375 break;
377 case SDQUOTE:
378 if (c == '"') {
379 POP_STATE();
380 *wp++ = CQUOTE;
381 } else
382 goto Subst;
383 break;
385 case SCSPAREN: /* $( .. ) */
386 /* todo: deal with $(...) quoting properly
387 * kludge to partly fake quoting inside $(..): doesn't
388 * really work because nested $(..) or ${..} inside
389 * double quotes aren't dealt with.
391 switch (statep->ls_scsparen.csstate) {
392 case 0: /* normal */
393 switch (c) {
394 case '(':
395 statep->ls_scsparen.nparen++;
396 break;
397 case ')':
398 statep->ls_scsparen.nparen--;
399 break;
400 case '\\':
401 statep->ls_scsparen.csstate = 1;
402 break;
403 case '"':
404 statep->ls_scsparen.csstate = 2;
405 break;
406 case '\'':
407 statep->ls_scsparen.csstate = 4;
408 ignore_backslash_newline++;
409 break;
411 break;
413 case 1: /* backslash in normal mode */
414 case 3: /* backslash in double quotes */
415 --statep->ls_scsparen.csstate;
416 break;
418 case 2: /* double quotes */
419 if (c == '"')
420 statep->ls_scsparen.csstate = 0;
421 else if (c == '\\')
422 statep->ls_scsparen.csstate = 3;
423 break;
425 case 4: /* single quotes */
426 if (c == '\'') {
427 statep->ls_scsparen.csstate = 0;
428 ignore_backslash_newline--;
430 break;
432 if (statep->ls_scsparen.nparen == 0) {
433 POP_STATE();
434 *wp++ = 0; /* end of COMSUB */
435 } else
436 *wp++ = c;
437 break;
439 case SASPAREN: /* $(( .. )) */
440 /* todo: deal with $((...); (...)) properly */
441 /* XXX should nest using existing state machine
442 * (embed "..", $(...), etc.) */
443 if (c == '(')
444 statep->ls_sasparen.nparen++;
445 else if (c == ')') {
446 statep->ls_sasparen.nparen--;
447 if (statep->ls_sasparen.nparen == 1) {
448 /*(*/
449 if ((c2 = getsc()) == ')') {
450 POP_STATE();
451 *wp++ = 0; /* end of EXPRSUB */
452 break;
453 } else {
454 char *s;
456 ungetsc(c2);
457 /* mismatched parenthesis -
458 * assume we were really
459 * parsing a $(..) expression
461 s = Xrestpos(ws, wp,
462 statep->ls_sasparen.start);
463 memmove(s + 1, s, wp - s);
464 *s++ = COMSUB;
465 *s = '('; /*)*/
466 wp++;
467 statep->ls_scsparen.nparen = 1;
468 statep->ls_scsparen.csstate = 0;
469 state = statep->ls_state
470 = SCSPAREN;
475 *wp++ = c;
476 break;
478 case SBRACE:
479 /*{*/
480 if (c == '}') {
481 POP_STATE();
482 *wp++ = CSUBST;
483 *wp++ = /*{*/ '}';
484 } else
485 goto Sbase1;
486 break;
488 case STBRACE:
489 /* Same as SBRACE, except (,|,) treated specially */
490 /*{*/
491 if (c == '}') {
492 POP_STATE();
493 *wp++ = CSUBST;
494 *wp++ = /*{*/ '}';
495 } else if (c == '|') {
496 *wp++ = SPAT;
497 } else if (c == '(') {
498 *wp++ = OPAT;
499 *wp++ = ' '; /* simile for @ */
500 PUSH_STATE(SPATTERN);
501 } else
502 goto Sbase1;
503 break;
505 case SBQUOTE:
506 if (c == '`') {
507 *wp++ = 0;
508 POP_STATE();
509 } else if (c == '\\') {
510 switch (c = getsc()) {
511 case '\\':
512 case '$': case '`':
513 *wp++ = c;
514 break;
515 case '"':
516 if (statep->ls_sbquote.indquotes) {
517 *wp++ = c;
518 break;
520 /* fall through.. */
521 default:
522 if (c) { /* trailing \ is lost */
523 *wp++ = '\\';
524 *wp++ = c;
526 break;
528 } else
529 *wp++ = c;
530 break;
532 case SWORD: /* ONEWORD */
533 goto Subst;
535 #ifdef KSH
536 case SLETPAREN: /* LETEXPR: (( ... )) */
537 /*(*/
538 if (c == ')') {
539 if (statep->ls_sletparen.nparen > 0)
540 --statep->ls_sletparen.nparen;
541 /*(*/
542 else if ((c2 = getsc()) == ')') {
543 c = 0;
544 *wp++ = CQUOTE;
545 goto Done;
546 } else
547 ungetsc(c2);
548 } else if (c == '(')
549 /* parenthesis inside quotes and backslashes
550 * are lost, but at&t ksh doesn't count them
551 * either
553 ++statep->ls_sletparen.nparen;
554 goto Sbase2;
555 #endif /* KSH */
557 case SHEREDELIM: /* <<,<<- delimiter */
558 /* XXX chuck this state (and the next) - use
559 * the existing states ($ and \`..` should be
560 * stripped of their specialness after the
561 * fact).
563 /* here delimiters need a special case since
564 * $ and `..` are not to be treated specially
566 if (c == '\\') {
567 c = getsc();
568 if (c) { /* trailing \ is lost */
569 *wp++ = QCHAR;
570 *wp++ = c;
572 } else if (c == '\'') {
573 PUSH_STATE(SSQUOTE);
574 *wp++ = OQUOTE;
575 ignore_backslash_newline++;
576 } else if (c == '"') {
577 state = statep->ls_state = SHEREDQUOTE;
578 *wp++ = OQUOTE;
579 } else {
580 *wp++ = CHAR;
581 *wp++ = c;
583 break;
585 case SHEREDQUOTE: /* " in <<,<<- delimiter */
586 if (c == '"') {
587 *wp++ = CQUOTE;
588 state = statep->ls_state = SHEREDELIM;
589 } else {
590 if (c == '\\') {
591 switch (c = getsc()) {
592 case '\\': case '"':
593 case '$': case '`':
594 break;
595 default:
596 if (c) { /* trailing \ lost */
597 *wp++ = CHAR;
598 *wp++ = '\\';
600 break;
603 *wp++ = CHAR;
604 *wp++ = c;
606 break;
608 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
609 if ( /*(*/ c == ')') {
610 *wp++ = CPAT;
611 POP_STATE();
612 } else if (c == '|') {
613 *wp++ = SPAT;
614 } else if (c == '(') {
615 *wp++ = OPAT;
616 *wp++ = ' '; /* simile for @ */
617 PUSH_STATE(SPATTERN);
618 } else
619 goto Sbase1;
620 break;
623 Done:
624 Xcheck(ws, wp);
625 if (statep != &states[1])
626 /* XXX figure out what is missing */
627 yyerror("no closing quote\n");
629 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
630 if (state == SHEREDELIM)
631 state = SBASE;
633 dp = Xstring(ws, wp);
634 if ((c == '<' || c == '>') && state == SBASE
635 && ((c2 = Xlength(ws, wp)) == 0
636 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
638 struct ioword *iop =
639 (struct ioword *) alloc(sizeof(*iop), ATEMP);
641 if (c2 == 2)
642 iop->unit = dp[1] - '0';
643 else
644 iop->unit = c == '>'; /* 0 for <, 1 for > */
646 c2 = getsc();
647 /* <<, >>, <> are ok, >< is not */
648 if (c == c2 || (c == '<' && c2 == '>')) {
649 iop->flag = c == c2 ?
650 (c == '>' ? IOCAT : IOHERE) : IORDWR;
651 if (iop->flag == IOHERE) {
652 if ((c2 = getsc()) == '-') {
653 iop->flag |= IOSKIP;
654 } else {
655 ungetsc(c2);
658 } else if (c2 == '&')
659 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
660 else {
661 iop->flag = c == '>' ? IOWRITE : IOREAD;
662 if (c == '>' && c2 == '|')
663 iop->flag |= IOCLOB;
664 else
665 ungetsc(c2);
668 iop->name = (char *) 0;
669 iop->delim = (char *) 0;
670 iop->heredoc = (char *) 0;
671 Xfree(ws, wp); /* free word */
672 yylval.iop = iop;
673 return REDIR;
676 if (wp == dp && state == SBASE) {
677 Xfree(ws, wp); /* free word */
678 /* no word, process LEX1 character */
679 switch (c) {
680 default:
681 return c;
683 case '|':
684 case '&':
685 case ';':
686 if ((c2 = getsc()) == c)
687 c = (c == ';') ? BREAK :
688 (c == '|') ? LOGOR :
689 (c == '&') ? LOGAND :
690 YYERRCODE;
691 #ifdef KSH
692 else if (c == '|' && c2 == '&')
693 c = COPROC;
694 #endif /* KSH */
695 else
696 ungetsc(c2);
697 return c;
699 case '\n':
700 gethere();
701 if (cf & CONTIN)
702 goto Again;
703 return c;
705 case '(': /*)*/
706 #ifdef KSH
707 if ((c2 = getsc()) == '(') /*)*/
708 /* XXX need to handle ((...); (...)) */
709 c = MDPAREN;
710 else
711 ungetsc(c2);
712 #endif /* KSH */
713 return c;
714 /*(*/
715 case ')':
716 return c;
720 *wp++ = EOS; /* terminate word */
721 yylval.cp = Xclose(ws, wp);
722 if (state == SWORD
723 #ifdef KSH
724 || state == SLETPAREN
725 #endif /* KSH */
726 ) /* ONEWORD? */
727 return LWORD;
728 ungetsc(c); /* unget terminator */
730 /* copy word to unprefixed string ident */
731 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
732 *dp++ = *sp++;
733 /* Make sure the ident array stays '\0' padded */
734 memset(dp, 0, (ident+IDENT) - dp + 1);
735 if (c != EOS)
736 *ident = '\0'; /* word is not unquoted */
738 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
739 struct tbl *p;
740 int h = hash(ident);
742 /* { */
743 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
744 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
746 afree(yylval.cp, ATEMP);
747 return p->val.i;
749 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
750 && (p->flag & ISSET))
752 register Source *s;
754 for (s = source; s->type == SALIAS; s = s->next)
755 if (s->u.tblp == p)
756 return LWORD;
757 /* push alias expansion */
758 s = pushs(SALIAS, source->areap);
759 s->start = s->str = p->val.s;
760 s->u.tblp = p;
761 s->next = source;
762 source = s;
763 afree(yylval.cp, ATEMP);
764 goto Again;
768 return LWORD;
771 static void
772 gethere()
774 register struct ioword **p;
776 for (p = heres; p < herep; p++)
777 readhere(*p);
778 herep = heres;
782 * read "<<word" text into temp file
785 static void
786 readhere(iop)
787 struct ioword *iop;
789 register int c;
790 char *volatile eof;
791 char *eofp;
792 int skiptabs;
793 XString xs;
794 char *xp;
795 int xpos;
797 eof = evalstr(iop->delim, 0);
799 if (!(iop->flag & IOEVAL))
800 ignore_backslash_newline++;
802 Xinit(xs, xp, 256, ATEMP);
804 for (;;) {
805 eofp = eof;
806 skiptabs = iop->flag & IOSKIP;
807 xpos = Xsavepos(xs, xp);
808 while ((c = getsc()) != 0) {
809 if (skiptabs) {
810 if (c == '\t')
811 continue;
812 skiptabs = 0;
814 if (c != *eofp)
815 break;
816 Xcheck(xs, xp);
817 Xput(xs, xp, c);
818 eofp++;
820 /* Allow EOF here so commands with out trailing newlines
821 * will work (eg, ksh -c '...', $(...), etc).
823 if (*eofp == '\0' && (c == 0 || c == '\n')) {
824 xp = Xrestpos(xs, xp, xpos);
825 break;
827 ungetsc(c);
828 while ((c = getsc()) != '\n') {
829 if (c == 0)
830 yyerror("here document `%s' unclosed\n", eof);
831 Xcheck(xs, xp);
832 Xput(xs, xp, c);
834 Xcheck(xs, xp);
835 Xput(xs, xp, c);
837 Xput(xs, xp, '\0');
838 iop->heredoc = Xclose(xs, xp);
840 if (!(iop->flag & IOEVAL))
841 ignore_backslash_newline--;
844 void
845 #ifdef HAVE_PROTOTYPES
846 yyerror(const char *fmt, ...)
847 #else
848 yyerror(fmt, va_alist)
849 const char *fmt;
850 va_dcl
851 #endif
853 va_list va;
855 /* pop aliases and re-reads */
856 while (source->type == SALIAS || source->type == SREREAD)
857 source = source->next;
858 source->str = null; /* zap pending input */
860 error_prefix(TRUE);
861 SH_VA_START(va, fmt);
862 shf_vfprintf(shl_out, fmt, va);
863 va_end(va);
864 errorf("%s", null);
868 * input for yylex with alias expansion
871 Source *
872 pushs(type, areap)
873 int type;
874 Area *areap;
876 register Source *s;
878 s = (Source *) alloc(sizeof(Source), areap);
879 s->type = type;
880 s->str = null;
881 s->start = NULL;
882 s->line = 0;
883 s->errline = 0;
884 s->file = NULL;
885 s->flags = 0;
886 s->next = NULL;
887 s->areap = areap;
888 if (type == SFILE || type == SSTDIN) {
889 char *dummy;
890 Xinit(s->xs, dummy, 256, s->areap);
891 } else
892 memset(&s->xs, 0, sizeof(s->xs));
893 return s;
896 static int
897 getsc__()
899 register Source *s = source;
900 register int c;
902 while ((c = *s->str++) == 0) {
903 s->str = NULL; /* return 0 for EOF by default */
904 switch (s->type) {
905 case SEOF:
906 s->str = null;
907 return 0;
909 case SSTDIN:
910 case SFILE:
911 getsc_line(s);
912 break;
914 case SWSTR:
915 break;
917 case SSTRING:
918 break;
920 case SWORDS:
921 s->start = s->str = *s->u.strv++;
922 s->type = SWORDSEP;
923 break;
925 case SWORDSEP:
926 if (*s->u.strv == NULL) {
927 s->start = s->str = newline;
928 s->type = SEOF;
929 } else {
930 s->start = s->str = space;
931 s->type = SWORDS;
933 break;
935 case SALIAS:
936 if (s->flags & SF_ALIASEND) {
937 /* pass on an unused SF_ALIAS flag */
938 source = s->next;
939 source->flags |= s->flags & SF_ALIAS;
940 s = source;
941 } else if (*s->u.tblp->val.s
942 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
944 source = s = s->next; /* pop source stack */
945 /* Note that this alias ended with a space,
946 * enabling alias expansion on the following
947 * word.
949 s->flags |= SF_ALIAS;
950 } else {
951 /* At this point, we need to keep the current
952 * alias in the source list so recursive
953 * aliases can be detected and we also need
954 * to return the next character. Do this
955 * by temporarily popping the alias to get
956 * the next character and then put it back
957 * in the source list with the SF_ALIASEND
958 * flag set.
960 source = s->next; /* pop source stack */
961 source->flags |= s->flags & SF_ALIAS;
962 c = getsc__();
963 if (c) {
964 s->flags |= SF_ALIASEND;
965 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
966 s->start = s->str = s->ugbuf;
967 s->next = source;
968 source = s;
969 } else {
970 s = source;
971 /* avoid reading eof twice */
972 s->str = NULL;
973 break;
976 continue;
978 case SREREAD:
979 if (s->start != s->ugbuf) /* yuck */
980 afree(s->u.freeme, ATEMP);
981 source = s = s->next;
982 continue;
984 if (s->str == NULL) {
985 s->type = SEOF;
986 s->start = s->str = null;
987 return '\0';
989 if (s->flags & SF_ECHO) {
990 shf_puts(s->str, shl_out);
991 shf_flush(shl_out);
994 return c;
997 static void
998 getsc_line(s)
999 Source *s;
1001 char *xp = Xstring(s->xs, xp);
1002 int interactive = Flag(FTALKING) && s->type == SSTDIN;
1003 int have_tty = interactive && (s->flags & SF_TTY);
1005 /* Done here to ensure nothing odd happens when a timeout occurs */
1006 XcheckN(s->xs, xp, LINE);
1007 *xp = '\0';
1008 s->start = s->str = xp;
1010 #ifdef KSH
1011 if (have_tty && ksh_tmout) {
1012 ksh_tmout_state = TMOUT_READING;
1013 alarm(ksh_tmout);
1015 #endif /* KSH */
1016 #ifdef EDIT
1017 if (have_tty && (0
1018 # ifdef VI
1019 || Flag(FVI)
1020 # endif /* VI */
1021 # ifdef EMACS
1022 || Flag(FEMACS) || Flag(FGMACS)
1023 # endif /* EMACS */
1026 int nread;
1028 nread = x_read(xp, LINE);
1029 if (nread < 0) /* read error */
1030 nread = 0;
1031 xp[nread] = '\0';
1032 xp += nread;
1034 else
1035 #endif /* EDIT */
1037 if (interactive) {
1038 pprompt(prompt, 0);
1039 } else
1040 s->line++;
1042 while (1) {
1043 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1045 if (!p && shf_error(s->u.shf)
1046 && shf_errno(s->u.shf) == EINTR)
1048 shf_clearerr(s->u.shf);
1049 if (trap)
1050 runtraps(0);
1051 continue;
1053 if (!p || (xp = p, xp[-1] == '\n'))
1054 break;
1055 /* double buffer size */
1056 xp++; /* move past null so doubling works... */
1057 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1058 xp--; /* ...and move back again */
1060 /* flush any unwanted input so other programs/builtins
1061 * can read it. Not very optimal, but less error prone
1062 * than flushing else where, dealing with redirections,
1063 * etc..
1064 * todo: reduce size of shf buffer (~128?) if SSTDIN
1066 if (s->type == SSTDIN)
1067 shf_flush(s->u.shf);
1069 /* XXX: temporary kludge to restore source after a
1070 * trap may have been executed.
1072 source = s;
1073 #ifdef KSH
1074 if (have_tty && ksh_tmout)
1076 ksh_tmout_state = TMOUT_EXECUTING;
1077 alarm(0);
1079 #endif /* KSH */
1080 s->start = s->str = Xstring(s->xs, xp);
1081 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1082 /* Note: if input is all nulls, this is not eof */
1083 if (Xlength(s->xs, xp) == 0) { /* EOF */
1084 if (s->type == SFILE)
1085 shf_fdclose(s->u.shf);
1086 s->str = NULL;
1087 } else if (interactive) {
1088 #ifdef HISTORY
1089 char *p = Xstring(s->xs, xp);
1090 if (cur_prompt == PS1)
1091 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1092 p++;
1093 if (*p) {
1094 # ifdef EASY_HISTORY
1095 if (cur_prompt == PS2)
1096 histappend(Xstring(s->xs, xp), 1);
1097 else
1098 # endif /* EASY_HISTORY */
1100 s->line++;
1101 histsave(s->line, s->str, 1);
1104 #endif /* HISTORY */
1106 if (interactive)
1107 set_prompt(PS2, (Source *) 0);
1110 void
1111 set_prompt(to, s)
1112 int to;
1113 Source *s;
1115 cur_prompt = to;
1117 switch (to) {
1118 case PS1: /* command */
1119 #ifdef KSH
1120 /* Substitute ! and !! here, before substitutions are done
1121 * so ! in expanded variables are not expanded.
1122 * NOTE: this is not what at&t ksh does (it does it after
1123 * substitutions, POSIX doesn't say which is to be done.
1126 struct shf *shf;
1127 char * volatile ps1;
1128 Area *saved_atemp;
1130 ps1 = str_val(global("PS1"));
1131 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1132 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1133 while (*ps1) {
1134 if (*ps1 != '!' || *++ps1 == '!')
1135 shf_putchar(*ps1++, shf);
1136 else
1137 shf_fprintf(shf, "%d",
1138 s ? s->line + 1 : 0);
1140 ps1 = shf_sclose(shf);
1141 saved_atemp = ATEMP;
1142 newenv(E_ERRH);
1143 if (ksh_sigsetjmp(e->jbuf, 0)) {
1144 prompt = safe_prompt;
1145 /* Don't print an error - assume it has already
1146 * been printed. Reason is we may have forked
1147 * to run a command and the child may be
1148 * unwinding its stack through this code as it
1149 * exits.
1151 } else
1152 prompt = str_save(substitute(ps1, 0),
1153 saved_atemp);
1154 quitenv();
1156 #else /* KSH */
1157 prompt = str_val(global("PS1"));
1158 #endif /* KSH */
1159 break;
1161 case PS2: /* command continuation */
1162 prompt = str_val(global("PS2"));
1163 break;
1167 /* See also related routine, promptlen() in edit.c */
1168 void
1169 pprompt(cp, ntruncate)
1170 const char *cp;
1171 int ntruncate;
1173 #if 0
1174 char nbuf[32];
1175 int c;
1177 while (*cp != 0) {
1178 if (*cp != '!')
1179 c = *cp++;
1180 else if (*++cp == '!')
1181 c = *cp++;
1182 else {
1183 int len;
1184 char *p;
1186 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1187 source->line + 1);
1188 len = strlen(nbuf);
1189 if (ntruncate) {
1190 if (ntruncate >= len) {
1191 ntruncate -= len;
1192 continue;
1194 p += ntruncate;
1195 len -= ntruncate;
1196 ntruncate = 0;
1198 shf_write(p, len, shl_out);
1199 continue;
1201 if (ntruncate)
1202 --ntruncate;
1203 else
1204 shf_putc(c, shl_out);
1206 #endif /* 0 */
1207 shf_puts(cp + ntruncate, shl_out);
1208 shf_flush(shl_out);
1211 /* Read the variable part of a ${...} expression (ie, up to but not including
1212 * the :[-+?=#%] or close-brace.
1214 static char *
1215 get_brace_var(wsp, wp)
1216 XString *wsp;
1217 char *wp;
1219 enum parse_state {
1220 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1221 PS_NUMBER, PS_VAR1, PS_END
1223 state;
1224 char c;
1226 state = PS_INITIAL;
1227 while (1) {
1228 c = getsc();
1229 /* State machine to figure out where the variable part ends. */
1230 switch (state) {
1231 case PS_INITIAL:
1232 if (c == '#') {
1233 state = PS_SAW_HASH;
1234 break;
1236 /* fall through.. */
1237 case PS_SAW_HASH:
1238 if (letter(c))
1239 state = PS_IDENT;
1240 else if (digit(c))
1241 state = PS_NUMBER;
1242 else if (ctype(c, C_VAR1))
1243 state = PS_VAR1;
1244 else
1245 state = PS_END;
1246 break;
1247 case PS_IDENT:
1248 if (!letnum(c)) {
1249 state = PS_END;
1250 if (c == '[') {
1251 char *tmp, *p;
1253 if (!arraysub(&tmp))
1254 yyerror("missing ]\n");
1255 *wp++ = c;
1256 for (p = tmp; *p; ) {
1257 Xcheck(*wsp, wp);
1258 *wp++ = *p++;
1260 afree(tmp, ATEMP);
1261 c = getsc(); /* the ] */
1264 break;
1265 case PS_NUMBER:
1266 if (!digit(c))
1267 state = PS_END;
1268 break;
1269 case PS_VAR1:
1270 state = PS_END;
1271 break;
1272 case PS_END: /* keep gcc happy */
1273 break;
1275 if (state == PS_END) {
1276 *wp++ = '\0'; /* end of variable part */
1277 ungetsc(c);
1278 break;
1280 Xcheck(*wsp, wp);
1281 *wp++ = c;
1283 return wp;
1287 * Save an array subscript - returns true if matching bracket found, false
1288 * if eof or newline was found.
1289 * (Returned string double null terminated)
1291 static int
1292 arraysub(strp)
1293 char **strp;
1295 XString ws;
1296 char *wp;
1297 char c;
1298 int depth = 1; /* we are just past the initial [ */
1300 Xinit(ws, wp, 32, ATEMP);
1302 do {
1303 c = getsc();
1304 Xcheck(ws, wp);
1305 *wp++ = c;
1306 if (c == '[')
1307 depth++;
1308 else if (c == ']')
1309 depth--;
1310 } while (depth > 0 && c && c != '\n');
1312 *wp++ = '\0';
1313 *strp = Xclose(ws, wp);
1315 return depth == 0 ? 1 : 0;
1318 /* Unget a char: handles case when we are already at the start of the buffer */
1319 static const char *
1320 ungetsc(c)
1321 int c;
1323 if (backslash_skip)
1324 backslash_skip--;
1325 /* Don't unget eof... */
1326 if (source->str == null && c == '\0')
1327 return source->str;
1328 if (source->str > source->start)
1329 source->str--;
1330 else {
1331 Source *s;
1333 s = pushs(SREREAD, source->areap);
1334 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1335 s->start = s->str = s->ugbuf;
1336 s->next = source;
1337 source = s;
1339 return source->str;
1343 /* Called to get a char that isn't a \newline sequence. */
1344 static int
1345 getsc_bn ARGS((void))
1347 int c, c2;
1349 if (ignore_backslash_newline)
1350 return getsc_();
1352 if (backslash_skip == 1) {
1353 backslash_skip = 2;
1354 return getsc_();
1357 backslash_skip = 0;
1359 while (1) {
1360 c = getsc_();
1361 if (c == '\\') {
1362 if ((c2 = getsc_()) == '\n')
1363 /* ignore the \newline; get the next char... */
1364 continue;
1365 ungetsc(c2);
1366 backslash_skip = 1;
1368 return c;
1372 static Lex_state *
1373 push_state_(si, old_end)
1374 State_info *si;
1375 Lex_state *old_end;
1377 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1379 new[0].ls_info.base = old_end;
1380 si->base = &new[0];
1381 si->end = &new[STATE_BSIZE];
1382 return &new[1];
1385 static Lex_state *
1386 pop_state_(si, old_end)
1387 State_info *si;
1388 Lex_state *old_end;
1390 Lex_state *old_base = si->base;
1392 si->base = old_end->ls_info.base - STATE_BSIZE;
1393 si->end = old_end->ls_info.base;
1395 afree(old_base, ATEMP);
1397 return si->base + STATE_BSIZE - 1;