1 /* $NetBSD: lex.c,v 1.25 2007/07/16 14:07:01 christos Exp $ */
4 * Copyright (c) 1980, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
35 static char sccsid
[] = "@(#)lex.c 8.1 (Berkeley) 5/31/93";
37 __RCSID("$NetBSD: lex.c,v 1.25 2007/07/16 14:07:01 christos Exp $");
41 #include <sys/ioctl.h>
42 #include <sys/types.h>
55 * These lexical routines read input and form lists of words.
56 * There is some involved processing here, because of the complications
57 * of input buffering, and especially because of history substitution.
60 static Char
*word(void);
61 static int getC1(int);
62 static void getdol(void);
63 static void getexcl(int);
64 static struct Hist
*findev(Char
*, int);
65 static void setexclp(Char
*);
66 static int bgetc(void);
67 static void bfree(void);
68 static struct wordent
*gethent(int);
69 static int matchs(Char
*, Char
*);
70 static int getsel(int *, int *, int);
71 static struct wordent
*getsub(struct wordent
*);
72 static Char
*subword(Char
*, int, int *);
73 static struct wordent
*dosub(int, struct wordent
*, int);
76 * Peekc is a peek character for getC, peekread for readc.
77 * There is a subtlety here in many places... history routines
78 * will read ahead and then insert stuff into the input stream.
79 * If they push back a character then they must push it behind
80 * the text substituted by the history substitution. On the other
81 * hand in several places we need 2 peek characters. To make this
82 * all work, the history routines read with getC, and make use both
83 * of ungetC and unreadc. The key observation is that the state
84 * of getC at the call of a history reference is such that calls
85 * to getC from the history routines will always yield calls of
86 * readc, unless this peeking is involved. That is to say that during
87 * getexcl the variables lap, exclp, and exclnxt are all zero.
89 * Getdol invokes history substitution, hence the extra peek, peekd,
90 * which it can ungetD to be before history substitutions.
92 static Char peekc
= 0, peekd
= 0;
93 static Char peekread
= 0;
95 /* (Tail of) current word from ! subst */
96 static Char
*exclp
= NULL
;
98 /* The rest of the ! subst words */
99 static struct wordent
*exclnxt
= NULL
;
101 /* Count of remaining words in ! subst */
102 static int exclc
= 0;
104 /* "Globp" for alias resubstitution */
105 Char
**alvec
, *alvecp
;
109 * Labuf implements a general buffer for lookahead during lexical operations.
110 * Text which is to be placed in the input stream can be stuck here.
111 * We stick parsed ahead $ constructs during initial input,
112 * process id's from `$$', and modified variable values (from qualifiers
113 * during expansion in sh.dol.c) here.
115 static Char labuf
[BUFSIZE
];
118 * Lex returns to its caller not only a wordlist (as a "var" parameter)
119 * but also whether a history substitution occurred. This is used in
120 * the main (process) routine to determine whether to echo, and also
121 * when called by the alias routine to determine whether to keep the
124 static int hadhist
= 0;
127 * Avoid alias expansion recursion via \!#
133 #define getC(f) ((getCtmp = peekc) ? (peekc = 0, getCtmp) : getC1(f))
134 #define ungetC(c) peekc = c
135 #define ungetD(c) peekd = c
138 lex(struct wordent
*hp
)
144 hp
->next
= hp
->prev
= hp
;
149 while (c
== ' ' || c
== '\t');
150 if (c
== HISTSUB
&& intty
)
151 /* ^lef^rit from tty is short !:s^lef^rit */
157 * The following loop is written so that the links needed by freelex will
158 * be ready and rarin to go even if it is interrupted.
163 new = (struct wordent
*)xmalloc((size_t)sizeof(*wdp
));
170 } while (wdp
->word
[0] != '\n');
176 prlex(FILE *fp
, struct wordent
*sp0
)
182 (void)fprintf(fp
, "%s", vis_str(sp
->word
));
186 if (sp
->word
[0] != '\n')
187 (void) fputc(' ', fp
);
192 copylex(struct wordent
*hp
, struct wordent
*fp
)
201 new = (struct wordent
*)xmalloc((size_t)sizeof(*wdp
));
206 wdp
->word
= Strsave(fp
->word
);
208 } while (wdp
->word
[0] != '\n');
213 freelex(struct wordent
*vp
)
217 while (vp
->next
!= vp
) {
220 xfree((ptr_t
) fp
->word
);
229 Char wbuf
[BUFSIZE
], *wp
;
237 while ((c
= getC(DOALL
)) == ' ' || c
== '\t')
239 if (cmap(c
, _META
| _ESC
))
292 else if (c
== '\\') {
299 * if (c1 == '`') c = ' '; else
306 else if (c
== '\n') {
307 seterror(ERR_UNMATCHED
, c1
);
312 else if (cmap(c
, _META
| _QF
| _QB
| _ESC
)) {
324 else if (cmap(c
, _QF
| _QB
)) { /* '"` */
326 dolflg
= c
== '"' ? DOALL
: DOEXCL
;
328 else if (c
!= '#' || !intty
) {
338 seterror(ERR_WTOOLONG
);
345 return (Strsave(wbuf
));
354 if ((c
= peekc
) != '\0') {
359 if ((c
= *lap
++) == 0)
362 if (cmap(c
, _META
| _QF
| _QB
))
367 if ((c
= peekd
) != '\0') {
372 if ((c
= *exclp
++) != '\0')
374 if (exclnxt
&& --exclc
>= 0) {
375 exclnxt
= exclnxt
->next
;
376 setexclp(exclnxt
->word
);
383 exclnxt
= exclnxt
->next
;
387 setexclp(exclnxt
->word
);
391 if (c
== '$' && (flag
& DODOL
)) {
395 if (c
== HIST
&& (flag
& DOEXCL
)) {
407 Char name
[4*MAXVARLEN
+1], *ep
, *np
;
409 int special
, toolong
;
412 np
= name
, *np
++ = '$';
413 c
= sc
= getC(DOEXCL
);
414 if (any("\t \n", c
)) {
420 *np
++ = c
, c
= getC(DOEXCL
);
421 if (c
== '#' || c
== '?')
422 special
++, *np
++ = c
, c
= getC(DOEXCL
);
429 seterror(ERR_SPDOLLT
);
436 seterror(ERR_NEWLINE
);
442 seterror(ERR_SPSTAR
);
450 /* let $?0 pass for now */
458 /* we know that np < &name[4] */
460 while ((c
= getC(DOEXCL
)) != '\0'){
469 else if (letter(c
)) {
470 /* we know that np < &name[4] */
473 while ((c
= getC(DOEXCL
)) != '\0') {
474 /* Bugfix for ${v123x} from Chris Torek, DAS DEC-90. */
475 if (!letter(c
) && !Isdigit(c
))
485 seterror(ERR_VARILL
);
490 seterror(ERR_VARTOOLONG
);
500 * Name up to here is a max of MAXVARLEN + 8.
502 ep
= &np
[2 * MAXVARLEN
+ 8];
505 * Michael Greim: Allow $ expansion to take place in selector
506 * expressions. (limits the number of characters returned)
508 c
= getC(DOEXCL
| DODOL
);
512 seterror(ERR_NLINDEX
);
522 seterror(ERR_SELOVFL
);
529 * Name up to here is a max of 2 * MAXVARLEN + 8.
533 * if the :g modifier is followed by a newline, then error right away!
536 int amodflag
, gmodflag
;
541 *np
++ = c
, c
= getC(DOEXCL
);
542 if (c
== 'g' || c
== 'a') {
547 *np
++ = c
; c
= getC(DOEXCL
);
549 if ((c
== 'g' && !gmodflag
) || (c
== 'a' && !amodflag
)) {
554 *np
++ = c
; c
= getC(DOEXCL
);
557 /* scan s// [eichin:19910926.0512EST] */
563 if (!delim
|| letter(delim
)
564 || Isdigit(delim
) || any(" \t\n", delim
)) {
565 seterror(ERR_BADSUBST
);
568 while ((c
= getC(0)) != (-1)) {
570 if(c
== delim
) delimcnt
--;
574 seterror(ERR_BADSUBST
);
579 if (!any("htrqxes", c
)) {
580 if ((amodflag
|| gmodflag
) && c
== '\n')
581 stderror(ERR_VARSYN
); /* strike */
582 seterror(ERR_VARMOD
, c
);
588 while ((c
= getC(DOEXCL
)) == ':');
597 seterror(ERR_MISSING
, '}');
614 if (Strlen(cp
) + (lap
? Strlen(lap
) : 0) >=
615 (sizeof(labuf
) - 4) / sizeof(Char
)) {
616 seterror(ERR_EXPOVFL
);
620 (void)Strcpy(buf
, lap
);
621 (void)Strcpy(labuf
, cp
);
623 (void)Strcat(labuf
, buf
);
627 static Char lhsb
[32];
628 static Char slhs
[32];
629 static Char rhsb
[64];
635 struct wordent
*hp
, *ip
;
636 int c
, dol
, left
, right
;
653 for (ip
= hp
->next
->next
; ip
!= alhistt
; ip
= ip
->next
)
656 for (ip
= hp
->next
->next
; ip
!= hp
->prev
; ip
= ip
->next
)
658 left
= 0, right
= dol
;
660 ungetC('s'), unreadc(HISTSUB
), c
= ':';
664 if (!any(":^$*-%", c
))
670 if (letter(c
) || c
== '&') {
672 left
= 0, right
= dol
;
678 if (!getsel(&left
, &right
, dol
))
684 if (!getsel(&left
, &right
, dol
))
689 exclc
= right
- left
+ 1;
692 if (sc
== HISTSUB
|| c
== ':') {
702 seterror(ERR_BADBANG
);
707 static struct wordent
*
708 getsub(struct wordent
*en
)
710 Char orhsb
[sizeof(rhsb
) / sizeof(Char
)];
719 if (c
== 'g' || c
== 'a') {
720 global
|= (c
== 'g') ? 1 : 2;
723 if (((c
=='g') && !(global
& 1)) || ((c
== 'a') && !(global
& 2))) {
724 global
|= (c
== 'g') ? 1 : 2;
743 seterror(ERR_NOSUBST
);
746 (void) Strcpy(lhsb
, slhs
);
756 if (letter(delim
) || Isdigit(delim
) || any(" \t\n", delim
)) {
759 seterror(ERR_BADSUBST
);
771 if (cp
> &lhsb
[sizeof(lhsb
) / sizeof(Char
) - 2]) {
773 seterror(ERR_BADSUBST
);
778 if (c
!= delim
&& c
!= '\\')
785 else if (lhsb
[0] == 0) {
790 (void)Strcpy(orhsb
, cp
);
801 if (&cp
[Strlen(orhsb
)] > &rhsb
[sizeof(rhsb
) /
804 (void)Strcpy(cp
, orhsb
);
809 if (cp
> &rhsb
[sizeof(rhsb
) / sizeof(Char
) - 2]) {
810 seterror(ERR_RHSLONG
);
815 if (c
!= delim
/* && c != '~' */ )
825 seterror(ERR_BADBANGMOD
, c
);
828 (void)Strcpy(slhs
, lhsb
);
830 en
= dosub(sc
, en
, global
);
832 while ((c
= getC(0)) == ':');
837 static struct wordent
*
838 dosub(int sc
, struct wordent
*en
, int global
)
840 struct wordent lexi
, *hp
, *wdp
;
851 struct wordent
*new = (struct wordent
*)xcalloc(1, sizeof *wdp
);
860 Char
*tword
, *otword
;
862 if ((global
& 1) || didsub
== 0) {
863 tword
= subword(en
->word
, sc
, &didone
);
867 while (didone
&& tword
!= STRNULL
) {
869 tword
= subword(otword
, sc
, &didone
);
870 if (Strcmp(tword
, otword
) == 0) {
871 xfree((ptr_t
) otword
);
875 xfree((ptr_t
)otword
);
880 tword
= Strsave(en
->word
);
885 seterror(ERR_MODFAIL
);
887 return (&enthist(-1000, &lexi
, 0)->Hlex
);
891 subword(Char
*cp
, int type
, int *adid
)
905 wp
= domod(cp
, type
);
907 return (Strsave(cp
));
913 for (mp
= cp
; *mp
; mp
++)
914 if (matchs(mp
, lhsb
)) {
915 for (np
= cp
; np
< mp
;)
917 for (np
= rhsb
; *np
; np
++)
925 seterror(ERR_SUBOVFL
);
933 seterror(ERR_SUBOVFL
);
937 (void) Strcat(wp
, lhsb
);
944 seterror(ERR_SUBOVFL
);
948 (void) Strcat(wp
, mp
);
950 return (Strsave(wbuf
));
952 return (Strsave(cp
));
957 domod(Char
*cp
, int type
)
966 for (xp
= wp
; (c
= *xp
) != '\0'; xp
++)
967 if ((c
!= ' ' && c
!= '\t') || type
== 'q')
972 if (!any(short2str(cp
), '/'))
973 return (type
== 't' ? Strsave(cp
) : 0);
978 xp
= Strsave(cp
), xp
[wp
- cp
] = 0;
980 xp
= Strsave(wp
+ 1);
985 for (wp
--; wp
>= cp
&& *wp
!= '/'; wp
--)
988 xp
= Strsave(wp
+ 1);
990 xp
= Strsave(cp
), xp
[wp
- cp
] = 0;
993 return (Strsave(type
== 'e' ? STRNULL
: cp
));
1001 matchs(Char
*str
, Char
*pat
)
1003 while (*str
&& *pat
&& *str
== *pat
)
1009 getsel(int *al
, int *ar
, int dol
)
1019 if (quesarg
== -1) {
1020 seterror(ERR_BADBANGARG
);
1057 while (Isdigit(c
)) {
1058 i
= i
* 10 + c
- '0';
1080 if (*al
> *ar
|| *ar
> dol
) {
1081 seterror(ERR_BADBANGARG
);
1088 static struct wordent
*
1098 c
= sc
== HISTSUB
? HIST
: getC(0);
1112 if (lastev
== eventno
&& alhistp
)
1116 case '#': /* !# is command being typed in (mrh) */
1118 seterror(ERR_HISTLOOP
);
1129 if (any("(=~", c
)) {
1136 while (!cmap(c
, _ESC
| _META
| _QF
| _QB
) && !any("${}:", c
)) {
1137 if (event
!= -1 && Isdigit(c
))
1138 event
= event
* 10 + c
- '0';
1141 if (np
< &lhsb
[sizeof(lhsb
) / sizeof(Char
) - 2])
1153 * History had only digits
1156 event
= eventno
+ (alhistp
== 0) - (event
? event
: 0);
1159 hp
= findev(lhsb
, 0);
1173 if (np
< &lhsb
[sizeof(lhsb
) / sizeof(Char
) - 2])
1178 seterror(ERR_NOSEARCH
);
1184 hp
= findev(lhsb
, 1);
1190 for (hp
= Histlist
.Hnext
; hp
; hp
= hp
->Hnext
)
1191 if (hp
->Hnum
== event
) {
1199 seterror(ERR_NOEVENT
, str
);
1203 static struct Hist
*
1204 findev(Char
*cp
, int anyarg
)
1208 for (hp
= Histlist
.Hnext
; hp
; hp
= hp
->Hnext
) {
1217 * The entries added by alias substitution don't have a newline but do
1218 * have a negative event number. Savehist() trims off these entries,
1219 * but it happens before alias expansion, too early to delete those
1220 * from the previous command.
1224 if (lp
->word
[0] == '\n')
1232 while (*p
++ == *q
++);
1236 for (dp
= lp
->word
; *dp
; dp
++) {
1244 while (*p
++ == *q
++);
1248 } while (lp
->word
[0] != '\n');
1250 seterror(ERR_NOEVENT
, vis_str(cp
));
1258 if (cp
&& cp
[0] == '\n')
1272 static int sincereal
;
1276 if ((c
= peekread
) != '\0') {
1284 if ((c
= *alvecp
++) != '\0')
1286 if (alvec
&& *alvec
) {
1297 if ((alvecp
= *alvec
) != '\0') {
1301 /* Infinite source! */
1306 if ((c
= *evalp
++) != '\0')
1308 if (evalvec
&& *evalvec
) {
1316 if (evalvec
== (Char
**) 1) {
1320 if ((evalp
= *evalvec
) != '\0') {
1324 evalvec
= (Char
**) 1;
1328 if (arginp
== (Char
*) 1 || onelflg
== 1) {
1334 if ((c
= *arginp
++) == 0) {
1335 arginp
= (Char
*) 1;
1346 /* was isatty but raw with ignoreeof yields problems */
1347 if (tcgetattr(SHIN
, &tty
) == 0 && (tty
.c_lflag
& ICANON
))
1349 /* was 'short' for FILEC */
1352 if (++sincereal
> 25)
1355 (ctpgrp
= tcgetpgrp(FSHTTY
)) != -1 &&
1357 (void)tcsetpgrp(FSHTTY
, tpgrp
);
1358 (void)kill(-ctpgrp
, SIGHUP
);
1359 (void)fprintf(csherr
, "Reset tty pgrp from %ld to %ld\n",
1360 (long)ctpgrp
, (long)tpgrp
);
1363 if (adrof(STRignoreeof
)) {
1365 (void)fprintf(csherr
,"\nUse \"logout\" to logout.\n");
1367 (void)fprintf(csherr
,"\nUse \"exit\" to leave csh.\n");
1378 if (c
== '\n' && onelflg
)
1388 char tbuf
[BUFSIZE
+ 1];
1389 Char ttyline
[BUFSIZE
];
1390 int c
, buf
, numleft
, off
, roomleft
;
1394 char tbuf
[BUFSIZE
+ 1];
1399 if (fseekp
< fbobp
|| fseekp
> feobp
) {
1400 fbobp
= feobp
= fseekp
;
1401 (void)lseek(SHIN
, fseekp
, SEEK_SET
);
1403 if (fseekp
== feobp
) {
1408 c
= read(SHIN
, tbuf
, BUFSIZE
);
1409 while (c
< 0 && errno
== EINTR
);
1412 for (i
= 0; i
< c
; i
++)
1413 fbuf
[0][i
] = (unsigned char) tbuf
[i
];
1416 c
= fbuf
[0][fseekp
- fbobp
];
1422 buf
= (int) fseekp
/ BUFSIZE
;
1423 if (buf
>= fblocks
) {
1426 nfbuf
= (Char
**)xcalloc((size_t) (fblocks
+ 2), sizeof(char **));
1428 (void)blkcpy(nfbuf
, fbuf
);
1429 xfree((ptr_t
) fbuf
);
1432 fbuf
[fblocks
] = (Char
*)xcalloc(BUFSIZE
, sizeof(Char
));
1437 if (fseekp
>= feobp
) {
1438 buf
= (int) feobp
/ BUFSIZE
;
1439 off
= (int) feobp
% BUFSIZE
;
1440 roomleft
= BUFSIZE
- off
;
1443 roomleft
= BUFSIZE
- off
;
1445 if (filec
&& intty
) {
1446 c
= numleft
? numleft
: tenex(ttyline
, BUFSIZE
);
1448 /* start with fresh buffer */
1449 feobp
= fseekp
= fblocks
* BUFSIZE
;
1454 (void)memcpy(fbuf
[buf
] + off
, ttyline
, c
* sizeof(Char
));
1459 c
= read(SHIN
, tbuf
, roomleft
);
1462 Char
*ptr
= fbuf
[buf
] + off
;
1464 for (i
= 0; i
< c
; i
++)
1465 ptr
[i
] = (unsigned char) tbuf
[i
];
1472 if (errno
== EWOULDBLOCK
) {
1475 (void)ioctl(SHIN
, FIONBIO
, (ioctl_t
) & iooff
);
1477 else if (errno
!= EINTR
)
1486 if (filec
&& !intty
)
1490 c
= fbuf
[buf
][(int)fseekp
% BUFSIZE
];
1504 sb
= (int)(fseekp
- 1) / BUFSIZE
;
1506 for (i
= 0; i
< sb
; i
++)
1507 xfree((ptr_t
) fbuf
[i
]);
1508 (void)blkcpy(fbuf
, &fbuf
[sb
]);
1509 fseekp
-= BUFSIZE
* sb
;
1510 feobp
-= BUFSIZE
* sb
;
1516 bseek(struct Ain
*l
)
1518 switch (aret
= l
->type
) {
1524 evalvec
= l
->a_seek
;
1531 (void)fprintf(csherr
, "Bad seek type %d\n", aret
);
1537 btell(struct Ain
*l
)
1539 switch (l
->type
= aret
) {
1545 l
->a_seek
= evalvec
;
1553 (void)fprintf(csherr
, "Bad seek type %d\n", aret
);
1561 (void)lseek(SHIN
, (off_t
) 0, SEEK_END
);
1576 if (arginp
|| onelflg
|| intty
)
1578 if (lseek(SHIN
, (off_t
) 0, SEEK_CUR
) < 0 || errno
== ESPIPE
)
1580 fbuf
= (Char
**)xcalloc(2, sizeof(Char
**));
1582 fbuf
[0] = (Char
*)xcalloc(BUFSIZE
, sizeof(Char
));
1583 fseekp
= fbobp
= feobp
= lseek(SHIN
, (off_t
) 0, SEEK_CUR
);