Sync usage with man page.
[netbsd-mini2440.git] / dist / nawk / lex.c
blobd4c1b34e6b11032011d044b633fdc7b2d8d798ee
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include "awk.h"
34 #include "awkgram.h"
36 extern YYSTYPE yylval;
37 extern int infunc;
39 int lineno = 1;
40 int bracecnt = 0;
41 int brackcnt = 0;
42 int parencnt = 0;
44 typedef struct Keyword {
45 const char *word;
46 int sub;
47 int type;
48 } Keyword;
50 int peek(void);
51 int gettok(char **, int *);
52 int binsearch(const char *, const Keyword *, int);
54 const Keyword keywords[] ={ /* keep sorted: binary searched */
55 { "BEGIN", XBEGIN, XBEGIN },
56 { "END", XEND, XEND },
57 { "NF", VARNF, VARNF },
58 { "atan2", FATAN, BLTIN },
59 { "break", BREAK, BREAK },
60 { "close", CLOSE, CLOSE },
61 { "continue", CONTINUE, CONTINUE },
62 { "cos", FCOS, BLTIN },
63 { "delete", DELETE, DELETE },
64 { "do", DO, DO },
65 { "else", ELSE, ELSE },
66 { "exit", EXIT, EXIT },
67 { "exp", FEXP, BLTIN },
68 { "fflush", FFLUSH, BLTIN },
69 { "for", FOR, FOR },
70 { "func", FUNC, FUNC },
71 { "function", FUNC, FUNC },
72 { "gensub", GENSUB, GENSUB },
73 { "getline", GETLINE, GETLINE },
74 { "gsub", GSUB, GSUB },
75 { "if", IF, IF },
76 { "in", IN, IN },
77 { "index", INDEX, INDEX },
78 { "int", FINT, BLTIN },
79 { "length", FLENGTH, BLTIN },
80 { "log", FLOG, BLTIN },
81 { "match", MATCHFCN, MATCHFCN },
82 { "next", NEXT, NEXT },
83 { "nextfile", NEXTFILE, NEXTFILE },
84 { "print", PRINT, PRINT },
85 { "printf", PRINTF, PRINTF },
86 { "rand", FRAND, BLTIN },
87 { "return", RETURN, RETURN },
88 { "sin", FSIN, BLTIN },
89 { "split", SPLIT, SPLIT },
90 { "sprintf", SPRINTF, SPRINTF },
91 { "sqrt", FSQRT, BLTIN },
92 { "srand", FSRAND, BLTIN },
93 { "strftime", FSTRFTIME, BLTIN },
94 { "sub", SUB, SUB },
95 { "substr", SUBSTR, SUBSTR },
96 { "system", FSYSTEM, BLTIN },
97 { "systime", FSYSTIME, BLTIN },
98 { "tolower", FTOLOWER, BLTIN },
99 { "toupper", FTOUPPER, BLTIN },
100 { "while", WHILE, WHILE },
103 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
105 int peek(void)
107 int c = input();
108 unput(c);
109 return c;
112 int gettok(char **pbuf, int *psz) /* get next input token */
114 int c, retc;
115 uschar *buf = (uschar *) *pbuf;
116 int sz = *psz;
117 uschar *bp = buf;
119 c = input();
120 if (c == 0)
121 return 0;
122 buf[0] = c;
123 buf[1] = 0;
124 if (!isalnum(c) && c != '.' && c != '_')
125 return c;
127 *bp++ = c;
128 if (isalpha(c) || c == '_') { /* it's a varname */
129 for ( ; (c = input()) != 0; ) {
130 if (bp-buf >= sz)
131 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
132 FATAL( "out of space for name %.10s...", buf );
133 if (isalnum(c) || c == '_')
134 *bp++ = c;
135 else {
136 *bp = 0;
137 unput(c);
138 break;
141 *bp = 0;
142 retc = 'a'; /* alphanumeric */
143 } else { /* maybe it's a number, but could be . */
144 char *rem;
145 /* read input until can't be a number */
146 for ( ; (c = input()) != 0; ) {
147 if (bp-buf >= sz)
148 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
149 FATAL( "out of space for number %.10s...", buf );
150 if (isdigit(c) || c == 'e' || c == 'E'
151 || c == '.' || c == '+' || c == '-')
152 *bp++ = c;
153 else {
154 unput(c);
155 break;
158 *bp = 0;
159 strtod(buf, &rem); /* parse the number */
160 if (rem == (char *)buf) { /* it wasn't a valid number at all */
161 buf[1] = 0; /* return one character as token */
162 retc = buf[0]; /* character is its own type */
163 unputstr(rem+1); /* put rest back for later */
164 } else { /* some prefix was a number */
165 unputstr(rem); /* put rest back for later */
166 rem[0] = 0; /* truncate buf after number part */
167 retc = '0'; /* type is number */
170 *pbuf = buf;
171 *psz = sz;
172 return retc;
175 int word(char *);
176 int string(void);
177 int regexpr(void);
178 int sc = 0; /* 1 => return a } right now */
179 int reg = 0; /* 1 => return a REGEXPR now */
181 int yylex(void)
183 int c;
184 static char *buf = 0;
185 static int bufsize = 5; /* BUG: setting this small causes core dump! */
187 if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
188 FATAL( "out of space in yylex" );
189 if (sc) {
190 sc = 0;
191 RET('}');
193 if (reg) {
194 reg = 0;
195 return regexpr();
197 for (;;) {
198 c = gettok(&buf, &bufsize);
199 if (c == 0)
200 return 0;
201 if (isalpha(c) || c == '_')
202 return word(buf);
203 if (isdigit(c)) {
204 yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
205 /* should this also have STR set? */
206 RET(NUMBER);
209 yylval.i = c;
210 switch (c) {
211 case '\n': /* {EOL} */
212 RET(NL);
213 case '\r': /* assume \n is coming */
214 case ' ': /* {WS}+ */
215 case '\t':
216 break;
217 case '#': /* #.* strip comments */
218 while ((c = input()) != '\n' && c != 0)
220 unput(c);
221 break;
222 case ';':
223 RET(';');
224 case '\\':
225 if (peek() == '\n') {
226 input();
227 } else if (peek() == '\r') {
228 input(); input(); /* \n */
229 lineno++;
230 } else {
231 RET(c);
233 break;
234 case '&':
235 if (peek() == '&') {
236 input(); RET(AND);
237 } else
238 RET('&');
239 case '|':
240 if (peek() == '|') {
241 input(); RET(BOR);
242 } else
243 RET('|');
244 case '!':
245 if (peek() == '=') {
246 input(); yylval.i = NE; RET(NE);
247 } else if (peek() == '~') {
248 input(); yylval.i = NOTMATCH; RET(MATCHOP);
249 } else
250 RET(NOT);
251 case '~':
252 yylval.i = MATCH;
253 RET(MATCHOP);
254 case '<':
255 if (peek() == '=') {
256 input(); yylval.i = LE; RET(LE);
257 } else {
258 yylval.i = LT; RET(LT);
260 case '=':
261 if (peek() == '=') {
262 input(); yylval.i = EQ; RET(EQ);
263 } else {
264 yylval.i = ASSIGN; RET(ASGNOP);
266 case '>':
267 if (peek() == '=') {
268 input(); yylval.i = GE; RET(GE);
269 } else if (peek() == '>') {
270 input(); yylval.i = APPEND; RET(APPEND);
271 } else {
272 yylval.i = GT; RET(GT);
274 case '+':
275 if (peek() == '+') {
276 input(); yylval.i = INCR; RET(INCR);
277 } else if (peek() == '=') {
278 input(); yylval.i = ADDEQ; RET(ASGNOP);
279 } else
280 RET('+');
281 case '-':
282 if (peek() == '-') {
283 input(); yylval.i = DECR; RET(DECR);
284 } else if (peek() == '=') {
285 input(); yylval.i = SUBEQ; RET(ASGNOP);
286 } else
287 RET('-');
288 case '*':
289 if (peek() == '=') { /* *= */
290 input(); yylval.i = MULTEQ; RET(ASGNOP);
291 } else if (peek() == '*') { /* ** or **= */
292 input(); /* eat 2nd * */
293 if (peek() == '=') {
294 input(); yylval.i = POWEQ; RET(ASGNOP);
295 } else {
296 RET(POWER);
298 } else
299 RET('*');
300 case '/':
301 RET('/');
302 case '%':
303 if (peek() == '=') {
304 input(); yylval.i = MODEQ; RET(ASGNOP);
305 } else
306 RET('%');
307 case '^':
308 if (peek() == '=') {
309 input(); yylval.i = POWEQ; RET(ASGNOP);
310 } else
311 RET(POWER);
313 case '$':
314 /* BUG: awkward, if not wrong */
315 c = gettok(&buf, &bufsize);
316 if (isalpha(c)) {
317 if (strcmp(buf, "NF") == 0) { /* very special */
318 unputstr("(NF)");
319 RET(INDIRECT);
321 c = peek();
322 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
323 unputstr(buf);
324 RET(INDIRECT);
326 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
327 RET(IVAR);
328 } else if (c == 0) { /* */
329 SYNTAX( "unexpected end of input after $" );
330 RET(';');
331 } else {
332 unputstr(buf);
333 RET(INDIRECT);
336 case '}':
337 if (--bracecnt < 0)
338 SYNTAX( "extra }" );
339 sc = 1;
340 RET(';');
341 case ']':
342 if (--brackcnt < 0)
343 SYNTAX( "extra ]" );
344 RET(']');
345 case ')':
346 if (--parencnt < 0)
347 SYNTAX( "extra )" );
348 RET(')');
349 case '{':
350 bracecnt++;
351 RET('{');
352 case '[':
353 brackcnt++;
354 RET('[');
355 case '(':
356 parencnt++;
357 RET('(');
359 case '"':
360 return string(); /* BUG: should be like tran.c ? */
362 default:
363 RET(c);
368 int string(void)
370 int c, n;
371 uschar *s, *bp;
372 static uschar *buf = 0;
373 static int bufsz = 500;
375 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
376 FATAL("out of space for strings");
377 for (bp = buf; (c = input()) != '"'; ) {
378 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
379 FATAL("out of space for string %.10s...", buf);
380 switch (c) {
381 case '\n':
382 case '\r':
383 case 0:
384 SYNTAX( "non-terminated string %.10s...", buf );
385 lineno++;
386 if (c == 0) /* hopeless */
387 FATAL( "giving up" );
388 break;
389 case '\\':
390 c = input();
391 switch (c) {
392 case '\n': break;
393 case '"': *bp++ = '"'; break;
394 case 'n': *bp++ = '\n'; break;
395 case 't': *bp++ = '\t'; break;
396 case 'f': *bp++ = '\f'; break;
397 case 'r': *bp++ = '\r'; break;
398 case 'b': *bp++ = '\b'; break;
399 case 'v': *bp++ = '\v'; break;
400 case 'a': *bp++ = '\007'; break;
401 case '\\': *bp++ = '\\'; break;
403 case '0': case '1': case '2': /* octal: \d \dd \ddd */
404 case '3': case '4': case '5': case '6': case '7':
405 n = c - '0';
406 if ((c = peek()) >= '0' && c < '8') {
407 n = 8 * n + input() - '0';
408 if ((c = peek()) >= '0' && c < '8')
409 n = 8 * n + input() - '0';
411 *bp++ = n;
412 break;
414 case 'x': /* hex \x0-9a-fA-F + */
415 { char xbuf[100], *px;
416 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
417 if (isdigit(c)
418 || (c >= 'a' && c <= 'f')
419 || (c >= 'A' && c <= 'F'))
420 *px++ = c;
421 else
422 break;
424 *px = 0;
425 unput(c);
426 sscanf(xbuf, "%x", &n);
427 *bp++ = n;
428 break;
431 default:
432 WARNING("warning: escape sequence `\\%c' "
433 "treated as plain `%c'", c, c);
434 *bp++ = c;
435 break;
437 break;
438 default:
439 *bp++ = c;
440 break;
443 *bp = 0;
444 s = tostring(buf);
445 *bp++ = ' '; *bp++ = 0;
446 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
447 RET(STRING);
451 int binsearch(const char *w, const Keyword *kp, int n)
453 int cond, low, mid, high;
455 low = 0;
456 high = n - 1;
457 while (low <= high) {
458 mid = (low + high) / 2;
459 if ((cond = strcmp(w, kp[mid].word)) < 0)
460 high = mid - 1;
461 else if (cond > 0)
462 low = mid + 1;
463 else
464 return mid;
466 return -1;
469 int word(char *w)
471 const Keyword *kp;
472 int c, n;
474 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
475 /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
476 kp = keywords + n;
477 if (n != -1) { /* found in table */
478 yylval.i = kp->sub;
479 switch (kp->type) { /* special handling */
480 case BLTIN:
481 if (kp->sub == FSYSTEM && safe)
482 SYNTAX( "system is unsafe" );
483 RET(kp->type);
484 case FUNC:
485 if (infunc)
486 SYNTAX( "illegal nested function" );
487 RET(kp->type);
488 case RETURN:
489 if (!infunc)
490 SYNTAX( "return not in function" );
491 RET(kp->type);
492 case VARNF:
493 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
494 RET(VARNF);
495 default:
496 RET(kp->type);
499 c = peek(); /* look for '(' */
500 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
501 yylval.i = n;
502 RET(ARG);
503 } else {
504 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
505 if (c == '(') {
506 RET(CALL);
507 } else {
508 RET(VAR);
513 void startreg(void) /* next call to yylex will return a regular expression */
515 reg = 1;
518 int regexpr(void)
520 int c;
521 static uschar *buf = 0;
522 static int bufsz = 500;
523 uschar *bp;
525 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
526 FATAL("out of space for rex expr");
527 bp = buf;
528 for ( ; (c = input()) != '/' && c != 0; ) {
529 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
530 FATAL("out of space for reg expr %.10s...", buf);
531 if (c == '\n') {
532 SYNTAX( "newline in regular expression %.10s...", buf );
533 unput('\n');
534 break;
535 } else if (c == '\\') {
536 *bp++ = '\\';
537 *bp++ = input();
538 } else {
539 *bp++ = c;
542 *bp = 0;
543 if (c == 0)
544 SYNTAX("non-terminated regular expression %.10s...", buf);
545 yylval.s = tostring(buf);
546 unput('/');
547 RET(REGEXPR);
550 /* low-level lexical stuff, sort of inherited from lex */
552 char ebuf[300];
553 char *ep = ebuf;
554 char yysbuf[100]; /* pushback buffer */
555 char *yysptr = yysbuf;
556 FILE *yyin = 0;
558 int input(void) /* get next lexical input character */
560 int c;
561 extern char *lexprog;
563 if (yysptr > yysbuf)
564 c = (uschar)*--yysptr;
565 else if (lexprog != NULL) { /* awk '...' */
566 if ((c = (uschar)*lexprog) != 0)
567 lexprog++;
568 } else /* awk -f ... */
569 c = pgetc();
570 if (c == '\n')
571 lineno++;
572 else if (c == EOF)
573 c = 0;
574 if (ep >= ebuf + sizeof ebuf)
575 ep = ebuf;
576 return *ep++ = c;
579 void unput(int c) /* put lexical character back on input */
581 if (c == '\n')
582 lineno--;
583 if (yysptr >= yysbuf + sizeof(yysbuf))
584 FATAL("pushed back too much: %.20s...", yysbuf);
585 *yysptr++ = c;
586 if (--ep < ebuf)
587 ep = ebuf + sizeof(ebuf) - 1;
590 void unputstr(const char *s) /* put a string back on input */
592 int i;
594 for (i = strlen(s)-1; i >= 0; i--)
595 unput(s[i]);