1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
32 extern YYSTYPE yylval
;
40 typedef struct Keyword
{
46 Keyword keywords
[] ={ /* keep sorted: binary searched */
47 { "BEGIN", XBEGIN
, XBEGIN
},
48 { "END", XEND
, XEND
},
49 { "NF", VARNF
, VARNF
},
50 { "and", BIO_FAND
, BLTIN
},
51 { "atan2", FATAN
, BLTIN
},
52 { "break", BREAK
, BREAK
},
53 { "close", CLOSE
, CLOSE
},
54 { "continue", CONTINUE
, CONTINUE
},
55 { "cos", FCOS
, BLTIN
},
56 { "delete", DELETE
, DELETE
},
58 { "else", ELSE
, ELSE
},
59 { "exit", EXIT
, EXIT
},
60 { "exp", FEXP
, BLTIN
},
61 { "fflush", FFLUSH
, BLTIN
},
63 { "func", FUNC
, FUNC
},
64 { "function", FUNC
, FUNC
},
65 { "gc", BIO_FGC
, BLTIN
},
66 { "getline", GETLINE
, GETLINE
},
67 { "gsub", GSUB
, GSUB
},
70 { "index", INDEX
, INDEX
},
71 { "int", FINT
, BLTIN
},
72 { "length", FLENGTH
, BLTIN
},
73 { "log", FLOG
, BLTIN
},
74 { "match", MATCHFCN
, MATCHFCN
},
75 { "meanqual", BIO_FMEANQUAL
, BLTIN
},
76 { "next", NEXT
, NEXT
},
77 { "nextfile", NEXTFILE
, NEXTFILE
},
78 { "or", BIO_FOR
, BLTIN
},
79 { "print", PRINT
, PRINT
},
80 { "printf", PRINTF
, PRINTF
},
81 { "qualcount", BIO_FQUALCOUNT
, BLTIN
},
82 { "rand", FRAND
, BLTIN
},
83 { "return", RETURN
, RETURN
},
84 { "revcomp",BIO_FREVCOMP
, BLTIN
},
85 { "reverse",BIO_FREVERSE
, BLTIN
},
86 { "sin", FSIN
, BLTIN
},
87 { "split", SPLIT
, SPLIT
},
88 { "sprintf", SPRINTF
, SPRINTF
},
89 { "sqrt", FSQRT
, BLTIN
},
90 { "srand", FSRAND
, BLTIN
},
92 { "substr", SUBSTR
, SUBSTR
},
93 { "system", FSYSTEM
, BLTIN
},
94 { "tolower", FTOLOWER
, BLTIN
},
95 { "toupper", FTOUPPER
, BLTIN
},
96 { "trimq", BIO_FTRIMQ
, BLTIN
},
97 { "while", WHILE
, WHILE
},
98 { "xor", BIO_FXOR
, BLTIN
}
101 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
110 int gettok(char **pbuf
, int *psz
) /* get next input token */
122 if (!isalnum(c
) && c
!= '.' && c
!= '_')
126 if (isalpha(c
) || c
== '_') { /* it's a varname */
127 for ( ; (c
= input()) != 0; ) {
129 if (!adjbuf(&buf
, &sz
, bp
-buf
+2, 100, &bp
, "gettok"))
130 FATAL( "out of space for name %.10s...", buf
);
131 if (isalnum(c
) || c
== '_')
140 retc
= 'a'; /* alphanumeric */
141 } else { /* maybe it's a number, but could be . */
143 /* read input until can't be a number */
144 for ( ; (c
= input()) != 0; ) {
146 if (!adjbuf(&buf
, &sz
, bp
-buf
+2, 100, &bp
, "gettok"))
147 FATAL( "out of space for number %.10s...", buf
);
148 if (isdigit(c
) || c
== 'e' || c
== 'E'
149 || c
== '.' || c
== '+' || c
== '-')
157 strtod(buf
, &rem
); /* parse the number */
158 if (rem
== buf
) { /* it wasn't a valid number at all */
159 buf
[1] = 0; /* return one character as token */
160 retc
= buf
[0]; /* character is its own type */
161 unputstr(rem
+1); /* put rest back for later */
162 } else { /* some prefix was a number */
163 unputstr(rem
); /* put rest back for later */
164 rem
[0] = 0; /* truncate buf after number part */
165 retc
= '0'; /* type is number */
176 int sc
= 0; /* 1 => return a } right now */
177 int reg
= 0; /* 1 => return a REGEXPR now */
182 static char *buf
= 0;
183 static int bufsize
= 5; /* BUG: setting this small causes core dump! */
185 if (buf
== 0 && (buf
= (char *) malloc(bufsize
)) == NULL
)
186 FATAL( "out of space in yylex" );
196 c
= gettok(&buf
, &bufsize
);
199 if (isalpha(c
) || c
== '_')
202 yylval
.cp
= setsymtab(buf
, tostring(buf
), atof(buf
), CON
|NUM
, symtab
);
203 /* should this also have STR set? */
209 case '\n': /* {EOL} */
211 case '\r': /* assume \n is coming */
212 case ' ': /* {WS}+ */
215 case '#': /* #.* strip comments */
216 while ((c
= input()) != '\n' && c
!= 0)
223 if (peek() == '\n') {
225 } else if (peek() == '\r') {
226 input(); input(); /* \n */
244 input(); yylval
.i
= NE
; RET(NE
);
245 } else if (peek() == '~') {
246 input(); yylval
.i
= NOTMATCH
; RET(MATCHOP
);
254 input(); yylval
.i
= LE
; RET(LE
);
256 yylval
.i
= LT
; RET(LT
);
260 input(); yylval
.i
= EQ
; RET(EQ
);
262 yylval
.i
= ASSIGN
; RET(ASGNOP
);
266 input(); yylval
.i
= GE
; RET(GE
);
267 } else if (peek() == '>') {
268 input(); yylval
.i
= APPEND
; RET(APPEND
);
270 yylval
.i
= GT
; RET(GT
);
274 input(); yylval
.i
= INCR
; RET(INCR
);
275 } else if (peek() == '=') {
276 input(); yylval
.i
= ADDEQ
; RET(ASGNOP
);
281 input(); yylval
.i
= DECR
; RET(DECR
);
282 } else if (peek() == '=') {
283 input(); yylval
.i
= SUBEQ
; RET(ASGNOP
);
287 if (peek() == '=') { /* *= */
288 input(); yylval
.i
= MULTEQ
; RET(ASGNOP
);
289 } else if (peek() == '*') { /* ** or **= */
290 input(); /* eat 2nd * */
292 input(); yylval
.i
= POWEQ
; RET(ASGNOP
);
302 input(); yylval
.i
= MODEQ
; RET(ASGNOP
);
307 input(); yylval
.i
= POWEQ
; RET(ASGNOP
);
312 /* BUG: awkward, if not wrong */
313 c
= gettok(&buf
, &bufsize
);
315 if (strcmp(buf
, "NF") == 0) { /* very special */
320 if (c
== '(' || c
== '[' || (infunc
&& isarg(buf
) >= 0)) {
324 yylval
.cp
= setsymtab(buf
, "", 0.0, STR
|NUM
, symtab
);
326 } else if (c
== 0) { /* */
327 SYNTAX( "unexpected end of input after $" );
358 return string(); /* BUG: should be like tran.c ? */
370 static char *buf
= 0;
371 static int bufsz
= 500;
373 if (buf
== 0 && (buf
= (char *) malloc(bufsz
)) == NULL
)
374 FATAL("out of space for strings");
375 for (bp
= buf
; (c
= input()) != '"'; ) {
376 if (!adjbuf(&buf
, &bufsz
, bp
-buf
+2, 500, &bp
, "string"))
377 FATAL("out of space for string %.10s...", buf
);
382 SYNTAX( "non-terminated string %.10s...", buf
);
384 if (c
== 0) /* hopeless */
385 FATAL( "giving up" );
390 case '"': *bp
++ = '"'; break;
391 case 'n': *bp
++ = '\n'; break;
392 case 't': *bp
++ = '\t'; break;
393 case 'f': *bp
++ = '\f'; break;
394 case 'r': *bp
++ = '\r'; break;
395 case 'b': *bp
++ = '\b'; break;
396 case 'v': *bp
++ = '\v'; break;
397 case 'a': *bp
++ = '\007'; break;
398 case '\\': *bp
++ = '\\'; break;
400 case '0': case '1': case '2': /* octal: \d \dd \ddd */
401 case '3': case '4': case '5': case '6': case '7':
403 if ((c
= peek()) >= '0' && c
< '8') {
404 n
= 8 * n
+ input() - '0';
405 if ((c
= peek()) >= '0' && c
< '8')
406 n
= 8 * n
+ input() - '0';
411 case 'x': /* hex \x0-9a-fA-F + */
412 { char xbuf
[100], *px
;
413 for (px
= xbuf
; (c
= input()) != 0 && px
-xbuf
< 100-2; ) {
415 || (c
>= 'a' && c
<= 'f')
416 || (c
>= 'A' && c
<= 'F'))
423 sscanf(xbuf
, "%x", (unsigned int *) &n
);
440 *bp
++ = ' '; *bp
++ = 0;
441 yylval
.cp
= setsymtab(buf
, s
, 0.0, CON
|STR
|DONTFREE
, symtab
);
446 int binsearch(char *w
, Keyword
*kp
, int n
)
448 int cond
, low
, mid
, high
;
452 while (low
<= high
) {
453 mid
= (low
+ high
) / 2;
454 if ((cond
= strcmp(w
, kp
[mid
].word
)) < 0)
469 n
= binsearch(w
, keywords
, sizeof(keywords
)/sizeof(keywords
[0]));
470 /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
472 if (n
!= -1) { /* found in table */
474 switch (kp
->type
) { /* special handling */
476 if (kp
->sub
== FSYSTEM
&& safe
)
477 SYNTAX( "system is unsafe" );
481 SYNTAX( "illegal nested function" );
485 SYNTAX( "return not in function" );
488 yylval
.cp
= setsymtab("NF", "", 0.0, NUM
, symtab
);
494 c
= peek(); /* look for '(' */
495 if (c
!= '(' && infunc
&& (n
=isarg(w
)) >= 0) {
499 yylval
.cp
= setsymtab(w
, "", 0.0, STR
|NUM
|DONTFREE
, symtab
);
508 void startreg(void) /* next call to yylex will return a regular expression */
516 static char *buf
= 0;
517 static int bufsz
= 500;
520 if (buf
== 0 && (buf
= (char *) malloc(bufsz
)) == NULL
)
521 FATAL("out of space for rex expr");
523 for ( ; (c
= input()) != '/' && c
!= 0; ) {
524 if (!adjbuf(&buf
, &bufsz
, bp
-buf
+3, 500, &bp
, "regexpr"))
525 FATAL("out of space for reg expr %.10s...", buf
);
527 SYNTAX( "newline in regular expression %.10s...", buf
);
530 } else if (c
== '\\') {
539 SYNTAX("non-terminated regular expression %.10s...", buf
);
540 yylval
.s
= tostring(buf
);
545 /* low-level lexical stuff, sort of inherited from lex */
549 char yysbuf
[100]; /* pushback buffer */
550 char *yysptr
= yysbuf
;
553 int input(void) /* get next lexical input character */
556 extern char *lexprog
;
559 c
= (uschar
)*--yysptr
;
560 else if (lexprog
!= NULL
) { /* awk '...' */
561 if ((c
= (uschar
)*lexprog
) != 0)
563 } else /* awk -f ... */
569 if (ep
>= ebuf
+ sizeof ebuf
)
574 void unput(int c
) /* put lexical character back on input */
578 if (yysptr
>= yysbuf
+ sizeof(yysbuf
))
579 FATAL("pushed back too much: %.20s...", yysbuf
);
582 ep
= ebuf
+ sizeof(ebuf
) - 1;
585 void unputstr(const char *s
) /* put a string back on input */
589 for (i
= strlen(s
)-1; i
>= 0; i
--)