2 * token - read input file characters into tokens
4 * Copyright (C) 1999-2007 David I. Bell and Ernest Bowen
6 * Primary author: David I. Bell
8 * Calc is open software; you can redistribute it and/or modify it under
9 * the terms of the version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
12 * Calc is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
15 * Public License for more details.
17 * A copy of version 2.1 of the GNU Lesser General Public License is
18 * distributed with calc under the filename COPYING-LGPL. You should have
19 * received a copy with calc; if not, write to Free Software Foundation, Inc.
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * @(#) $Revision: 30.2 $
23 * @(#) $Id: token.c,v 30.2 2008/11/05 17:32:19 chongo Exp $
24 * @(#) $Source: /usr/local/src/bin/calc/RCS/token.c,v $
26 * Under source code control: 1990/02/15 01:48:25
27 * File existed as early as: before 1990
29 * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
43 #define isletter(ch) ((((ch) >= 'a') && ((ch) <= 'z')) || \
44 (((ch) >= 'A') && ((ch) <= 'Z')))
45 #define isdigit(ch) (((ch) >= '0') && ((ch) <= '9'))
46 #define issymbol(ch) (isletter(ch) || isdigit(ch) || ((ch) == '_'))
47 #define isoctal(ch) (((ch) >= '0') && ((ch) <= '7'))
49 #define STRBUFSIZE 1024
56 short t_type
; /* type of token */
57 char *t_sym
; /* symbol name */
58 long t_strindex
; /* index of string value */
59 long t_numindex
; /* index of numeric value */
63 STATIC BOOL rescan
; /* TRUE to reread current token */
64 STATIC BOOL newlines
; /* TRUE to return newlines as tokens */
65 STATIC BOOL allsyms
; /* TRUE if always want a symbol token */
66 STATIC STRINGHEAD strings
; /* list of constant strings */
67 STATIC
char *numbuf
; /* buffer for numeric tokens */
68 STATIC
long numbufsize
; /* current size of numeric buffer */
70 long errorcount
= 0; /* number of compilation errors */
77 char *k_name
; /* keyword name */
78 int k_token
; /* token number */
81 STATIC
struct keyword keywords
[] = {
87 {"continue", T_CONTINUE
},
96 {"default", T_DEFAULT
},
108 {"undefine", T_UNDEFINE
},
114 S_FUNC
void eatcomment(void);
115 S_FUNC
void eatstring(int quotechar
);
116 S_FUNC
void eatline(void);
117 S_FUNC
int eatsymbol(void);
118 S_FUNC
int eatnumber(void);
122 * Initialize all token information.
131 setprompt(conf
->prompt1
);
136 * Set the new token mode according to the specified flag, and return the
137 * previous value of the flag.
144 oldflag
= TM_DEFAULT
;
146 oldflag
|= TM_NEWLINES
;
148 oldflag
|= TM_ALLSYMS
;
151 if (flag
& TM_NEWLINES
)
153 if (flag
& TM_ALLSYMS
)
155 setprompt(newlines
? conf
->prompt1
: conf
->prompt2
);
161 * Routine to read in the next token from the input stream.
162 * The type of token is returned as a value. If the token is a string or
163 * symbol name, information is saved so that the value can be retrieved.
168 int ch
; /* current input character */
169 int type
; /* token type */
171 if (rescan
) { /* rescanning */
173 return curtoken
.t_type
;
175 curtoken
.t_sym
= NULL
;
176 curtoken
.t_strindex
= 0;
177 curtoken
.t_numindex
= 0;
179 while (type
== T_NULL
) {
181 if (allsyms
&& ch
!=' ' && ch
!=';' && ch
!='"' &&
182 ch
!='\'' && ch
!='\n' && ch
!=EOF
) {
199 case EOF
: type
= T_EOF
; break;
200 case '{': type
= T_LEFTBRACE
; break;
201 case '}': type
= T_RIGHTBRACE
; break;
202 case '(': type
= T_LEFTPAREN
; break;
203 case ')': type
= T_RIGHTPAREN
; break;
204 case '[': type
= T_LEFTBRACKET
; break;
205 case ']': type
= T_RIGHTBRACKET
; break;
206 case ';': type
= T_SEMICOLON
; break;
207 case ':': type
= T_COLON
; break;
208 case ',': type
= T_COMMA
; break;
209 case '?': type
= T_QUESTIONMARK
; break;
210 case '@': type
= T_AT
; break;
211 case '`': type
= T_BACKQUOTE
; break;
212 case '$': type
= T_DOLLAR
; break;
219 switch (nextchar()) {
220 case '=': type
= T_POWEREQUALS
; break;
221 default: type
= T_POWER
; reread();
225 switch (nextchar()) {
226 case '=': type
= T_EQ
; break;
227 default: type
= T_ASSIGN
; reread();
231 switch (nextchar()) {
232 case '+': type
= T_PLUSPLUS
; break;
233 case '=': type
= T_PLUSEQUALS
; break;
234 default: type
= T_PLUS
; reread();
238 switch (nextchar()) {
239 case '-': type
= T_MINUSMINUS
; break;
240 case '=': type
= T_MINUSEQUALS
; break;
241 case '>': type
= T_ARROW
; break;
242 default: type
= T_MINUS
; reread();
246 switch (nextchar()) {
247 case '=': type
= T_MULTEQUALS
; break;
249 switch (nextchar()) {
251 type
= T_POWEREQUALS
; break;
253 type
= T_POWER
; reread();
256 default: type
= T_MULT
; reread();
260 switch (nextchar()) {
262 switch (nextchar()) {
264 type
= T_SLASHSLASHEQUALS
;
272 case '=': type
= T_DIVEQUALS
; break;
273 case '*': eatcomment(); break;
274 default: type
= T_DIV
; reread();
278 switch (nextchar()) {
279 case '=': type
= T_MODEQUALS
; break;
280 default: type
= T_MOD
; reread();
284 switch (nextchar()) {
285 case '=': type
= T_LE
; break;
287 switch (nextchar()) {
289 type
= T_LSHIFTEQUALS
;
297 default: type
= T_LT
; reread();
301 switch (nextchar()) {
302 case '=': type
= T_GE
; break;
304 switch (nextchar()) {
306 type
= T_RSHIFTEQUALS
;
314 default: type
= T_GT
; reread();
318 switch (nextchar()) {
319 case '&': type
= T_ANDAND
; break;
320 case '=': type
= T_ANDEQUALS
; break;
321 default: type
= T_AND
; reread(); break;
325 switch (nextchar()) {
326 case '|': type
= T_OROR
; break;
327 case '=': type
= T_OREQUALS
; break;
328 default: type
= T_OR
; reread(); break;
332 switch (nextchar()) {
333 case '=': type
= T_NE
; break;
334 default: type
= T_NOT
; reread(); break;
339 case '=': type
= T_HASHEQUALS
; break;
341 case '#': eatline(); break;
342 case '\n': reread(); break;
343 default: type
= T_HASH
; reread();
347 switch (nextchar()) {
348 case '=': type
= T_TILDEEQUALS
; break;
349 default: type
= T_TILDE
; reread();
353 switch (nextchar()) {
354 case '\n': setprompt(conf
->prompt2
); break;
355 case '=': type
= T_BACKSLASHEQUALS
; break;
356 default: type
= T_BACKSLASH
; reread();
360 if (isletter(ch
) || ch
== '_') {
365 if (isdigit(ch
) || (ch
== '.')) {
370 scanerror(T_NULL
, "Unknown token character '%c'", ch
);
373 curtoken
.t_type
= (short)type
;
379 * Continue to eat up a comment string.
380 * The leading slash-asterisk has just been scanned at this point.
386 setprompt(conf
->prompt2
);
395 if (ch
== EOF
|| ch
== '\0') {
396 scanerror(T_NULL
, "Unterminated comment");
401 setprompt(conf
->prompt1
);
406 * Continue to eat up a the current line
407 * Typically a #! will require the rest of the line to be eaten as if
413 int ch
; /* chars being eaten */
417 } while (ch
!= '\n' && ch
!= EOF
&& ch
!= '\0');
423 * Read in a string and add it to the literal string pool.
424 * The leading single or double quote has been read in at this point.
427 eatstring(int quotechar
)
429 register char *cp
; /* current character address */
430 int ch
, cch
; /* current character */
432 char buf
[STRBUFSIZE
]; /* buffer for string */
433 long len
; /* length in buffer */
434 long totlen
; /* total length, including '\0' */
445 while (!done
&& len
< STRBUFSIZE
) {
454 "Unterminated string constant");
463 for (i
= 2; i
> 0; i
--) {
467 ch
= 8 * ch
+ cch
- '0';
475 case 'n': ch
= '\n'; break;
476 case 'r': ch
= '\r'; break;
477 case 't': ch
= '\t'; break;
478 case 'b': ch
= '\b'; break;
479 case 'f': ch
= '\f'; break;
480 case 'v': ch
= '\v'; break;
481 case 'a': ch
= '\007'; break;
482 case 'e': ch
= '\033'; break;
484 setprompt(conf
->prompt2
);
491 for (i
= 2; i
> 0; i
--) {
494 ch
= 16 * ch
+ cch
- '0';
495 else if (cch
>= 'a' && cch
<= 'f')
496 ch
= 16 * ch
+ 10 + cch
- 'a';
497 else if (cch
>= 'A' && cch
<= 'F')
498 ch
= 16 * ch
+ 10 + cch
- 'A';
507 if (ch
== quotechar
) {
510 if (ch
!= ' ' && ch
!= '\t' &&
515 if (ch
== '"' || ch
== '\'') {
529 if (!done
|| totlen
) {
531 str
= (char *) realloc(str
, totlen
+ len
);
533 str
= (char *) malloc(len
);
535 math_error("Out of memory for reading tokens");
538 memcpy(str
+ totlen
, buf
, len
);
543 curtoken
.t_strindex
= addstring(str
, totlen
+ len
);
550 * Read in a symbol name which may or may not be a keyword.
551 * If allsyms is set, keywords are not looked up and almost all chars
552 * will be accepted for the symbol. Returns the type of symbol found.
557 register struct keyword
*kp
; /* pointer to current keyword */
558 register char *cp
; /* current character pointer */
559 int ch
; /* current character */
560 int cc
; /* character count */
561 STATIC
char buf
[SYMBOLSIZE
+1]; /* temporary buffer */
568 if (ch
== ' ' || ch
== ';' ||
569 ch
== '\n' || ch
== EOF
)
577 scanerror(T_NULL
, "Symbol too long");
578 curtoken
.t_sym
= buf
;
591 scanerror(T_NULL
, "Symbol too long");
592 for (kp
= keywords
; kp
->k_name
; kp
++)
593 if (strcmp(kp
->k_name
, buf
) == 0)
595 curtoken
.t_sym
= buf
;
601 * Read in and remember a possibly numeric constant value.
602 * The constant is inserted into a constant table so further uses
603 * of the same constant will not take more memory. This can also
604 * return just a period, which is used for element accesses and for
605 * the old numeric value.
610 register char *cp
; /* current character pointer */
611 long len
; /* parsed size of number */
612 long res
; /* result of parsing number */
614 if (numbufsize
== 0) {
615 numbuf
= (char *)malloc(128+1);
617 math_error("Cannot allocate number buffer");
623 if (len
>= numbufsize
) {
624 cp
= (char *)realloc(numbuf
, numbufsize
+ 1001);
626 math_error("Cannot reallocate number buffer");
635 if ((numbuf
[0] == '.') && isletter(numbuf
[1])) {
639 res
= qparse(numbuf
, QPF_IMAG
);
642 scanerror(T_NULL
, "Badly formatted number");
643 curtoken
.t_numindex
= addnumber("0");
651 if ((numbuf
[0] == '.') && (numbuf
[1] == '\0')) {
652 curtoken
.t_numindex
= 0;
657 if ((*cp
== 'i') || (*cp
== 'I')) {
661 curtoken
.t_numindex
= addnumber(numbuf
);
667 * Return the index for string value of the current token.
672 return curtoken
.t_strindex
;
677 * Return the constant index of a numeric token.
682 return curtoken
.t_numindex
;
686 * Return the address of a symbol
691 return curtoken
.t_sym
;
695 * Push back the token just read so that it will be seen again.
705 * Describe an error message.
706 * Then skip to the next specified token (or one more powerful).
709 scanerror(int skip
, char *fmt
, ...)
712 char *name
; /* name of file with error */
713 int len
; /* length of error msg bufer */
715 /* count the error */
718 /* form the error message */
722 snprintf(calc_err_msg
, MAXERROR
, "\"%s\", line %ld: ",
724 calc_err_msg
[MAXERROR
] = '\0'; /* firewall */
725 len
= strlen(calc_err_msg
);
726 if (len
< MAXERROR
) {
727 vsnprintf(calc_err_msg
+len
, MAXERROR
-len
, fmt
, ap
);
730 vsnprintf(calc_err_msg
, MAXERROR
, fmt
, ap
);
733 calc_err_msg
[MAXERROR
] = '\0';
735 /* print error message if allowed */
736 if (calc_print_scanerr_msg
!= 0) {
737 fprintf(stderr
, "%s\n\n", calc_err_msg
);
740 /* bail out if continuation not permitted */
741 if ((!c_flag
&& !stoponerror
) || stoponerror
> 0) {
742 if (calc_use_scanerr_jmpbuf
!= 0) {
743 longjmp(calc_scanerr_jmpbuf
, 60);
747 "calc_scanerr_jmpbuf not setup, exiting code 60\n");
748 libcalc_call_me_last();
753 /* bail out if too many errors */
754 if (conf
->maxscancount
> 0 && errorcount
> conf
->maxscancount
) {
755 fprintf(stderr
, "Too many scan errors, compilation aborted.\n");
756 if (calc_use_scanerr_jmpbuf
!= 0) {
757 longjmp(calc_scanerr_jmpbuf
, 61);
761 "calc_scanerr_jmpbuf not ready: exit 61\n");
762 libcalc_call_me_last();
767 /* post-error report processing */
774 switch (gettoken()) {
786 snprintf(calc_err_msg
, MAXERROR
,
787 "Unknown skip token for scanerror\n");
788 calc_err_msg
[MAXERROR
] = '\0';
789 if (calc_print_scanerr_msg
!= 0) {
790 fprintf(stderr
, "%s\n\n", calc_err_msg
);
792 /* fall into semicolon case */
797 switch (gettoken()) {
811 * Display a warning and return to compiling
814 warning(char *fmt
, ...)
817 char *name
; /* name of file with error */
818 int len
; /* length of error msg bufer */
820 /* count this warning */
823 /* form the error message */
827 snprintf(calc_warn_msg
, MAXERROR
, "\"%s\", line %ld: ",
829 calc_warn_msg
[MAXERROR
] = '\0'; /* firewall */
830 len
= strlen(calc_warn_msg
);
831 if (len
< MAXERROR
) {
832 vsnprintf(calc_warn_msg
+len
, MAXERROR
-len
, fmt
, ap
);
835 vsnprintf(calc_warn_msg
, MAXERROR
, fmt
, ap
);
838 calc_warn_msg
[MAXERROR
] = '\0';
840 /* print the warning if allowed */
841 if (calc_print_scanwarn_msg
!= 0) {
842 fprintf(stderr
, "Warning: %s\n", calc_warn_msg
);