modified: SpatialOmicsCoord.py
[GalaxyCodeBases.git] / c_cpp / etc / calc / token.c
blobd15780ac0d25c8eb61510ad6a567bddbaca8ecb6
1 /*
2 * token - read input file characters into tokens
4 * Copyright (C) 1999-2007 David I. Bell and Ernest Bowen
6 * Primary author: David I. Bell
8 * Calc is open software; you can redistribute it and/or modify it under
9 * the terms of the version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
12 * Calc is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
15 * Public License for more details.
17 * A copy of version 2.1 of the GNU Lesser General Public License is
18 * distributed with calc under the filename COPYING-LGPL. You should have
19 * received a copy with calc; if not, write to Free Software Foundation, Inc.
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * @(#) $Revision: 30.2 $
23 * @(#) $Id: token.c,v 30.2 2008/11/05 17:32:19 chongo Exp $
24 * @(#) $Source: /usr/local/src/bin/calc/RCS/token.c,v $
26 * Under source code control: 1990/02/15 01:48:25
27 * File existed as early as: before 1990
29 * Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
33 #include <stdio.h>
34 #include <setjmp.h>
36 #include "calc.h"
37 #include "token.h"
38 #include "str.h"
39 #include "args.h"
40 #include "lib_calc.h"
43 #define isletter(ch) ((((ch) >= 'a') && ((ch) <= 'z')) || \
44 (((ch) >= 'A') && ((ch) <= 'Z')))
45 #define isdigit(ch) (((ch) >= '0') && ((ch) <= '9'))
46 #define issymbol(ch) (isletter(ch) || isdigit(ch) || ((ch) == '_'))
47 #define isoctal(ch) (((ch) >= '0') && ((ch) <= '7'))
49 #define STRBUFSIZE 1024
53 * Current token.
55 STATIC struct {
56 short t_type; /* type of token */
57 char *t_sym; /* symbol name */
58 long t_strindex; /* index of string value */
59 long t_numindex; /* index of numeric value */
60 } curtoken;
63 STATIC BOOL rescan; /* TRUE to reread current token */
64 STATIC BOOL newlines; /* TRUE to return newlines as tokens */
65 STATIC BOOL allsyms; /* TRUE if always want a symbol token */
66 STATIC STRINGHEAD strings; /* list of constant strings */
67 STATIC char *numbuf; /* buffer for numeric tokens */
68 STATIC long numbufsize; /* current size of numeric buffer */
70 long errorcount = 0; /* number of compilation errors */
74 * Table of keywords
76 struct keyword {
77 char *k_name; /* keyword name */
78 int k_token; /* token number */
81 STATIC struct keyword keywords[] = {
82 {"if", T_IF},
83 {"else", T_ELSE},
84 {"for", T_FOR},
85 {"while", T_WHILE},
86 {"do", T_DO},
87 {"continue", T_CONTINUE},
88 {"break", T_BREAK},
89 {"goto", T_GOTO},
90 {"return", T_RETURN},
91 {"local", T_LOCAL},
92 {"global", T_GLOBAL},
93 {"static", T_STATIC},
94 {"switch", T_SWITCH},
95 {"case", T_CASE},
96 {"default", T_DEFAULT},
97 {"quit", T_QUIT},
98 {"exit", T_QUIT},
99 {"define", T_DEFINE},
100 {"read", T_READ},
101 {"show", T_SHOW},
102 {"help", T_HELP},
103 {"write", T_WRITE},
104 {"mat", T_MAT},
105 {"obj", T_OBJ},
106 {"print", T_PRINT},
107 {"cd", T_CD},
108 {"undefine", T_UNDEFINE},
109 {"abort", T_ABORT},
110 {NULL, 0}
114 S_FUNC void eatcomment(void);
115 S_FUNC void eatstring(int quotechar);
116 S_FUNC void eatline(void);
117 S_FUNC int eatsymbol(void);
118 S_FUNC int eatnumber(void);
122 * Initialize all token information.
124 void
125 inittokens(void)
127 initstr(&strings);
128 newlines = FALSE;
129 allsyms = FALSE;
130 rescan = FALSE;
131 setprompt(conf->prompt1);
136 * Set the new token mode according to the specified flag, and return the
137 * previous value of the flag.
140 tokenmode(int flag)
142 int oldflag;
144 oldflag = TM_DEFAULT;
145 if (newlines)
146 oldflag |= TM_NEWLINES;
147 if (allsyms)
148 oldflag |= TM_ALLSYMS;
149 newlines = FALSE;
150 allsyms = FALSE;
151 if (flag & TM_NEWLINES)
152 newlines = TRUE;
153 if (flag & TM_ALLSYMS)
154 allsyms = TRUE;
155 setprompt(newlines ? conf->prompt1 : conf->prompt2);
156 return oldflag;
161 * Routine to read in the next token from the input stream.
162 * The type of token is returned as a value. If the token is a string or
163 * symbol name, information is saved so that the value can be retrieved.
166 gettoken(void)
168 int ch; /* current input character */
169 int type; /* token type */
171 if (rescan) { /* rescanning */
172 rescan = FALSE;
173 return curtoken.t_type;
175 curtoken.t_sym = NULL;
176 curtoken.t_strindex = 0;
177 curtoken.t_numindex = 0;
178 type = T_NULL;
179 while (type == T_NULL) {
180 ch = nextchar();
181 if (allsyms && ch!=' ' && ch!=';' && ch!='"' &&
182 ch!='\'' && ch!='\n' && ch!=EOF) {
183 reread();
184 type = eatsymbol();
185 break;
187 switch (ch) {
188 case ' ':
189 case '\t':
190 case '\r':
191 case '\v':
192 case '\f':
193 case '\0':
194 break;
195 case '\n':
196 if (newlines)
197 type = T_NEWLINE;
198 break;
199 case EOF: type = T_EOF; break;
200 case '{': type = T_LEFTBRACE; break;
201 case '}': type = T_RIGHTBRACE; break;
202 case '(': type = T_LEFTPAREN; break;
203 case ')': type = T_RIGHTPAREN; break;
204 case '[': type = T_LEFTBRACKET; break;
205 case ']': type = T_RIGHTBRACKET; break;
206 case ';': type = T_SEMICOLON; break;
207 case ':': type = T_COLON; break;
208 case ',': type = T_COMMA; break;
209 case '?': type = T_QUESTIONMARK; break;
210 case '@': type = T_AT; break;
211 case '`': type = T_BACKQUOTE; break;
212 case '$': type = T_DOLLAR; break;
213 case '"':
214 case '\'':
215 type = T_STRING;
216 eatstring(ch);
217 break;
218 case '^':
219 switch (nextchar()) {
220 case '=': type = T_POWEREQUALS; break;
221 default: type = T_POWER; reread();
223 break;
224 case '=':
225 switch (nextchar()) {
226 case '=': type = T_EQ; break;
227 default: type = T_ASSIGN; reread();
229 break;
230 case '+':
231 switch (nextchar()) {
232 case '+': type = T_PLUSPLUS; break;
233 case '=': type = T_PLUSEQUALS; break;
234 default: type = T_PLUS; reread();
236 break;
237 case '-':
238 switch (nextchar()) {
239 case '-': type = T_MINUSMINUS; break;
240 case '=': type = T_MINUSEQUALS; break;
241 case '>': type = T_ARROW; break;
242 default: type = T_MINUS; reread();
244 break;
245 case '*':
246 switch (nextchar()) {
247 case '=': type = T_MULTEQUALS; break;
248 case '*':
249 switch (nextchar()) {
250 case '=':
251 type = T_POWEREQUALS; break;
252 default:
253 type = T_POWER; reread();
255 break;
256 default: type = T_MULT; reread();
258 break;
259 case '/':
260 switch (nextchar()) {
261 case '/':
262 switch (nextchar()) {
263 case '=':
264 type = T_SLASHSLASHEQUALS;
265 break;
266 default:
267 reread();
268 type = T_SLASHSLASH;
269 break;
271 break;
272 case '=': type = T_DIVEQUALS; break;
273 case '*': eatcomment(); break;
274 default: type = T_DIV; reread();
276 break;
277 case '%':
278 switch (nextchar()) {
279 case '=': type = T_MODEQUALS; break;
280 default: type = T_MOD; reread();
282 break;
283 case '<':
284 switch (nextchar()) {
285 case '=': type = T_LE; break;
286 case '<':
287 switch (nextchar()) {
288 case '=':
289 type = T_LSHIFTEQUALS;
290 break;
291 default:
292 reread();
293 type = T_LEFTSHIFT;
294 break;
296 break;
297 default: type = T_LT; reread();
299 break;
300 case '>':
301 switch (nextchar()) {
302 case '=': type = T_GE; break;
303 case '>':
304 switch (nextchar()) {
305 case '=':
306 type = T_RSHIFTEQUALS;
307 break;
308 default:
309 reread();
310 type = T_RIGHTSHIFT;
311 break;
313 break;
314 default: type = T_GT; reread();
316 break;
317 case '&':
318 switch (nextchar()) {
319 case '&': type = T_ANDAND; break;
320 case '=': type = T_ANDEQUALS; break;
321 default: type = T_AND; reread(); break;
323 break;
324 case '|':
325 switch (nextchar()) {
326 case '|': type = T_OROR; break;
327 case '=': type = T_OREQUALS; break;
328 default: type = T_OR; reread(); break;
330 break;
331 case '!':
332 switch (nextchar()) {
333 case '=': type = T_NE; break;
334 default: type = T_NOT; reread(); break;
336 break;
337 case '#':
338 switch(nextchar()) {
339 case '=': type = T_HASHEQUALS; break;
340 case '!':
341 case '#': eatline(); break;
342 case '\n': reread(); break;
343 default: type = T_HASH; reread();
345 break;
346 case '~':
347 switch (nextchar()) {
348 case '=': type = T_TILDEEQUALS; break;
349 default: type = T_TILDE; reread();
351 break;
352 case '\\':
353 switch (nextchar()) {
354 case '\n': setprompt(conf->prompt2); break;
355 case '=': type = T_BACKSLASHEQUALS; break;
356 default: type = T_BACKSLASH; reread();
358 break;
359 default:
360 if (isletter(ch) || ch == '_') {
361 reread();
362 type = eatsymbol();
363 break;
365 if (isdigit(ch) || (ch == '.')) {
366 reread();
367 type = eatnumber();
368 break;
370 scanerror(T_NULL, "Unknown token character '%c'", ch);
373 curtoken.t_type = (short)type;
374 return type;
379 * Continue to eat up a comment string.
380 * The leading slash-asterisk has just been scanned at this point.
382 S_FUNC void
383 eatcomment(void)
385 int ch;
386 setprompt(conf->prompt2);
387 for (;;) {
388 ch = nextchar();
389 if (ch == '*') {
390 ch = nextchar();
391 if (ch == '/')
392 break;
393 reread();
395 if (ch == EOF || ch == '\0') {
396 scanerror(T_NULL, "Unterminated comment");
397 reread();
398 break;
401 setprompt(conf->prompt1);
406 * Continue to eat up a the current line
407 * Typically a #! will require the rest of the line to be eaten as if
408 * it were a comment.
410 S_FUNC void
411 eatline(void)
413 int ch; /* chars being eaten */
415 do {
416 ch = nextchar();
417 } while (ch != '\n' && ch != EOF && ch != '\0');
418 reread();
423 * Read in a string and add it to the literal string pool.
424 * The leading single or double quote has been read in at this point.
426 S_FUNC void
427 eatstring(int quotechar)
429 register char *cp; /* current character address */
430 int ch, cch; /* current character */
431 int i; /* index */
432 char buf[STRBUFSIZE]; /* buffer for string */
433 long len; /* length in buffer */
434 long totlen; /* total length, including '\0' */
435 char *str;
436 BOOL done;
438 str = buf;
439 totlen = 0;
440 done = FALSE;
442 while (!done) {
443 cp = buf;
444 len = 0;
445 while (!done && len < STRBUFSIZE) {
446 ch = nextchar();
447 switch (ch) {
448 case '\n':
449 if (!newlines)
450 break;
451 case EOF:
452 reread();
453 scanerror(T_NULL,
454 "Unterminated string constant");
455 done = TRUE;
456 ch = '\0';
457 break;
459 case '\\':
460 ch = nextchar();
461 if (isoctal(ch)) {
462 ch = ch - '0';
463 for (i = 2; i > 0; i--) {
464 cch = nextchar();
465 if (!isoctal(cch))
466 break;
467 ch = 8 * ch + cch - '0';
469 ch &= 0xff;
470 if (i > 0)
471 reread();
472 break;
474 switch (ch) {
475 case 'n': ch = '\n'; break;
476 case 'r': ch = '\r'; break;
477 case 't': ch = '\t'; break;
478 case 'b': ch = '\b'; break;
479 case 'f': ch = '\f'; break;
480 case 'v': ch = '\v'; break;
481 case 'a': ch = '\007'; break;
482 case 'e': ch = '\033'; break;
483 case '\n':
484 setprompt(conf->prompt2);
485 continue;
486 case EOF:
487 reread();
488 continue;
489 case 'x':
490 ch = 0;
491 for (i = 2; i > 0; i--) {
492 cch = nextchar();
493 if (isdigit(cch))
494 ch = 16 * ch + cch - '0';
495 else if (cch >= 'a' && cch <= 'f')
496 ch = 16 * ch + 10 + cch - 'a';
497 else if (cch >= 'A' && cch <= 'F')
498 ch = 16 * ch + 10 + cch - 'A';
499 else break;
501 if (i > 0)
502 reread();
504 break;
505 case '"':
506 case '\'':
507 if (ch == quotechar) {
508 for (;;) {
509 ch = nextchar();
510 if (ch != ' ' && ch != '\t' &&
511 (ch != '\n' ||
512 newlines))
513 break;
515 if (ch == '"' || ch == '\'') {
516 quotechar = ch;
517 continue;
519 reread();
520 done = TRUE;
521 ch = '\0';
523 break;
526 *cp++ = (char) ch;
527 len++;
529 if (!done || totlen) {
530 if (totlen)
531 str = (char *) realloc(str, totlen + len);
532 else
533 str = (char *) malloc(len);
534 if (str == NULL) {
535 math_error("Out of memory for reading tokens");
536 /*NOTREACHED*/
538 memcpy(str + totlen, buf, len);
539 totlen += len;
540 len = 0;
543 curtoken.t_strindex = addstring(str, totlen + len);
544 if (str != buf)
545 free(str);
550 * Read in a symbol name which may or may not be a keyword.
551 * If allsyms is set, keywords are not looked up and almost all chars
552 * will be accepted for the symbol. Returns the type of symbol found.
554 S_FUNC int
555 eatsymbol(void)
557 register struct keyword *kp; /* pointer to current keyword */
558 register char *cp; /* current character pointer */
559 int ch; /* current character */
560 int cc; /* character count */
561 STATIC char buf[SYMBOLSIZE+1]; /* temporary buffer */
563 cp = buf;
564 cc = SYMBOLSIZE;
565 if (allsyms) {
566 for (;;) {
567 ch = nextchar();
568 if (ch == ' ' || ch == ';' ||
569 ch == '\n' || ch == EOF)
570 break;
571 if (cc-- > 0)
572 *cp++ = (char) ch;
574 reread();
575 *cp = '\0';
576 if (cc < 0)
577 scanerror(T_NULL, "Symbol too long");
578 curtoken.t_sym = buf;
579 return T_SYMBOL;
581 for (;;) {
582 ch = nextchar();
583 if (!issymbol(ch))
584 break;
585 if (cc-- > 0)
586 *cp++ = (char)ch;
588 reread();
589 *cp = '\0';
590 if (cc < 0)
591 scanerror(T_NULL, "Symbol too long");
592 for (kp = keywords; kp->k_name; kp++)
593 if (strcmp(kp->k_name, buf) == 0)
594 return kp->k_token;
595 curtoken.t_sym = buf;
596 return T_SYMBOL;
601 * Read in and remember a possibly numeric constant value.
602 * The constant is inserted into a constant table so further uses
603 * of the same constant will not take more memory. This can also
604 * return just a period, which is used for element accesses and for
605 * the old numeric value.
607 S_FUNC int
608 eatnumber(void)
610 register char *cp; /* current character pointer */
611 long len; /* parsed size of number */
612 long res; /* result of parsing number */
614 if (numbufsize == 0) {
615 numbuf = (char *)malloc(128+1);
616 if (numbuf == NULL)
617 math_error("Cannot allocate number buffer");
618 numbufsize = 128;
620 cp = numbuf;
621 len = 0;
622 for (;;) {
623 if (len >= numbufsize) {
624 cp = (char *)realloc(numbuf, numbufsize + 1001);
625 if (cp == NULL) {
626 math_error("Cannot reallocate number buffer");
627 /*NOTREACHED*/
629 numbuf = cp;
630 numbufsize += 1000;
631 cp = &numbuf[len];
633 *cp = nextchar();
634 *(++cp) = '\0';
635 if ((numbuf[0] == '.') && isletter(numbuf[1])) {
636 reread();
637 return T_PERIOD;
639 res = qparse(numbuf, QPF_IMAG);
640 if (res < 0) {
641 reread();
642 scanerror(T_NULL, "Badly formatted number");
643 curtoken.t_numindex = addnumber("0");
644 return T_NUMBER;
646 if (res != ++len)
647 break;
649 cp[-1] = '\0';
650 reread();
651 if ((numbuf[0] == '.') && (numbuf[1] == '\0')) {
652 curtoken.t_numindex = 0;
653 return T_OLDVALUE;
655 cp -= 2;
656 res = T_NUMBER;
657 if ((*cp == 'i') || (*cp == 'I')) {
658 *cp = '\0';
659 res = T_IMAGINARY;
661 curtoken.t_numindex = addnumber(numbuf);
662 return (int)res;
667 * Return the index for string value of the current token.
669 long
670 tokenstring(void)
672 return curtoken.t_strindex;
677 * Return the constant index of a numeric token.
679 long
680 tokennumber(void)
682 return curtoken.t_numindex;
686 * Return the address of a symbol
688 char *
689 tokensymbol(void)
691 return curtoken.t_sym;
695 * Push back the token just read so that it will be seen again.
697 void
698 rescantoken(void)
700 rescan = TRUE;
705 * Describe an error message.
706 * Then skip to the next specified token (or one more powerful).
708 void
709 scanerror(int skip, char *fmt, ...)
711 va_list ap;
712 char *name; /* name of file with error */
713 int len; /* length of error msg bufer */
715 /* count the error */
716 errorcount++;
718 /* form the error message */
719 name = inputname();
720 va_start(ap, fmt);
721 if (name) {
722 snprintf(calc_err_msg, MAXERROR, "\"%s\", line %ld: ",
723 name, linenumber());
724 calc_err_msg[MAXERROR] = '\0'; /* firewall */
725 len = strlen(calc_err_msg);
726 if (len < MAXERROR) {
727 vsnprintf(calc_err_msg+len, MAXERROR-len, fmt, ap);
729 } else {
730 vsnprintf(calc_err_msg, MAXERROR, fmt, ap);
732 va_end(ap);
733 calc_err_msg[MAXERROR] = '\0';
735 /* print error message if allowed */
736 if (calc_print_scanerr_msg != 0) {
737 fprintf(stderr, "%s\n\n", calc_err_msg);
740 /* bail out if continuation not permitted */
741 if ((!c_flag && !stoponerror) || stoponerror > 0) {
742 if (calc_use_scanerr_jmpbuf != 0) {
743 longjmp(calc_scanerr_jmpbuf, 60);
744 /*NOTREACHED*/
745 } else {
746 fprintf(stderr,
747 "calc_scanerr_jmpbuf not setup, exiting code 60\n");
748 libcalc_call_me_last();
749 exit(60);
753 /* bail out if too many errors */
754 if (conf->maxscancount > 0 && errorcount > conf->maxscancount) {
755 fprintf(stderr, "Too many scan errors, compilation aborted.\n");
756 if (calc_use_scanerr_jmpbuf != 0) {
757 longjmp(calc_scanerr_jmpbuf, 61);
758 /*NOTREACHED*/
759 } else {
760 fprintf(stderr,
761 "calc_scanerr_jmpbuf not ready: exit 61\n");
762 libcalc_call_me_last();
763 exit(61);
767 /* post-error report processing */
768 switch (skip) {
769 case T_NULL:
770 return;
771 case T_COMMA:
772 rescan = TRUE;
773 for (;;) {
774 switch (gettoken()) {
775 case T_NEWLINE:
776 case T_SEMICOLON:
777 case T_LEFTBRACE:
778 case T_RIGHTBRACE:
779 case T_EOF:
780 case T_COMMA:
781 rescan = TRUE;
782 return;
785 default:
786 snprintf(calc_err_msg, MAXERROR,
787 "Unknown skip token for scanerror\n");
788 calc_err_msg[MAXERROR] = '\0';
789 if (calc_print_scanerr_msg != 0) {
790 fprintf(stderr, "%s\n\n", calc_err_msg);
792 /* fall into semicolon case */
793 /*FALLTHRU*/
794 case T_SEMICOLON:
795 rescan = TRUE;
796 for (;;) {
797 switch (gettoken()) {
798 case T_NEWLINE:
799 case T_SEMICOLON:
800 case T_LEFTBRACE:
801 case T_RIGHTBRACE:
802 case T_EOF:
803 rescan = TRUE;
804 return;
811 * Display a warning and return to compiling
813 void
814 warning(char *fmt, ...)
816 va_list ap;
817 char *name; /* name of file with error */
818 int len; /* length of error msg bufer */
820 /* count this warning */
821 ++calc_warn_cnt;
823 /* form the error message */
824 name = inputname();
825 va_start(ap, fmt);
826 if (name) {
827 snprintf(calc_warn_msg, MAXERROR, "\"%s\", line %ld: ",
828 name, linenumber());
829 calc_warn_msg[MAXERROR] = '\0'; /* firewall */
830 len = strlen(calc_warn_msg);
831 if (len < MAXERROR) {
832 vsnprintf(calc_warn_msg+len, MAXERROR-len, fmt, ap);
834 } else {
835 vsnprintf(calc_warn_msg, MAXERROR, fmt, ap);
837 va_end(ap);
838 calc_warn_msg[MAXERROR] = '\0';
840 /* print the warning if allowed */
841 if (calc_print_scanwarn_msg != 0) {
842 fprintf(stderr, "Warning: %s\n", calc_warn_msg);
844 return;