* tiny
[mascara-docs.git] / compilers / bcc / linux86-0.16.17 / unproto / tok_io.c
blob3cae52e19675a5c2d46d23e7de01f61c773eeb39
1 /*++
2 /* NAME
3 /* tok_io 3
4 /* SUMMARY
5 /* token I/O
6 /* PACKAGE
7 /* unproto
8 /* SYNOPSIS
9 /* #include "token.h"
11 /* struct token *tok_get()
13 /* void tok_flush(t)
14 /* struct token *t;
16 /* void tok_show(t)
17 /* struct token *t;
19 /* void tok_show_ch(t)
20 /* struct token *t;
22 /* void put_str(s)
23 /* char *s;
25 /* void put_ch(c)
26 /* int c;
28 /* void put_nl()
30 /* char *in_path;
31 /* int in_line;
32 /* DESCRIPTION
33 /* These functions read from stdin and write to stdout. The
34 /* tokenizer keeps track of where the token appeared in the input
35 /* stream; on output, this information is used to preserve correct
36 /* line number information (even after lots of token lookahead or
37 /* after function-header rewriting) so that diagnostics from the
38 /* next compiler stage make sense.
40 /* tok_get() reads the next token from standard input. It returns
41 /* a null pointer when the end of input is reached.
43 /* tok_show() displays the contents of a (possibly composite) token
44 /* on the standard output.
46 /* tok_show_ch() displays the contents of a single-character token
47 /* on the standard output. The character should not be a newline.
49 /* tok_flush() displays the contents of a (possibly composite) token
50 /* on the standard output and makes it available for re-use.
52 /* put_str() writes a null-terminated string to standard output.
53 /* There should be no newline characters in the string argument.
55 /* put_ch() writes one character to standard output. The character
56 /* should not be a newline.
58 /* put_nl() outputs a newline character and adjusts the program's idea of
59 /* the current output line.
61 /* The in_path and in_line variables contain the file name and
62 /* line number of the most recently read token.
63 /* BUGS
64 /* The tokenizer is just good enough for the unproto filter.
65 /* As a benefit, it is quite fast.
66 /* AUTHOR(S)
67 /* Wietse Venema
68 /* Eindhoven University of Technology
69 /* Department of Mathematics and Computer Science
70 /* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
71 /* LAST MODIFICATION
72 /* 92/01/15 21:52:59
73 /* VERSION/RELEASE
74 /* 1.3
75 /*--*/
77 static char io_sccsid[] = "@(#) tok_io.c 1.3 92/01/15 21:52:59";
79 /* C library */
81 #include <stdio.h>
82 #include <ctype.h>
84 extern char *strchr();
85 extern char *malloc();
86 extern char *realloc();
87 extern char *strcpy();
89 /* Application-specific stuff */
91 #include "token.h"
92 #include "vstring.h"
93 #include "error.h"
95 extern char *strsave(); /* XXX need include file */
97 /* Stuff to keep track of original source file name and position */
99 static char def_path[] = ""; /* default path name */
101 char *in_path = def_path; /* current input file name */
102 int in_line = 1; /* current input line number */
104 static char *out_path = def_path; /* last name in output line control */
105 static int out_line = 1; /* current output line number */
106 int last_ch; /* type of last output */
108 /* Forward declarations */
110 static int read_quoted();
111 static void read_comment();
112 static int backslash_newline();
113 static char *read_hex();
114 static char *read_octal();
115 static void fix_line_control();
118 * Character input with one level of pushback. The INPUT() macro recursively
119 * strips backslash-newline pairs from the input stream. The UNPUT() macro
120 * should be used only for characters obtained through the INPUT() macro.
122 * After skipping a backslash-newline pair, the input line counter is not
123 * updated, and we continue with the same logical source line. We just
124 * update a counter with the number of backslash-newline sequences that must
125 * be accounted for (backslash_newline() updates the counter). At the end of
126 * the logical source line, an appropriate number of newline characters is
127 * pushed back (in tok_get()). I do not know how GCC handles this, but it
128 * seems to produce te same output.
130 * Because backslash_newline() recursively calls itself (through the INPUT()
131 * macro), we will run out of stack space, given a sufficiently long
132 * sequence of backslash-newline pairs.
135 static char in_char = 0; /* push-back storage */
136 static int in_flag = 0; /* pushback available */
137 static int nl_compensate = 0; /* line continuation kluge */
139 #define INPUT(c) (in_flag ? (in_flag = 0, c = in_char) : \
140 (c = getchar()) != '\\' ? c : \
141 (c = getchar()) != '\n' ? (ungetc(c, stdin), c = '\\') : \
142 (c = backslash_newline()))
143 #define UNPUT(c) (in_flag = 1, in_char = c)
145 /* Directives that should be ignored. */
147 #ifdef IGNORE_DIRECTIVES
149 static char *ignore_directives[] = {
150 IGNORE_DIRECTIVES,
154 #endif
156 /* Modified string and ctype stuff. */
158 #define STREQUAL(x,y) (*(x) == *(y) && strcmp((x),(y)) == 0)
160 #define ISALNUM(c) (isalnum(c) || (c) == '_')
161 #define ISALPHA(c) (isalpha(c) || (c) == '_')
162 #define ISSPACE(c) (isspace(c) && c != '\n')
163 #define ISDOT(c) (c == '.')
164 #define ISHEX(c) (isdigit(c) || strchr("abcdefABCDEF", c) != 0)
165 #define ISOCTAL(c) (isdigit(c) && (c) != '8' && (c) != '9')
167 /* Collect all characters that satisfy one condition */
169 #define COLLECT(v,c,cond) { \
170 register struct vstring *vs = v; \
171 register char *cp = vs->str; \
172 *cp++ = c; \
173 while (INPUT(c) != EOF) { \
174 if (cond) { \
175 if (VS_ADDCH(vs, cp, c) == 0) \
176 fatal("out of memory"); \
177 } else { \
178 UNPUT(c); \
179 break; \
182 *cp = 0; \
185 /* Ensure that output line information is correct */
187 #define CHECK_LINE_CONTROL(p,l) { if (out_path != (p) || out_line != (l)) \
188 fix_line_control((p),(l)); }
190 /* do_control - parse control line */
192 static int do_control()
194 struct token *t;
195 int line;
196 char *path;
198 /* Make sure that the directive shows up in the right place. */
200 CHECK_LINE_CONTROL(in_path, in_line);
202 while (t = tok_get()) {
203 switch (t->tokno) {
205 case TOK_WSPACE:
206 /* Ignore blanks after "#" token. */
207 tok_free(t);
208 break;
210 case TOK_NUMBER:
213 * Line control is of the form: number pathname junk. Since we
214 * have no idea what junk the preprocessor may generate, we copy
215 * all line control tokens to stdout.
218 put_str("# ");
219 line = atoi(t->vstr->str); /* extract line number */
220 tok_flush(t);
221 while ((t = tok_get()) && t->tokno == TOK_WSPACE)
222 tok_flush(t); /* copy white space */
223 if (t) { /* extract path name */
224 path = (t->tokno == '"') ? strsave(t->vstr->str) : in_path;
225 do {
226 tok_flush(t); /* copy until newline */
227 } while (t->tokno != '\n' && (t = tok_get()));
229 out_line = in_line = line; /* synchronize */
230 out_path = in_path = path; /* synchronize */
231 return;
233 #ifdef IGNORE_DIRECTIVES
235 case TOK_WORD:
238 * Optionally ignore other #directives. This is only a partial
239 * solution, because the preprocessor will still see them.
242 char **cpp;
243 char *cp = t->vstr->str;
245 for (cpp = ignore_directives; *cpp; cpp++) {
246 if (STREQUAL(cp, *cpp)) {
247 do {
248 tok_free(t);
249 } while (t->tokno != '\n' && (t = tok_get()));
250 return;
254 /* FALLTHROUGH */
255 #endif
256 default:
257 /* Pass through. */
258 put_ch('#');
259 do {
260 tok_flush(t);
261 } while (t->tokno != '\n' && (t = tok_get()));
262 return;
264 case 0:
265 /* Hit EOF, punt. */
266 put_ch('#');
267 return;
272 /* backslash_newline - fix up things after reading a backslash-newline pair */
274 static int backslash_newline()
276 register int c;
278 nl_compensate++;
279 return (INPUT(c));
282 /* tok_get - get next token */
284 static int last_tokno = '\n';
286 struct token *tok_get()
288 register struct token *t;
289 register int c;
290 int d;
293 * Get one from the pool and fill it in. The loop is here in case we hit
294 * a preprocessor control line, which happens in a minority of all cases.
295 * We update the token input path and line info *after* backslash-newline
296 * processing or the newline compensation would go wrong.
299 t = tok_alloc();
301 for (;;) {
302 if ((INPUT(c)) == EOF) {
303 tok_free(t);
304 return (0);
305 } else if ((t->line = in_line, t->path = in_path), !isascii(c)) {
306 t->vstr->str[0] = c;
307 t->vstr->str[1] = 0;
308 t->tokno = TOK_OTHER;
309 break;
310 } else if (ISSPACE(c)) {
311 COLLECT(t->vstr, c, ISSPACE(c));
312 t->tokno = TOK_WSPACE;
313 break;
314 } else if (ISALPHA(c)) {
315 COLLECT(t->vstr, c, ISALNUM(c));
316 t->tokno = TOK_WORD;
317 break;
318 } else if (isdigit(c)) {
319 COLLECT(t->vstr, c, isdigit(c));
320 t->tokno = TOK_NUMBER;
321 break;
322 } else if (c == '"' || c == '\'') {
323 t->tokno = read_quoted(t->vstr, c); /* detect missing end quote */
324 break;
325 } else if (ISDOT(c)) {
326 COLLECT(t->vstr, c, ISDOT(c));
327 t->tokno = TOK_OTHER;
328 break;
329 } else if (c == '#' && last_tokno == '\n') {
330 do_control();
331 continue;
332 } else {
333 t->vstr->str[0] = c;
334 if (c == '\n') {
335 in_line++;
336 if (nl_compensate > 0) { /* compensation for bs-nl */
337 UNPUT('\n');
338 nl_compensate--;
340 } else if (c == '/') {
341 if ((INPUT(d)) == '*') {
342 t->vstr->str[1] = d; /* comment */
343 read_comment(t->vstr);
344 t->tokno = TOK_WSPACE;
345 break;
346 } else {
347 if (d != EOF)
348 UNPUT(d);
350 } else if (c == '\\') {
351 t->vstr->str[1] = (INPUT(c) == EOF ? 0 : c);
352 t->vstr->str[2] = 0;
353 t->tokno = TOK_OTHER;
354 break;
356 t->vstr->str[1] = 0;
357 t->tokno = c;
358 break;
361 last_tokno = t->tokno;
362 t->end_line = in_line;
363 return (t);
366 /* read_quoted - read string or character literal, canonicalize escapes */
368 static int read_quoted(vs, ch)
369 register struct vstring *vs;
370 int ch;
372 register char *cp = vs->str;
373 register int c;
374 int ret = TOK_OTHER;
376 *cp++ = ch;
379 * Clobber the token type in case of a premature newline or EOF. This
380 * prevents us from attempting to concatenate string constants with
381 * broken ones that have no closing quote.
384 while (INPUT(c) != EOF) {
385 if (c == '\n') { /* newline in string */
386 UNPUT(c);
387 break;
389 if (VS_ADDCH(vs, cp, c) == 0) /* store character */
390 fatal("out of memory");
391 if (c == ch) { /* closing quote */
392 ret = c;
393 break;
395 if (c == '\\') { /* parse escape sequence */
396 if ((INPUT(c)) == EOF) { /* EOF, punt */
397 break;
398 } else if (c == 'a') { /* \a -> audible bell */
399 #ifdef BELL
400 if ((cp = vs_strcpy(vs, cp, BELL)) == 0)
401 #else
402 if ((cp = vs_strcpy(vs, cp, "\007")) == 0)
403 #endif
404 fatal("out of memory");
405 } else if (c == 'x') { /* \xhh -> \nnn */
406 cp = read_hex(vs, cp);
407 } else if (ISOCTAL(c) && ch != '\'') {
408 cp = read_octal(vs, cp, c); /* canonicalize \octal */
409 } else {
410 if (VS_ADDCH(vs, cp, c) == 0) /* \other: leave alone */
411 fatal("out of memory");
415 *cp = 0;
416 return (ret);
419 /* read_comment - stuff a whole comment into one huge token */
421 static void read_comment(vs)
422 register struct vstring *vs;
424 register char *cp = vs->str + 2; /* skip slash star */
425 register int c;
426 register int d;
428 while (INPUT(c) != EOF) {
429 if (VS_ADDCH(vs, cp, c) == 0)
430 fatal("out of memory");
431 if (c == '*') {
432 if ((INPUT(d)) == '/') {
433 if (VS_ADDCH(vs, cp, d) == 0)
434 fatal("out of memory");
435 break;
436 } else {
437 if (d != EOF)
438 UNPUT(d);
440 } else if (c == '\n') {
441 in_line++;
442 } else if (c == '\\') {
443 if ((INPUT(d)) != EOF && VS_ADDCH(vs, cp, d) == 0)
444 fatal("out of memory");
447 *cp = 0;
450 /* read_hex - rewrite hex escape to three-digit octal escape */
452 static char *read_hex(vs, cp)
453 struct vstring *vs;
454 register char *cp;
456 register int c;
457 register int i;
458 char buf[BUFSIZ];
459 int len;
460 unsigned val;
463 * Eat up all subsequent hex digits. Complain later when there are too
464 * many.
467 for (i = 0; i < sizeof(buf) && (INPUT(c) != EOF) && ISHEX(c); i++)
468 buf[i] = c;
469 buf[i] = 0;
471 if (i < sizeof(buf) && c)
472 UNPUT(c);
475 * Convert hex form to three-digit octal form. The three-digit form is
476 * used so that strings can be concatenated without problems. Complain
477 * about malformed input; truncate the result to at most three octal
478 * digits.
481 if (i == 0) {
482 error("\\x escape sequence without hexadecimal digits");
483 if (VS_ADDCH(vs, cp, 'x') == 0)
484 fatal("out of memory");
485 } else {
486 (void) sscanf(buf, "%x", &val);
487 sprintf(buf, "%03o", val);
488 if ((len = strlen(buf)) > 3)
489 error("\\x escape sequence yields non-character value");
490 if ((cp = vs_strcpy(vs, cp, buf + len - 3)) == 0)
491 fatal("out of memory");
493 return (cp);
496 /* read_octal - convert octal escape to three-digit format */
498 static char obuf[] = "00123";
500 static char *read_octal(vs, cp, c)
501 register struct vstring *vs;
502 register char *cp;
503 register int c;
505 register int i;
507 #define buf_input (obuf + 2)
509 /* Eat up at most three octal digits. */
511 buf_input[0] = c;
512 for (i = 1; i < 3 && (INPUT(c) != EOF) && ISOCTAL(c); i++)
513 buf_input[i] = c;
514 buf_input[i] = 0;
516 if (i < 3 && c)
517 UNPUT(c);
520 * Leave three-digit octal escapes alone. Convert one-digit and two-digit
521 * octal escapes to three-digit form by prefixing them with a suitable
522 * number of '0' characters. This is done so that strings can be
523 * concatenated without problems.
526 if ((cp = vs_strcpy(vs, cp, buf_input + i - 3)) == 0)
527 fatal("out of memory");
528 return (cp);
531 /* put_nl - emit newline and adjust output line count */
533 void put_nl()
535 put_ch('\n');
536 out_line++;
539 /* fix_line_control - to adjust path and/or line count info in output */
541 static void fix_line_control(path, line)
542 register char *path;
543 register int line;
547 * This function is called sporadically, so it should not be a problem
548 * that we repeat some of the tests that preceded this function call.
550 * Emit a newline if we are not at the start of a line.
552 * If we switch files, or if we jump backwards, emit line control. If we
553 * jump forward, emit the proper number of newlines to compensate.
556 if (last_ch != '\n') /* terminate open line */
557 put_nl();
558 if (path != out_path || line < out_line) { /* file switch or back jump */
559 printf("# %d %s\n", out_line = line, out_path = path);
560 last_ch = '\n';
561 } else { /* forward jump */
562 while (line > out_line)
563 put_nl();
567 /* tok_show_ch - output single-character token (not newline) */
569 void tok_show_ch(t)
570 register struct token *t;
572 CHECK_LINE_CONTROL(t->path, t->line);
574 put_ch(t->tokno); /* show token contents */
577 /* tok_show - output (possibly composite) token */
579 void tok_show(t)
580 register struct token *t;
582 register struct token *p;
584 if (t->tokno == TOK_LIST) {
585 register struct token *s;
588 * This branch is completely in terms of tok_xxx() primitives, so
589 * there is no need to check the line control information.
592 for (s = t->head; s; s = s->next) {
593 tok_show_ch(s); /* '(' or ',' or ')' */
594 for (p = s->head; p; p = p->next)
595 tok_show(p); /* show list element */
597 } else {
598 register char *cp = t->vstr->str;
601 * Measurements show that it pays off to give special treatment to
602 * single-character tokens. Note that both types of token may cause a
603 * change of output line number.
606 CHECK_LINE_CONTROL(t->path, t->line);
607 if (cp[1] == 0) {
608 put_ch(*cp); /* single-character token */
609 } else {
610 put_str(cp); /* multi_character token */
612 out_line = t->end_line; /* may span multiple lines */
613 for (p = t->head; p; p = p->next)
614 tok_show(p); /* trailing blanks */