tools/llvm: Do not build with symbols
[minix3.git] / external / bsd / byacc / dist / reader.c
blobb16af06504e9002fd3adb8408f770345a775b7ae
1 /* $NetBSD: reader.c,v 1.8 2013/04/06 14:52:24 christos Exp $ */
3 /* Id: reader.c,v 1.36 2012/05/26 16:05:41 tom Exp */
5 #include "defs.h"
7 #include <sys/cdefs.h>
8 __RCSID("$NetBSD: reader.c,v 1.8 2013/04/06 14:52:24 christos Exp $");
10 /* The line size must be a positive integer. One hundred was chosen */
11 /* because few lines in Yacc input grammars exceed 100 characters. */
12 /* Note that if a line exceeds LINESIZE characters, the line buffer */
13 /* will be expanded to accomodate it. */
15 #define LINESIZE 100
17 #define L_CURL '{'
18 #define R_CURL '}'
20 static void start_rule(bucket *bp, int s_lineno);
22 static char *cache;
23 static int cinc, cache_size;
25 int ntags;
26 static int tagmax;
27 static char **tag_table;
29 static char saw_eof;
30 char unionized;
31 char *cptr, *line;
32 static int linesize;
34 static bucket *goal;
35 static Value_t prec;
36 static int gensym;
37 static char last_was_action;
39 static int maxitems;
40 static bucket **pitem;
42 static int maxrules;
43 static bucket **plhs;
45 static size_t name_pool_size;
46 static char *name_pool;
48 char line_format[] = "#line %d \"%s\"\n";
50 param *lex_param;
51 param *parse_param;
53 static void
54 cachec(int c)
56 assert(cinc >= 0);
57 if (cinc >= cache_size)
59 cache_size += 256;
60 cache = TREALLOC(char, cache, cache_size);
61 NO_SPACE(cache);
63 cache[cinc] = (char)c;
64 ++cinc;
67 static void
68 get_line(void)
70 FILE *f = input_file;
71 int c;
72 int i;
74 if (saw_eof || (c = getc(f)) == EOF)
76 if (line)
78 FREE(line);
79 line = 0;
81 cptr = 0;
82 saw_eof = 1;
83 return;
86 if (line == 0 || linesize != (LINESIZE + 1))
88 if (line)
89 FREE(line);
90 linesize = LINESIZE + 1;
91 line = TMALLOC(char, linesize);
92 NO_SPACE(line);
95 i = 0;
96 ++lineno;
97 for (;;)
99 line[i] = (char)c;
100 if (c == '\n')
102 cptr = line;
103 return;
105 if (++i >= linesize)
107 linesize += LINESIZE;
108 line = TREALLOC(char, line, linesize);
109 NO_SPACE(line);
111 c = getc(f);
112 if (c == EOF)
114 line[i] = '\n';
115 saw_eof = 1;
116 cptr = line;
117 return;
122 static char *
123 dup_line(void)
125 char *p, *s, *t;
127 if (line == 0)
128 return (0);
129 s = line;
130 while (*s != '\n')
131 ++s;
132 p = TMALLOC(char, s - line + 1);
133 NO_SPACE(p);
135 s = line;
136 t = p;
137 while ((*t++ = *s++) != '\n')
138 continue;
139 return (p);
142 static void
143 skip_comment(void)
145 char *s;
147 int st_lineno = lineno;
148 char *st_line = dup_line();
149 char *st_cptr = st_line + (cptr - line);
151 s = cptr + 2;
152 for (;;)
154 if (*s == '*' && s[1] == '/')
156 cptr = s + 2;
157 FREE(st_line);
158 return;
160 if (*s == '\n')
162 get_line();
163 if (line == 0)
164 unterminated_comment(st_lineno, st_line, st_cptr);
165 s = cptr;
167 else
168 ++s;
172 static int
173 nextc(void)
175 char *s;
177 if (line == 0)
179 get_line();
180 if (line == 0)
181 return (EOF);
184 s = cptr;
185 for (;;)
187 switch (*s)
189 case '\n':
190 get_line();
191 if (line == 0)
192 return (EOF);
193 s = cptr;
194 break;
196 case ' ':
197 case '\t':
198 case '\f':
199 case '\r':
200 case '\v':
201 case ',':
202 case ';':
203 ++s;
204 break;
206 case '\\':
207 cptr = s;
208 return ('%');
210 case '/':
211 if (s[1] == '*')
213 cptr = s;
214 skip_comment();
215 s = cptr;
216 break;
218 else if (s[1] == '/')
220 get_line();
221 if (line == 0)
222 return (EOF);
223 s = cptr;
224 break;
226 /* FALLTHRU */
228 default:
229 cptr = s;
230 return (*s);
236 * Compare keyword to cached token, treating '_' and '-' the same. Some
237 * grammars rely upon this misfeature.
239 static int
240 matchec(const char *name)
242 const char *p = cache;
243 const char *q = name;
244 int code = 0; /* assume mismatch */
246 while (*p != '\0' && *q != '\0')
248 char a = *p++;
249 char b = *q++;
250 if (a == '_')
251 a = '-';
252 if (b == '_')
253 b = '-';
254 if (a != b)
255 break;
256 if (*p == '\0' && *q == '\0')
258 code = 1;
259 break;
262 return code;
265 static int
266 keyword(void)
268 int c;
269 char *t_cptr = cptr;
271 c = *++cptr;
272 if (isalpha(c))
274 cinc = 0;
275 for (;;)
277 if (isalpha(c))
279 if (isupper(c))
280 c = tolower(c);
281 cachec(c);
283 else if (isdigit(c)
284 || c == '-'
285 || c == '_'
286 || c == '.'
287 || c == '$')
289 cachec(c);
291 else
293 break;
295 c = *++cptr;
297 cachec(NUL);
299 if (matchec("token") || matchec("term"))
300 return (TOKEN);
301 if (matchec("type"))
302 return (TYPE);
303 if (matchec("left"))
304 return (LEFT);
305 if (matchec("right"))
306 return (RIGHT);
307 if (matchec("nonassoc") || matchec("binary"))
308 return (NONASSOC);
309 if (matchec("start"))
310 return (START);
311 if (matchec("union"))
312 return (UNION);
313 if (matchec("ident"))
314 return (IDENT);
315 if (matchec("expect"))
316 return (EXPECT);
317 if (matchec("expect-rr"))
318 return (EXPECT_RR);
319 if (matchec("pure-parser"))
320 return (PURE_PARSER);
321 if (matchec("parse-param"))
322 return (PARSE_PARAM);
323 if (matchec("lex-param"))
324 return (LEX_PARAM);
325 if (matchec("yacc"))
326 return (POSIX_YACC);
328 else
330 ++cptr;
331 if (c == L_CURL)
332 return (TEXT);
333 if (c == '%' || c == '\\')
334 return (MARK);
335 if (c == '<')
336 return (LEFT);
337 if (c == '>')
338 return (RIGHT);
339 if (c == '0')
340 return (TOKEN);
341 if (c == '2')
342 return (NONASSOC);
344 syntax_error(lineno, line, t_cptr);
345 /*NOTREACHED */
349 static void
350 copy_ident(void)
352 int c;
353 FILE *f = output_file;
355 c = nextc();
356 if (c == EOF)
357 unexpected_EOF();
358 if (c != '"')
359 syntax_error(lineno, line, cptr);
360 ++outline;
361 fprintf(f, "#ident \"");
362 for (;;)
364 c = *++cptr;
365 if (c == '\n')
367 fprintf(f, "\"\n");
368 return;
370 putc(c, f);
371 if (c == '"')
373 putc('\n', f);
374 ++cptr;
375 return;
380 static void
381 copy_text(void)
383 int c;
384 int quote;
385 FILE *f = text_file;
386 int need_newline = 0;
387 int t_lineno = lineno;
388 char *t_line = dup_line();
389 char *t_cptr = t_line + (cptr - line - 2);
391 if (*cptr == '\n')
393 get_line();
394 if (line == 0)
395 unterminated_text(t_lineno, t_line, t_cptr);
397 if (!lflag)
398 fprintf(f, line_format, lineno, input_file_name);
400 loop:
401 c = *cptr++;
402 switch (c)
404 case '\n':
405 next_line:
406 putc('\n', f);
407 need_newline = 0;
408 get_line();
409 if (line)
410 goto loop;
411 unterminated_text(t_lineno, t_line, t_cptr);
413 case '\'':
414 case '"':
416 int s_lineno = lineno;
417 char *s_line = dup_line();
418 char *s_cptr = s_line + (cptr - line - 1);
420 quote = c;
421 putc(c, f);
422 for (;;)
424 c = *cptr++;
425 putc(c, f);
426 if (c == quote)
428 need_newline = 1;
429 FREE(s_line);
430 goto loop;
432 if (c == '\n')
433 unterminated_string(s_lineno, s_line, s_cptr);
434 if (c == '\\')
436 c = *cptr++;
437 putc(c, f);
438 if (c == '\n')
440 get_line();
441 if (line == 0)
442 unterminated_string(s_lineno, s_line, s_cptr);
448 case '/':
449 putc(c, f);
450 need_newline = 1;
451 c = *cptr;
452 if (c == '/')
454 putc('*', f);
455 while ((c = *++cptr) != '\n')
457 if (c == '*' && cptr[1] == '/')
458 fprintf(f, "* ");
459 else
460 putc(c, f);
462 fprintf(f, "*/");
463 goto next_line;
465 if (c == '*')
467 int c_lineno = lineno;
468 char *c_line = dup_line();
469 char *c_cptr = c_line + (cptr - line - 1);
471 putc('*', f);
472 ++cptr;
473 for (;;)
475 c = *cptr++;
476 putc(c, f);
477 if (c == '*' && *cptr == '/')
479 putc('/', f);
480 ++cptr;
481 FREE(c_line);
482 goto loop;
484 if (c == '\n')
486 get_line();
487 if (line == 0)
488 unterminated_comment(c_lineno, c_line, c_cptr);
492 need_newline = 1;
493 goto loop;
495 case '%':
496 case '\\':
497 if (*cptr == R_CURL)
499 if (need_newline)
500 putc('\n', f);
501 ++cptr;
502 FREE(t_line);
503 return;
505 /* FALLTHRU */
507 default:
508 putc(c, f);
509 need_newline = 1;
510 goto loop;
514 static void
515 puts_both(const char *s)
517 fputs(s, text_file);
518 if (dflag)
519 fputs(s, union_file);
522 static void
523 putc_both(int c)
525 putc(c, text_file);
526 if (dflag)
527 putc(c, union_file);
530 static void
531 copy_union(void)
533 int c;
534 int quote;
535 int depth;
536 int u_lineno = lineno;
537 char *u_line = dup_line();
538 char *u_cptr = u_line + (cptr - line - 6);
540 if (unionized)
541 over_unionized(cptr - 6);
542 unionized = 1;
544 if (!lflag)
545 fprintf(text_file, line_format, lineno, input_file_name);
547 puts_both("#ifdef YYSTYPE\n");
548 puts_both("#undef YYSTYPE_IS_DECLARED\n");
549 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
550 puts_both("#endif\n");
551 puts_both("#ifndef YYSTYPE_IS_DECLARED\n");
552 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
553 puts_both("typedef union");
555 depth = 0;
556 loop:
557 c = *cptr++;
558 putc_both(c);
559 switch (c)
561 case '\n':
562 next_line:
563 get_line();
564 if (line == 0)
565 unterminated_union(u_lineno, u_line, u_cptr);
566 goto loop;
568 case L_CURL:
569 ++depth;
570 goto loop;
572 case R_CURL:
573 if (--depth == 0)
575 puts_both(" YYSTYPE;\n");
576 puts_both("#endif /* !YYSTYPE_IS_DECLARED */\n");
577 FREE(u_line);
578 return;
580 goto loop;
582 case '\'':
583 case '"':
585 int s_lineno = lineno;
586 char *s_line = dup_line();
587 char *s_cptr = s_line + (cptr - line - 1);
589 quote = c;
590 for (;;)
592 c = *cptr++;
593 putc_both(c);
594 if (c == quote)
596 FREE(s_line);
597 goto loop;
599 if (c == '\n')
600 unterminated_string(s_lineno, s_line, s_cptr);
601 if (c == '\\')
603 c = *cptr++;
604 putc_both(c);
605 if (c == '\n')
607 get_line();
608 if (line == 0)
609 unterminated_string(s_lineno, s_line, s_cptr);
615 case '/':
616 c = *cptr;
617 if (c == '/')
619 putc_both('*');
620 while ((c = *++cptr) != '\n')
622 if (c == '*' && cptr[1] == '/')
624 puts_both("* ");
626 else
628 putc_both(c);
631 puts_both("*/\n");
632 goto next_line;
634 if (c == '*')
636 int c_lineno = lineno;
637 char *c_line = dup_line();
638 char *c_cptr = c_line + (cptr - line - 1);
640 putc_both('*');
641 ++cptr;
642 for (;;)
644 c = *cptr++;
645 putc_both(c);
646 if (c == '*' && *cptr == '/')
648 putc_both('/');
649 ++cptr;
650 FREE(c_line);
651 goto loop;
653 if (c == '\n')
655 get_line();
656 if (line == 0)
657 unterminated_comment(c_lineno, c_line, c_cptr);
661 goto loop;
663 default:
664 goto loop;
669 * Keep a linked list of parameters
671 static void
672 copy_param(int k)
674 char *buf;
675 int c;
676 param *head, *p;
677 int i;
678 int name, type2;
680 c = nextc();
681 if (c == EOF)
682 unexpected_EOF();
683 if (c != '{')
684 goto out;
685 cptr++;
687 c = nextc();
688 if (c == EOF)
689 unexpected_EOF();
690 if (c == '}')
691 goto out;
693 buf = TMALLOC(char, linesize);
694 NO_SPACE(buf);
696 for (i = 0; (c = *cptr++) != '}'; i++)
698 if (c == '\0')
699 missing_brace();
700 if (c == EOF)
701 unexpected_EOF();
702 buf[i] = (char)c;
705 if (i == 0)
706 goto out;
708 buf[i--] = '\0';
709 while (i >= 0 && isspace(UCH(buf[i])))
710 buf[i--] = '\0';
712 if (buf[i] == ']')
714 int level = 1;
715 while (i >= 0 && level > 0 && buf[i] != '[')
717 if (buf[i] == ']')
718 ++level;
719 else if (buf[i] == '[')
720 --level;
721 i--;
723 if (i <= 0)
724 unexpected_EOF();
725 type2 = i--;
727 else
729 type2 = i + 1;
732 while (i >= 0 && (isalnum(UCH(buf[i])) ||
733 UCH(buf[i]) == '_'))
734 i--;
736 if (!isspace(UCH(buf[i])) && buf[i] != '*')
737 goto out;
739 name = i + 1;
741 p = TMALLOC(param, 1);
742 NO_SPACE(p);
744 p->type2 = strdup(buf + type2);
745 NO_SPACE(p->type2);
747 buf[type2] = '\0';
749 p->name = strdup(buf + name);
750 NO_SPACE(p->name);
752 buf[name] = '\0';
753 p->type = buf;
755 if (k == LEX_PARAM)
756 head = lex_param;
757 else
758 head = parse_param;
760 if (head != NULL)
762 while (head->next)
763 head = head->next;
764 head->next = p;
766 else
768 if (k == LEX_PARAM)
769 lex_param = p;
770 else
771 parse_param = p;
773 p->next = NULL;
774 return;
776 out:
777 syntax_error(lineno, line, cptr);
780 static int
781 hexval(int c)
783 if (c >= '0' && c <= '9')
784 return (c - '0');
785 if (c >= 'A' && c <= 'F')
786 return (c - 'A' + 10);
787 if (c >= 'a' && c <= 'f')
788 return (c - 'a' + 10);
789 return (-1);
792 static bucket *
793 get_literal(void)
795 int c, quote;
796 int i;
797 int n;
798 char *s;
799 bucket *bp;
800 int s_lineno = lineno;
801 char *s_line = dup_line();
802 char *s_cptr = s_line + (cptr - line);
804 quote = *cptr++;
805 cinc = 0;
806 for (;;)
808 c = *cptr++;
809 if (c == quote)
810 break;
811 if (c == '\n')
812 unterminated_string(s_lineno, s_line, s_cptr);
813 if (c == '\\')
815 char *c_cptr = cptr - 1;
817 c = *cptr++;
818 switch (c)
820 case '\n':
821 get_line();
822 if (line == 0)
823 unterminated_string(s_lineno, s_line, s_cptr);
824 continue;
826 case '0':
827 case '1':
828 case '2':
829 case '3':
830 case '4':
831 case '5':
832 case '6':
833 case '7':
834 n = c - '0';
835 c = *cptr;
836 if (IS_OCTAL(c))
838 n = (n << 3) + (c - '0');
839 c = *++cptr;
840 if (IS_OCTAL(c))
842 n = (n << 3) + (c - '0');
843 ++cptr;
846 if (n > MAXCHAR)
847 illegal_character(c_cptr);
848 c = n;
849 break;
851 case 'x':
852 c = *cptr++;
853 n = hexval(c);
854 if (n < 0 || n >= 16)
855 illegal_character(c_cptr);
856 for (;;)
858 c = *cptr;
859 i = hexval(c);
860 if (i < 0 || i >= 16)
861 break;
862 ++cptr;
863 n = (n << 4) + i;
864 if (n > MAXCHAR)
865 illegal_character(c_cptr);
867 c = n;
868 break;
870 case 'a':
871 c = 7;
872 break;
873 case 'b':
874 c = '\b';
875 break;
876 case 'f':
877 c = '\f';
878 break;
879 case 'n':
880 c = '\n';
881 break;
882 case 'r':
883 c = '\r';
884 break;
885 case 't':
886 c = '\t';
887 break;
888 case 'v':
889 c = '\v';
890 break;
893 cachec(c);
895 FREE(s_line);
897 n = cinc;
898 s = TMALLOC(char, n);
899 NO_SPACE(s);
901 for (i = 0; i < n; ++i)
902 s[i] = cache[i];
904 cinc = 0;
905 if (n == 1)
906 cachec('\'');
907 else
908 cachec('"');
910 for (i = 0; i < n; ++i)
912 c = UCH(s[i]);
913 if (c == '\\' || c == cache[0])
915 cachec('\\');
916 cachec(c);
918 else if (isprint(c))
919 cachec(c);
920 else
922 cachec('\\');
923 switch (c)
925 case 7:
926 cachec('a');
927 break;
928 case '\b':
929 cachec('b');
930 break;
931 case '\f':
932 cachec('f');
933 break;
934 case '\n':
935 cachec('n');
936 break;
937 case '\r':
938 cachec('r');
939 break;
940 case '\t':
941 cachec('t');
942 break;
943 case '\v':
944 cachec('v');
945 break;
946 default:
947 cachec(((c >> 6) & 7) + '0');
948 cachec(((c >> 3) & 7) + '0');
949 cachec((c & 7) + '0');
950 break;
955 if (n == 1)
956 cachec('\'');
957 else
958 cachec('"');
960 cachec(NUL);
961 bp = lookup(cache);
962 bp->class = TERM;
963 if (n == 1 && bp->value == UNDEFINED)
964 bp->value = UCH(*s);
965 FREE(s);
967 return (bp);
970 static int
971 is_reserved(char *name)
973 char *s;
975 if (strcmp(name, ".") == 0 ||
976 strcmp(name, "$accept") == 0 ||
977 strcmp(name, "$end") == 0)
978 return (1);
980 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
982 s = name + 3;
983 while (isdigit(UCH(*s)))
984 ++s;
985 if (*s == NUL)
986 return (1);
989 return (0);
992 static bucket *
993 get_name(void)
995 int c;
997 cinc = 0;
998 for (c = *cptr; IS_IDENT(c); c = *++cptr)
999 cachec(c);
1000 cachec(NUL);
1002 if (is_reserved(cache))
1003 used_reserved(cache);
1005 return (lookup(cache));
1008 static Value_t
1009 get_number(void)
1011 int c;
1012 Value_t n;
1014 n = 0;
1015 for (c = *cptr; isdigit(c); c = *++cptr)
1016 n = (Value_t) (10 * n + (c - '0'));
1018 return (n);
1021 static char *
1022 get_tag(void)
1024 int c;
1025 int i;
1026 char *s;
1027 int t_lineno = lineno;
1028 char *t_line = dup_line();
1029 char *t_cptr = t_line + (cptr - line);
1031 ++cptr;
1032 c = nextc();
1033 if (c == EOF)
1034 unexpected_EOF();
1035 if (!isalpha(c) && c != '_' && c != '$')
1036 illegal_tag(t_lineno, t_line, t_cptr);
1038 cinc = 0;
1041 cachec(c);
1042 c = *++cptr;
1044 while (IS_IDENT(c));
1045 cachec(NUL);
1047 c = nextc();
1048 if (c == EOF)
1049 unexpected_EOF();
1050 if (c != '>')
1051 illegal_tag(t_lineno, t_line, t_cptr);
1052 ++cptr;
1054 for (i = 0; i < ntags; ++i)
1056 if (strcmp(cache, tag_table[i]) == 0)
1058 FREE(t_line);
1059 return (tag_table[i]);
1063 if (ntags >= tagmax)
1065 tagmax += 16;
1066 tag_table =
1067 (tag_table
1068 ? TREALLOC(char *, tag_table, tagmax)
1069 : TMALLOC(char *, tagmax));
1070 NO_SPACE(tag_table);
1073 s = TMALLOC(char, cinc);
1074 NO_SPACE(s);
1076 strcpy(s, cache);
1077 tag_table[ntags] = s;
1078 ++ntags;
1079 FREE(t_line);
1080 return (s);
1083 static void
1084 declare_tokens(int assoc)
1086 int c;
1087 bucket *bp;
1088 Value_t value;
1089 char *tag = 0;
1091 if (assoc != TOKEN)
1092 ++prec;
1094 c = nextc();
1095 if (c == EOF)
1096 unexpected_EOF();
1097 if (c == '<')
1099 tag = get_tag();
1100 c = nextc();
1101 if (c == EOF)
1102 unexpected_EOF();
1105 for (;;)
1107 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1108 bp = get_name();
1109 else if (c == '\'' || c == '"')
1110 bp = get_literal();
1111 else
1112 return;
1114 if (bp == goal)
1115 tokenized_start(bp->name);
1116 bp->class = TERM;
1118 if (tag)
1120 if (bp->tag && tag != bp->tag)
1121 retyped_warning(bp->name);
1122 bp->tag = tag;
1125 if (assoc != TOKEN)
1127 if (bp->prec && prec != bp->prec)
1128 reprec_warning(bp->name);
1129 bp->assoc = (Assoc_t) assoc;
1130 bp->prec = prec;
1133 c = nextc();
1134 if (c == EOF)
1135 unexpected_EOF();
1137 if (isdigit(c))
1139 value = get_number();
1140 if (bp->value != UNDEFINED && value != bp->value)
1141 revalued_warning(bp->name);
1142 bp->value = value;
1143 c = nextc();
1144 if (c == EOF)
1145 unexpected_EOF();
1151 * %expect requires special handling
1152 * as it really isn't part of the yacc
1153 * grammar only a flag for yacc proper.
1155 static void
1156 declare_expect(int assoc)
1158 int c;
1160 if (assoc != EXPECT && assoc != EXPECT_RR)
1161 ++prec;
1164 * Stay away from nextc - doesn't
1165 * detect EOL and will read to EOF.
1167 c = *++cptr;
1168 if (c == EOF)
1169 unexpected_EOF();
1171 for (;;)
1173 if (isdigit(c))
1175 if (assoc == EXPECT)
1176 SRexpect = get_number();
1177 else
1178 RRexpect = get_number();
1179 break;
1182 * Looking for number before EOL.
1183 * Spaces, tabs, and numbers are ok,
1184 * words, punc., etc. are syntax errors.
1186 else if (c == '\n' || isalpha(c) || !isspace(c))
1188 syntax_error(lineno, line, cptr);
1190 else
1192 c = *++cptr;
1193 if (c == EOF)
1194 unexpected_EOF();
1199 static void
1200 declare_types(void)
1202 int c;
1203 bucket *bp;
1204 char *tag;
1206 c = nextc();
1207 if (c == EOF)
1208 unexpected_EOF();
1209 if (c != '<')
1210 syntax_error(lineno, line, cptr);
1211 tag = get_tag();
1213 for (;;)
1215 c = nextc();
1216 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1217 bp = get_name();
1218 else if (c == '\'' || c == '"')
1219 bp = get_literal();
1220 else
1221 return;
1223 if (bp->tag && tag != bp->tag)
1224 retyped_warning(bp->name);
1225 bp->tag = tag;
1229 static void
1230 declare_start(void)
1232 int c;
1233 bucket *bp;
1235 c = nextc();
1236 if (c == EOF)
1237 unexpected_EOF();
1238 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1239 syntax_error(lineno, line, cptr);
1240 bp = get_name();
1241 if (bp->class == TERM)
1242 terminal_start(bp->name);
1243 if (goal && goal != bp)
1244 restarted_warning();
1245 goal = bp;
1248 static void
1249 read_declarations(void)
1251 int c, k;
1253 cache_size = 256;
1254 cache = TMALLOC(char, cache_size);
1255 NO_SPACE(cache);
1257 for (;;)
1259 c = nextc();
1260 if (c == EOF)
1261 unexpected_EOF();
1262 if (c != '%')
1263 syntax_error(lineno, line, cptr);
1264 switch (k = keyword())
1266 case MARK:
1267 return;
1269 case IDENT:
1270 copy_ident();
1271 break;
1273 case TEXT:
1274 copy_text();
1275 break;
1277 case UNION:
1278 copy_union();
1279 break;
1281 case TOKEN:
1282 case LEFT:
1283 case RIGHT:
1284 case NONASSOC:
1285 declare_tokens(k);
1286 break;
1288 case EXPECT:
1289 case EXPECT_RR:
1290 declare_expect(k);
1291 break;
1293 case TYPE:
1294 declare_types();
1295 break;
1297 case START:
1298 declare_start();
1299 break;
1301 case PURE_PARSER:
1302 pure_parser = 1;
1303 break;
1305 case PARSE_PARAM:
1306 case LEX_PARAM:
1307 copy_param(k);
1308 break;
1310 case POSIX_YACC:
1311 /* noop for bison compatibility. byacc is already designed to be posix
1312 * yacc compatible. */
1313 break;
1318 static void
1319 initialize_grammar(void)
1321 nitems = 4;
1322 maxitems = 300;
1324 pitem = TMALLOC(bucket *, maxitems);
1325 NO_SPACE(pitem);
1327 pitem[0] = 0;
1328 pitem[1] = 0;
1329 pitem[2] = 0;
1330 pitem[3] = 0;
1332 nrules = 3;
1333 maxrules = 100;
1335 plhs = TMALLOC(bucket *, maxrules);
1336 NO_SPACE(plhs);
1338 plhs[0] = 0;
1339 plhs[1] = 0;
1340 plhs[2] = 0;
1342 rprec = TMALLOC(Value_t, maxrules);
1343 NO_SPACE(rprec);
1345 rprec[0] = 0;
1346 rprec[1] = 0;
1347 rprec[2] = 0;
1349 rassoc = TMALLOC(Assoc_t, maxrules);
1350 NO_SPACE(rassoc);
1352 rassoc[0] = TOKEN;
1353 rassoc[1] = TOKEN;
1354 rassoc[2] = TOKEN;
1357 static void
1358 expand_items(void)
1360 maxitems += 300;
1361 pitem = TREALLOC(bucket *, pitem, maxitems);
1362 NO_SPACE(pitem);
1365 static void
1366 expand_rules(void)
1368 maxrules += 100;
1370 plhs = TREALLOC(bucket *, plhs, maxrules);
1371 NO_SPACE(plhs);
1373 rprec = TREALLOC(Value_t, rprec, maxrules);
1374 NO_SPACE(rprec);
1376 rassoc = TREALLOC(Assoc_t, rassoc, maxrules);
1377 NO_SPACE(rassoc);
1380 static void
1381 advance_to_start(void)
1383 int c;
1384 bucket *bp;
1385 char *s_cptr;
1386 int s_lineno;
1388 for (;;)
1390 c = nextc();
1391 if (c != '%')
1392 break;
1393 s_cptr = cptr;
1394 switch (keyword())
1396 case MARK:
1397 no_grammar();
1399 case TEXT:
1400 copy_text();
1401 break;
1403 case START:
1404 declare_start();
1405 break;
1407 default:
1408 syntax_error(lineno, line, s_cptr);
1412 c = nextc();
1413 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1414 syntax_error(lineno, line, cptr);
1415 bp = get_name();
1416 if (goal == 0)
1418 if (bp->class == TERM)
1419 terminal_start(bp->name);
1420 goal = bp;
1423 s_lineno = lineno;
1424 c = nextc();
1425 if (c == EOF)
1426 unexpected_EOF();
1427 if (c != ':')
1428 syntax_error(lineno, line, cptr);
1429 start_rule(bp, s_lineno);
1430 ++cptr;
1433 static void
1434 start_rule(bucket *bp, int s_lineno)
1436 if (bp->class == TERM)
1437 terminal_lhs(s_lineno);
1438 bp->class = NONTERM;
1439 if (nrules >= maxrules)
1440 expand_rules();
1441 plhs[nrules] = bp;
1442 rprec[nrules] = UNDEFINED;
1443 rassoc[nrules] = TOKEN;
1446 static void
1447 end_rule(void)
1449 int i;
1451 if (!last_was_action && plhs[nrules]->tag)
1453 if (pitem[nitems - 1])
1455 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1456 continue;
1457 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1458 default_action_warning();
1460 else
1462 default_action_warning();
1466 last_was_action = 0;
1467 if (nitems >= maxitems)
1468 expand_items();
1469 pitem[nitems] = 0;
1470 ++nitems;
1471 ++nrules;
1474 static void
1475 insert_empty_rule(void)
1477 bucket *bp, **bpp;
1479 assert(cache);
1480 sprintf(cache, "$$%d", ++gensym);
1481 bp = make_bucket(cache);
1482 last_symbol->next = bp;
1483 last_symbol = bp;
1484 bp->tag = plhs[nrules]->tag;
1485 bp->class = NONTERM;
1487 if ((nitems += 2) > maxitems)
1488 expand_items();
1489 bpp = pitem + nitems - 1;
1490 *bpp-- = bp;
1491 while ((bpp[0] = bpp[-1]) != 0)
1492 --bpp;
1494 if (++nrules >= maxrules)
1495 expand_rules();
1496 plhs[nrules] = plhs[nrules - 1];
1497 plhs[nrules - 1] = bp;
1498 rprec[nrules] = rprec[nrules - 1];
1499 rprec[nrules - 1] = 0;
1500 rassoc[nrules] = rassoc[nrules - 1];
1501 rassoc[nrules - 1] = TOKEN;
1504 static void
1505 add_symbol(void)
1507 int c;
1508 bucket *bp;
1509 int s_lineno = lineno;
1511 c = *cptr;
1512 if (c == '\'' || c == '"')
1513 bp = get_literal();
1514 else
1515 bp = get_name();
1517 c = nextc();
1518 if (c == ':')
1520 end_rule();
1521 start_rule(bp, s_lineno);
1522 ++cptr;
1523 return;
1526 if (last_was_action)
1527 insert_empty_rule();
1528 last_was_action = 0;
1530 if (++nitems > maxitems)
1531 expand_items();
1532 pitem[nitems - 1] = bp;
1535 static char *
1536 after_blanks(char *s)
1538 while (*s != '\0' && isspace(UCH(*s)))
1539 ++s;
1540 return s;
1543 static void
1544 copy_action(void)
1546 int c;
1547 int i, n;
1548 int depth;
1549 int quote;
1550 char *tag;
1551 FILE *f = action_file;
1552 int a_lineno = lineno;
1553 char *a_line = dup_line();
1554 char *a_cptr = a_line + (cptr - line);
1556 if (last_was_action)
1557 insert_empty_rule();
1558 last_was_action = 1;
1560 fprintf(f, "case %d:\n", nrules - 2);
1561 if (!lflag)
1562 fprintf(f, line_format, lineno, input_file_name);
1563 if (*cptr == '=')
1564 ++cptr;
1566 /* avoid putting curly-braces in first column, to ease editing */
1567 if (*after_blanks(cptr) == L_CURL)
1569 putc('\t', f);
1570 cptr = after_blanks(cptr);
1573 n = 0;
1574 for (i = nitems - 1; pitem[i]; --i)
1575 ++n;
1577 depth = 0;
1578 loop:
1579 c = *cptr;
1580 if (c == '$')
1582 if (cptr[1] == '<')
1584 int d_lineno = lineno;
1585 char *d_line = dup_line();
1586 char *d_cptr = d_line + (cptr - line);
1588 ++cptr;
1589 tag = get_tag();
1590 c = *cptr;
1591 if (c == '$')
1593 fprintf(f, "yyval.%s", tag);
1594 ++cptr;
1595 FREE(d_line);
1596 goto loop;
1598 else if (isdigit(c))
1600 i = get_number();
1601 if (i > n)
1602 dollar_warning(d_lineno, i);
1603 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1604 FREE(d_line);
1605 goto loop;
1607 else if (c == '-' && isdigit(UCH(cptr[1])))
1609 ++cptr;
1610 i = -get_number() - n;
1611 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1612 FREE(d_line);
1613 goto loop;
1615 else
1616 dollar_error(d_lineno, d_line, d_cptr);
1618 else if (cptr[1] == '$')
1620 if (ntags)
1622 tag = plhs[nrules]->tag;
1623 if (tag == 0)
1624 untyped_lhs();
1625 fprintf(f, "yyval.%s", tag);
1627 else
1628 fprintf(f, "yyval");
1629 cptr += 2;
1630 goto loop;
1632 else if (isdigit(UCH(cptr[1])))
1634 ++cptr;
1635 i = get_number();
1636 if (ntags)
1638 if (i <= 0 || i > n)
1639 unknown_rhs(i);
1640 tag = pitem[nitems + i - n - 1]->tag;
1641 if (tag == 0)
1642 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1643 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1645 else
1647 if (i > n)
1648 dollar_warning(lineno, i);
1649 fprintf(f, "yystack.l_mark[%d]", i - n);
1651 goto loop;
1653 else if (cptr[1] == '-')
1655 cptr += 2;
1656 i = get_number();
1657 if (ntags)
1658 unknown_rhs(-i);
1659 fprintf(f, "yystack.l_mark[%d]", -i - n);
1660 goto loop;
1663 if (isalpha(c) || c == '_' || c == '$')
1667 putc(c, f);
1668 c = *++cptr;
1670 while (isalnum(c) || c == '_' || c == '$');
1671 goto loop;
1673 putc(c, f);
1674 ++cptr;
1675 switch (c)
1677 case '\n':
1678 next_line:
1679 get_line();
1680 if (line)
1681 goto loop;
1682 unterminated_action(a_lineno, a_line, a_cptr);
1684 case ';':
1685 if (depth > 0)
1686 goto loop;
1687 fprintf(f, "\nbreak;\n");
1688 free(a_line);
1689 return;
1691 case L_CURL:
1692 ++depth;
1693 goto loop;
1695 case R_CURL:
1696 if (--depth > 0)
1697 goto loop;
1698 fprintf(f, "\nbreak;\n");
1699 free(a_line);
1700 return;
1702 case '\'':
1703 case '"':
1705 int s_lineno = lineno;
1706 char *s_line = dup_line();
1707 char *s_cptr = s_line + (cptr - line - 1);
1709 quote = c;
1710 for (;;)
1712 c = *cptr++;
1713 putc(c, f);
1714 if (c == quote)
1716 FREE(s_line);
1717 goto loop;
1719 if (c == '\n')
1720 unterminated_string(s_lineno, s_line, s_cptr);
1721 if (c == '\\')
1723 c = *cptr++;
1724 putc(c, f);
1725 if (c == '\n')
1727 get_line();
1728 if (line == 0)
1729 unterminated_string(s_lineno, s_line, s_cptr);
1735 case '/':
1736 c = *cptr;
1737 if (c == '/')
1739 putc('*', f);
1740 while ((c = *++cptr) != '\n')
1742 if (c == '*' && cptr[1] == '/')
1743 fprintf(f, "* ");
1744 else
1745 putc(c, f);
1747 fprintf(f, "*/\n");
1748 goto next_line;
1750 if (c == '*')
1752 int c_lineno = lineno;
1753 char *c_line = dup_line();
1754 char *c_cptr = c_line + (cptr - line - 1);
1756 putc('*', f);
1757 ++cptr;
1758 for (;;)
1760 c = *cptr++;
1761 putc(c, f);
1762 if (c == '*' && *cptr == '/')
1764 putc('/', f);
1765 ++cptr;
1766 FREE(c_line);
1767 goto loop;
1769 if (c == '\n')
1771 get_line();
1772 if (line == 0)
1773 unterminated_comment(c_lineno, c_line, c_cptr);
1777 goto loop;
1779 default:
1780 goto loop;
1784 static int
1785 mark_symbol(void)
1787 int c;
1788 bucket *bp = NULL;
1790 c = cptr[1];
1791 if (c == '%' || c == '\\')
1793 cptr += 2;
1794 return (1);
1797 if (c == '=')
1798 cptr += 2;
1799 else if ((c == 'p' || c == 'P') &&
1800 ((c = cptr[2]) == 'r' || c == 'R') &&
1801 ((c = cptr[3]) == 'e' || c == 'E') &&
1802 ((c = cptr[4]) == 'c' || c == 'C') &&
1803 ((c = cptr[5], !IS_IDENT(c))))
1804 cptr += 5;
1805 else
1806 syntax_error(lineno, line, cptr);
1808 c = nextc();
1809 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1810 bp = get_name();
1811 else if (c == '\'' || c == '"')
1812 bp = get_literal();
1813 else
1815 syntax_error(lineno, line, cptr);
1816 /*NOTREACHED */
1819 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1820 prec_redeclared();
1822 rprec[nrules] = bp->prec;
1823 rassoc[nrules] = bp->assoc;
1824 return (0);
1827 static void
1828 read_grammar(void)
1830 int c;
1832 initialize_grammar();
1833 advance_to_start();
1835 for (;;)
1837 c = nextc();
1838 if (c == EOF)
1839 break;
1840 if (isalpha(c)
1841 || c == '_'
1842 || c == '.'
1843 || c == '$'
1844 || c == '\''
1845 || c == '"')
1846 add_symbol();
1847 else if (c == L_CURL || c == '=')
1848 copy_action();
1849 else if (c == '|')
1851 end_rule();
1852 start_rule(plhs[nrules - 1], 0);
1853 ++cptr;
1855 else if (c == '%')
1857 if (mark_symbol())
1858 break;
1860 else
1861 syntax_error(lineno, line, cptr);
1863 end_rule();
1866 static void
1867 free_tags(void)
1869 int i;
1871 if (tag_table == 0)
1872 return;
1874 for (i = 0; i < ntags; ++i)
1876 assert(tag_table[i]);
1877 FREE(tag_table[i]);
1879 FREE(tag_table);
1882 static void
1883 pack_names(void)
1885 bucket *bp;
1886 char *p, *s, *t;
1888 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1889 for (bp = first_symbol; bp; bp = bp->next)
1890 name_pool_size += strlen(bp->name) + 1;
1892 name_pool = TMALLOC(char, name_pool_size);
1893 NO_SPACE(name_pool);
1895 strlcpy(name_pool, "$accept", name_pool_size);
1896 strlcpy(name_pool + 8, "$end", name_pool_size - 8);
1897 t = name_pool + 13;
1898 for (bp = first_symbol; bp; bp = bp->next)
1900 p = t;
1901 s = bp->name;
1902 while ((*t++ = *s++) != 0)
1903 continue;
1904 FREE(bp->name);
1905 bp->name = p;
1909 static void
1910 check_symbols(void)
1912 bucket *bp;
1914 if (goal->class == UNKNOWN)
1915 undefined_goal(goal->name);
1917 for (bp = first_symbol; bp; bp = bp->next)
1919 if (bp->class == UNKNOWN)
1921 undefined_symbol_warning(bp->name);
1922 bp->class = TERM;
1927 static void
1928 protect_string(char *src, char **des)
1930 unsigned len;
1931 char *s;
1932 char *d;
1934 *des = src;
1935 if (src)
1937 len = 1;
1938 s = src;
1939 while (*s)
1941 if ('\\' == *s || '"' == *s)
1942 len++;
1943 s++;
1944 len++;
1947 *des = d = TMALLOC(char, len);
1948 NO_SPACE(d);
1950 s = src;
1951 while (*s)
1953 if ('\\' == *s || '"' == *s)
1954 *d++ = '\\';
1955 *d++ = *s++;
1957 *d = '\0';
1961 static void
1962 pack_symbols(void)
1964 bucket *bp;
1965 bucket **v;
1966 Value_t i, j, k, n;
1968 nsyms = 2;
1969 ntokens = 1;
1970 for (bp = first_symbol; bp; bp = bp->next)
1972 ++nsyms;
1973 if (bp->class == TERM)
1974 ++ntokens;
1976 start_symbol = (Value_t) ntokens;
1977 nvars = nsyms - ntokens;
1979 symbol_name = TMALLOC(char *, nsyms);
1980 NO_SPACE(symbol_name);
1982 symbol_value = TMALLOC(Value_t, nsyms);
1983 NO_SPACE(symbol_value);
1985 symbol_prec = TMALLOC(short, nsyms);
1986 NO_SPACE(symbol_prec);
1988 symbol_assoc = TMALLOC(char, nsyms);
1989 NO_SPACE(symbol_assoc);
1991 v = TMALLOC(bucket *, nsyms);
1992 NO_SPACE(v);
1994 v[0] = 0;
1995 v[start_symbol] = 0;
1997 i = 1;
1998 j = (Value_t) (start_symbol + 1);
1999 for (bp = first_symbol; bp; bp = bp->next)
2001 if (bp->class == TERM)
2002 v[i++] = bp;
2003 else
2004 v[j++] = bp;
2006 assert(i == ntokens && j == nsyms);
2008 for (i = 1; i < ntokens; ++i)
2009 v[i]->index = i;
2011 goal->index = (Index_t) (start_symbol + 1);
2012 k = (Value_t) (start_symbol + 2);
2013 while (++i < nsyms)
2014 if (v[i] != goal)
2016 v[i]->index = k;
2017 ++k;
2020 goal->value = 0;
2021 k = 1;
2022 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
2024 if (v[i] != goal)
2026 v[i]->value = k;
2027 ++k;
2031 k = 0;
2032 for (i = 1; i < ntokens; ++i)
2034 n = v[i]->value;
2035 if (n > 256)
2037 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
2038 symbol_value[j] = symbol_value[j - 1];
2039 symbol_value[j] = n;
2043 assert(v[1] != 0);
2045 if (v[1]->value == UNDEFINED)
2046 v[1]->value = 256;
2048 j = 0;
2049 n = 257;
2050 for (i = 2; i < ntokens; ++i)
2052 if (v[i]->value == UNDEFINED)
2054 while (j < k && n == symbol_value[j])
2056 while (++j < k && n == symbol_value[j])
2057 continue;
2058 ++n;
2060 v[i]->value = n;
2061 ++n;
2065 symbol_name[0] = name_pool + 8;
2066 symbol_value[0] = 0;
2067 symbol_prec[0] = 0;
2068 symbol_assoc[0] = TOKEN;
2069 for (i = 1; i < ntokens; ++i)
2071 symbol_name[i] = v[i]->name;
2072 symbol_value[i] = v[i]->value;
2073 symbol_prec[i] = v[i]->prec;
2074 symbol_assoc[i] = v[i]->assoc;
2076 symbol_name[start_symbol] = name_pool;
2077 symbol_value[start_symbol] = -1;
2078 symbol_prec[start_symbol] = 0;
2079 symbol_assoc[start_symbol] = TOKEN;
2080 for (++i; i < nsyms; ++i)
2082 k = v[i]->index;
2083 symbol_name[k] = v[i]->name;
2084 symbol_value[k] = v[i]->value;
2085 symbol_prec[k] = v[i]->prec;
2086 symbol_assoc[k] = v[i]->assoc;
2089 if (gflag)
2091 symbol_pname = TMALLOC(char *, nsyms);
2092 NO_SPACE(symbol_pname);
2094 for (i = 0; i < nsyms; ++i)
2095 protect_string(symbol_name[i], &(symbol_pname[i]));
2098 FREE(v);
2101 static void
2102 pack_grammar(void)
2104 int i;
2105 Value_t j;
2106 Assoc_t assoc;
2107 Value_t prec2;
2109 ritem = TMALLOC(Value_t, nitems);
2110 NO_SPACE(ritem);
2112 rlhs = TMALLOC(Value_t, nrules);
2113 NO_SPACE(rlhs);
2115 rrhs = TMALLOC(Value_t, nrules + 1);
2116 NO_SPACE(rrhs);
2118 rprec = TREALLOC(Value_t, rprec, nrules);
2119 NO_SPACE(rprec);
2121 rassoc = TREALLOC(Assoc_t, rassoc, nrules);
2122 NO_SPACE(rassoc);
2124 ritem[0] = -1;
2125 ritem[1] = goal->index;
2126 ritem[2] = 0;
2127 ritem[3] = -2;
2128 rlhs[0] = 0;
2129 rlhs[1] = 0;
2130 rlhs[2] = start_symbol;
2131 rrhs[0] = 0;
2132 rrhs[1] = 0;
2133 rrhs[2] = 1;
2135 j = 4;
2136 for (i = 3; i < nrules; ++i)
2138 rlhs[i] = plhs[i]->index;
2139 rrhs[i] = j;
2140 assoc = TOKEN;
2141 prec2 = 0;
2142 while (pitem[j])
2144 ritem[j] = pitem[j]->index;
2145 if (pitem[j]->class == TERM)
2147 prec2 = pitem[j]->prec;
2148 assoc = pitem[j]->assoc;
2150 ++j;
2152 ritem[j] = (Value_t) - i;
2153 ++j;
2154 if (rprec[i] == UNDEFINED)
2156 rprec[i] = prec2;
2157 rassoc[i] = assoc;
2160 rrhs[i] = j;
2162 FREE(plhs);
2163 FREE(pitem);
2166 static void
2167 print_grammar(void)
2169 int i, k;
2170 size_t j, spacing = 0;
2171 FILE *f = verbose_file;
2173 if (!vflag)
2174 return;
2176 k = 1;
2177 for (i = 2; i < nrules; ++i)
2179 if (rlhs[i] != rlhs[i - 1])
2181 if (i != 2)
2182 fprintf(f, "\n");
2183 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2184 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2186 else
2188 fprintf(f, "%4d ", i - 2);
2189 j = spacing;
2190 while (j-- != 0)
2191 putc(' ', f);
2192 putc('|', f);
2195 while (ritem[k] >= 0)
2197 fprintf(f, " %s", symbol_name[ritem[k]]);
2198 ++k;
2200 ++k;
2201 putc('\n', f);
2205 void
2206 reader(void)
2208 write_section(code_file, banner);
2209 create_symbol_table();
2210 read_declarations();
2211 read_grammar();
2212 free_symbol_table();
2213 free_tags();
2214 pack_names();
2215 check_symbols();
2216 pack_symbols();
2217 pack_grammar();
2218 free_symbols();
2219 print_grammar();
2222 #ifdef NO_LEAKS
2223 static param *
2224 free_declarations(param * list)
2226 while (list != 0)
2228 param *next = list->next;
2229 free(list->type);
2230 free(list->name);
2231 free(list->type2);
2232 free(list);
2233 list = next;
2235 return list;
2238 void
2239 reader_leaks(void)
2241 lex_param = free_declarations(lex_param);
2242 parse_param = free_declarations(parse_param);
2244 DO_FREE(line);
2245 DO_FREE(rrhs);
2246 DO_FREE(rlhs);
2247 DO_FREE(rprec);
2248 DO_FREE(ritem);
2249 DO_FREE(rassoc);
2250 DO_FREE(cache);
2251 DO_FREE(name_pool);
2252 DO_FREE(symbol_name);
2253 DO_FREE(symbol_prec);
2254 DO_FREE(symbol_assoc);
2255 DO_FREE(symbol_value);
2257 #endif