fix various codegen bugs on arm64
[qbe.git] / minic / yacc.c
blob01f054e9f12244a3733123870594160de9539fb1
1 /*% clang -g -Wall -Wextra % -o #
2 * miniyacc - LALR(1) grammars for C
3 * See LICENSE for copyright and license details.
4 */
5 #include <assert.h>
6 #include <ctype.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
11 typedef int Sym;
12 typedef struct Rule Rule;
13 typedef struct TSet TSet;
14 typedef struct Info Info;
15 typedef struct Term Term;
16 typedef struct Item Item;
17 typedef struct Row Row;
19 #define S ((Sym) -1)
20 #define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
21 #define GetBit(s,n) (s[n/32] & (1<<(n%32)))
22 #define SetBit(s,n) (s[n/32] |= 1<<(n%32))
24 enum {
25 IdntSz = 64,
26 MaxRhs = 32,
27 MaxTk = 500,
28 MaxNt = 500,
29 MaxRl = 800,
30 MaxTm = 1000,
32 TSetSz = (MaxTk+31)/32,
33 Sym0 = MaxTk
36 struct Rule {
37 Sym lhs;
38 Sym rhs[MaxRhs];
39 char *act;
40 int actln;
41 int prec;
44 struct TSet {
45 unsigned t[TSetSz];
48 struct Info {
49 int nul;
50 TSet fst;
51 int prec;
52 enum {
53 ANone,
54 ALeft,
55 ARight,
56 ANonassoc
57 } assoc;
58 char name[IdntSz];
59 char type[IdntSz];
62 struct Term {
63 Rule *rule;
64 int dot;
65 TSet lk;
68 struct Item {
69 int id;
70 int nt;
71 Term ts[MaxTm];
72 Item **gtbl;
73 int dirty;
76 struct Row {
77 int def;
78 int ndef;
79 int *t;
82 char srs[] = "shift/reduce conflict state %d token %s\n";
83 char rrs[] = "reduce/reduce conflict state %d token %s\n";
85 Item i0; /* temporary item */
87 int nrl, nsy, nst, ntk;
88 Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */
89 Info is[MaxTk+MaxNt]; /* symbol information */
90 Item **st; /* LALR(1) states (ordered, icmp) */
91 Row *as; /* action table [state][tok] */
92 Row *gs; /* goto table [sym][state] */
93 Sym sstart;/* start symbol */
94 Item *ini; /* initial state */
95 int doty; /* type-checking enabled */
97 int srconf, rrconf;
98 int actsz;
99 int *act;
100 int *chk;
101 int *adsp;
102 int *gdsp;
104 int lineno = 1;
105 char *srca;
106 FILE *fin;
107 FILE *fout;
108 FILE *fgrm;
109 FILE *fhdr;
111 void
112 die(char *s)
114 fprintf(stderr, "%s (on line %d)\n", s, lineno);
115 exit(1);
118 void *
119 yalloc(size_t n, size_t o)
121 void *p;
123 p = calloc(n, o);
124 if (!p)
125 die("out of memory");
126 return p;
130 rcmp(const void *a, const void *b)
132 return ((Rule *)a)->lhs - ((Rule *)b)->lhs;
135 Rule *
136 rfind(Sym lhs)
138 Rule *r;
139 Rule k;
141 k.lhs = lhs;
142 r = bsearch(&k, rs, nrl, sizeof *r, rcmp);
143 if (r != 0)
144 while (r > rs && r[-1].lhs == lhs)
145 r--;
146 return r;
150 slen(Sym *l)
152 int n;
154 for (n=0; *l!=S; n++, l++);
155 return n;
158 void
159 tszero(TSet *ts)
161 memset(ts, 0, sizeof *ts);
165 tsunion(TSet *tsa, TSet *tsb)
167 int n;
168 unsigned *a, *b, c, t;
170 c = 0;
171 a = tsa->t;
172 b = tsb->t;
173 n = (31+ntk)/32;
174 while (n-- > 0) {
175 t = *a;
176 *a |= *b++;
177 c |= t ^ *a++;
179 return !!c;
182 void
183 first(TSet *ts, Sym *stnc, TSet *last)
185 Sym f;
187 f = stnc[0];
188 if (f == S) {
189 if (last)
190 tsunion(ts, last);
191 return;
193 if (f < ntk) {
194 SetBit(ts->t, f);
195 return;
197 if (is[f].nul)
198 first(ts, stnc+1, last);
199 tsunion(ts, &is[f].fst);
202 void
203 ginit()
205 int chg;
206 Rule *r;
207 Info *i;
208 Sym *s;
209 TSet ts;
211 do {
212 chg = 0;
213 for (r=rs; r-rs<nrl; r++) {
214 i = &is[r->lhs];
215 for (s=r->rhs; *s!=S; s++)
216 if (!is[*s].nul)
217 goto nonul;
218 chg |= i->nul == 0;
219 i->nul = 1;
220 nonul:
221 tszero(&ts);
222 first(&ts, r->rhs, 0);
223 chg |= tsunion(&i->fst, &ts);
225 } while (chg);
229 tcmp(Term *a, Term *b)
231 int c;
233 c = a->rule - b->rule;
234 if (c==0)
235 c = a->dot - b->dot;
236 return c;
240 tcmpv(const void *a, const void *b)
242 return tcmp((Term *)a, (Term *)b);
245 void
246 iclose(Item *i)
248 int smap[MaxNt];
249 Rule *r;
250 Term *t, t1;
251 Sym s, *rem;
252 int chg, n, m;
254 t1.dot = 0;
255 memset(smap, 0, sizeof smap);
256 for (n=0; n<i->nt; n++) {
257 t = &i->ts[n];
258 s = t->rule->lhs-Sym0;
259 if (t->dot==0)
260 if (smap[s]==0)
261 smap[s] = n;
263 do {
264 chg = 0;
265 for (n=0; n<i->nt; n++) {
266 t = &i->ts[n];
267 rem = &t->rule->rhs[t->dot];
268 s = *rem++;
269 if (s < Sym0 || s == S)
270 continue;
271 r = rfind(s);
272 if (!r)
273 die("some non-terminals are not defined");
274 tszero(&t1.lk);
275 first(&t1.lk, rem, &t->lk);
276 m = smap[s-Sym0];
277 if (m)
278 for (; r-rs<nrl && r->lhs==s; r++, m++)
279 chg |= tsunion(&i->ts[m].lk, &t1.lk);
280 else {
281 m = i->nt;
282 smap[s-Sym0] = m;
283 for (; r-rs<nrl && r->lhs==s; r++, m++) {
284 if (m>=MaxTm)
285 die("too many terms in item");
286 t1.rule = r;
287 i->ts[m] = t1;
289 i->nt = m;
290 chg = 1;
293 } while (chg);
296 void
297 igoto(Item *i, Sym s)
299 Term *t, *t1;
300 int n;
302 i0.nt = 0;
303 for (n=0, t=i->ts; n<i->nt; n++, t++) {
304 if (t->rule->rhs[t->dot] != s)
305 continue;
306 t1 = &i0.ts[i0.nt++];
307 *t1 = *t;
308 t1->dot++;
310 qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv);
314 icmp(Item *a, Item *b)
316 Term *ta, *tb, *ma, *mb;
317 int c;
319 ta = a->ts;
320 tb = b->ts;
321 ma = ta+a->nt;
322 mb = tb+b->nt;
323 for (;;) {
324 if (ta==ma || ta->dot==0)
325 return -(tb<mb && tb->dot);
326 if (tb==mb || tb->dot==0)
327 return +(ta<ma && ta->dot);
328 if ((c=tcmp(ta++, tb++)))
329 return c;
334 stadd(Item **pi)
336 Item *i, *i1;
337 int lo, hi, mid, n, chg;
339 /* http://www.iq0.com/duffgram/bsearch.c */
340 i = *pi;
341 lo = 0;
342 hi = nst - 1;
343 if (hi<0 || icmp(i, st[hi])>0)
344 hi++;
345 else if (icmp(i, st[lo])<=0)
346 hi = lo;
347 else
348 while (hi-lo!=1) {
349 mid = (lo+hi)/2;
350 if (icmp(st[mid], i)<0)
351 lo = mid;
352 else
353 hi = mid;
355 if (hi<nst && icmp(st[hi], i)==0) {
356 chg = 0;
357 i1 = st[hi];
358 for (n=0; n<i->nt; n++)
359 chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk);
360 i1->dirty |= chg;
361 *pi = i1;
362 return chg;
363 } else {
364 st = realloc(st, ++nst * sizeof st[0]);
365 if (!st)
366 die("out of memory");
367 memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]);
368 i->gtbl = yalloc(nsy, sizeof i->gtbl[0]);
369 i->dirty = 1;
370 i1 = yalloc(1, sizeof *i1);
371 *i1 = *i;
372 *pi = st[hi] = i1;
373 return 1;
377 void
378 stgen()
380 Sym s;
381 Rule *r;
382 Item *i, *i1;
383 Term tini;
384 int n, chg;
386 ini = &i0;
387 r = rfind(Sym0);
388 tini.rule = r;
389 tini.dot = 0;
390 tszero(&tini.lk);
391 SetBit(tini.lk.t, 0);
392 i0.nt = 0;
393 i0.ts[i0.nt++] = tini;
394 stadd(&ini);
395 do {
396 chg = 0;
397 for (n=0; n<nst; n++) {
398 i = st[n];
399 if (!i->dirty)
400 continue;
401 i->dirty = 0;
402 iclose(i);
403 for (s=0; s<nsy; s++) {
404 igoto(i, s);
405 i1 = &i0;
406 if (!i1->nt) {
407 i->gtbl[s] = 0;
408 continue;
410 chg |= stadd(&i1);
411 i->gtbl[s] = i1;
414 } while (chg);
418 resolve(Rule *r, Sym s, int st)
420 if (!r->prec || !is[s].prec) {
421 conflict:
422 if (fgrm)
423 fprintf(fgrm, srs, st, is[s].name);
424 srconf++;
425 return ARight;
427 if (r->prec==is[s].prec) {
428 if (is[s].assoc == ANone)
429 goto conflict;
430 return is[s].assoc;
431 } else
432 if (r->prec<is[s].prec)
433 return ARight;
434 else
435 return ALeft;
438 void
439 tblset(int *tbl, Item *i, Term *t)
441 int act;
442 Sym s;
444 s = t->rule->rhs[t->dot];
445 if (s!=S) {
446 /* shift */
447 if (s>=ntk)
448 return;
449 assert(i->gtbl[s]);
450 act = ARight;
451 if (tbl[s] && tbl[s] != i->gtbl[s]->id) {
452 assert(tbl[s]<=0);
453 act = resolve(&rs[Red(tbl[s])], s, i->id-1);
455 switch (act) {
456 case ARight:
457 tbl[s] = i->gtbl[s]->id;
458 break;
459 case ANonassoc:
460 tbl[s] = -1;
461 break;
463 } else
464 /* reduce */
465 for (s=0; s<ntk; s++) {
466 if (!GetBit(t->lk.t, s))
467 continue;
468 /* default to shift if conflict occurs */
469 if (!tbl[s])
470 act = ALeft;
471 else if (tbl[s]<0) {
472 if (fgrm)
473 fprintf(fgrm, rrs, i->id-1, is[s].name);
474 rrconf++;
475 act = ARight;
476 } else
477 act = resolve(t->rule, s, i->id-1);
478 switch (act) {
479 case ALeft:
480 tbl[s] = Red(t->rule-rs);
481 break;
482 case ANonassoc:
483 tbl[s] = -1;
484 break;
489 void
490 setdef(Row *r, int w, int top)
492 int n, m, x, cnt, def, max;
494 max = 0;
495 def = -1;
496 r->ndef = 0;
497 for (n=0; n<w; n++) {
498 x = r->t[n];
499 if (x==0)
500 r->ndef++;
501 if (x>=top || x==0)
502 continue;
503 cnt = 1;
504 for (m=n+1; m<w; m++)
505 if (r->t[m]==x)
506 cnt++;
507 if (cnt>max) {
508 def = x;
509 max = cnt;
512 r->def = def;
513 if (max!=0)
514 /* zero out the most frequent entry */
515 for (n=0; n<w; n++)
516 if (r->t[n]==def) {
517 r->t[n] = 0;
518 r->ndef++;
522 void
523 tblgen()
525 Row *r;
526 Item *i;
527 int n, m;
529 for (n=0; n<nst; n++)
530 st[n]->id = n+1;
531 as = yalloc(nst, sizeof as[0]);
532 gs = yalloc(nsy-MaxTk, sizeof gs[0]);
533 /* fill action table */
534 for (n=0; n<nst; n++) {
535 r = &as[n];
536 r->t = yalloc(ntk, sizeof r->t[0]);
537 for (i=st[n], m=0; m<i->nt; m++)
538 tblset(r->t, i, &i->ts[m]);
539 setdef(r, ntk, -1);
540 r->def = Red(r->def); /* Red(-1) == -1 */
542 /* fill goto table */
543 for (n=MaxTk; n<nsy; n++) {
544 r = &gs[n-MaxTk];
545 r->t = yalloc(nst, sizeof r->t[0]);
546 for (m=0; m<nst; m++)
547 if (st[m]->gtbl[n])
548 r->t[m] = st[m]->gtbl[n]->id;
549 setdef(r, nst, nst+1);
554 prcmp(const void *a, const void *b)
556 return (*(Row **)a)->ndef - (*(Row **)b)->ndef;
559 void
560 actgen()
562 Row **o, *r;
563 int n, m, t, dsp, nnt;
565 actsz = 0;
566 o = yalloc(nst+nsy, sizeof o[0]);
567 act = yalloc(nst*nsy, sizeof act[0]);
568 chk = yalloc(nst*nsy, sizeof chk[0]);
569 adsp = yalloc(nst, sizeof adsp[0]);
570 for (n=0; n<nst*nsy; n++)
571 chk[n] = -1;
572 /* fill in actions */
573 for (n=0; n<nst; n++)
574 o[n] = &as[n];
575 qsort(o, nst, sizeof o[0], prcmp);
576 for (n=0; n<nst; n++) {
577 r = o[n];
578 dsp = 0;
579 for (m=0; m<ntk && r->t[m]==0; m++)
580 dsp--;
581 retrya:
582 /* The invariant here is even
583 * trickier than it looks.
585 for (t=0; t<ntk; t++)
586 if ((m=dsp+t)>=0 && chk[m]>=0)
587 if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t]))
588 || (!r->t[t] && chk[m]==t)) {
589 dsp++;
590 goto retrya;
592 adsp[r-as] = dsp;
593 for (t=0; t<ntk; t++)
594 if (r->t[t]) {
595 chk[dsp+t] = t;
596 act[dsp+t] = r->t[t];
597 if (dsp+t>=actsz)
598 actsz = dsp+t+1;
601 /* fill in gotos */
602 nnt = nsy-MaxTk;
603 gdsp = yalloc(nnt, sizeof gdsp[0]);
604 for (n=0; n<nnt; n++)
605 o[n] = &gs[n];
606 qsort(o, nnt, sizeof o[0], prcmp);
607 for (n=0; n<nnt; n++) {
608 r = o[n];
609 dsp = 0;
610 for (m=0; m<nst && r->t[m]==0; m++)
611 dsp--;
612 retryg:
613 for (t=m; t<nst; t++)
614 if (chk[dsp+t]>=0 && r->t[t]) {
615 dsp++;
616 goto retryg;
618 gdsp[r-gs] = dsp;
619 for (t=m; t<nst; t++)
620 if (r->t[t]) {
621 chk[dsp+t] = ntk+(r-gs);
622 act[dsp+t] = r->t[t];
623 if (dsp+t>=actsz)
624 actsz = dsp+t+1;
627 free(o);
630 void
631 aout(char *name, int *t, int n)
633 int i;
635 fprintf(fout, "short %s[] = {", name);
636 for (i=0; i<n; i++) {
637 if (i % 10 == 0)
638 fprintf(fout, "\n");
639 fprintf(fout, "%4d", t[i]);
640 if (i != n-1)
641 fprintf(fout, ",");
643 fprintf(fout, "\n};\n");
646 void
647 tblout()
649 int *o, n, m;
651 fprintf(fout, "short yyini = %d;\n", ini->id-1);
652 fprintf(fout, "short yyntoks = %d;\n", ntk);
653 o = yalloc(nrl+nst+nsy, sizeof o[0]);
654 for (n=0; n<nrl; n++)
655 o[n] = slen(rs[n].rhs);
656 aout("yyr1", o, nrl);
657 for (n=0; n<nrl; n++)
658 o[n] = rs[n].lhs-MaxTk;
659 aout("yyr2", o, nrl);
660 for (n=0; n<nst; n++)
661 o[n] = as[n].def;
662 aout("yyadef", o, nst);
663 for (n=0; n<nsy-MaxTk; n++) {
664 o[n] = gs[n].def;
665 assert(o[n]>0 || o[n]==-1);
666 if (o[n]>0)
667 o[n]--;
669 aout("yygdef", o, nsy-MaxTk);
670 aout("yyadsp", adsp, nst);
671 aout("yygdsp", gdsp, nsy-MaxTk);
672 for (n=0; n<actsz; n++)
673 if (act[n]>=0)
674 act[n]--;
675 aout("yyact", act, actsz);
676 aout("yychk", chk, actsz);
677 for (n=0; n<128; n++) {
678 o[n] = 0;
679 for (m=0; m<ntk; m++)
680 if (is[m].name[0]=='\'')
681 if (is[m].name[1]==n)
682 assert(!o[n]), o[n] = m;
684 m = 128;
685 for (n=1; n<ntk; n++) {
686 if (is[n].name[0]=='\'')
687 continue;
688 fprintf(fout, "#define %s %d\n", is[n].name, m);
689 if (fhdr)
690 fprintf(fhdr, "#define %s %d\n", is[n].name, m);
691 o[m++] = n;
693 aout("yytrns", o, m);
694 if (fhdr) {
695 fputs("int yyparse(void);\n", fhdr);
696 fputs("#ifndef YYSTYPE\n", fhdr);
697 fputs("#define YYSTYPE int\n", fhdr);
698 fputs("#endif\n", fhdr);
699 fputs("extern YYSTYPE yylval;\n", fhdr);
700 fputs("#endif\n", fhdr);
702 free(o);
705 void
706 stdump()
708 Term *t;
709 Sym *s1;
710 int n, m, d, act;
711 Rule *r;
712 Row *ar;
714 if (!fgrm)
715 return;
716 for (r=rs; r-rs<nrl; r++) {
717 fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name);
718 for (s1=r->rhs; *s1!=S; s1++)
719 fprintf(fgrm, " %s", is[*s1].name);
721 fprintf(fgrm, "\n");
722 for (m=0; m<nst; m++) {
723 fprintf(fgrm, "\nState %d:\n", m);
724 for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) {
725 r = t->rule;
726 d = t->dot;
727 if (d==0 && t!=st[m]->ts)
728 continue;
729 fprintf(fgrm, " %s ->", is[r->lhs].name);
730 for (s1=r->rhs; *s1!=S; s1++, d--)
731 fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name);
732 if (!d)
733 fprintf(fgrm, " .");
734 fprintf(fgrm, "\n");
736 fprintf(fgrm, "\n");
737 ar = &as[m];
738 for (n=0; n<ntk; n++) {
739 act = ar->t[n];
740 if (!act)
741 continue;
742 if (act==-1)
743 fprintf(fgrm, " %s error (nonassoc)\n", is[n].name);
744 else if (act<0)
745 fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act));
746 else
747 fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1);
749 if (ar->def != -1)
750 fprintf(fgrm, " * reduce with rule %d\n", ar->def);
754 enum {
755 TIdnt,
756 TTokchr, /* 'c' */
757 TPP, /* %% */
758 TLL, /* %{ */
759 TLangle, /* < */
760 TRangle, /* > */
761 TSemi, /* ; */
762 TBar, /* | */
763 TColon, /* : */
764 TLBrack, /* { */
765 TUnion,
766 TType,
767 TToken,
768 TRight,
769 TLeft,
770 TNonassoc,
771 TPrec,
772 TStart,
773 TEof
776 struct {
777 char *name;
778 int tok;
779 } words[] = {
780 { "%%", TPP },
781 { "%union", TUnion },
782 { "%type", TType },
783 { "%token", TToken },
784 { "%right", TRight },
785 { "%left", TLeft },
786 { "%nonassoc", TNonassoc },
787 { "%prec", TPrec },
788 { "%start", TStart },
789 { 0, 0 }
792 char idnt[IdntSz];
795 istok(int c)
797 return isalnum(c) || c=='_' || c=='%';
801 nexttk()
803 int n;
804 char c, *p;
806 while (isspace(c=fgetc(fin)))
807 if (c == '\n')
808 lineno++;
809 switch (c) {
810 case '<':
811 return TLangle;
812 case '>':
813 return TRangle;
814 case ';':
815 return TSemi;
816 case '|':
817 return TBar;
818 case ':':
819 return TColon;
820 case '{':
821 return TLBrack;
822 case EOF:
823 return TEof;
824 case '\'':
825 idnt[0] = '\'';
826 idnt[1] = fgetc(fin);
827 idnt[2] = '\'';
828 idnt[3] = 0;
829 if (fgetc(fin)!='\'')
830 die("syntax error, invalid char token");
831 return TTokchr;
833 p = idnt;
834 while (istok(c)) {
835 *p++ = c;
836 if (p-idnt >= IdntSz-1)
837 die("identifier too long");
838 c = fgetc(fin);
840 *p = 0;
841 if (strcmp(idnt, "%")==0)
842 if (c=='{')
843 return TLL;
844 ungetc(c, fin);
845 for (n=0; words[n].name; n++)
846 if (strcmp(idnt, words[n].name) == 0)
847 return words[n].tok;
848 return TIdnt;
851 char *
852 cpycode()
854 int c, nest, in, len, pos;
855 char *s;
857 len = 64;
858 s = yalloc(len+1, 1);
859 s[0] = '{';
860 pos = 1;
861 nest = 1;
862 in = 0;
864 while (nest) {
865 c = fgetc(fin);
866 if (in) {
867 if (c == in)
868 if (s[pos-1] != '\\')
869 in = 0;
870 } else {
871 if (c == '"' || c == '\'')
872 in = c;
873 if (c == '{')
874 nest++;
875 if (c == '}')
876 nest--;
877 if (c == EOF)
878 die("syntax error, unclosed code block");
879 if (c == '\n')
880 lineno++;
882 if (pos>=len)
883 if (!(s=realloc(s, len=2*len+1)))
884 die("out of memory");
885 s[pos++] = c;
887 s[pos] = 0;
888 return s;
892 gettype(char *type)
894 int tk;
896 tk = nexttk();
897 if (tk==TLangle) {
898 if (nexttk()!=TIdnt)
899 die("syntax error, ident expected after <");
900 strcpy(type, idnt);
901 if (nexttk()!=TRangle)
902 die("syntax error, unclosed <");
903 return nexttk();
904 } else {
905 type[0] = 0;
906 return tk;
911 findsy(char *name, int add)
913 int n;
915 for (n=0; n<nsy; n++) {
916 if (n == ntk) {
917 if (name[0]=='\'') {
918 if (ntk>=MaxTk)
919 die("too many tokens");
920 ntk++;
921 strcpy(is[n].name, name);
922 return n;
924 n = MaxTk;
926 if (strcmp(is[n].name, name)==0)
927 return n;
929 if (add) {
930 if (nsy>=MaxTk+MaxNt)
931 die("too many non-terminals");
932 strcpy(is[nsy].name, name);
933 return nsy++;
934 } else
935 return nsy;
938 void
939 getdecls()
941 int tk, prec, p, a, c, c1, n;
942 Info *si;
943 char type[IdntSz], *s;
945 strcpy(is[0].name, "$");
946 ntk = 1;
947 strcpy(is[Sym0].name, "@start");
948 nsy = MaxTk+1;
949 sstart = S;
950 prec = 0;
951 tk = nexttk();
952 for (;;)
953 switch (tk) {
954 case TStart:
955 tk = nexttk();
956 if (tk!=TIdnt)
957 die("syntax error, ident expected after %start");
958 sstart = findsy(idnt, 1);
959 if (sstart<ntk)
960 die("%start cannot specify a token");
961 tk = nexttk();
962 break;
963 case TUnion:
964 tk = nexttk();
965 if (tk!=TLBrack)
966 die("syntax error, { expected after %union");
967 fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
968 s = cpycode();
969 fprintf(fout, "typedef union %s yyunion;\n", s);
970 fprintf(fout, "#define YYSTYPE yyunion\n");
971 if (fhdr) {
972 fprintf(fhdr, "typedef union %s yyunion;\n", s);
973 fprintf(fhdr, "#define YYSTYPE yyunion\n");
975 free(s);
976 doty = 1;
977 tk = nexttk();
978 break;
979 case TLeft:
980 p = ++prec;
981 a = ALeft;
982 goto addtoks;
983 case TRight:
984 p = ++prec;
985 a = ARight;
986 goto addtoks;
987 case TNonassoc:
988 p = ++prec;
989 a = ANonassoc;
990 goto addtoks;
991 case TToken:
992 p = 0;
993 a = ANone;
994 addtoks:
995 tk = gettype(type);
996 while (tk==TIdnt || tk==TTokchr) {
997 si = 0;
998 n = findsy(idnt, 0);
999 if (n>=MaxTk && n<nsy)
1000 die("non-terminal redeclared as token");
1001 if (n==nsy) {
1002 if (ntk>=MaxTk)
1003 die("too many tokens");
1004 n = ntk++;
1006 si = &is[n];
1007 strcpy(si->name, idnt);
1008 strcpy(si->type, type);
1009 si->prec = p;
1010 si->assoc = a;
1011 tk = nexttk();
1013 break;
1014 case TType:
1015 tk = gettype(type);
1016 if (type[0]==0)
1017 die("syntax error, type expected");
1018 while (tk==TIdnt) {
1019 si = 0;
1020 n = findsy(idnt, 1);
1021 if (n<ntk)
1022 die("token redeclared as non-terminal");
1023 if (n==nsy) {
1024 nsy++;
1026 si = &is[n];
1027 strcpy(si->name, idnt);
1028 strcpy(si->type, type);
1029 tk = nexttk();
1031 break;
1032 case TLL:
1033 fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
1034 for (;;) {
1035 c = fgetc(fin);
1036 if (c == EOF)
1037 die("syntax error, unclosed %{");
1038 if (c == '%') {
1039 c1 = fgetc(fin);
1040 if (c1 == '}') {
1041 fputs("\n", fout);
1042 break;
1044 ungetc(c1, fin);
1046 if (c == '\n')
1047 lineno++;
1048 fputc(c, fout);
1050 tk = nexttk();
1051 break;
1052 case TPP:
1053 return;
1054 case TEof:
1055 die("syntax error, unfinished declarations");
1056 default:
1057 die("syntax error, declaration expected");
1061 void
1062 getgram()
1064 extern char *retcode;
1065 int tk;
1066 Sym hd, *p, s;
1067 Rule *r;
1069 for (;;) {
1070 tk = nexttk();
1071 if (tk==TPP || tk==TEof) {
1072 if (sstart==S)
1073 die("syntax error, empty grammar");
1074 r = &rs[nrl++];
1075 r->lhs = Sym0;
1076 r->rhs[0] = sstart;
1077 r->rhs[1] = 0;
1078 r->rhs[2] = S;
1079 r->act = retcode;
1080 qsort(rs, nrl, sizeof rs[0], rcmp);
1081 return;
1083 if (tk!=TIdnt)
1084 die("syntax error, production rule expected");
1085 if (nexttk()!=TColon)
1086 die("syntax error, colon expected after production's head");
1087 hd = findsy(idnt, 1);
1088 if (sstart==S)
1089 sstart = hd;
1090 do {
1091 if (nrl>=MaxRl-1)
1092 die("too many rules");
1093 r = &rs[nrl++];
1094 r->lhs = hd;
1095 r->act = 0;
1096 p = r->rhs;
1097 while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) {
1098 if (tk==TPrec) {
1099 tk = nexttk();
1100 if (tk!=TIdnt
1101 || (s=findsy(idnt, 0))>=ntk)
1102 die("token expected after %prec");
1103 r->prec = is[s].prec;
1104 continue;
1106 s = findsy(idnt, 1);
1107 *p++ = s;
1108 if (s<ntk && is[s].prec>0)
1109 r->prec = is[s].prec;
1110 if (p-r->rhs >= MaxRhs-1)
1111 die("production rule too long");
1113 *p = S;
1114 if (tk==TLBrack) {
1115 r->actln = lineno;
1116 r->act = cpycode();
1117 tk = nexttk();
1119 } while (tk==TBar);
1120 if (tk!=TSemi)
1121 die("syntax error, ; or | expected");
1125 void
1126 actout(Rule *r)
1128 long l;
1129 int i, ar;
1130 char c, *p, *ty, tya[IdntSz];
1132 ar = slen(r->rhs);
1133 p = r->act;
1134 i = r->actln;
1135 if (!p)
1136 return;
1137 while ((c=*p++))
1138 switch (c) {
1139 case '\n':
1140 i++;
1141 default:
1142 fputc(c, fout);
1143 break;
1144 case '$':
1145 c = *p++;
1146 if (c == '$') {
1147 fprintf(fout, "yyval");
1148 if (doty) {
1149 ty = is[r->lhs].type;
1150 if (!ty[0]) {
1151 lineno = i;
1152 die("$$ has no type");
1154 fprintf(fout, ".%s", ty);
1157 else if (c == '<') {
1158 ty = tya;
1159 while (istok(*p) && ty-tya<IdntSz-1)
1160 *ty++ = *p++;
1161 *ty = 0;
1162 if (*p++!='>') {
1163 lineno = i;
1164 die("unclosed tag field");
1166 ty = tya;
1167 c = *p++;
1168 if (c == '$') {
1169 fprintf(fout, "yyval.%s", ty);
1170 } else {
1171 if (!isdigit(c)) {
1172 lineno = i;
1173 die("number or $ expected afer tag field");
1175 goto readnum;
1178 else if (isdigit(c)) {
1179 ty = 0;
1180 readnum:
1181 l = strtol(p-1, &p, 10);
1182 if (l > ar) {
1183 lineno = i;
1184 die("invalid $n");
1186 fprintf(fout, "ps[%d].val", (int)l);
1187 if (doty) {
1188 if (!ty && l>0)
1189 ty = is[r->rhs[l-1]].type;
1190 if (!ty || !ty[0]) {
1191 lineno = i;
1192 die("$n has no type");
1194 fprintf(fout, ".%s", ty);
1197 else {
1198 fputc('$', fout);
1199 fputc(c, fout);
1202 fputs("\n", fout);
1205 void
1206 codeout()
1208 extern char *code0[], *code1[];
1209 char **p;
1210 Rule *r;
1211 int n, c;
1213 for (p=code0; *p; p++)
1214 fputs(*p, fout);
1215 for (n=0; n<nrl; n++) {
1216 fprintf(fout, "\tcase %d:\n", n);
1217 r = &rs[n];
1218 fprintf(fout, "#line %d \"%s\"\n", r->actln, srca);
1219 actout(r);
1220 fputs("\t\tbreak;\n", fout);
1222 for (p=code1; *p; p++)
1223 fputs(*p, fout);
1224 fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
1225 while ((c=fgetc(fin))!=EOF)
1226 fputc(c, fout);
1229 void
1230 init(int ac, char *av[])
1232 int c, vf, df;
1233 char *pref, buf[100], *opt;
1235 (void) ac;
1236 pref = "y";
1237 vf = df = 0;
1238 for (av++; av[0] && av[0][0]=='-'; av++)
1239 for (opt = &av[0][1]; (c = *opt); opt++)
1240 switch (c) {
1241 case 'v':
1242 vf = 1;
1243 break;
1244 case 'd':
1245 df = 1;
1246 break;
1247 case 'b':
1248 if ((pref = *++av))
1249 break;
1250 default:
1251 usage:
1252 fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr);
1253 exit(1);
1256 if (!(srca = *av))
1257 goto usage;
1258 fin = fopen(srca, "r");
1259 if (strlen(pref) + 10 > sizeof buf)
1260 die("-b prefix too long");
1261 sprintf(buf, "%s.tab.c", pref);
1262 fout = fopen(buf, "w");
1263 if (vf) {
1264 sprintf(buf, "%s.output", pref);
1265 fgrm = fopen(buf, "w");
1267 if (df) {
1268 sprintf(buf, "%s.tab.h", pref);
1269 fhdr = fopen(buf, "w");
1270 if (fhdr) {
1271 fprintf(fhdr, "#ifndef Y_TAB_H_\n");
1272 fprintf(fhdr, "#define Y_TAB_H_\n");
1275 if (!fin || !fout || (!fgrm && vf) || (!fhdr && df))
1276 die("cannot open work files");
1280 main(int ac, char *av[])
1283 init(ac, av);
1284 getdecls();
1285 getgram();
1286 ginit();
1287 stgen();
1288 tblgen();
1289 stdump();
1290 actgen();
1291 tblout();
1292 codeout();
1294 if (srconf)
1295 fprintf(stderr, "%d shift/reduce conflicts\n", srconf);
1296 if (rrconf)
1297 fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf);
1299 exit(0);
1302 /* Glorious macros.
1303 |sed 's|.*|"&\\n",|'
1306 char *retcode = "\t\tyyval = ps[1].val; return 0;";
1308 char *code0[] = {
1309 "\n",
1310 "#ifndef YYSTYPE\n",
1311 "#define YYSTYPE int\n",
1312 "#endif\n",
1313 "YYSTYPE yylval;\n",
1314 "\n",
1315 "int\n",
1316 "yyparse()\n",
1317 "{\n",
1318 " enum {\n",
1319 " StackSize = 100,\n",
1320 " ActSz = sizeof yyact / sizeof yyact[0],\n",
1321 " };\n",
1322 " struct {\n",
1323 " YYSTYPE val;\n",
1324 " int state;\n",
1325 " } stk[StackSize], *ps;\n",
1326 " int r, h, n, s, tk;\n",
1327 " YYSTYPE yyval;\n",
1328 "\n",
1329 " ps = stk;\n",
1330 " ps->state = s = yyini;\n",
1331 " tk = -1;\n",
1332 "loop:\n",
1333 " n = yyadsp[s];\n",
1334 " if (tk < 0 && n > -yyntoks)\n",
1335 " tk = yytrns[yylex()];\n",
1336 " n += tk;\n",
1337 " if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
1338 " r = yyadef[s];\n",
1339 " if (r < 0)\n",
1340 " return -1;\n",
1341 " goto reduce;\n",
1342 " }\n",
1343 " n = yyact[n];\n",
1344 " if (n == -1)\n",
1345 " return -1;\n",
1346 " if (n < 0) {\n",
1347 " r = - (n+2);\n",
1348 " goto reduce;\n",
1349 " }\n",
1350 " tk = -1;\n",
1351 " yyval = yylval;\n",
1352 "stack:\n",
1353 " ps++;\n",
1354 " if (ps-stk >= StackSize)\n",
1355 " return -2;\n",
1356 " s = n;\n",
1357 " ps->state = s;\n",
1358 " ps->val = yyval;\n",
1359 " goto loop;\n",
1360 "reduce:\n",
1361 " ps -= yyr1[r];\n",
1362 " h = yyr2[r];\n",
1363 " s = ps->state;\n",
1364 " n = yygdsp[h] + s;\n",
1365 " if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
1366 " n = yygdef[h];\n",
1367 " else\n",
1368 " n = yyact[n];\n",
1369 " switch (r) {\n",
1373 char *code1[] = {
1374 " }\n",
1375 " goto stack;\n",
1376 "}\n",