1 /*% clang -g -Wall -Wextra % -o #
2 * miniyacc - LALR(1) grammars for C
3 * See LICENSE for copyright and license details.
12 typedef struct Rule Rule
;
13 typedef struct TSet TSet
;
14 typedef struct Info Info
;
15 typedef struct Term Term
;
16 typedef struct Item Item
;
17 typedef struct Row Row
;
20 #define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
21 #define GetBit(s,n) (s[n/32] & (1<<(n%32)))
22 #define SetBit(s,n) (s[n/32] |= 1<<(n%32))
32 TSetSz
= (MaxTk
+31)/32,
82 char srs
[] = "shift/reduce conflict state %d token %s\n";
83 char rrs
[] = "reduce/reduce conflict state %d token %s\n";
85 Item i0
; /* temporary item */
87 int nrl
, nsy
, nst
, ntk
;
88 Rule rs
[MaxRl
]; /* grammar rules (ordered, rcmp) */
89 Info is
[MaxTk
+MaxNt
]; /* symbol information */
90 Item
**st
; /* LALR(1) states (ordered, icmp) */
91 Row
*as
; /* action table [state][tok] */
92 Row
*gs
; /* goto table [sym][state] */
93 Sym sstart
;/* start symbol */
94 Item
*ini
; /* initial state */
95 int doty
; /* type-checking enabled */
114 fprintf(stderr
, "%s (on line %d)\n", s
, lineno
);
119 yalloc(size_t n
, size_t o
)
125 die("out of memory");
130 rcmp(const void *a
, const void *b
)
132 return ((Rule
*)a
)->lhs
- ((Rule
*)b
)->lhs
;
142 r
= bsearch(&k
, rs
, nrl
, sizeof *r
, rcmp
);
144 while (r
> rs
&& r
[-1].lhs
== lhs
)
154 for (n
=0; *l
!=S
; n
++, l
++);
161 memset(ts
, 0, sizeof *ts
);
165 tsunion(TSet
*tsa
, TSet
*tsb
)
168 unsigned *a
, *b
, c
, t
;
183 first(TSet
*ts
, Sym
*stnc
, TSet
*last
)
198 first(ts
, stnc
+1, last
);
199 tsunion(ts
, &is
[f
].fst
);
213 for (r
=rs
; r
-rs
<nrl
; r
++) {
215 for (s
=r
->rhs
; *s
!=S
; s
++)
222 first(&ts
, r
->rhs
, 0);
223 chg
|= tsunion(&i
->fst
, &ts
);
229 tcmp(Term
*a
, Term
*b
)
233 c
= a
->rule
- b
->rule
;
240 tcmpv(const void *a
, const void *b
)
242 return tcmp((Term
*)a
, (Term
*)b
);
255 memset(smap
, 0, sizeof smap
);
256 for (n
=0; n
<i
->nt
; n
++) {
258 s
= t
->rule
->lhs
-Sym0
;
265 for (n
=0; n
<i
->nt
; n
++) {
267 rem
= &t
->rule
->rhs
[t
->dot
];
269 if (s
< Sym0
|| s
== S
)
273 die("some non-terminals are not defined");
275 first(&t1
.lk
, rem
, &t
->lk
);
278 for (; r
-rs
<nrl
&& r
->lhs
==s
; r
++, m
++)
279 chg
|= tsunion(&i
->ts
[m
].lk
, &t1
.lk
);
283 for (; r
-rs
<nrl
&& r
->lhs
==s
; r
++, m
++) {
285 die("too many terms in item");
297 igoto(Item
*i
, Sym s
)
303 for (n
=0, t
=i
->ts
; n
<i
->nt
; n
++, t
++) {
304 if (t
->rule
->rhs
[t
->dot
] != s
)
306 t1
= &i0
.ts
[i0
.nt
++];
310 qsort(i0
.ts
, i0
.nt
, sizeof i0
.ts
[0], tcmpv
);
314 icmp(Item
*a
, Item
*b
)
316 Term
*ta
, *tb
, *ma
, *mb
;
324 if (ta
==ma
|| ta
->dot
==0)
325 return -(tb
<mb
&& tb
->dot
);
326 if (tb
==mb
|| tb
->dot
==0)
327 return +(ta
<ma
&& ta
->dot
);
328 if ((c
=tcmp(ta
++, tb
++)))
337 int lo
, hi
, mid
, n
, chg
;
339 /* http://www.iq0.com/duffgram/bsearch.c */
343 if (hi
<0 || icmp(i
, st
[hi
])>0)
345 else if (icmp(i
, st
[lo
])<=0)
350 if (icmp(st
[mid
], i
)<0)
355 if (hi
<nst
&& icmp(st
[hi
], i
)==0) {
358 for (n
=0; n
<i
->nt
; n
++)
359 chg
|= tsunion(&i1
->ts
[n
].lk
, &i
->ts
[n
].lk
);
364 st
= realloc(st
, ++nst
* sizeof st
[0]);
366 die("out of memory");
367 memmove(&st
[hi
+1], &st
[hi
], (nst
-1 - hi
) * sizeof st
[0]);
368 i
->gtbl
= yalloc(nsy
, sizeof i
->gtbl
[0]);
370 i1
= yalloc(1, sizeof *i1
);
391 SetBit(tini
.lk
.t
, 0);
393 i0
.ts
[i0
.nt
++] = tini
;
397 for (n
=0; n
<nst
; n
++) {
403 for (s
=0; s
<nsy
; s
++) {
418 resolve(Rule
*r
, Sym s
, int st
)
420 if (!r
->prec
|| !is
[s
].prec
) {
423 fprintf(fgrm
, srs
, st
, is
[s
].name
);
427 if (r
->prec
==is
[s
].prec
) {
428 if (is
[s
].assoc
== ANone
)
432 if (r
->prec
<is
[s
].prec
)
439 tblset(int *tbl
, Item
*i
, Term
*t
)
444 s
= t
->rule
->rhs
[t
->dot
];
451 if (tbl
[s
] && tbl
[s
] != i
->gtbl
[s
]->id
) {
453 act
= resolve(&rs
[Red(tbl
[s
])], s
, i
->id
-1);
457 tbl
[s
] = i
->gtbl
[s
]->id
;
465 for (s
=0; s
<ntk
; s
++) {
466 if (!GetBit(t
->lk
.t
, s
))
468 /* default to shift if conflict occurs */
473 fprintf(fgrm
, rrs
, i
->id
-1, is
[s
].name
);
477 act
= resolve(t
->rule
, s
, i
->id
-1);
480 tbl
[s
] = Red(t
->rule
-rs
);
490 setdef(Row
*r
, int w
, int top
)
492 int n
, m
, x
, cnt
, def
, max
;
497 for (n
=0; n
<w
; n
++) {
504 for (m
=n
+1; m
<w
; m
++)
514 /* zero out the most frequent entry */
529 for (n
=0; n
<nst
; n
++)
531 as
= yalloc(nst
, sizeof as
[0]);
532 gs
= yalloc(nsy
-MaxTk
, sizeof gs
[0]);
533 /* fill action table */
534 for (n
=0; n
<nst
; n
++) {
536 r
->t
= yalloc(ntk
, sizeof r
->t
[0]);
537 for (i
=st
[n
], m
=0; m
<i
->nt
; m
++)
538 tblset(r
->t
, i
, &i
->ts
[m
]);
540 r
->def
= Red(r
->def
); /* Red(-1) == -1 */
542 /* fill goto table */
543 for (n
=MaxTk
; n
<nsy
; n
++) {
545 r
->t
= yalloc(nst
, sizeof r
->t
[0]);
546 for (m
=0; m
<nst
; m
++)
548 r
->t
[m
] = st
[m
]->gtbl
[n
]->id
;
549 setdef(r
, nst
, nst
+1);
554 prcmp(const void *a
, const void *b
)
556 return (*(Row
**)a
)->ndef
- (*(Row
**)b
)->ndef
;
563 int n
, m
, t
, dsp
, nnt
;
566 o
= yalloc(nst
+nsy
, sizeof o
[0]);
567 act
= yalloc(nst
*nsy
, sizeof act
[0]);
568 chk
= yalloc(nst
*nsy
, sizeof chk
[0]);
569 adsp
= yalloc(nst
, sizeof adsp
[0]);
570 for (n
=0; n
<nst
*nsy
; n
++)
572 /* fill in actions */
573 for (n
=0; n
<nst
; n
++)
575 qsort(o
, nst
, sizeof o
[0], prcmp
);
576 for (n
=0; n
<nst
; n
++) {
579 for (m
=0; m
<ntk
&& r
->t
[m
]==0; m
++)
582 /* The invariant here is even
583 * trickier than it looks.
585 for (t
=0; t
<ntk
; t
++)
586 if ((m
=dsp
+t
)>=0 && chk
[m
]>=0)
587 if ((r
->t
[t
] && (chk
[m
]!=t
|| act
[m
]!=r
->t
[t
]))
588 || (!r
->t
[t
] && chk
[m
]==t
)) {
593 for (t
=0; t
<ntk
; t
++)
596 act
[dsp
+t
] = r
->t
[t
];
603 gdsp
= yalloc(nnt
, sizeof gdsp
[0]);
604 for (n
=0; n
<nnt
; n
++)
606 qsort(o
, nnt
, sizeof o
[0], prcmp
);
607 for (n
=0; n
<nnt
; n
++) {
610 for (m
=0; m
<nst
&& r
->t
[m
]==0; m
++)
613 for (t
=m
; t
<nst
; t
++)
614 if (chk
[dsp
+t
]>=0 && r
->t
[t
]) {
619 for (t
=m
; t
<nst
; t
++)
621 chk
[dsp
+t
] = ntk
+(r
-gs
);
622 act
[dsp
+t
] = r
->t
[t
];
631 aout(char *name
, int *t
, int n
)
635 fprintf(fout
, "short %s[] = {", name
);
636 for (i
=0; i
<n
; i
++) {
639 fprintf(fout
, "%4d", t
[i
]);
643 fprintf(fout
, "\n};\n");
651 fprintf(fout
, "short yyini = %d;\n", ini
->id
-1);
652 fprintf(fout
, "short yyntoks = %d;\n", ntk
);
653 o
= yalloc(nrl
+nst
+nsy
, sizeof o
[0]);
654 for (n
=0; n
<nrl
; n
++)
655 o
[n
] = slen(rs
[n
].rhs
);
656 aout("yyr1", o
, nrl
);
657 for (n
=0; n
<nrl
; n
++)
658 o
[n
] = rs
[n
].lhs
-MaxTk
;
659 aout("yyr2", o
, nrl
);
660 for (n
=0; n
<nst
; n
++)
662 aout("yyadef", o
, nst
);
663 for (n
=0; n
<nsy
-MaxTk
; n
++) {
665 assert(o
[n
]>0 || o
[n
]==-1);
669 aout("yygdef", o
, nsy
-MaxTk
);
670 aout("yyadsp", adsp
, nst
);
671 aout("yygdsp", gdsp
, nsy
-MaxTk
);
672 for (n
=0; n
<actsz
; n
++)
675 aout("yyact", act
, actsz
);
676 aout("yychk", chk
, actsz
);
677 for (n
=0; n
<128; n
++) {
679 for (m
=0; m
<ntk
; m
++)
680 if (is
[m
].name
[0]=='\'')
681 if (is
[m
].name
[1]==n
)
682 assert(!o
[n
]), o
[n
] = m
;
685 for (n
=1; n
<ntk
; n
++) {
686 if (is
[n
].name
[0]=='\'')
688 fprintf(fout
, "#define %s %d\n", is
[n
].name
, m
);
690 fprintf(fhdr
, "#define %s %d\n", is
[n
].name
, m
);
693 aout("yytrns", o
, m
);
695 fputs("int yyparse(void);\n", fhdr
);
696 fputs("#ifndef YYSTYPE\n", fhdr
);
697 fputs("#define YYSTYPE int\n", fhdr
);
698 fputs("#endif\n", fhdr
);
699 fputs("extern YYSTYPE yylval;\n", fhdr
);
700 fputs("#endif\n", fhdr
);
716 for (r
=rs
; r
-rs
<nrl
; r
++) {
717 fprintf(fgrm
, "\n%03d: %s ->", (int)(r
-rs
), is
[r
->lhs
].name
);
718 for (s1
=r
->rhs
; *s1
!=S
; s1
++)
719 fprintf(fgrm
, " %s", is
[*s1
].name
);
722 for (m
=0; m
<nst
; m
++) {
723 fprintf(fgrm
, "\nState %d:\n", m
);
724 for (t
=st
[m
]->ts
; t
-st
[m
]->ts
<st
[m
]->nt
; t
++) {
727 if (d
==0 && t
!=st
[m
]->ts
)
729 fprintf(fgrm
, " %s ->", is
[r
->lhs
].name
);
730 for (s1
=r
->rhs
; *s1
!=S
; s1
++, d
--)
731 fprintf(fgrm
, " %s%s", d
? "" : ". ", is
[*s1
].name
);
738 for (n
=0; n
<ntk
; n
++) {
743 fprintf(fgrm
, " %s error (nonassoc)\n", is
[n
].name
);
745 fprintf(fgrm
, " %s reduce with rule %d\n", is
[n
].name
, Red(act
));
747 fprintf(fgrm
, " %s shift and go to %d\n", is
[n
].name
, act
-1);
750 fprintf(fgrm
, " * reduce with rule %d\n", ar
->def
);
781 { "%union", TUnion
},
783 { "%token", TToken
},
784 { "%right", TRight
},
786 { "%nonassoc", TNonassoc
},
788 { "%start", TStart
},
797 return isalnum(c
) || c
=='_' || c
=='%';
806 while (isspace(c
=fgetc(fin
)))
826 idnt
[1] = fgetc(fin
);
829 if (fgetc(fin
)!='\'')
830 die("syntax error, invalid char token");
836 if (p
-idnt
>= IdntSz
-1)
837 die("identifier too long");
841 if (strcmp(idnt
, "%")==0)
845 for (n
=0; words
[n
].name
; n
++)
846 if (strcmp(idnt
, words
[n
].name
) == 0)
854 int c
, nest
, in
, len
, pos
;
858 s
= yalloc(len
+1, 1);
868 if (s
[pos
-1] != '\\')
871 if (c
== '"' || c
== '\'')
878 die("syntax error, unclosed code block");
883 if (!(s
=realloc(s
, len
=2*len
+1)))
884 die("out of memory");
899 die("syntax error, ident expected after <");
901 if (nexttk()!=TRangle
)
902 die("syntax error, unclosed <");
911 findsy(char *name
, int add
)
915 for (n
=0; n
<nsy
; n
++) {
919 die("too many tokens");
921 strcpy(is
[n
].name
, name
);
926 if (strcmp(is
[n
].name
, name
)==0)
930 if (nsy
>=MaxTk
+MaxNt
)
931 die("too many non-terminals");
932 strcpy(is
[nsy
].name
, name
);
941 int tk
, prec
, p
, a
, c
, c1
, n
;
943 char type
[IdntSz
], *s
;
945 strcpy(is
[0].name
, "$");
947 strcpy(is
[Sym0
].name
, "@start");
957 die("syntax error, ident expected after %start");
958 sstart
= findsy(idnt
, 1);
960 die("%start cannot specify a token");
966 die("syntax error, { expected after %union");
967 fprintf(fout
, "#line %d \"%s\"\n", lineno
, srca
);
969 fprintf(fout
, "typedef union %s yyunion;\n", s
);
970 fprintf(fout
, "#define YYSTYPE yyunion\n");
972 fprintf(fhdr
, "typedef union %s yyunion;\n", s
);
973 fprintf(fhdr
, "#define YYSTYPE yyunion\n");
996 while (tk
==TIdnt
|| tk
==TTokchr
) {
999 if (n
>=MaxTk
&& n
<nsy
)
1000 die("non-terminal redeclared as token");
1003 die("too many tokens");
1007 strcpy(si
->name
, idnt
);
1008 strcpy(si
->type
, type
);
1017 die("syntax error, type expected");
1020 n
= findsy(idnt
, 1);
1022 die("token redeclared as non-terminal");
1027 strcpy(si
->name
, idnt
);
1028 strcpy(si
->type
, type
);
1033 fprintf(fout
, "#line %d \"%s\"\n", lineno
, srca
);
1037 die("syntax error, unclosed %{");
1055 die("syntax error, unfinished declarations");
1057 die("syntax error, declaration expected");
1064 extern char *retcode
;
1071 if (tk
==TPP
|| tk
==TEof
) {
1073 die("syntax error, empty grammar");
1080 qsort(rs
, nrl
, sizeof rs
[0], rcmp
);
1084 die("syntax error, production rule expected");
1085 if (nexttk()!=TColon
)
1086 die("syntax error, colon expected after production's head");
1087 hd
= findsy(idnt
, 1);
1092 die("too many rules");
1097 while ((tk
=nexttk())==TIdnt
|| tk
==TTokchr
|| tk
==TPrec
) {
1101 || (s
=findsy(idnt
, 0))>=ntk
)
1102 die("token expected after %prec");
1103 r
->prec
= is
[s
].prec
;
1106 s
= findsy(idnt
, 1);
1108 if (s
<ntk
&& is
[s
].prec
>0)
1109 r
->prec
= is
[s
].prec
;
1110 if (p
-r
->rhs
>= MaxRhs
-1)
1111 die("production rule too long");
1121 die("syntax error, ; or | expected");
1130 char c
, *p
, *ty
, tya
[IdntSz
];
1147 fprintf(fout
, "yyval");
1149 ty
= is
[r
->lhs
].type
;
1152 die("$$ has no type");
1154 fprintf(fout
, ".%s", ty
);
1157 else if (c
== '<') {
1159 while (istok(*p
) && ty
-tya
<IdntSz
-1)
1164 die("unclosed tag field");
1169 fprintf(fout
, "yyval.%s", ty
);
1173 die("number or $ expected afer tag field");
1178 else if (isdigit(c
)) {
1181 l
= strtol(p
-1, &p
, 10);
1186 fprintf(fout
, "ps[%d].val", (int)l
);
1189 ty
= is
[r
->rhs
[l
-1]].type
;
1190 if (!ty
|| !ty
[0]) {
1192 die("$n has no type");
1194 fprintf(fout
, ".%s", ty
);
1208 extern char *code0
[], *code1
[];
1213 for (p
=code0
; *p
; p
++)
1215 for (n
=0; n
<nrl
; n
++) {
1216 fprintf(fout
, "\tcase %d:\n", n
);
1218 fprintf(fout
, "#line %d \"%s\"\n", r
->actln
, srca
);
1220 fputs("\t\tbreak;\n", fout
);
1222 for (p
=code1
; *p
; p
++)
1224 fprintf(fout
, "#line %d \"%s\"\n", lineno
, srca
);
1225 while ((c
=fgetc(fin
))!=EOF
)
1230 init(int ac
, char *av
[])
1233 char *pref
, buf
[100], *opt
;
1238 for (av
++; av
[0] && av
[0][0]=='-'; av
++)
1239 for (opt
= &av
[0][1]; (c
= *opt
); opt
++)
1252 fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr
);
1258 fin
= fopen(srca
, "r");
1259 if (strlen(pref
) + 10 > sizeof buf
)
1260 die("-b prefix too long");
1261 sprintf(buf
, "%s.tab.c", pref
);
1262 fout
= fopen(buf
, "w");
1264 sprintf(buf
, "%s.output", pref
);
1265 fgrm
= fopen(buf
, "w");
1268 sprintf(buf
, "%s.tab.h", pref
);
1269 fhdr
= fopen(buf
, "w");
1271 fprintf(fhdr
, "#ifndef Y_TAB_H_\n");
1272 fprintf(fhdr
, "#define Y_TAB_H_\n");
1275 if (!fin
|| !fout
|| (!fgrm
&& vf
) || (!fhdr
&& df
))
1276 die("cannot open work files");
1280 main(int ac
, char *av
[])
1295 fprintf(stderr
, "%d shift/reduce conflicts\n", srconf
);
1297 fprintf(stderr
, "%d reduce/reduce conflicts\n", rrconf
);
1303 |sed 's|.*|"&\\n",|'
1306 char *retcode
= "\t\tyyval = ps[1].val; return 0;";
1310 "#ifndef YYSTYPE\n",
1311 "#define YYSTYPE int\n",
1313 "YYSTYPE yylval;\n",
1319 " StackSize = 100,\n",
1320 " ActSz = sizeof yyact / sizeof yyact[0],\n",
1325 " } stk[StackSize], *ps;\n",
1326 " int r, h, n, s, tk;\n",
1327 " YYSTYPE yyval;\n",
1330 " ps->state = s = yyini;\n",
1333 " n = yyadsp[s];\n",
1334 " if (tk < 0 && n > -yyntoks)\n",
1335 " tk = yytrns[yylex()];\n",
1337 " if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
1338 " r = yyadef[s];\n",
1351 " yyval = yylval;\n",
1354 " if (ps-stk >= StackSize)\n",
1357 " ps->state = s;\n",
1358 " ps->val = yyval;\n",
1361 " ps -= yyr1[r];\n",
1363 " s = ps->state;\n",
1364 " n = yygdsp[h] + s;\n",
1365 " if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
1366 " n = yygdef[h];\n",