Simplify int mul/udiv/urem of 2^N into shl/shr/and.
[qbe.git] / minic / minic.y
blobeeef900fe43081b9fb461626d3e052f409622ce3
1 %{
3 #include <ctype.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
8 enum {
9 NString = 32,
10 NGlo = 256,
11 NVar = 512,
12 NStr = 256,
15 enum { /* minic types */
16 NIL,
17 INT,
18 LNG,
19 PTR,
20 FUN,
23 #define IDIR(x) (((x) << 3) + PTR)
24 #define FUNC(x) (((x) << 3) + FUN)
25 #define DREF(x) ((x) >> 3)
26 #define KIND(x) ((x) & 7)
27 #define SIZE(x) \
28 (x == NIL ? (die("void has no size"), 0) : \
29 x == INT ? 4 : 8)
31 typedef struct Node Node;
32 typedef struct Symb Symb;
33 typedef struct Stmt Stmt;
35 struct Symb {
36 enum {
37 Con,
38 Tmp,
39 Var,
40 Glo,
41 } t;
42 union {
43 int n;
44 char v[NString];
45 } u;
46 unsigned long ctyp;
49 struct Node {
50 char op;
51 union {
52 int n;
53 char v[NString];
54 Symb s;
55 } u;
56 Node *l, *r;
59 struct Stmt {
60 enum {
61 If,
62 While,
63 Seq,
64 Expr,
65 Break,
66 Ret,
67 } t;
68 void *p1, *p2, *p3;
71 int yylex(void), yyerror(char *);
72 Symb expr(Node *), lval(Node *);
73 void bool(Node *, int, int);
75 FILE *of;
76 int line;
77 int lbl, tmp, nglo;
78 char *ini[NGlo];
79 struct {
80 char v[NString];
81 unsigned ctyp;
82 int glo;
83 } varh[NVar];
85 void
86 die(char *s)
88 fprintf(stderr, "error:%d: %s\n", line, s);
89 exit(1);
92 void *
93 alloc(size_t s)
95 void *p;
97 p = malloc(s);
98 if (!p)
99 die("out of memory");
100 return p;
103 unsigned
104 hash(char *s)
106 unsigned h;
108 h = 42;
109 while (*s)
110 h += 11 * h + *s++;
111 return h % NVar;
114 void
115 varclr()
117 unsigned h;
119 for (h=0; h<NVar; h++)
120 if (!varh[h].glo)
121 varh[h].v[0] = 0;
124 void
125 varadd(char *v, int glo, unsigned ctyp)
127 unsigned h0, h;
129 h0 = hash(v);
130 h = h0;
131 do {
132 if (varh[h].v[0] == 0) {
133 strcpy(varh[h].v, v);
134 varh[h].glo = glo;
135 varh[h].ctyp = ctyp;
136 return;
138 if (strcmp(varh[h].v, v) == 0)
139 die("double definition");
140 h = (h+1) % NVar;
141 } while(h != h0);
142 die("too many variables");
145 Symb *
146 varget(char *v)
148 static Symb s;
149 unsigned h0, h;
151 h0 = hash(v);
152 h = h0;
153 do {
154 if (strcmp(varh[h].v, v) == 0) {
155 if (!varh[h].glo) {
156 s.t = Var;
157 strcpy(s.u.v, v);
158 } else {
159 s.t = Glo;
160 s.u.n = varh[h].glo;
162 s.ctyp = varh[h].ctyp;
163 return &s;
165 h = (h+1) % NVar;
166 } while (h != h0 && varh[h].v[0] != 0);
167 return 0;
170 char
171 irtyp(unsigned ctyp)
173 return SIZE(ctyp) == 8 ? 'l' : 'w';
176 void
177 psymb(Symb s)
179 switch (s.t) {
180 case Tmp:
181 fprintf(of, "%%t%d", s.u.n);
182 break;
183 case Var:
184 fprintf(of, "%%%s", s.u.v);
185 break;
186 case Glo:
187 fprintf(of, "$glo%d", s.u.n);
188 break;
189 case Con:
190 fprintf(of, "%d", s.u.n);
191 break;
195 void
196 sext(Symb *s)
198 fprintf(of, "\t%%t%d =l extsw ", tmp);
199 psymb(*s);
200 fprintf(of, "\n");
201 s->t = Tmp;
202 s->ctyp = LNG;
203 s->u.n = tmp++;
206 unsigned
207 prom(int op, Symb *l, Symb *r)
209 Symb *t;
210 int sz;
212 if (l->ctyp == r->ctyp && KIND(l->ctyp) != PTR)
213 return l->ctyp;
215 if (l->ctyp == LNG && r->ctyp == INT) {
216 sext(r);
217 return LNG;
219 if (l->ctyp == INT && r->ctyp == LNG) {
220 sext(l);
221 return LNG;
224 if (op == '+') {
225 if (KIND(r->ctyp) == PTR) {
226 t = l;
227 l = r;
228 r = t;
230 if (KIND(r->ctyp) == PTR)
231 die("pointers added");
232 goto Scale;
235 if (op == '-') {
236 if (KIND(l->ctyp) != PTR)
237 die("pointer substracted from integer");
238 if (KIND(r->ctyp) != PTR)
239 goto Scale;
240 if (l->ctyp != r->ctyp)
241 die("non-homogeneous pointers in substraction");
242 return LNG;
245 Scale:
246 sz = SIZE(DREF(l->ctyp));
247 if (r->t == Con)
248 r->u.n *= sz;
249 else {
250 if (irtyp(r->ctyp) != 'l')
251 sext(r);
252 fprintf(of, "\t%%t%d =l mul %d, ", tmp, sz);
253 psymb(*r);
254 fprintf(of, "\n");
255 r->u.n = tmp++;
257 return l->ctyp;
260 void
261 load(Symb d, Symb s)
263 char t;
265 fprintf(of, "\t");
266 psymb(d);
267 t = irtyp(d.ctyp);
268 fprintf(of, " =%c load%c ", t, t);
269 psymb(s);
270 fprintf(of, "\n");
273 void
274 call(Node *n, Symb *sr)
276 Node *a;
277 char *f;
278 unsigned ft;
280 f = n->l->u.v;
281 if (varget(f)) {
282 ft = varget(f)->ctyp;
283 if (KIND(ft) != FUN)
284 die("invalid call");
285 } else
286 ft = FUNC(INT);
287 sr->ctyp = DREF(ft);
288 for (a=n->r; a; a=a->r)
289 a->u.s = expr(a->l);
290 fprintf(of, "\t");
291 psymb(*sr);
292 fprintf(of, " =%c call $%s(", irtyp(sr->ctyp), f);
293 for (a=n->r; a; a=a->r) {
294 fprintf(of, "%c ", irtyp(a->u.s.ctyp));
295 psymb(a->u.s);
296 fprintf(of, ", ");
298 fprintf(of, "...)\n");
301 Symb
302 expr(Node *n)
304 static char *otoa[] = {
305 ['+'] = "add",
306 ['-'] = "sub",
307 ['*'] = "mul",
308 ['/'] = "div",
309 ['%'] = "rem",
310 ['&'] = "and",
311 ['<'] = "cslt", /* meeeeh, wrong for pointers! */
312 ['l'] = "csle",
313 ['e'] = "ceq",
314 ['n'] = "cne",
316 Symb sr, s0, s1, sl;
317 int o, l;
318 char ty[2];
320 sr.t = Tmp;
321 sr.u.n = tmp++;
323 switch (n->op) {
325 case 0:
326 abort();
328 case 'o':
329 case 'a':
330 l = lbl;
331 lbl += 3;
332 bool(n, l, l+1);
333 fprintf(of, "@l%d\n", l);
334 fprintf(of, "\tjmp @l%d\n", l+2);
335 fprintf(of, "@l%d\n", l+1);
336 fprintf(of, "\tjmp @l%d\n", l+2);
337 fprintf(of, "@l%d\n", l+2);
338 fprintf(of, "\t");
339 sr.ctyp = INT;
340 psymb(sr);
341 fprintf(of, " =w phi @l%d 1, @l%d 0\n", l, l+1);
342 break;
344 case 'V':
345 s0 = lval(n);
346 sr.ctyp = s0.ctyp;
347 load(sr, s0);
348 break;
350 case 'N':
351 sr.t = Con;
352 sr.u.n = n->u.n;
353 sr.ctyp = INT;
354 break;
356 case 'S':
357 sr.t = Glo;
358 sr.u.n = n->u.n;
359 sr.ctyp = IDIR(INT);
360 break;
362 case 'C':
363 call(n, &sr);
364 break;
366 case '@':
367 s0 = expr(n->l);
368 if (KIND(s0.ctyp) != PTR)
369 die("dereference of a non-pointer");
370 sr.ctyp = DREF(s0.ctyp);
371 load(sr, s0);
372 break;
374 case 'A':
375 sr = lval(n->l);
376 sr.ctyp = IDIR(sr.ctyp);
377 break;
379 case '=':
380 s0 = expr(n->r);
381 s1 = lval(n->l);
382 sr = s0;
383 if (s1.ctyp == LNG && s0.ctyp == INT)
384 sext(&s0);
385 if (s0.ctyp != IDIR(NIL) || KIND(s1.ctyp) != PTR)
386 if (s1.ctyp != IDIR(NIL) || KIND(s0.ctyp) != PTR)
387 if (s1.ctyp != s0.ctyp)
388 die("invalid assignment");
389 fprintf(of, "\tstore%c ", irtyp(s1.ctyp));
390 goto Args;
392 case 'P':
393 case 'M':
394 o = n->op == 'P' ? '+' : '-';
395 sl = lval(n->l);
396 s0.t = Tmp;
397 s0.u.n = tmp++;
398 s0.ctyp = sl.ctyp;
399 load(s0, sl);
400 s1.t = Con;
401 s1.u.n = 1;
402 s1.ctyp = INT;
403 goto Binop;
405 default:
406 s0 = expr(n->l);
407 s1 = expr(n->r);
408 o = n->op;
409 Binop:
410 sr.ctyp = prom(o, &s0, &s1);
411 if (strchr("ne<l", n->op)) {
412 sprintf(ty, "%c", irtyp(sr.ctyp));
413 sr.ctyp = INT;
414 } else
415 strcpy(ty, "");
416 fprintf(of, "\t");
417 psymb(sr);
418 fprintf(of, " =%c", irtyp(sr.ctyp));
419 fprintf(of, " %s%s ", otoa[o], ty);
420 Args:
421 psymb(s0);
422 fprintf(of, ", ");
423 psymb(s1);
424 fprintf(of, "\n");
425 break;
428 if (n->op == '-'
429 && KIND(s0.ctyp) == PTR
430 && KIND(s1.ctyp) == PTR) {
431 fprintf(of, "\t%%t%d =l div ", tmp);
432 psymb(sr);
433 fprintf(of, ", %d\n", SIZE(DREF(s0.ctyp)));
434 sr.u.n = tmp++;
436 if (n->op == 'P' || n->op == 'M') {
437 fprintf(of, "\tstore%c ", irtyp(sl.ctyp));
438 psymb(sr);
439 fprintf(of, ", ");
440 psymb(sl);
441 fprintf(of, "\n");
442 sr = s0;
444 return sr;
447 Symb
448 lval(Node *n)
450 Symb sr;
452 switch (n->op) {
453 default:
454 die("invalid lvalue");
455 case 'V':
456 if (!varget(n->u.v))
457 die("undefined variable");
458 sr = *varget(n->u.v);
459 break;
460 case '@':
461 sr = expr(n->l);
462 if (KIND(sr.ctyp) != PTR)
463 die("dereference of a non-pointer");
464 sr.ctyp = DREF(sr.ctyp);
465 break;
467 return sr;
470 void
471 bool(Node *n, int lt, int lf)
473 Symb s;
474 int l;
476 switch (n->op) {
477 default:
478 s = expr(n); /* TODO: insert comparison to 0 with proper type */
479 fprintf(of, "\tjnz ");
480 psymb(s);
481 fprintf(of, ", @l%d, @l%d\n", lt, lf);
482 break;
483 case 'o':
484 l = lbl;
485 lbl += 1;
486 bool(n->l, lt, l);
487 fprintf(of, "@l%d\n", l);
488 bool(n->r, lt, lf);
489 break;
490 case 'a':
491 l = lbl;
492 lbl += 1;
493 bool(n->l, l, lf);
494 fprintf(of, "@l%d\n", l);
495 bool(n->r, lt, lf);
496 break;
501 stmt(Stmt *s, int b)
503 int l, r;
504 Symb x;
506 if (!s)
507 return 0;
509 switch (s->t) {
510 case Ret:
511 x = expr(s->p1);
512 fprintf(of, "\tret ");
513 psymb(x);
514 fprintf(of, "\n");
515 return 1;
516 case Break:
517 if (b < 0)
518 die("break not in loop");
519 fprintf(of, "\tjmp @l%d\n", b);
520 return 1;
521 case Expr:
522 expr(s->p1);
523 return 0;
524 case Seq:
525 return stmt(s->p1, b) || stmt(s->p2, b);
526 case If:
527 l = lbl;
528 lbl += 3;
529 bool(s->p1, l, l+1);
530 fprintf(of, "@l%d\n", l);
531 if (!(r=stmt(s->p2, b)))
532 if (s->p3)
533 fprintf(of, "\tjmp @l%d\n", l+2);
534 fprintf(of, "@l%d\n", l+1);
535 if (s->p3)
536 if (!(r &= stmt(s->p3, b)))
537 fprintf(of, "@l%d\n", l+2);
538 return s->p3 && r;
539 case While:
540 l = lbl;
541 lbl += 3;
542 fprintf(of, "@l%d\n", l);
543 bool(s->p1, l+1, l+2);
544 fprintf(of, "@l%d\n", l+1);
545 if (!stmt(s->p2, l+2))
546 fprintf(of, "\tjmp @l%d\n", l);
547 fprintf(of, "@l%d\n", l+2);
548 return 0;
552 Node *
553 mknode(char op, Node *l, Node *r)
555 Node *n;
557 n = alloc(sizeof *n);
558 n->op = op;
559 n->l = l;
560 n->r = r;
561 return n;
564 Node *
565 mkidx(Node *a, Node *i)
567 Node *n;
569 n = mknode('+', a, i);
570 n = mknode('@', n, 0);
571 return n;
574 Node *
575 mkneg(Node *n)
577 static Node *z;
579 if (!z) {
580 z = mknode('N', 0, 0);
581 z->u.n = 0;
583 return mknode('-', z, n);
586 Stmt *
587 mkstmt(int t, void *p1, void *p2, void *p3)
589 Stmt *s;
591 s = alloc(sizeof *s);
592 s->t = t;
593 s->p1 = p1;
594 s->p2 = p2;
595 s->p3 = p3;
596 return s;
599 Node *
600 param(char *v, unsigned ctyp, Node *pl)
602 Node *n;
604 if (ctyp == NIL)
605 die("invalid void declaration");
606 n = mknode(0, 0, pl);
607 varadd(v, 0, ctyp);
608 strcpy(n->u.v, v);
609 return n;
612 Stmt *
613 mkfor(Node *ini, Node *tst, Node *inc, Stmt *s)
615 Stmt *s1, *s2;
617 if (ini)
618 s1 = mkstmt(Expr, ini, 0, 0);
619 else
620 s1 = 0;
621 if (inc) {
622 s2 = mkstmt(Expr, inc, 0, 0);
623 s2 = mkstmt(Seq, s, s2, 0);
624 } else
625 s2 = s;
626 if (!tst) {
627 tst = mknode('N', 0, 0);
628 tst->u.n = 1;
630 s2 = mkstmt(While, tst, s2, 0);
631 if (s1)
632 return mkstmt(Seq, s1, s2, 0);
633 else
634 return s2;
639 %union {
640 Node *n;
641 Stmt *s;
642 unsigned u;
645 %token <n> NUM
646 %token <n> STR
647 %token <n> IDENT
648 %token PP MM LE GE SIZEOF
650 %token TVOID TINT TLNG
651 %token IF ELSE WHILE FOR BREAK RETURN
653 %right '='
654 %left OR
655 %left AND
656 %left '&'
657 %left EQ NE
658 %left '<' '>' LE GE
659 %left '+' '-'
660 %left '*' '/' '%'
662 %type <u> type
663 %type <s> stmt stmts
664 %type <n> expr exp0 pref post arg0 arg1 par0 par1
668 prog: func prog | fdcl prog | idcl prog | ;
670 fdcl: type IDENT '(' ')' ';'
672 varadd($2->u.v, 1, FUNC($1));
675 idcl: type IDENT ';'
677 if ($1 == NIL)
678 die("invalid void declaration");
679 if (nglo == NGlo)
680 die("too many string literals");
681 ini[nglo] = alloc(sizeof "{ x 0 }");
682 sprintf(ini[nglo], "{ %c 0 }", irtyp($1));
683 varadd($2->u.v, nglo++, $1);
686 init:
688 varclr();
689 tmp = 0;
692 func: init prot '{' dcls stmts '}'
694 if (!stmt($5, -1))
695 fprintf(of, "\tret 0\n");
696 fprintf(of, "}\n\n");
699 prot: IDENT '(' par0 ')'
701 Symb *s;
702 Node *n;
703 int t, m;
705 varadd($1->u.v, 1, FUNC(INT));
706 fprintf(of, "export function w $%s(", $1->u.v);
707 n = $3;
708 if (n)
709 for (;;) {
710 s = varget(n->u.v);
711 fprintf(of, "%c ", irtyp(s->ctyp));
712 fprintf(of, "%%t%d", tmp++);
713 n = n->r;
714 if (n)
715 fprintf(of, ", ");
716 else
717 break;
719 fprintf(of, ") {\n");
720 fprintf(of, "@l%d\n", lbl++);
721 for (t=0, n=$3; n; t++, n=n->r) {
722 s = varget(n->u.v);
723 m = SIZE(s->ctyp);
724 fprintf(of, "\t%%%s =l alloc%d %d\n", n->u.v, m, m);
725 fprintf(of, "\tstore%c %%t%d", irtyp(s->ctyp), t);
726 fprintf(of, ", %%%s\n", n->u.v);
730 par0: par1
731 | { $$ = 0; }
733 par1: type IDENT ',' par1 { $$ = param($2->u.v, $1, $4); }
734 | type IDENT { $$ = param($2->u.v, $1, 0); }
738 dcls: | dcls type IDENT ';'
740 int s;
741 char *v;
743 if ($2 == NIL)
744 die("invalid void declaration");
745 v = $3->u.v;
746 s = SIZE($2);
747 varadd(v, 0, $2);
748 fprintf(of, "\t%%%s =l alloc%d %d\n", v, s, s);
751 type: type '*' { $$ = IDIR($1); }
752 | TINT { $$ = INT; }
753 | TLNG { $$ = LNG; }
754 | TVOID { $$ = NIL; }
757 stmt: ';' { $$ = 0; }
758 | '{' stmts '}' { $$ = $2; }
759 | BREAK ';' { $$ = mkstmt(Break, 0, 0, 0); }
760 | RETURN expr ';' { $$ = mkstmt(Ret, $2, 0, 0); }
761 | expr ';' { $$ = mkstmt(Expr, $1, 0, 0); }
762 | WHILE '(' expr ')' stmt { $$ = mkstmt(While, $3, $5, 0); }
763 | IF '(' expr ')' stmt ELSE stmt { $$ = mkstmt(If, $3, $5, $7); }
764 | IF '(' expr ')' stmt { $$ = mkstmt(If, $3, $5, 0); }
765 | FOR '(' exp0 ';' exp0 ';' exp0 ')' stmt
766 { $$ = mkfor($3, $5, $7, $9); }
769 stmts: stmts stmt { $$ = mkstmt(Seq, $1, $2, 0); }
770 | { $$ = 0; }
773 expr: pref
774 | expr '=' expr { $$ = mknode('=', $1, $3); }
775 | expr '+' expr { $$ = mknode('+', $1, $3); }
776 | expr '-' expr { $$ = mknode('-', $1, $3); }
777 | expr '*' expr { $$ = mknode('*', $1, $3); }
778 | expr '/' expr { $$ = mknode('/', $1, $3); }
779 | expr '%' expr { $$ = mknode('%', $1, $3); }
780 | expr '<' expr { $$ = mknode('<', $1, $3); }
781 | expr '>' expr { $$ = mknode('<', $3, $1); }
782 | expr LE expr { $$ = mknode('l', $1, $3); }
783 | expr GE expr { $$ = mknode('l', $3, $1); }
784 | expr EQ expr { $$ = mknode('e', $1, $3); }
785 | expr NE expr { $$ = mknode('n', $1, $3); }
786 | expr '&' expr { $$ = mknode('&', $1, $3); }
787 | expr AND expr { $$ = mknode('a', $1, $3); }
788 | expr OR expr { $$ = mknode('o', $1, $3); }
791 exp0: expr
792 | { $$ = 0; }
795 pref: post
796 | '-' pref { $$ = mkneg($2); }
797 | '*' pref { $$ = mknode('@', $2, 0); }
798 | '&' pref { $$ = mknode('A', $2, 0); }
801 post: NUM
802 | STR
803 | IDENT
804 | SIZEOF '(' type ')' { $$ = mknode('N', 0, 0); $$->u.n = SIZE($3); }
805 | '(' expr ')' { $$ = $2; }
806 | IDENT '(' arg0 ')' { $$ = mknode('C', $1, $3); }
807 | post '[' expr ']' { $$ = mkidx($1, $3); }
808 | post PP { $$ = mknode('P', $1, 0); }
809 | post MM { $$ = mknode('M', $1, 0); }
812 arg0: arg1
813 | { $$ = 0; }
815 arg1: expr { $$ = mknode(0, $1, 0); }
816 | expr ',' arg1 { $$ = mknode(0, $1, $3); }
822 yylex()
824 struct {
825 char *s;
826 int t;
827 } kwds[] = {
828 { "void", TVOID },
829 { "int", TINT },
830 { "long", TLNG },
831 { "if", IF },
832 { "else", ELSE },
833 { "for", FOR },
834 { "while", WHILE },
835 { "return", RETURN },
836 { "break", BREAK },
837 { "sizeof", SIZEOF },
838 { 0, 0 }
840 int i, c, c1, n;
841 char v[NString], *p;
843 do {
844 c = getchar();
845 if (c == '#')
846 while ((c = getchar()) != '\n')
848 if (c == '\n')
849 line++;
850 } while (isspace(c));
853 if (c == EOF)
854 return 0;
857 if (isdigit(c)) {
858 n = 0;
859 do {
860 n *= 10;
861 n += c-'0';
862 c = getchar();
863 } while (isdigit(c));
864 ungetc(c, stdin);
865 yylval.n = mknode('N', 0, 0);
866 yylval.n->u.n = n;
867 return NUM;
870 if (isalpha(c)) {
871 p = v;
872 do {
873 if (p == &v[NString-1])
874 die("ident too long");
875 *p++ = c;
876 c = getchar();
877 } while (isalpha(c) || c == '_');
878 *p = 0;
879 ungetc(c, stdin);
880 for (i=0; kwds[i].s; i++)
881 if (strcmp(v, kwds[i].s) == 0)
882 return kwds[i].t;
883 yylval.n = mknode('V', 0, 0);
884 strcpy(yylval.n->u.v, v);
885 return IDENT;
888 if (c == '"') {
889 i = 0;
890 n = 32;
891 p = alloc(n);
892 strcpy(p, "{ b \"");
893 for (i=5;; i++) {
894 c = getchar();
895 if (c == EOF)
896 die("unclosed string literal");
897 if (i+8 >= n) {
898 p = memcpy(alloc(n*2), p, n);
899 n *= 2;
901 p[i] = c;
902 if (c == '"' && p[i-1]!='\\')
903 break;
905 strcpy(&p[i], "\", b 0 }");
906 if (nglo == NGlo)
907 die("too many globals");
908 ini[nglo] = p;
909 yylval.n = mknode('S', 0, 0);
910 yylval.n->u.n = nglo++;
911 return STR;
914 c1 = getchar();
915 #define DI(a, b) a + b*256
916 switch (DI(c,c1)) {
917 case DI('!','='): return NE;
918 case DI('=','='): return EQ;
919 case DI('<','='): return LE;
920 case DI('>','='): return GE;
921 case DI('+','+'): return PP;
922 case DI('-','-'): return MM;
923 case DI('&','&'): return AND;
924 case DI('|','|'): return OR;
926 #undef DI
927 ungetc(c1, stdin);
929 return c;
933 yyerror(char *err)
935 die("parse error");
936 return 0;
940 main()
942 int i;
944 of = stdout;
945 nglo = 1;
946 if (yyparse() != 0)
947 die("parse error");
948 for (i=1; i<nglo; i++)
949 fprintf(of, "data $glo%d = %s\n", i, ini[i]);
950 return 0;