tests for variable argument lists
[qbe.git] / emit.c
blobccbd5162e9cdc080a6d8ceb3586347a8277a956a
1 #include "all.h"
3 char *locprefix, *symprefix;
5 enum {
6 SLong = 0,
7 SWord = 1,
8 SShort = 2,
9 SByte = 3,
11 Ki = -1, /* matches Kw and Kl */
12 Ka = -2, /* matches all classes */
15 /* Instruction format strings:
17 * if the format string starts with -, the instruction
18 * is assumed to be 3-address and is put in 2-address
19 * mode using an extra mov if necessary
21 * if the format string starts with +, the same as the
22 * above applies, but commutativity is also assumed
24 * %k is used to set the class of the instruction,
25 * it'll expand to "l", "q", "ss", "sd", depending
26 * on the instruction class
27 * %0 designates the first argument
28 * %1 designates the second argument
29 * %= designates the result
31 * if %k is not used, a prefix to 0, 1, or = must be
32 * added, it can be:
33 * M - memory reference
34 * L - long (64 bits)
35 * W - word (32 bits)
36 * H - short (16 bits)
37 * B - byte (8 bits)
38 * S - single precision float
39 * D - double precision float
41 static struct {
42 short op;
43 short cls;
44 char *asm;
45 } omap[] = {
46 { Oadd, Ka, "+add%k %1, %=" },
47 { Osub, Ka, "-sub%k %1, %=" },
48 { Oand, Ki, "+and%k %1, %=" },
49 { Oor, Ki, "+or%k %1, %=" },
50 { Oxor, Ki, "+xor%k %1, %=" },
51 { Osar, Ki, "-sar%k %B1, %=" },
52 { Oshr, Ki, "-shr%k %B1, %=" },
53 { Oshl, Ki, "-shl%k %B1, %=" },
54 { Omul, Ki, "+imul%k %1, %=" },
55 { Omul, Ks, "+mulss %1, %=" }, /* fixme */
56 { Omul, Kd, "+mulsd %1, %=" },
57 { Odiv, Ka, "-div%k %1, %=" },
58 { Ostorel, Ka, "movq %L0, %M1" },
59 { Ostorew, Ka, "movl %W0, %M1" },
60 { Ostoreh, Ka, "movw %H0, %M1" },
61 { Ostoreb, Ka, "movb %B0, %M1" },
62 { Ostores, Ka, "movss %S0, %M1" },
63 { Ostored, Ka, "movsd %D0, %M1" },
64 { Oload, Ka, "mov%k %M0, %=" },
65 { Oloadsw, Kl, "movslq %M0, %L=" },
66 { Oloadsw, Kw, "movl %M0, %W=" },
67 { Oloaduw, Ki, "movl %M0, %W=" },
68 { Oloadsh, Ki, "movsw%k %M0, %=" },
69 { Oloaduh, Ki, "movzw%k %M0, %=" },
70 { Oloadsb, Ki, "movsb%k %M0, %=" },
71 { Oloadub, Ki, "movzb%k %M0, %=" },
72 { Oextsw, Kl, "movslq %W0, %L=" },
73 { Oextuw, Kl, "movl %W0, %W=" },
74 { Oextsh, Ki, "movsw%k %H0, %=" },
75 { Oextuh, Ki, "movzw%k %H0, %=" },
76 { Oextsb, Ki, "movsb%k %B0, %=" },
77 { Oextub, Ki, "movzb%k %B0, %=" },
79 { Oexts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */
80 { Otruncd, Ks, "cvttsd2ss %0, %=" },
81 { Ostosi, Ki, "cvttss2si%k %0, %=" },
82 { Odtosi, Ki, "cvttsd2si%k %0, %=" },
83 { Oswtof, Ka, "cvtsi2%k %W0, %=" },
84 { Osltof, Ka, "cvtsi2%k %L0, %=" },
85 { Ocast, Ki, "movq %D0, %L=" },
86 { Ocast, Ka, "movq %L0, %D=" },
88 { Oaddr, Ki, "lea%k %M0, %=" },
89 { Oswap, Ki, "xchg%k %0, %1" },
90 { Osign, Kl, "cqto" },
91 { Osign, Kw, "cltd" },
92 { Oxdiv, Ki, "div%k %0" },
93 { Oxidiv, Ki, "idiv%k %0" },
94 { Oxcmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */
95 { Oxcmp, Kd, "comisd %D0, %D1" },
96 { Oxcmp, Ki, "cmp%k %0, %1" },
97 { Oxtest, Ki, "test%k %0, %1" },
98 { Oxset+ICule, Ki, "setbe %B=\n\tmovzb%k %B=, %=" },
99 { Oxset+ICult, Ki, "setb %B=\n\tmovzb%k %B=, %=" },
100 { Oxset+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
101 { Oxset+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
102 { Oxset+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
103 { Oxset+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
104 { Oxset+ICugt, Ki, "seta %B=\n\tmovzb%k %B=, %=" },
105 { Oxset+ICuge, Ki, "setae %B=\n\tmovzb%k %B=, %=" },
106 { Oxset+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
107 { Oxset+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
108 { Oxset+ICxnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
109 { Oxset+ICxp, Ki, "setp %B=\n\tmovsb%k %B=, %=" },
110 { NOp, 0, 0 }
113 static char *rname[][4] = {
114 [RAX] = {"rax", "eax", "ax", "al"},
115 [RBX] = {"rbx", "ebx", "bx", "bl"},
116 [RCX] = {"rcx", "ecx", "cx", "cl"},
117 [RDX] = {"rdx", "edx", "dx", "dl"},
118 [RSI] = {"rsi", "esi", "si", "sil"},
119 [RDI] = {"rdi", "edi", "di", "dil"},
120 [RBP] = {"rbp", "ebp", "bp", "bpl"},
121 [RSP] = {"rsp", "esp", "sp", "spl"},
122 [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
123 [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
124 [R10] = {"r10", "r10d", "r10w", "r10b"},
125 [R11] = {"r11", "r11d", "r11w", "r11b"},
126 [R12] = {"r12", "r12d", "r12w", "r12b"},
127 [R13] = {"r13", "r13d", "r13w", "r13b"},
128 [R14] = {"r14", "r14d", "r14w", "r14b"},
129 [R15] = {"r15", "r15d", "r15w", "r15b"},
133 static int
134 slot(int s, Fn *fn)
136 struct { int i:29; } x;
138 /* sign extend s using a bitfield */
139 x.i = s;
140 assert(x.i <= fn->slot);
141 /* specific to NAlign == 3 */
142 if (x.i < 0)
143 return -4 * x.i;
144 else if (fn->vararg)
145 return -176 + -4 * (fn->slot - x.i);
146 else
147 return -4 * (fn->slot - x.i);
150 static void
151 emitcon(Con *con, FILE *f)
153 switch (con->type) {
154 case CAddr:
155 if (con->local)
156 fprintf(f, "%s%s", locprefix, con->label);
157 else
158 fprintf(f, "%s%s", symprefix, con->label);
159 if (con->bits.i)
160 fprintf(f, "%+"PRId64, con->bits.i);
161 break;
162 case CBits:
163 fprintf(f, "%"PRId64, con->bits.i);
164 break;
165 default:
166 die("unreachable");
170 static char *
171 regtoa(int reg, int sz)
173 static char buf[6];
175 if (reg >= XMM0) {
176 sprintf(buf, "xmm%d", reg-XMM0);
177 return buf;
178 } else
179 return rname[reg][sz];
182 static Ref
183 getarg(char c, Ins *i)
185 switch (c) {
186 case '0':
187 return i->arg[0];
188 case '1':
189 return i->arg[1];
190 case '=':
191 return i->to;
192 default:
193 die("invalid arg letter %c", c);
197 static void emitins(Ins, Fn *, FILE *);
199 static void
200 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
202 Ins icp;
204 icp.op = Ocopy;
205 icp.arg[0] = r2;
206 icp.to = r1;
207 icp.cls = k;
208 emitins(icp, fn, f);
211 static void
212 emitf(char *s, Ins *i, Fn *fn, FILE *f)
214 static char clstoa[][3] = {"l", "q", "ss", "sd"};
215 char c;
216 int sz;
217 Ref ref;
218 Mem *m;
219 Con off;
221 switch (*s) {
222 case '+':
223 if (req(i->arg[1], i->to)) {
224 ref = i->arg[0];
225 i->arg[0] = i->arg[1];
226 i->arg[1] = ref;
228 /* fall through */
229 case '-':
230 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
231 "cannot convert to 2-address");
232 emitcopy(i->to, i->arg[0], i->cls, fn, f);
233 s++;
234 break;
237 fputc('\t', f);
238 Next:
239 while ((c = *s++) != '%')
240 if (!c) {
241 fputc('\n', f);
242 return;
243 } else
244 fputc(c, f);
245 switch ((c = *s++)) {
246 case '%':
247 fputc('%', f);
248 break;
249 case 'k':
250 fputs(clstoa[i->cls], f);
251 break;
252 case '0':
253 case '1':
254 case '=':
255 sz = KWIDE(i->cls) ? SLong : SWord;
256 s--;
257 goto Ref;
258 case 'D':
259 case 'S':
260 sz = SLong; /* does not matter for floats */
261 Ref:
262 c = *s++;
263 ref = getarg(c, i);
264 switch (rtype(ref)) {
265 case RTmp:
266 assert(isreg(ref));
267 fprintf(f, "%%%s", regtoa(ref.val, sz));
268 break;
269 case RSlot:
270 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
271 break;
272 case RMem:
273 Mem:
274 m = &fn->mem[ref.val];
275 if (rtype(m->base) == RSlot) {
276 off.type = CBits;
277 off.bits.i = slot(m->base.val, fn);
278 addcon(&m->offset, &off);
279 m->base = TMP(RBP);
281 if (m->offset.type != CUndef)
282 emitcon(&m->offset, f);
283 fputc('(', f);
284 if (req(m->base, R))
285 fprintf(f, "%%rip");
286 else
287 fprintf(f, "%%%s", regtoa(m->base.val, SLong));
288 if (!req(m->index, R))
289 fprintf(f, ", %%%s, %d",
290 regtoa(m->index.val, SLong),
291 m->scale
293 fputc(')', f);
294 break;
295 case RCon:
296 fputc('$', f);
297 emitcon(&fn->con[ref.val], f);
298 break;
299 default:
300 die("unreachable");
302 break;
303 case 'L':
304 sz = SLong;
305 goto Ref;
306 case 'W':
307 sz = SWord;
308 goto Ref;
309 case 'H':
310 sz = SShort;
311 goto Ref;
312 case 'B':
313 sz = SByte;
314 goto Ref;
315 case 'M':
316 c = *s++;
317 ref = getarg(c, i);
318 switch (rtype(ref)) {
319 case RMem:
320 goto Mem;
321 case RSlot:
322 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
323 break;
324 case RCon:
325 emitcon(&fn->con[ref.val], f);
326 fprintf(f, "(%%rip)");
327 break;
328 case RTmp:
329 assert(isreg(ref));
330 fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
331 break;
332 default:
333 die("unreachable");
335 break;
336 default:
337 die("invalid format specifier %%%c", c);
339 goto Next;
342 static void
343 emitins(Ins i, Fn *fn, FILE *f)
345 Ref r;
346 int64_t val;
347 int o;
349 switch (i.op) {
350 default:
351 Table:
352 /* most instructions are just pulled out of
353 * the table omap[], some special cases are
354 * detailed below */
355 for (o=0;; o++) {
356 /* this linear search should really be a binary
357 * search */
358 if (omap[o].op == NOp)
359 die("no match for %s(%d)", opdesc[i.op].name, i.cls);
360 if (omap[o].op == i.op)
361 if (omap[o].cls == i.cls
362 || (omap[o].cls == Ki && KBASE(i.cls) == 0)
363 || (omap[o].cls == Ka))
364 break;
366 emitf(omap[o].asm, &i, fn, f);
367 break;
368 case Onop:
369 /* just do nothing for nops, they are inserted
370 * by some passes */
371 break;
372 case Omul:
373 /* here, we try to use the 3-addresss form
374 * of multiplication when possible */
375 if (rtype(i.arg[1]) == RCon) {
376 r = i.arg[0];
377 i.arg[0] = i.arg[1];
378 i.arg[1] = r;
380 if (KBASE(i.cls) == 0 /* only available for ints */
381 && rtype(i.arg[0]) == RCon
382 && rtype(i.arg[1]) == RTmp) {
383 emitf("imul%k %0, %1, %=", &i, fn, f);
384 break;
386 goto Table;
387 case Osub:
388 /* we have to use the negation trick to handle
389 * some 3-address substractions */
390 if (req(i.to, i.arg[1])) {
391 emitf("neg%k %=", &i, fn, f);
392 emitf("add%k %0, %=", &i, fn, f);
393 break;
395 goto Table;
396 case Ocopy:
397 /* make sure we don't emit useless copies,
398 * also, we can use a trick to load 64-bits
399 * registers, it's detailed in my note below
400 * http://c9x.me/art/notes.html?09/19/2015 */
401 if (req(i.to, R) || req(i.arg[0], R))
402 break;
403 if (isreg(i.to)
404 && rtype(i.arg[0]) == RCon
405 && i.cls == Kl
406 && fn->con[i.arg[0].val].type == CBits
407 && (val = fn->con[i.arg[0].val].bits.i) >= 0
408 && val <= UINT32_MAX) {
409 emitf("movl %W0, %W=", &i, fn, f);
410 } else if (isreg(i.to)
411 && rtype(i.arg[0]) == RCon
412 && fn->con[i.arg[0].val].type == CAddr) {
413 emitf("lea%k %M0, %=", &i, fn, f);
414 } else if (!req(i.arg[0], i.to))
415 emitf("mov%k %0, %=", &i, fn, f);
416 break;
417 case Ocall:
418 /* calls simply have a weird syntax in AT&T
419 * assembly... */
420 switch (rtype(i.arg[0])) {
421 case RCon:
422 fprintf(f, "\tcallq ");
423 emitcon(&fn->con[i.arg[0].val], f);
424 fprintf(f, "\n");
425 break;
426 case RTmp:
427 emitf("callq *%L0", &i, fn, f);
428 break;
429 default:
430 die("invalid call argument");
432 break;
433 case Osalloc:
434 /* there is no good reason why this is here
435 * maybe we should split Osalloc in 2 different
436 * instructions depending on the result
438 emitf("subq %L0, %%rsp", &i, fn, f);
439 if (!req(i.to, R))
440 emitcopy(i.to, TMP(RSP), Kl, fn, f);
441 break;
442 case Oswap:
443 if (KBASE(i.cls) == 0)
444 goto Table;
445 /* for floats, there is no swap instruction
446 * so we use xmm15 as a temporary
448 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
449 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
450 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
451 break;
455 static int
456 cneg(int cmp)
458 switch (cmp) {
459 default: die("invalid int comparison %d", cmp);
460 case ICule: return ICugt;
461 case ICult: return ICuge;
462 case ICsle: return ICsgt;
463 case ICslt: return ICsge;
464 case ICsgt: return ICsle;
465 case ICsge: return ICslt;
466 case ICugt: return ICule;
467 case ICuge: return ICult;
468 case ICeq: return ICne;
469 case ICne: return ICeq;
470 case ICxnp: return ICxp;
471 case ICxp: return ICxnp;
475 static int
476 framesz(Fn *fn)
478 int i, o, f;
480 /* specific to NAlign == 3 */
481 for (i=0, o=0; i<NRClob; i++)
482 o ^= 1 & (fn->reg >> rclob[i]);
483 f = fn->slot;
484 f = (f + 3) & -4;
485 return 4*f + 8*o + 176*fn->vararg;
488 void
489 emitfn(Fn *fn, FILE *f)
491 static char *ctoa[] = {
492 [ICeq] = "z",
493 [ICule] = "be",
494 [ICult] = "b",
495 [ICsle] = "le",
496 [ICslt] = "l",
497 [ICsgt] = "g",
498 [ICsge] = "ge",
499 [ICugt] = "a",
500 [ICuge] = "ae",
501 [ICne] = "nz",
502 [ICxnp] = "np",
503 [ICxp] = "p"
505 static int id0;
506 Blk *b, *s;
507 Ins *i, itmp;
508 int *r, c, fs, o, n;
510 fprintf(f, ".text\n");
511 if (fn->export)
512 fprintf(f, ".globl %s%s\n", symprefix, fn->name);
513 fprintf(f,
514 "%s%s:\n"
515 "\tpushq %%rbp\n"
516 "\tmovq %%rsp, %%rbp\n",
517 symprefix, fn->name
519 fs = framesz(fn);
520 if (fs)
521 fprintf(f, "\tsub $%d, %%rsp\n", fs);
522 if (fn->vararg) {
523 o = -176;
524 for (r=rsave; r-rsave<6; ++r, o+=8)
525 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
526 for (n=0; n<8; ++n, o+=16)
527 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
529 for (r=rclob; r-rclob < NRClob; r++)
530 if (fn->reg & BIT(*r)) {
531 itmp.arg[0] = TMP(*r);
532 emitf("pushq %L0", &itmp, fn, f);
535 for (b=fn->start; b; b=b->link) {
536 fprintf(f, "%sbb%d: /* %s */\n", locprefix, id0+b->id, b->name);
537 for (i=b->ins; i!=&b->ins[b->nins]; i++)
538 emitins(*i, fn, f);
539 switch (b->jmp.type) {
540 case Jret0:
541 for (r=&rclob[NRClob]; r>rclob;)
542 if (fn->reg & BIT(*--r)) {
543 itmp.arg[0] = TMP(*r);
544 emitf("popq %L0", &itmp, fn, f);
546 fprintf(f,
547 "\tleave\n"
548 "\tret\n"
550 break;
551 case Jjmp:
552 Jmp:
553 if (b->s1 != b->link)
554 fprintf(f, "\tjmp %sbb%d /* %s */\n",
555 locprefix, id0+b->s1->id, b->s1->name);
556 break;
557 default:
558 c = b->jmp.type - Jxjc;
559 if (0 <= c && c <= NXICmp) {
560 if (b->link == b->s2) {
561 s = b->s1;
562 b->s1 = b->s2;
563 b->s2 = s;
564 } else
565 c = cneg(c);
566 fprintf(f, "\tj%s %sbb%d /* %s */\n", ctoa[c],
567 locprefix, id0+b->s2->id, b->s2->name);
568 goto Jmp;
570 die("unhandled jump %d", b->jmp.type);
573 id0 += fn->nblk;
576 void
577 emitdat(Dat *d, FILE *f)
579 static int align;
580 static char *dtoa[] = {
581 [DAlign] = ".align",
582 [DB] = "\t.byte",
583 [DH] = "\t.value",
584 [DW] = "\t.long",
585 [DL] = "\t.quad"
588 switch (d->type) {
589 case DStart:
590 align = 0;
591 fprintf(f, ".data\n");
592 break;
593 case DEnd:
594 break;
595 case DName:
596 if (!align)
597 fprintf(f, ".align 8\n");
598 if (d->export)
599 fprintf(f, ".globl %s%s\n", symprefix, d->u.str);
600 fprintf(f, "%s%s:\n", symprefix, d->u.str);
601 break;
602 case DZ:
603 fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
604 break;
605 default:
606 if (d->type == DAlign)
607 align = 1;
609 if (d->isstr) {
610 if (d->type != DB)
611 err("strings only supported for 'b' currently");
612 fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
614 else if (d->isref) {
615 fprintf(f, "%s %s%+"PRId64"\n",
616 dtoa[d->type], d->u.ref.nam,
617 d->u.ref.off);
619 else {
620 fprintf(f, "%s %"PRId64"\n",
621 dtoa[d->type], d->u.num);
623 break;
627 typedef struct FBits FBits;
629 struct FBits {
630 union {
631 int64_t n;
632 float f;
633 double d;
634 } bits;
635 int wide;
636 FBits *link;
639 static FBits *stash;
642 stashfp(int64_t n, int w)
644 FBits **pb, *b;
645 int i;
647 /* does a dumb de-dup of fp constants
648 * this should be the linker's job */
649 for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
650 if (n == b->bits.n && w == b->wide)
651 return i;
652 b = emalloc(sizeof *b);
653 b->bits.n = n;
654 b->wide = w;
655 b->link = 0;
656 *pb = b;
657 return i;
660 void
661 emitfin(FILE *f)
663 FBits *b;
664 int i;
666 if (!stash)
667 return;
668 fprintf(f, "/* floating point constants */\n");
669 fprintf(f, ".data\n.align 8\n");
670 for (b=stash, i=0; b; b=b->link, i++)
671 if (b->wide)
672 fprintf(f,
673 "%sfp%d:\n"
674 "\t.quad %"PRId64
675 " /* %f */\n",
676 locprefix, i, b->bits.n,
677 b->bits.d
679 for (b=stash, i=0; b; b=b->link, i++)
680 if (!b->wide)
681 fprintf(f,
682 "%sfp%d:\n"
683 "\t.long %"PRId64
684 " /* %lf */\n",
685 locprefix, i, b->bits.n & 0xffffffff,
686 b->bits.f
688 while ((b=stash)) {
689 stash = b->link;
690 free(b);