properly detect ssa form
[qbe.git] / amd64 / emit.c
blobac7a1f3a775b4784870dd4f9ba19cf4d6b065022
1 #include "all.h"
4 #define CMP(X) \
5 X(Ciule, "be") \
6 X(Ciult, "b") \
7 X(Cisle, "le") \
8 X(Cislt, "l") \
9 X(Cisgt, "g") \
10 X(Cisge, "ge") \
11 X(Ciugt, "a") \
12 X(Ciuge, "ae") \
13 X(Cieq, "z") \
14 X(Cine, "nz") \
15 X(NCmpI+Cfle, "be") \
16 X(NCmpI+Cflt, "b") \
17 X(NCmpI+Cfgt, "a") \
18 X(NCmpI+Cfge, "ae") \
19 X(NCmpI+Cfeq, "z") \
20 X(NCmpI+Cfne, "nz") \
21 X(NCmpI+Cfo, "np") \
22 X(NCmpI+Cfuo, "p")
24 enum {
25 SLong = 0,
26 SWord = 1,
27 SShort = 2,
28 SByte = 3,
30 Ki = -1, /* matches Kw and Kl */
31 Ka = -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
51 * added, it can be:
52 * M - memory reference
53 * L - long (64 bits)
54 * W - word (32 bits)
55 * H - short (16 bits)
56 * B - byte (8 bits)
57 * S - single precision float
58 * D - double precision float
60 static struct {
61 short op;
62 short cls;
63 char *asm;
64 } omap[] = {
65 { Oadd, Ka, "+add%k %1, %=" },
66 { Osub, Ka, "-sub%k %1, %=" },
67 { Oand, Ki, "+and%k %1, %=" },
68 { Oor, Ki, "+or%k %1, %=" },
69 { Oxor, Ki, "+xor%k %1, %=" },
70 { Osar, Ki, "-sar%k %B1, %=" },
71 { Oshr, Ki, "-shr%k %B1, %=" },
72 { Oshl, Ki, "-shl%k %B1, %=" },
73 { Omul, Ki, "+imul%k %1, %=" },
74 { Omul, Ks, "+mulss %1, %=" },
75 { Omul, Kd, "+mulsd %1, %=" },
76 { Odiv, Ka, "-div%k %1, %=" },
77 { Ostorel, Ka, "movq %L0, %M1" },
78 { Ostorew, Ka, "movl %W0, %M1" },
79 { Ostoreh, Ka, "movw %H0, %M1" },
80 { Ostoreb, Ka, "movb %B0, %M1" },
81 { Ostores, Ka, "movss %S0, %M1" },
82 { Ostored, Ka, "movsd %D0, %M1" },
83 { Oload, Ka, "mov%k %M0, %=" },
84 { Oloadsw, Kl, "movslq %M0, %L=" },
85 { Oloadsw, Kw, "movl %M0, %W=" },
86 { Oloaduw, Ki, "movl %M0, %W=" },
87 { Oloadsh, Ki, "movsw%k %M0, %=" },
88 { Oloaduh, Ki, "movzw%k %M0, %=" },
89 { Oloadsb, Ki, "movsb%k %M0, %=" },
90 { Oloadub, Ki, "movzb%k %M0, %=" },
91 { Oextsw, Kl, "movslq %W0, %L=" },
92 { Oextuw, Kl, "movl %W0, %W=" },
93 { Oextsh, Ki, "movsw%k %H0, %=" },
94 { Oextuh, Ki, "movzw%k %H0, %=" },
95 { Oextsb, Ki, "movsb%k %B0, %=" },
96 { Oextub, Ki, "movzb%k %B0, %=" },
98 { Oexts, Kd, "cvtss2sd %0, %=" },
99 { Otruncd, Ks, "cvtsd2ss %0, %=" },
100 { Ostosi, Ki, "cvttss2si%k %0, %=" },
101 { Odtosi, Ki, "cvttsd2si%k %0, %=" },
102 { Oswtof, Ka, "cvtsi2%k %W0, %=" },
103 { Osltof, Ka, "cvtsi2%k %L0, %=" },
104 { Ocast, Ki, "movq %D0, %L=" },
105 { Ocast, Ka, "movq %L0, %D=" },
107 { Oaddr, Ki, "lea%k %M0, %=" },
108 { Oswap, Ki, "xchg%k %0, %1" },
109 { Osign, Kl, "cqto" },
110 { Osign, Kw, "cltd" },
111 { Oxdiv, Ki, "div%k %0" },
112 { Oxidiv, Ki, "idiv%k %0" },
113 { Oxcmp, Ks, "comiss %S0, %S1" },
114 { Oxcmp, Kd, "comisd %D0, %D1" },
115 { Oxcmp, Ki, "cmp%k %0, %1" },
116 { Oxtest, Ki, "test%k %0, %1" },
117 #define X(c, s) \
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
119 CMP(X)
120 #undef X
121 { NOp, 0, 0 }
124 static char *rname[][4] = {
125 [RAX] = {"rax", "eax", "ax", "al"},
126 [RBX] = {"rbx", "ebx", "bx", "bl"},
127 [RCX] = {"rcx", "ecx", "cx", "cl"},
128 [RDX] = {"rdx", "edx", "dx", "dl"},
129 [RSI] = {"rsi", "esi", "si", "sil"},
130 [RDI] = {"rdi", "edi", "di", "dil"},
131 [RBP] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP] = {"rsp", "esp", "sp", "spl"},
133 [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10] = {"r10", "r10d", "r10w", "r10b"},
136 [R11] = {"r11", "r11d", "r11w", "r11b"},
137 [R12] = {"r12", "r12d", "r12w", "r12b"},
138 [R13] = {"r13", "r13d", "r13w", "r13b"},
139 [R14] = {"r14", "r14d", "r14w", "r14b"},
140 [R15] = {"r15", "r15d", "r15w", "r15b"},
144 static int
145 slot(int s, Fn *fn)
147 struct { int i:29; } x;
149 /* sign extend s using a bitfield */
150 x.i = s;
151 assert(x.i <= fn->slot);
152 /* specific to NAlign == 3 */
153 if (x.i < 0)
154 return -4 * x.i;
155 else if (fn->vararg)
156 return -176 + -4 * (fn->slot - x.i);
157 else
158 return -4 * (fn->slot - x.i);
161 static void
162 emitcon(Con *con, FILE *f)
164 char *p;
166 switch (con->type) {
167 case CAddr:
168 p = con->local ? gasloc : gassym;
169 fprintf(f, "%s%s", p, str(con->label));
170 if (con->bits.i)
171 fprintf(f, "%+"PRId64, con->bits.i);
172 break;
173 case CBits:
174 fprintf(f, "%"PRId64, con->bits.i);
175 break;
176 default:
177 die("unreachable");
181 static char *
182 regtoa(int reg, int sz)
184 static char buf[6];
186 if (reg >= XMM0) {
187 sprintf(buf, "xmm%d", reg-XMM0);
188 return buf;
189 } else
190 return rname[reg][sz];
193 static Ref
194 getarg(char c, Ins *i)
196 switch (c) {
197 case '0':
198 return i->arg[0];
199 case '1':
200 return i->arg[1];
201 case '=':
202 return i->to;
203 default:
204 die("invalid arg letter %c", c);
208 static void emitins(Ins, Fn *, FILE *);
210 static void
211 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
213 Ins icp;
215 icp.op = Ocopy;
216 icp.arg[0] = r2;
217 icp.to = r1;
218 icp.cls = k;
219 emitins(icp, fn, f);
222 static void
223 emitf(char *s, Ins *i, Fn *fn, FILE *f)
225 static char clstoa[][3] = {"l", "q", "ss", "sd"};
226 char c;
227 int sz;
228 Ref ref;
229 Mem *m;
230 Con off;
232 switch (*s) {
233 case '+':
234 if (req(i->arg[1], i->to)) {
235 ref = i->arg[0];
236 i->arg[0] = i->arg[1];
237 i->arg[1] = ref;
239 /* fall through */
240 case '-':
241 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
242 "cannot convert to 2-address");
243 emitcopy(i->to, i->arg[0], i->cls, fn, f);
244 s++;
245 break;
248 fputc('\t', f);
249 Next:
250 while ((c = *s++) != '%')
251 if (!c) {
252 fputc('\n', f);
253 return;
254 } else
255 fputc(c, f);
256 switch ((c = *s++)) {
257 case '%':
258 fputc('%', f);
259 break;
260 case 'k':
261 fputs(clstoa[i->cls], f);
262 break;
263 case '0':
264 case '1':
265 case '=':
266 sz = KWIDE(i->cls) ? SLong : SWord;
267 s--;
268 goto Ref;
269 case 'D':
270 case 'S':
271 sz = SLong; /* does not matter for floats */
272 Ref:
273 c = *s++;
274 ref = getarg(c, i);
275 switch (rtype(ref)) {
276 case RTmp:
277 assert(isreg(ref));
278 fprintf(f, "%%%s", regtoa(ref.val, sz));
279 break;
280 case RSlot:
281 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
282 break;
283 case RMem:
284 Mem:
285 m = &fn->mem[ref.val];
286 if (rtype(m->base) == RSlot) {
287 off.type = CBits;
288 off.bits.i = slot(m->base.val, fn);
289 addcon(&m->offset, &off);
290 m->base = TMP(RBP);
292 if (m->offset.type != CUndef)
293 emitcon(&m->offset, f);
294 fputc('(', f);
295 if (req(m->base, R))
296 fprintf(f, "%%rip");
297 else
298 fprintf(f, "%%%s", regtoa(m->base.val, SLong));
299 if (!req(m->index, R))
300 fprintf(f, ", %%%s, %d",
301 regtoa(m->index.val, SLong),
302 m->scale
304 fputc(')', f);
305 break;
306 case RCon:
307 fputc('$', f);
308 emitcon(&fn->con[ref.val], f);
309 break;
310 default:
311 die("unreachable");
313 break;
314 case 'L':
315 sz = SLong;
316 goto Ref;
317 case 'W':
318 sz = SWord;
319 goto Ref;
320 case 'H':
321 sz = SShort;
322 goto Ref;
323 case 'B':
324 sz = SByte;
325 goto Ref;
326 case 'M':
327 c = *s++;
328 ref = getarg(c, i);
329 switch (rtype(ref)) {
330 case RMem:
331 goto Mem;
332 case RSlot:
333 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
334 break;
335 case RCon:
336 emitcon(&fn->con[ref.val], f);
337 fprintf(f, "(%%rip)");
338 break;
339 case RTmp:
340 assert(isreg(ref));
341 fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
342 break;
343 default:
344 die("unreachable");
346 break;
347 default:
348 die("invalid format specifier %%%c", c);
350 goto Next;
353 static void *negmask[4] = {
354 [Ks] = (uint32_t[4]){ 0x80000000 },
355 [Kd] = (uint64_t[2]){ 0x8000000000000000 },
358 static void
359 emitins(Ins i, Fn *fn, FILE *f)
361 Ref r;
362 int64_t val;
363 int o, t0;
365 switch (i.op) {
366 default:
367 Table:
368 /* most instructions are just pulled out of
369 * the table omap[], some special cases are
370 * detailed below */
371 for (o=0;; o++) {
372 /* this linear search should really be a binary
373 * search */
374 if (omap[o].op == NOp)
375 die("no match for %s(%d)",
376 optab[i.op].name, "wlsd"[i.cls]);
377 if (omap[o].op == i.op)
378 if (omap[o].cls == i.cls
379 || (omap[o].cls == Ki && KBASE(i.cls) == 0)
380 || (omap[o].cls == Ka))
381 break;
383 emitf(omap[o].asm, &i, fn, f);
384 break;
385 case Onop:
386 /* just do nothing for nops, they are inserted
387 * by some passes */
388 break;
389 case Omul:
390 /* here, we try to use the 3-addresss form
391 * of multiplication when possible */
392 if (rtype(i.arg[1]) == RCon) {
393 r = i.arg[0];
394 i.arg[0] = i.arg[1];
395 i.arg[1] = r;
397 if (KBASE(i.cls) == 0 /* only available for ints */
398 && rtype(i.arg[0]) == RCon
399 && rtype(i.arg[1]) == RTmp) {
400 emitf("imul%k %0, %1, %=", &i, fn, f);
401 break;
403 goto Table;
404 case Osub:
405 /* we have to use the negation trick to handle
406 * some 3-address subtractions */
407 if (req(i.to, i.arg[1])) {
408 if (KBASE(i.cls) == 0)
409 emitf("neg%k %=", &i, fn, f);
410 else
411 fprintf(f,
412 "\txorp%c %sfp%d(%%rip), %%%s\n",
413 "xxsd"[i.cls],
414 gasloc,
415 gasstash(negmask[i.cls], 16),
416 regtoa(i.to.val, SLong)
418 emitf("add%k %0, %=", &i, fn, f);
419 break;
421 goto Table;
422 case Odiv:
423 /* use xmm15 to adjust the instruction when the
424 * conversion to 2-address in emitf() would fail */
425 if (req(i.to, i.arg[1])) {
426 i.arg[1] = TMP(XMM0+15);
427 emitf("mov%k %=, %1", &i, fn, f);
428 emitf("mov%k %0, %=", &i, fn, f);
429 i.arg[0] = i.to;
431 goto Table;
432 case Ocopy:
433 /* make sure we don't emit useless copies,
434 * also, we can use a trick to load 64-bits
435 * registers, it's detailed in my note below
436 * http://c9x.me/art/notes.html?09/19/2015 */
437 t0 = rtype(i.arg[0]);
438 if (req(i.to, R) || req(i.arg[0], R))
439 break;
440 if (isreg(i.to)
441 && t0 == RCon
442 && i.cls == Kl
443 && fn->con[i.arg[0].val].type == CBits
444 && (val = fn->con[i.arg[0].val].bits.i) >= 0
445 && val <= UINT32_MAX) {
446 emitf("movl %W0, %W=", &i, fn, f);
447 } else if (isreg(i.to)
448 && t0 == RCon
449 && fn->con[i.arg[0].val].type == CAddr) {
450 emitf("lea%k %M0, %=", &i, fn, f);
451 } else if (rtype(i.to) == RSlot
452 && (t0 == RSlot || t0 == RMem)) {
453 i.cls = KWIDE(i.cls) ? Kd : Ks;
454 i.arg[1] = TMP(XMM0+15);
455 emitf("mov%k %0, %1", &i, fn, f);
456 emitf("mov%k %1, %=", &i, fn, f);
458 } else if (!req(i.arg[0], i.to))
459 emitf("mov%k %0, %=", &i, fn, f);
460 break;
461 case Ocall:
462 /* calls simply have a weird syntax in AT&T
463 * assembly... */
464 switch (rtype(i.arg[0])) {
465 case RCon:
466 fprintf(f, "\tcallq ");
467 emitcon(&fn->con[i.arg[0].val], f);
468 fprintf(f, "\n");
469 break;
470 case RTmp:
471 emitf("callq *%L0", &i, fn, f);
472 break;
473 default:
474 die("invalid call argument");
476 break;
477 case Osalloc:
478 /* there is no good reason why this is here
479 * maybe we should split Osalloc in 2 different
480 * instructions depending on the result
482 emitf("subq %L0, %%rsp", &i, fn, f);
483 if (!req(i.to, R))
484 emitcopy(i.to, TMP(RSP), Kl, fn, f);
485 break;
486 case Oswap:
487 if (KBASE(i.cls) == 0)
488 goto Table;
489 /* for floats, there is no swap instruction
490 * so we use xmm15 as a temporary
492 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
493 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
494 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
495 break;
499 static uint64_t
500 framesz(Fn *fn)
502 uint64_t i, o, f;
504 /* specific to NAlign == 3 */
505 for (i=0, o=0; i<NCLR; i++)
506 o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
507 f = fn->slot;
508 f = (f + 3) & -4;
509 return 4*f + 8*o + 176*fn->vararg;
512 void
513 amd64_emitfn(Fn *fn, FILE *f)
515 static char *ctoa[] = {
516 #define X(c, s) [c] = s,
517 CMP(X)
518 #undef X
520 static int id0;
521 Blk *b, *s;
522 Ins *i, itmp;
523 int *r, c, o, n, lbl;
524 uint64_t fs;
526 fprintf(f, ".text\n");
527 if (fn->export)
528 fprintf(f, ".globl %s%s\n", gassym, fn->name);
529 fprintf(f,
530 "%s%s:\n"
531 "\tpushq %%rbp\n"
532 "\tmovq %%rsp, %%rbp\n",
533 gassym, fn->name
535 fs = framesz(fn);
536 if (fs)
537 fprintf(f, "\tsub $%"PRIu64", %%rsp\n", fs);
538 if (fn->vararg) {
539 o = -176;
540 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
541 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
542 for (n=0; n<8; ++n, o+=16)
543 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
545 for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
546 if (fn->reg & BIT(*r)) {
547 itmp.arg[0] = TMP(*r);
548 emitf("pushq %L0", &itmp, fn, f);
549 fs += 8;
552 for (lbl=0, b=fn->start; b; b=b->link) {
553 if (lbl || b->npred > 1)
554 fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
555 for (i=b->ins; i!=&b->ins[b->nins]; i++)
556 emitins(*i, fn, f);
557 lbl = 1;
558 switch (b->jmp.type) {
559 case Jret0:
560 if (fn->dynalloc)
561 fprintf(f,
562 "\tmovq %%rbp, %%rsp\n"
563 "\tsubq $%"PRIu64", %%rsp\n",
566 for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
567 if (fn->reg & BIT(*--r)) {
568 itmp.arg[0] = TMP(*r);
569 emitf("popq %L0", &itmp, fn, f);
571 fprintf(f,
572 "\tleave\n"
573 "\tret\n"
575 break;
576 case Jjmp:
577 Jmp:
578 if (b->s1 != b->link)
579 fprintf(f, "\tjmp %sbb%d\n",
580 gasloc, id0+b->s1->id);
581 else
582 lbl = 0;
583 break;
584 default:
585 c = b->jmp.type - Jjf;
586 if (0 <= c && c <= NCmp) {
587 if (b->link == b->s2) {
588 s = b->s1;
589 b->s1 = b->s2;
590 b->s2 = s;
591 } else
592 c = cmpneg(c);
593 fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
594 gasloc, id0+b->s2->id);
595 goto Jmp;
597 die("unhandled jump %d", b->jmp.type);
600 id0 += fn->nblk;