Optab-driven copy detection
[qbe.git] / amd64 / emit.c
blob2a9b9d229bdd9e612c13f720162e727d54b19568
1 #include "all.h"
4 #define CMP(X) \
5 X(Ciule, "be") \
6 X(Ciult, "b") \
7 X(Cisle, "le") \
8 X(Cislt, "l") \
9 X(Cisgt, "g") \
10 X(Cisge, "ge") \
11 X(Ciugt, "a") \
12 X(Ciuge, "ae") \
13 X(Cieq, "z") \
14 X(Cine, "nz") \
15 X(NCmpI+Cfle, "be") \
16 X(NCmpI+Cflt, "b") \
17 X(NCmpI+Cfgt, "a") \
18 X(NCmpI+Cfge, "ae") \
19 X(NCmpI+Cfeq, "z") \
20 X(NCmpI+Cfne, "nz") \
21 X(NCmpI+Cfo, "np") \
22 X(NCmpI+Cfuo, "p")
24 enum {
25 SLong = 0,
26 SWord = 1,
27 SShort = 2,
28 SByte = 3,
30 Ki = -1, /* matches Kw and Kl */
31 Ka = -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
51 * added, it can be:
52 * M - memory reference
53 * L - long (64 bits)
54 * W - word (32 bits)
55 * H - short (16 bits)
56 * B - byte (8 bits)
57 * S - single precision float
58 * D - double precision float
60 static struct {
61 short op;
62 short cls;
63 char *fmt;
64 } omap[] = {
65 { Oadd, Ka, "+add%k %1, %=" },
66 { Osub, Ka, "-sub%k %1, %=" },
67 { Oand, Ki, "+and%k %1, %=" },
68 { Oor, Ki, "+or%k %1, %=" },
69 { Oxor, Ki, "+xor%k %1, %=" },
70 { Osar, Ki, "-sar%k %B1, %=" },
71 { Oshr, Ki, "-shr%k %B1, %=" },
72 { Oshl, Ki, "-shl%k %B1, %=" },
73 { Omul, Ki, "+imul%k %1, %=" },
74 { Omul, Ks, "+mulss %1, %=" },
75 { Omul, Kd, "+mulsd %1, %=" },
76 { Odiv, Ka, "-div%k %1, %=" },
77 { Ostorel, Ka, "movq %L0, %M1" },
78 { Ostorew, Ka, "movl %W0, %M1" },
79 { Ostoreh, Ka, "movw %H0, %M1" },
80 { Ostoreb, Ka, "movb %B0, %M1" },
81 { Ostores, Ka, "movss %S0, %M1" },
82 { Ostored, Ka, "movsd %D0, %M1" },
83 { Oload, Ka, "mov%k %M0, %=" },
84 { Oloadsw, Kl, "movslq %M0, %L=" },
85 { Oloadsw, Kw, "movl %M0, %W=" },
86 { Oloaduw, Ki, "movl %M0, %W=" },
87 { Oloadsh, Ki, "movsw%k %M0, %=" },
88 { Oloaduh, Ki, "movzw%k %M0, %=" },
89 { Oloadsb, Ki, "movsb%k %M0, %=" },
90 { Oloadub, Ki, "movzb%k %M0, %=" },
91 { Oextsw, Kl, "movslq %W0, %L=" },
92 { Oextuw, Kl, "movl %W0, %W=" },
93 { Oextsh, Ki, "movsw%k %H0, %=" },
94 { Oextuh, Ki, "movzw%k %H0, %=" },
95 { Oextsb, Ki, "movsb%k %B0, %=" },
96 { Oextub, Ki, "movzb%k %B0, %=" },
98 { Oexts, Kd, "cvtss2sd %0, %=" },
99 { Otruncd, Ks, "cvtsd2ss %0, %=" },
100 { Ostosi, Ki, "cvttss2si%k %0, %=" },
101 { Odtosi, Ki, "cvttsd2si%k %0, %=" },
102 { Oswtof, Ka, "cvtsi2%k %W0, %=" },
103 { Osltof, Ka, "cvtsi2%k %L0, %=" },
104 { Ocast, Ki, "movq %D0, %L=" },
105 { Ocast, Ka, "movq %L0, %D=" },
107 { Oaddr, Ki, "lea%k %M0, %=" },
108 { Oswap, Ki, "xchg%k %0, %1" },
109 { Osign, Kl, "cqto" },
110 { Osign, Kw, "cltd" },
111 { Oxdiv, Ki, "div%k %0" },
112 { Oxidiv, Ki, "idiv%k %0" },
113 { Oxcmp, Ks, "ucomiss %S0, %S1" },
114 { Oxcmp, Kd, "ucomisd %D0, %D1" },
115 { Oxcmp, Ki, "cmp%k %0, %1" },
116 { Oxtest, Ki, "test%k %0, %1" },
117 #define X(c, s) \
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
119 CMP(X)
120 #undef X
121 { NOp, 0, 0 }
124 static char *rname[][4] = {
125 [RAX] = {"rax", "eax", "ax", "al"},
126 [RBX] = {"rbx", "ebx", "bx", "bl"},
127 [RCX] = {"rcx", "ecx", "cx", "cl"},
128 [RDX] = {"rdx", "edx", "dx", "dl"},
129 [RSI] = {"rsi", "esi", "si", "sil"},
130 [RDI] = {"rdi", "edi", "di", "dil"},
131 [RBP] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP] = {"rsp", "esp", "sp", "spl"},
133 [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10] = {"r10", "r10d", "r10w", "r10b"},
136 [R11] = {"r11", "r11d", "r11w", "r11b"},
137 [R12] = {"r12", "r12d", "r12w", "r12b"},
138 [R13] = {"r13", "r13d", "r13w", "r13b"},
139 [R14] = {"r14", "r14d", "r14w", "r14b"},
140 [R15] = {"r15", "r15d", "r15w", "r15b"},
144 static int
145 slot(Ref r, Fn *fn)
147 int s;
149 s = rsval(r);
150 assert(s <= fn->slot);
151 /* specific to NAlign == 3 */
152 if (s < 0)
153 return -4 * s;
154 else if (fn->vararg)
155 return -176 + -4 * (fn->slot - s);
156 else
157 return -4 * (fn->slot - s);
160 static void
161 emitcon(Con *con, FILE *f)
163 char *p, *l;
165 switch (con->type) {
166 case CAddr:
167 l = str(con->sym.id);
168 p = l[0] == '"' ? "" : T.assym;
169 if (con->sym.type == SThr) {
170 if (T.apple)
171 fprintf(f, "%s%s@TLVP", p, l);
172 else
173 fprintf(f, "%%fs:%s%s@tpoff", p, l);
174 } else
175 fprintf(f, "%s%s", p, l);
176 if (con->bits.i)
177 fprintf(f, "%+"PRId64, con->bits.i);
178 break;
179 case CBits:
180 fprintf(f, "%"PRId64, con->bits.i);
181 break;
182 default:
183 die("unreachable");
187 static char *
188 regtoa(int reg, int sz)
190 static char buf[6];
192 assert(reg <= XMM15);
193 if (reg >= XMM0) {
194 sprintf(buf, "xmm%d", reg-XMM0);
195 return buf;
196 } else
197 return rname[reg][sz];
200 static Ref
201 getarg(char c, Ins *i)
203 switch (c) {
204 case '0':
205 return i->arg[0];
206 case '1':
207 return i->arg[1];
208 case '=':
209 return i->to;
210 default:
211 die("invalid arg letter %c", c);
215 static void emitins(Ins, Fn *, FILE *);
217 static void
218 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
220 Ins icp;
222 icp.op = Ocopy;
223 icp.arg[0] = r2;
224 icp.to = r1;
225 icp.cls = k;
226 emitins(icp, fn, f);
229 static void
230 emitf(char *s, Ins *i, Fn *fn, FILE *f)
232 static char clstoa[][3] = {"l", "q", "ss", "sd"};
233 char c;
234 int sz;
235 Ref ref;
236 Mem *m;
237 Con off;
239 switch (*s) {
240 case '+':
241 if (req(i->arg[1], i->to)) {
242 ref = i->arg[0];
243 i->arg[0] = i->arg[1];
244 i->arg[1] = ref;
246 /* fall through */
247 case '-':
248 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
249 "cannot convert to 2-address");
250 emitcopy(i->to, i->arg[0], i->cls, fn, f);
251 s++;
252 break;
255 fputc('\t', f);
256 Next:
257 while ((c = *s++) != '%')
258 if (!c) {
259 fputc('\n', f);
260 return;
261 } else
262 fputc(c, f);
263 switch ((c = *s++)) {
264 case '%':
265 fputc('%', f);
266 break;
267 case 'k':
268 fputs(clstoa[i->cls], f);
269 break;
270 case '0':
271 case '1':
272 case '=':
273 sz = KWIDE(i->cls) ? SLong : SWord;
274 s--;
275 goto Ref;
276 case 'D':
277 case 'S':
278 sz = SLong; /* does not matter for floats */
279 Ref:
280 c = *s++;
281 ref = getarg(c, i);
282 switch (rtype(ref)) {
283 case RTmp:
284 assert(isreg(ref));
285 fprintf(f, "%%%s", regtoa(ref.val, sz));
286 break;
287 case RSlot:
288 fprintf(f, "%d(%%rbp)", slot(ref, fn));
289 break;
290 case RMem:
291 Mem:
292 m = &fn->mem[ref.val];
293 if (rtype(m->base) == RSlot) {
294 off.type = CBits;
295 off.bits.i = slot(m->base, fn);
296 addcon(&m->offset, &off, 1);
297 m->base = TMP(RBP);
299 if (m->offset.type != CUndef)
300 emitcon(&m->offset, f);
301 fputc('(', f);
302 if (!req(m->base, R))
303 fprintf(f, "%%%s", regtoa(m->base.val, SLong));
304 else if (m->offset.type == CAddr)
305 fprintf(f, "%%rip");
306 if (!req(m->index, R))
307 fprintf(f, ", %%%s, %d",
308 regtoa(m->index.val, SLong),
309 m->scale
311 fputc(')', f);
312 break;
313 case RCon:
314 fputc('$', f);
315 emitcon(&fn->con[ref.val], f);
316 break;
317 default:
318 die("unreachable");
320 break;
321 case 'L':
322 sz = SLong;
323 goto Ref;
324 case 'W':
325 sz = SWord;
326 goto Ref;
327 case 'H':
328 sz = SShort;
329 goto Ref;
330 case 'B':
331 sz = SByte;
332 goto Ref;
333 case 'M':
334 c = *s++;
335 ref = getarg(c, i);
336 switch (rtype(ref)) {
337 case RMem:
338 goto Mem;
339 case RSlot:
340 fprintf(f, "%d(%%rbp)", slot(ref, fn));
341 break;
342 case RCon:
343 off = fn->con[ref.val];
344 emitcon(&off, f);
345 if (off.type == CAddr)
346 if (off.sym.type != SThr || T.apple)
347 fprintf(f, "(%%rip)");
348 break;
349 case RTmp:
350 assert(isreg(ref));
351 fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
352 break;
353 default:
354 die("unreachable");
356 break;
357 default:
358 die("invalid format specifier %%%c", c);
360 goto Next;
363 static void *negmask[4] = {
364 [Ks] = (uint32_t[4]){ 0x80000000 },
365 [Kd] = (uint64_t[2]){ 0x8000000000000000 },
368 static void
369 emitins(Ins i, Fn *fn, FILE *f)
371 Ref r;
372 int64_t val;
373 int o, t0;
374 Ins ineg;
375 Con *con;
376 char *sym;
378 switch (i.op) {
379 default:
380 Table:
381 /* most instructions are just pulled out of
382 * the table omap[], some special cases are
383 * detailed below */
384 for (o=0;; o++) {
385 /* this linear search should really be a binary
386 * search */
387 if (omap[o].op == NOp)
388 die("no match for %s(%c)",
389 optab[i.op].name, "wlsd"[i.cls]);
390 if (omap[o].op == i.op)
391 if (omap[o].cls == i.cls
392 || (omap[o].cls == Ki && KBASE(i.cls) == 0)
393 || (omap[o].cls == Ka))
394 break;
396 emitf(omap[o].fmt, &i, fn, f);
397 break;
398 case Onop:
399 /* just do nothing for nops, they are inserted
400 * by some passes */
401 break;
402 case Omul:
403 /* here, we try to use the 3-addresss form
404 * of multiplication when possible */
405 if (rtype(i.arg[1]) == RCon) {
406 r = i.arg[0];
407 i.arg[0] = i.arg[1];
408 i.arg[1] = r;
410 if (KBASE(i.cls) == 0 /* only available for ints */
411 && rtype(i.arg[0]) == RCon
412 && rtype(i.arg[1]) == RTmp) {
413 emitf("imul%k %0, %1, %=", &i, fn, f);
414 break;
416 goto Table;
417 case Osub:
418 /* we have to use the negation trick to handle
419 * some 3-address subtractions */
420 if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
421 ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
422 emitins(ineg, fn, f);
423 emitf("add%k %0, %=", &i, fn, f);
424 break;
426 goto Table;
427 case Oneg:
428 if (!req(i.to, i.arg[0]))
429 emitf("mov%k %0, %=", &i, fn, f);
430 if (KBASE(i.cls) == 0)
431 emitf("neg%k %=", &i, fn, f);
432 else
433 fprintf(f,
434 "\txorp%c %sfp%d(%%rip), %%%s\n",
435 "xxsd"[i.cls],
436 T.asloc,
437 stashbits(negmask[i.cls], 16),
438 regtoa(i.to.val, SLong)
440 break;
441 case Odiv:
442 /* use xmm15 to adjust the instruction when the
443 * conversion to 2-address in emitf() would fail */
444 if (req(i.to, i.arg[1])) {
445 i.arg[1] = TMP(XMM0+15);
446 emitf("mov%k %=, %1", &i, fn, f);
447 emitf("mov%k %0, %=", &i, fn, f);
448 i.arg[0] = i.to;
450 goto Table;
451 case Ocopy:
452 /* copies are used for many things; see my note
453 * to understand how to load big constants:
454 * https://c9x.me/notes/2015-09-19.html */
455 assert(rtype(i.to) != RMem);
456 if (req(i.to, R) || req(i.arg[0], R))
457 break;
458 if (req(i.to, i.arg[0]))
459 break;
460 t0 = rtype(i.arg[0]);
461 if (i.cls == Kl
462 && t0 == RCon
463 && fn->con[i.arg[0].val].type == CBits) {
464 val = fn->con[i.arg[0].val].bits.i;
465 if (isreg(i.to))
466 if (val >= 0 && val <= UINT32_MAX) {
467 emitf("movl %W0, %W=", &i, fn, f);
468 break;
470 if (rtype(i.to) == RSlot)
471 if (val < INT32_MIN || val > INT32_MAX) {
472 emitf("movl %0, %=", &i, fn, f);
473 emitf("movl %0>>32, 4+%=", &i, fn, f);
474 break;
477 if (isreg(i.to)
478 && t0 == RCon
479 && fn->con[i.arg[0].val].type == CAddr) {
480 emitf("lea%k %M0, %=", &i, fn, f);
481 break;
483 if (rtype(i.to) == RSlot
484 && (t0 == RSlot || t0 == RMem)) {
485 i.cls = KWIDE(i.cls) ? Kd : Ks;
486 i.arg[1] = TMP(XMM0+15);
487 emitf("mov%k %0, %1", &i, fn, f);
488 emitf("mov%k %1, %=", &i, fn, f);
489 break;
491 /* conveniently, the assembler knows if it
492 * should use movabsq when reading movq */
493 emitf("mov%k %0, %=", &i, fn, f);
494 break;
495 case Oaddr:
496 if (!T.apple
497 && rtype(i.arg[0]) == RCon
498 && fn->con[i.arg[0].val].sym.type == SThr) {
499 /* derive the symbol address from the TCB
500 * address at offset 0 of %fs */
501 assert(isreg(i.to));
502 con = &fn->con[i.arg[0].val];
503 sym = str(con->sym.id);
504 emitf("movq %%fs:0, %L=", &i, fn, f);
505 fprintf(f, "\tleaq %s%s@tpoff",
506 sym[0] == '"' ? "" : T.assym, sym);
507 if (con->bits.i)
508 fprintf(f, "%+"PRId64, con->bits.i);
509 fprintf(f, "(%%%s), %%%s\n",
510 regtoa(i.to.val, SLong),
511 regtoa(i.to.val, SLong));
512 break;
514 goto Table;
515 case Ocall:
516 /* calls simply have a weird syntax in AT&T
517 * assembly... */
518 switch (rtype(i.arg[0])) {
519 case RCon:
520 fprintf(f, "\tcallq ");
521 emitcon(&fn->con[i.arg[0].val], f);
522 fprintf(f, "\n");
523 break;
524 case RTmp:
525 emitf("callq *%L0", &i, fn, f);
526 break;
527 default:
528 die("invalid call argument");
530 break;
531 case Osalloc:
532 /* there is no good reason why this is here
533 * maybe we should split Osalloc in 2 different
534 * instructions depending on the result
536 emitf("subq %L0, %%rsp", &i, fn, f);
537 if (!req(i.to, R))
538 emitcopy(i.to, TMP(RSP), Kl, fn, f);
539 break;
540 case Oswap:
541 if (KBASE(i.cls) == 0)
542 goto Table;
543 /* for floats, there is no swap instruction
544 * so we use xmm15 as a temporary
546 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
547 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
548 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
549 break;
550 case Odbgloc:
551 emitdbgloc(i.arg[0].val, i.arg[1].val, f);
552 break;
556 static uint64_t
557 framesz(Fn *fn)
559 uint64_t i, o, f;
561 /* specific to NAlign == 3 */
562 for (i=0, o=0; i<NCLR; i++)
563 o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
564 f = fn->slot;
565 f = (f + 3) & -4;
566 return 4*f + 8*o + 176*fn->vararg;
569 void
570 amd64_emitfn(Fn *fn, FILE *f)
572 static char *ctoa[] = {
573 #define X(c, s) [c] = s,
574 CMP(X)
575 #undef X
577 static int id0;
578 Blk *b, *s;
579 Ins *i, itmp;
580 int *r, c, o, n, lbl;
581 uint64_t fs;
583 emitfnlnk(fn->name, &fn->lnk, f);
584 fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
585 fs = framesz(fn);
586 if (fs)
587 fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", fs);
588 if (fn->vararg) {
589 o = -176;
590 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
591 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
592 for (n=0; n<8; ++n, o+=16)
593 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
595 for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
596 if (fn->reg & BIT(*r)) {
597 itmp.arg[0] = TMP(*r);
598 emitf("pushq %L0", &itmp, fn, f);
599 fs += 8;
602 for (lbl=0, b=fn->start; b; b=b->link) {
603 if (lbl || b->npred > 1)
604 fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
605 for (i=b->ins; i!=&b->ins[b->nins]; i++)
606 emitins(*i, fn, f);
607 lbl = 1;
608 switch (b->jmp.type) {
609 case Jhlt:
610 fprintf(f, "\tud2\n");
611 break;
612 case Jret0:
613 if (fn->dynalloc)
614 fprintf(f,
615 "\tmovq %%rbp, %%rsp\n"
616 "\tsubq $%"PRIu64", %%rsp\n",
619 for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
620 if (fn->reg & BIT(*--r)) {
621 itmp.arg[0] = TMP(*r);
622 emitf("popq %L0", &itmp, fn, f);
624 fprintf(f,
625 "\tleave\n"
626 "\tret\n"
628 break;
629 case Jjmp:
630 Jmp:
631 if (b->s1 != b->link)
632 fprintf(f, "\tjmp %sbb%d\n",
633 T.asloc, id0+b->s1->id);
634 else
635 lbl = 0;
636 break;
637 default:
638 c = b->jmp.type - Jjf;
639 if (0 <= c && c <= NCmp) {
640 if (b->link == b->s2) {
641 s = b->s1;
642 b->s1 = b->s2;
643 b->s2 = s;
644 } else
645 c = cmpneg(c);
646 fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
647 T.asloc, id0+b->s2->id);
648 goto Jmp;
650 die("unhandled jump %d", b->jmp.type);
653 id0 += fn->nblk;
654 if (!T.apple)
655 elf_emitfnfin(fn->name, f);