disable pie for arm64 tests
[qbe.git] / amd64 / emit.c
blobd8b29bd1887cf1c617fa53659910553acab61373
1 #include "all.h"
4 #define CMP(X) \
5 X(Ciule, "be") \
6 X(Ciult, "b") \
7 X(Cisle, "le") \
8 X(Cislt, "l") \
9 X(Cisgt, "g") \
10 X(Cisge, "ge") \
11 X(Ciugt, "a") \
12 X(Ciuge, "ae") \
13 X(Cieq, "z") \
14 X(Cine, "nz") \
15 X(NCmpI+Cfle, "be") \
16 X(NCmpI+Cflt, "b") \
17 X(NCmpI+Cfgt, "a") \
18 X(NCmpI+Cfge, "ae") \
19 X(NCmpI+Cfeq, "z") \
20 X(NCmpI+Cfne, "nz") \
21 X(NCmpI+Cfo, "np") \
22 X(NCmpI+Cfuo, "p")
24 enum {
25 SLong = 0,
26 SWord = 1,
27 SShort = 2,
28 SByte = 3,
30 Ki = -1, /* matches Kw and Kl */
31 Ka = -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
51 * added, it can be:
52 * M - memory reference
53 * L - long (64 bits)
54 * W - word (32 bits)
55 * H - short (16 bits)
56 * B - byte (8 bits)
57 * S - single precision float
58 * D - double precision float
60 static struct {
61 short op;
62 short cls;
63 char *asm;
64 } omap[] = {
65 { Oadd, Ka, "+add%k %1, %=" },
66 { Osub, Ka, "-sub%k %1, %=" },
67 { Oand, Ki, "+and%k %1, %=" },
68 { Oor, Ki, "+or%k %1, %=" },
69 { Oxor, Ki, "+xor%k %1, %=" },
70 { Osar, Ki, "-sar%k %B1, %=" },
71 { Oshr, Ki, "-shr%k %B1, %=" },
72 { Oshl, Ki, "-shl%k %B1, %=" },
73 { Omul, Ki, "+imul%k %1, %=" },
74 { Omul, Ks, "+mulss %1, %=" },
75 { Omul, Kd, "+mulsd %1, %=" },
76 { Odiv, Ka, "-div%k %1, %=" },
77 { Ostorel, Ka, "movq %L0, %M1" },
78 { Ostorew, Ka, "movl %W0, %M1" },
79 { Ostoreh, Ka, "movw %H0, %M1" },
80 { Ostoreb, Ka, "movb %B0, %M1" },
81 { Ostores, Ka, "movss %S0, %M1" },
82 { Ostored, Ka, "movsd %D0, %M1" },
83 { Oload, Ka, "mov%k %M0, %=" },
84 { Oloadsw, Kl, "movslq %M0, %L=" },
85 { Oloadsw, Kw, "movl %M0, %W=" },
86 { Oloaduw, Ki, "movl %M0, %W=" },
87 { Oloadsh, Ki, "movsw%k %M0, %=" },
88 { Oloaduh, Ki, "movzw%k %M0, %=" },
89 { Oloadsb, Ki, "movsb%k %M0, %=" },
90 { Oloadub, Ki, "movzb%k %M0, %=" },
91 { Oextsw, Kl, "movslq %W0, %L=" },
92 { Oextuw, Kl, "movl %W0, %W=" },
93 { Oextsh, Ki, "movsw%k %H0, %=" },
94 { Oextuh, Ki, "movzw%k %H0, %=" },
95 { Oextsb, Ki, "movsb%k %B0, %=" },
96 { Oextub, Ki, "movzb%k %B0, %=" },
98 { Oexts, Kd, "cvtss2sd %0, %=" },
99 { Otruncd, Ks, "cvtsd2ss %0, %=" },
100 { Ostosi, Ki, "cvttss2si%k %0, %=" },
101 { Odtosi, Ki, "cvttsd2si%k %0, %=" },
102 { Oswtof, Ka, "cvtsi2%k %W0, %=" },
103 { Osltof, Ka, "cvtsi2%k %L0, %=" },
104 { Ocast, Ki, "movq %D0, %L=" },
105 { Ocast, Ka, "movq %L0, %D=" },
107 { Oaddr, Ki, "lea%k %M0, %=" },
108 { Oswap, Ki, "xchg%k %0, %1" },
109 { Osign, Kl, "cqto" },
110 { Osign, Kw, "cltd" },
111 { Oxdiv, Ki, "div%k %0" },
112 { Oxidiv, Ki, "idiv%k %0" },
113 { Oxcmp, Ks, "ucomiss %S0, %S1" },
114 { Oxcmp, Kd, "ucomisd %D0, %D1" },
115 { Oxcmp, Ki, "cmp%k %0, %1" },
116 { Oxtest, Ki, "test%k %0, %1" },
117 #define X(c, s) \
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
119 CMP(X)
120 #undef X
121 { NOp, 0, 0 }
124 static char *rname[][4] = {
125 [RAX] = {"rax", "eax", "ax", "al"},
126 [RBX] = {"rbx", "ebx", "bx", "bl"},
127 [RCX] = {"rcx", "ecx", "cx", "cl"},
128 [RDX] = {"rdx", "edx", "dx", "dl"},
129 [RSI] = {"rsi", "esi", "si", "sil"},
130 [RDI] = {"rdi", "edi", "di", "dil"},
131 [RBP] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP] = {"rsp", "esp", "sp", "spl"},
133 [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10] = {"r10", "r10d", "r10w", "r10b"},
136 [R11] = {"r11", "r11d", "r11w", "r11b"},
137 [R12] = {"r12", "r12d", "r12w", "r12b"},
138 [R13] = {"r13", "r13d", "r13w", "r13b"},
139 [R14] = {"r14", "r14d", "r14w", "r14b"},
140 [R15] = {"r15", "r15d", "r15w", "r15b"},
144 static int
145 slot(int s, Fn *fn)
147 struct { int i:29; } x;
149 /* sign extend s using a bitfield */
150 x.i = s;
151 assert(x.i <= fn->slot);
152 /* specific to NAlign == 3 */
153 if (x.i < 0)
154 return -4 * x.i;
155 else if (fn->vararg)
156 return -176 + -4 * (fn->slot - x.i);
157 else
158 return -4 * (fn->slot - x.i);
161 static void
162 emitcon(Con *con, FILE *f)
164 char *p, *l;
166 switch (con->type) {
167 case CAddr:
168 l = str(con->label);
169 p = con->local ? gasloc : l[0] == '"' ? "" : gassym;
170 fprintf(f, "%s%s", p, l);
171 if (con->bits.i)
172 fprintf(f, "%+"PRId64, con->bits.i);
173 break;
174 case CBits:
175 fprintf(f, "%"PRId64, con->bits.i);
176 break;
177 default:
178 die("unreachable");
182 static char *
183 regtoa(int reg, int sz)
185 static char buf[6];
187 if (reg >= XMM0) {
188 sprintf(buf, "xmm%d", reg-XMM0);
189 return buf;
190 } else
191 return rname[reg][sz];
194 static Ref
195 getarg(char c, Ins *i)
197 switch (c) {
198 case '0':
199 return i->arg[0];
200 case '1':
201 return i->arg[1];
202 case '=':
203 return i->to;
204 default:
205 die("invalid arg letter %c", c);
209 static void emitins(Ins, Fn *, FILE *);
211 static void
212 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
214 Ins icp;
216 icp.op = Ocopy;
217 icp.arg[0] = r2;
218 icp.to = r1;
219 icp.cls = k;
220 emitins(icp, fn, f);
223 static void
224 emitf(char *s, Ins *i, Fn *fn, FILE *f)
226 static char clstoa[][3] = {"l", "q", "ss", "sd"};
227 char c;
228 int sz;
229 Ref ref;
230 Mem *m;
231 Con off;
233 switch (*s) {
234 case '+':
235 if (req(i->arg[1], i->to)) {
236 ref = i->arg[0];
237 i->arg[0] = i->arg[1];
238 i->arg[1] = ref;
240 /* fall through */
241 case '-':
242 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
243 "cannot convert to 2-address");
244 emitcopy(i->to, i->arg[0], i->cls, fn, f);
245 s++;
246 break;
249 fputc('\t', f);
250 Next:
251 while ((c = *s++) != '%')
252 if (!c) {
253 fputc('\n', f);
254 return;
255 } else
256 fputc(c, f);
257 switch ((c = *s++)) {
258 case '%':
259 fputc('%', f);
260 break;
261 case 'k':
262 fputs(clstoa[i->cls], f);
263 break;
264 case '0':
265 case '1':
266 case '=':
267 sz = KWIDE(i->cls) ? SLong : SWord;
268 s--;
269 goto Ref;
270 case 'D':
271 case 'S':
272 sz = SLong; /* does not matter for floats */
273 Ref:
274 c = *s++;
275 ref = getarg(c, i);
276 switch (rtype(ref)) {
277 case RTmp:
278 assert(isreg(ref));
279 fprintf(f, "%%%s", regtoa(ref.val, sz));
280 break;
281 case RSlot:
282 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
283 break;
284 case RMem:
285 Mem:
286 m = &fn->mem[ref.val];
287 if (rtype(m->base) == RSlot) {
288 off.type = CBits;
289 off.bits.i = slot(m->base.val, fn);
290 addcon(&m->offset, &off);
291 m->base = TMP(RBP);
293 if (m->offset.type != CUndef)
294 emitcon(&m->offset, f);
295 fputc('(', f);
296 if (!req(m->base, R))
297 fprintf(f, "%%%s", regtoa(m->base.val, SLong));
298 else if (m->offset.type == CAddr)
299 fprintf(f, "%%rip");
300 if (!req(m->index, R))
301 fprintf(f, ", %%%s, %d",
302 regtoa(m->index.val, SLong),
303 m->scale
305 fputc(')', f);
306 break;
307 case RCon:
308 fputc('$', f);
309 emitcon(&fn->con[ref.val], f);
310 break;
311 default:
312 die("unreachable");
314 break;
315 case 'L':
316 sz = SLong;
317 goto Ref;
318 case 'W':
319 sz = SWord;
320 goto Ref;
321 case 'H':
322 sz = SShort;
323 goto Ref;
324 case 'B':
325 sz = SByte;
326 goto Ref;
327 case 'M':
328 c = *s++;
329 ref = getarg(c, i);
330 switch (rtype(ref)) {
331 case RMem:
332 goto Mem;
333 case RSlot:
334 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
335 break;
336 case RCon:
337 off = fn->con[ref.val];
338 emitcon(&off, f);
339 if (off.type == CAddr)
340 fprintf(f, "(%%rip)");
341 break;
342 case RTmp:
343 assert(isreg(ref));
344 fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
345 break;
346 default:
347 die("unreachable");
349 break;
350 default:
351 die("invalid format specifier %%%c", c);
353 goto Next;
356 static void *negmask[4] = {
357 [Ks] = (uint32_t[4]){ 0x80000000 },
358 [Kd] = (uint64_t[2]){ 0x8000000000000000 },
361 static void
362 emitins(Ins i, Fn *fn, FILE *f)
364 Ref r;
365 int64_t val;
366 int o, t0;
368 switch (i.op) {
369 default:
370 Table:
371 /* most instructions are just pulled out of
372 * the table omap[], some special cases are
373 * detailed below */
374 for (o=0;; o++) {
375 /* this linear search should really be a binary
376 * search */
377 if (omap[o].op == NOp)
378 die("no match for %s(%d)",
379 optab[i.op].name, "wlsd"[i.cls]);
380 if (omap[o].op == i.op)
381 if (omap[o].cls == i.cls
382 || (omap[o].cls == Ki && KBASE(i.cls) == 0)
383 || (omap[o].cls == Ka))
384 break;
386 emitf(omap[o].asm, &i, fn, f);
387 break;
388 case Onop:
389 /* just do nothing for nops, they are inserted
390 * by some passes */
391 break;
392 case Omul:
393 /* here, we try to use the 3-addresss form
394 * of multiplication when possible */
395 if (rtype(i.arg[1]) == RCon) {
396 r = i.arg[0];
397 i.arg[0] = i.arg[1];
398 i.arg[1] = r;
400 if (KBASE(i.cls) == 0 /* only available for ints */
401 && rtype(i.arg[0]) == RCon
402 && rtype(i.arg[1]) == RTmp) {
403 emitf("imul%k %0, %1, %=", &i, fn, f);
404 break;
406 goto Table;
407 case Osub:
408 /* we have to use the negation trick to handle
409 * some 3-address subtractions */
410 if (req(i.to, i.arg[1])) {
411 if (KBASE(i.cls) == 0)
412 emitf("neg%k %=", &i, fn, f);
413 else
414 fprintf(f,
415 "\txorp%c %sfp%d(%%rip), %%%s\n",
416 "xxsd"[i.cls],
417 gasloc,
418 gasstash(negmask[i.cls], 16),
419 regtoa(i.to.val, SLong)
421 emitf("add%k %0, %=", &i, fn, f);
422 break;
424 goto Table;
425 case Odiv:
426 /* use xmm15 to adjust the instruction when the
427 * conversion to 2-address in emitf() would fail */
428 if (req(i.to, i.arg[1])) {
429 i.arg[1] = TMP(XMM0+15);
430 emitf("mov%k %=, %1", &i, fn, f);
431 emitf("mov%k %0, %=", &i, fn, f);
432 i.arg[0] = i.to;
434 goto Table;
435 case Ocopy:
436 /* copies are used for many things; see my note
437 * to understand how to load big constants:
438 * https://c9x.me/notes/2015-09-19.html */
439 assert(rtype(i.to) != RMem);
440 if (req(i.to, R) || req(i.arg[0], R))
441 break;
442 if (req(i.to, i.arg[0]))
443 break;
444 t0 = rtype(i.arg[0]);
445 if (i.cls == Kl
446 && t0 == RCon
447 && fn->con[i.arg[0].val].type == CBits) {
448 val = fn->con[i.arg[0].val].bits.i;
449 if (isreg(i.to))
450 if (val >= 0 && val <= UINT32_MAX) {
451 emitf("movl %W0, %W=", &i, fn, f);
452 break;
454 if (rtype(i.to) == RSlot)
455 if (val < INT32_MIN || val > INT32_MAX) {
456 emitf("movl %0, %=", &i, fn, f);
457 emitf("movl %0>>32, 4+%=", &i, fn, f);
458 break;
461 if (isreg(i.to)
462 && t0 == RCon
463 && fn->con[i.arg[0].val].type == CAddr) {
464 emitf("lea%k %M0, %=", &i, fn, f);
465 break;
467 if (rtype(i.to) == RSlot
468 && (t0 == RSlot || t0 == RMem)) {
469 i.cls = KWIDE(i.cls) ? Kd : Ks;
470 i.arg[1] = TMP(XMM0+15);
471 emitf("mov%k %0, %1", &i, fn, f);
472 emitf("mov%k %1, %=", &i, fn, f);
473 break;
475 /* conveniently, the assembler knows if it
476 * should use movabsq when reading movq */
477 emitf("mov%k %0, %=", &i, fn, f);
478 break;
479 case Ocall:
480 /* calls simply have a weird syntax in AT&T
481 * assembly... */
482 switch (rtype(i.arg[0])) {
483 case RCon:
484 fprintf(f, "\tcallq ");
485 emitcon(&fn->con[i.arg[0].val], f);
486 fprintf(f, "\n");
487 break;
488 case RTmp:
489 emitf("callq *%L0", &i, fn, f);
490 break;
491 default:
492 die("invalid call argument");
494 break;
495 case Osalloc:
496 /* there is no good reason why this is here
497 * maybe we should split Osalloc in 2 different
498 * instructions depending on the result
500 emitf("subq %L0, %%rsp", &i, fn, f);
501 if (!req(i.to, R))
502 emitcopy(i.to, TMP(RSP), Kl, fn, f);
503 break;
504 case Oswap:
505 if (KBASE(i.cls) == 0)
506 goto Table;
507 /* for floats, there is no swap instruction
508 * so we use xmm15 as a temporary
510 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
511 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
512 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
513 break;
517 static uint64_t
518 framesz(Fn *fn)
520 uint64_t i, o, f;
522 /* specific to NAlign == 3 */
523 for (i=0, o=0; i<NCLR; i++)
524 o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
525 f = fn->slot;
526 f = (f + 3) & -4;
527 return 4*f + 8*o + 176*fn->vararg;
530 void
531 amd64_emitfn(Fn *fn, FILE *f)
533 static char *ctoa[] = {
534 #define X(c, s) [c] = s,
535 CMP(X)
536 #undef X
538 static int id0;
539 Blk *b, *s;
540 Ins *i, itmp;
541 int *r, c, o, n, lbl;
542 uint64_t fs;
543 char *p;
545 p = fn->name[0] == '"' ? "" : gassym;
546 fprintf(f, ".text\n");
547 if (fn->export)
548 fprintf(f, ".globl %s%s\n", p, fn->name);
549 fprintf(f,
550 "%s%s:\n"
551 "\tpushq %%rbp\n"
552 "\tmovq %%rsp, %%rbp\n",
553 p, fn->name
555 fs = framesz(fn);
556 if (fs)
557 fprintf(f, "\tsub $%"PRIu64", %%rsp\n", fs);
558 if (fn->vararg) {
559 o = -176;
560 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
561 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
562 for (n=0; n<8; ++n, o+=16)
563 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
565 for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
566 if (fn->reg & BIT(*r)) {
567 itmp.arg[0] = TMP(*r);
568 emitf("pushq %L0", &itmp, fn, f);
569 fs += 8;
572 for (lbl=0, b=fn->start; b; b=b->link) {
573 if (lbl || b->npred > 1)
574 fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
575 for (i=b->ins; i!=&b->ins[b->nins]; i++)
576 emitins(*i, fn, f);
577 lbl = 1;
578 switch (b->jmp.type) {
579 case Jret0:
580 if (fn->dynalloc)
581 fprintf(f,
582 "\tmovq %%rbp, %%rsp\n"
583 "\tsubq $%"PRIu64", %%rsp\n",
586 for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
587 if (fn->reg & BIT(*--r)) {
588 itmp.arg[0] = TMP(*r);
589 emitf("popq %L0", &itmp, fn, f);
591 fprintf(f,
592 "\tleave\n"
593 "\tret\n"
595 break;
596 case Jjmp:
597 Jmp:
598 if (b->s1 != b->link)
599 fprintf(f, "\tjmp %sbb%d\n",
600 gasloc, id0+b->s1->id);
601 else
602 lbl = 0;
603 break;
604 default:
605 c = b->jmp.type - Jjf;
606 if (0 <= c && c <= NCmp) {
607 if (b->link == b->s2) {
608 s = b->s1;
609 b->s1 = b->s2;
610 b->s2 = s;
611 } else
612 c = cmpneg(c);
613 fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
614 gasloc, id0+b->s2->id);
615 goto Jmp;
617 die("unhandled jump %d", b->jmp.type);
620 id0 += fn->nblk;