shared linkage logic for func/data
[qbe.git] / amd64 / emit.c
blobb8e9e8e5338bd058e75630fa297eeb306bccb82a
1 #include "all.h"
4 #define CMP(X) \
5 X(Ciule, "be") \
6 X(Ciult, "b") \
7 X(Cisle, "le") \
8 X(Cislt, "l") \
9 X(Cisgt, "g") \
10 X(Cisge, "ge") \
11 X(Ciugt, "a") \
12 X(Ciuge, "ae") \
13 X(Cieq, "z") \
14 X(Cine, "nz") \
15 X(NCmpI+Cfle, "be") \
16 X(NCmpI+Cflt, "b") \
17 X(NCmpI+Cfgt, "a") \
18 X(NCmpI+Cfge, "ae") \
19 X(NCmpI+Cfeq, "z") \
20 X(NCmpI+Cfne, "nz") \
21 X(NCmpI+Cfo, "np") \
22 X(NCmpI+Cfuo, "p")
24 enum {
25 SLong = 0,
26 SWord = 1,
27 SShort = 2,
28 SByte = 3,
30 Ki = -1, /* matches Kw and Kl */
31 Ka = -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
51 * added, it can be:
52 * M - memory reference
53 * L - long (64 bits)
54 * W - word (32 bits)
55 * H - short (16 bits)
56 * B - byte (8 bits)
57 * S - single precision float
58 * D - double precision float
60 static struct {
61 short op;
62 short cls;
63 char *asm;
64 } omap[] = {
65 { Oadd, Ka, "+add%k %1, %=" },
66 { Osub, Ka, "-sub%k %1, %=" },
67 { Oand, Ki, "+and%k %1, %=" },
68 { Oor, Ki, "+or%k %1, %=" },
69 { Oxor, Ki, "+xor%k %1, %=" },
70 { Osar, Ki, "-sar%k %B1, %=" },
71 { Oshr, Ki, "-shr%k %B1, %=" },
72 { Oshl, Ki, "-shl%k %B1, %=" },
73 { Omul, Ki, "+imul%k %1, %=" },
74 { Omul, Ks, "+mulss %1, %=" },
75 { Omul, Kd, "+mulsd %1, %=" },
76 { Odiv, Ka, "-div%k %1, %=" },
77 { Ostorel, Ka, "movq %L0, %M1" },
78 { Ostorew, Ka, "movl %W0, %M1" },
79 { Ostoreh, Ka, "movw %H0, %M1" },
80 { Ostoreb, Ka, "movb %B0, %M1" },
81 { Ostores, Ka, "movss %S0, %M1" },
82 { Ostored, Ka, "movsd %D0, %M1" },
83 { Oload, Ka, "mov%k %M0, %=" },
84 { Oloadsw, Kl, "movslq %M0, %L=" },
85 { Oloadsw, Kw, "movl %M0, %W=" },
86 { Oloaduw, Ki, "movl %M0, %W=" },
87 { Oloadsh, Ki, "movsw%k %M0, %=" },
88 { Oloaduh, Ki, "movzw%k %M0, %=" },
89 { Oloadsb, Ki, "movsb%k %M0, %=" },
90 { Oloadub, Ki, "movzb%k %M0, %=" },
91 { Oextsw, Kl, "movslq %W0, %L=" },
92 { Oextuw, Kl, "movl %W0, %W=" },
93 { Oextsh, Ki, "movsw%k %H0, %=" },
94 { Oextuh, Ki, "movzw%k %H0, %=" },
95 { Oextsb, Ki, "movsb%k %B0, %=" },
96 { Oextub, Ki, "movzb%k %B0, %=" },
98 { Oexts, Kd, "cvtss2sd %0, %=" },
99 { Otruncd, Ks, "cvtsd2ss %0, %=" },
100 { Ostosi, Ki, "cvttss2si%k %0, %=" },
101 { Odtosi, Ki, "cvttsd2si%k %0, %=" },
102 { Oswtof, Ka, "cvtsi2%k %W0, %=" },
103 { Osltof, Ka, "cvtsi2%k %L0, %=" },
104 { Ocast, Ki, "movq %D0, %L=" },
105 { Ocast, Ka, "movq %L0, %D=" },
107 { Oaddr, Ki, "lea%k %M0, %=" },
108 { Oswap, Ki, "xchg%k %0, %1" },
109 { Osign, Kl, "cqto" },
110 { Osign, Kw, "cltd" },
111 { Oxdiv, Ki, "div%k %0" },
112 { Oxidiv, Ki, "idiv%k %0" },
113 { Oxcmp, Ks, "ucomiss %S0, %S1" },
114 { Oxcmp, Kd, "ucomisd %D0, %D1" },
115 { Oxcmp, Ki, "cmp%k %0, %1" },
116 { Oxtest, Ki, "test%k %0, %1" },
117 #define X(c, s) \
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
119 CMP(X)
120 #undef X
121 { NOp, 0, 0 }
124 static char *rname[][4] = {
125 [RAX] = {"rax", "eax", "ax", "al"},
126 [RBX] = {"rbx", "ebx", "bx", "bl"},
127 [RCX] = {"rcx", "ecx", "cx", "cl"},
128 [RDX] = {"rdx", "edx", "dx", "dl"},
129 [RSI] = {"rsi", "esi", "si", "sil"},
130 [RDI] = {"rdi", "edi", "di", "dil"},
131 [RBP] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP] = {"rsp", "esp", "sp", "spl"},
133 [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10] = {"r10", "r10d", "r10w", "r10b"},
136 [R11] = {"r11", "r11d", "r11w", "r11b"},
137 [R12] = {"r12", "r12d", "r12w", "r12b"},
138 [R13] = {"r13", "r13d", "r13w", "r13b"},
139 [R14] = {"r14", "r14d", "r14w", "r14b"},
140 [R15] = {"r15", "r15d", "r15w", "r15b"},
144 static int
145 slot(int s, Fn *fn)
147 struct { int i:29; } x;
149 /* sign extend s using a bitfield */
150 x.i = s;
151 assert(x.i <= fn->slot);
152 /* specific to NAlign == 3 */
153 if (x.i < 0)
154 return -4 * x.i;
155 else if (fn->vararg)
156 return -176 + -4 * (fn->slot - x.i);
157 else
158 return -4 * (fn->slot - x.i);
161 static void
162 emitcon(Con *con, FILE *f)
164 char *p, *l;
166 switch (con->type) {
167 case CAddr:
168 l = str(con->label);
169 p = con->local ? gasloc : l[0] == '"' ? "" : gassym;
170 fprintf(f, "%s%s", p, l);
171 if (con->bits.i)
172 fprintf(f, "%+"PRId64, con->bits.i);
173 break;
174 case CBits:
175 fprintf(f, "%"PRId64, con->bits.i);
176 break;
177 default:
178 die("unreachable");
182 static char *
183 regtoa(int reg, int sz)
185 static char buf[6];
187 assert(reg <= XMM15);
188 if (reg >= XMM0) {
189 sprintf(buf, "xmm%d", reg-XMM0);
190 return buf;
191 } else
192 return rname[reg][sz];
195 static Ref
196 getarg(char c, Ins *i)
198 switch (c) {
199 case '0':
200 return i->arg[0];
201 case '1':
202 return i->arg[1];
203 case '=':
204 return i->to;
205 default:
206 die("invalid arg letter %c", c);
210 static void emitins(Ins, Fn *, FILE *);
212 static void
213 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
215 Ins icp;
217 icp.op = Ocopy;
218 icp.arg[0] = r2;
219 icp.to = r1;
220 icp.cls = k;
221 emitins(icp, fn, f);
224 static void
225 emitf(char *s, Ins *i, Fn *fn, FILE *f)
227 static char clstoa[][3] = {"l", "q", "ss", "sd"};
228 char c;
229 int sz;
230 Ref ref;
231 Mem *m;
232 Con off;
234 switch (*s) {
235 case '+':
236 if (req(i->arg[1], i->to)) {
237 ref = i->arg[0];
238 i->arg[0] = i->arg[1];
239 i->arg[1] = ref;
241 /* fall through */
242 case '-':
243 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
244 "cannot convert to 2-address");
245 emitcopy(i->to, i->arg[0], i->cls, fn, f);
246 s++;
247 break;
250 fputc('\t', f);
251 Next:
252 while ((c = *s++) != '%')
253 if (!c) {
254 fputc('\n', f);
255 return;
256 } else
257 fputc(c, f);
258 switch ((c = *s++)) {
259 case '%':
260 fputc('%', f);
261 break;
262 case 'k':
263 fputs(clstoa[i->cls], f);
264 break;
265 case '0':
266 case '1':
267 case '=':
268 sz = KWIDE(i->cls) ? SLong : SWord;
269 s--;
270 goto Ref;
271 case 'D':
272 case 'S':
273 sz = SLong; /* does not matter for floats */
274 Ref:
275 c = *s++;
276 ref = getarg(c, i);
277 switch (rtype(ref)) {
278 case RTmp:
279 assert(isreg(ref));
280 fprintf(f, "%%%s", regtoa(ref.val, sz));
281 break;
282 case RSlot:
283 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
284 break;
285 case RMem:
286 Mem:
287 m = &fn->mem[ref.val];
288 if (rtype(m->base) == RSlot) {
289 off.type = CBits;
290 off.bits.i = slot(m->base.val, fn);
291 addcon(&m->offset, &off);
292 m->base = TMP(RBP);
294 if (m->offset.type != CUndef)
295 emitcon(&m->offset, f);
296 fputc('(', f);
297 if (!req(m->base, R))
298 fprintf(f, "%%%s", regtoa(m->base.val, SLong));
299 else if (m->offset.type == CAddr)
300 fprintf(f, "%%rip");
301 if (!req(m->index, R))
302 fprintf(f, ", %%%s, %d",
303 regtoa(m->index.val, SLong),
304 m->scale
306 fputc(')', f);
307 break;
308 case RCon:
309 fputc('$', f);
310 emitcon(&fn->con[ref.val], f);
311 break;
312 default:
313 die("unreachable");
315 break;
316 case 'L':
317 sz = SLong;
318 goto Ref;
319 case 'W':
320 sz = SWord;
321 goto Ref;
322 case 'H':
323 sz = SShort;
324 goto Ref;
325 case 'B':
326 sz = SByte;
327 goto Ref;
328 case 'M':
329 c = *s++;
330 ref = getarg(c, i);
331 switch (rtype(ref)) {
332 case RMem:
333 goto Mem;
334 case RSlot:
335 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
336 break;
337 case RCon:
338 off = fn->con[ref.val];
339 emitcon(&off, f);
340 if (off.type == CAddr)
341 fprintf(f, "(%%rip)");
342 break;
343 case RTmp:
344 assert(isreg(ref));
345 fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
346 break;
347 default:
348 die("unreachable");
350 break;
351 default:
352 die("invalid format specifier %%%c", c);
354 goto Next;
357 static void *negmask[4] = {
358 [Ks] = (uint32_t[4]){ 0x80000000 },
359 [Kd] = (uint64_t[2]){ 0x8000000000000000 },
362 static void
363 emitins(Ins i, Fn *fn, FILE *f)
365 Ref r;
366 int64_t val;
367 int o, t0;
368 Ins ineg;
370 switch (i.op) {
371 default:
372 Table:
373 /* most instructions are just pulled out of
374 * the table omap[], some special cases are
375 * detailed below */
376 for (o=0;; o++) {
377 /* this linear search should really be a binary
378 * search */
379 if (omap[o].op == NOp)
380 die("no match for %s(%c)",
381 optab[i.op].name, "wlsd"[i.cls]);
382 if (omap[o].op == i.op)
383 if (omap[o].cls == i.cls
384 || (omap[o].cls == Ki && KBASE(i.cls) == 0)
385 || (omap[o].cls == Ka))
386 break;
388 emitf(omap[o].asm, &i, fn, f);
389 break;
390 case Onop:
391 /* just do nothing for nops, they are inserted
392 * by some passes */
393 break;
394 case Omul:
395 /* here, we try to use the 3-addresss form
396 * of multiplication when possible */
397 if (rtype(i.arg[1]) == RCon) {
398 r = i.arg[0];
399 i.arg[0] = i.arg[1];
400 i.arg[1] = r;
402 if (KBASE(i.cls) == 0 /* only available for ints */
403 && rtype(i.arg[0]) == RCon
404 && rtype(i.arg[1]) == RTmp) {
405 emitf("imul%k %0, %1, %=", &i, fn, f);
406 break;
408 goto Table;
409 case Osub:
410 /* we have to use the negation trick to handle
411 * some 3-address subtractions */
412 if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
413 ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
414 emitins(ineg, fn, f);
415 emitf("add%k %0, %=", &i, fn, f);
416 break;
418 goto Table;
419 case Oneg:
420 if (!req(i.to, i.arg[0]))
421 emitf("mov%k %0, %=", &i, fn, f);
422 if (KBASE(i.cls) == 0)
423 emitf("neg%k %=", &i, fn, f);
424 else
425 fprintf(f,
426 "\txorp%c %sfp%d(%%rip), %%%s\n",
427 "xxsd"[i.cls],
428 gasloc,
429 gasstash(negmask[i.cls], 16),
430 regtoa(i.to.val, SLong)
432 break;
433 case Odiv:
434 /* use xmm15 to adjust the instruction when the
435 * conversion to 2-address in emitf() would fail */
436 if (req(i.to, i.arg[1])) {
437 i.arg[1] = TMP(XMM0+15);
438 emitf("mov%k %=, %1", &i, fn, f);
439 emitf("mov%k %0, %=", &i, fn, f);
440 i.arg[0] = i.to;
442 goto Table;
443 case Ocopy:
444 /* copies are used for many things; see my note
445 * to understand how to load big constants:
446 * https://c9x.me/notes/2015-09-19.html */
447 assert(rtype(i.to) != RMem);
448 if (req(i.to, R) || req(i.arg[0], R))
449 break;
450 if (req(i.to, i.arg[0]))
451 break;
452 t0 = rtype(i.arg[0]);
453 if (i.cls == Kl
454 && t0 == RCon
455 && fn->con[i.arg[0].val].type == CBits) {
456 val = fn->con[i.arg[0].val].bits.i;
457 if (isreg(i.to))
458 if (val >= 0 && val <= UINT32_MAX) {
459 emitf("movl %W0, %W=", &i, fn, f);
460 break;
462 if (rtype(i.to) == RSlot)
463 if (val < INT32_MIN || val > INT32_MAX) {
464 emitf("movl %0, %=", &i, fn, f);
465 emitf("movl %0>>32, 4+%=", &i, fn, f);
466 break;
469 if (isreg(i.to)
470 && t0 == RCon
471 && fn->con[i.arg[0].val].type == CAddr) {
472 emitf("lea%k %M0, %=", &i, fn, f);
473 break;
475 if (rtype(i.to) == RSlot
476 && (t0 == RSlot || t0 == RMem)) {
477 i.cls = KWIDE(i.cls) ? Kd : Ks;
478 i.arg[1] = TMP(XMM0+15);
479 emitf("mov%k %0, %1", &i, fn, f);
480 emitf("mov%k %1, %=", &i, fn, f);
481 break;
483 /* conveniently, the assembler knows if it
484 * should use movabsq when reading movq */
485 emitf("mov%k %0, %=", &i, fn, f);
486 break;
487 case Ocall:
488 /* calls simply have a weird syntax in AT&T
489 * assembly... */
490 switch (rtype(i.arg[0])) {
491 case RCon:
492 fprintf(f, "\tcallq ");
493 emitcon(&fn->con[i.arg[0].val], f);
494 fprintf(f, "\n");
495 break;
496 case RTmp:
497 emitf("callq *%L0", &i, fn, f);
498 break;
499 default:
500 die("invalid call argument");
502 break;
503 case Osalloc:
504 /* there is no good reason why this is here
505 * maybe we should split Osalloc in 2 different
506 * instructions depending on the result
508 emitf("subq %L0, %%rsp", &i, fn, f);
509 if (!req(i.to, R))
510 emitcopy(i.to, TMP(RSP), Kl, fn, f);
511 break;
512 case Oswap:
513 if (KBASE(i.cls) == 0)
514 goto Table;
515 /* for floats, there is no swap instruction
516 * so we use xmm15 as a temporary
518 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
519 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
520 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
521 break;
525 static uint64_t
526 framesz(Fn *fn)
528 uint64_t i, o, f;
530 /* specific to NAlign == 3 */
531 for (i=0, o=0; i<NCLR; i++)
532 o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
533 f = fn->slot;
534 f = (f + 3) & -4;
535 return 4*f + 8*o + 176*fn->vararg;
538 void
539 amd64_emitfn(Fn *fn, FILE *f)
541 static char *ctoa[] = {
542 #define X(c, s) [c] = s,
543 CMP(X)
544 #undef X
546 static int id0;
547 Blk *b, *s;
548 Ins *i, itmp;
549 int *r, c, o, n, lbl;
550 uint64_t fs;
552 gasemitlnk(fn->name, &fn->lnk, ".text", f);
553 fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
554 fs = framesz(fn);
555 if (fs)
556 fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", fs);
557 if (fn->vararg) {
558 o = -176;
559 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
560 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
561 for (n=0; n<8; ++n, o+=16)
562 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
564 for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
565 if (fn->reg & BIT(*r)) {
566 itmp.arg[0] = TMP(*r);
567 emitf("pushq %L0", &itmp, fn, f);
568 fs += 8;
571 for (lbl=0, b=fn->start; b; b=b->link) {
572 if (lbl || b->npred > 1)
573 fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
574 for (i=b->ins; i!=&b->ins[b->nins]; i++)
575 emitins(*i, fn, f);
576 lbl = 1;
577 switch (b->jmp.type) {
578 case Jret0:
579 if (fn->dynalloc)
580 fprintf(f,
581 "\tmovq %%rbp, %%rsp\n"
582 "\tsubq $%"PRIu64", %%rsp\n",
585 for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
586 if (fn->reg & BIT(*--r)) {
587 itmp.arg[0] = TMP(*r);
588 emitf("popq %L0", &itmp, fn, f);
590 fprintf(f,
591 "\tleave\n"
592 "\tret\n"
594 break;
595 case Jjmp:
596 Jmp:
597 if (b->s1 != b->link)
598 fprintf(f, "\tjmp %sbb%d\n",
599 gasloc, id0+b->s1->id);
600 else
601 lbl = 0;
602 break;
603 default:
604 c = b->jmp.type - Jjf;
605 if (0 <= c && c <= NCmp) {
606 if (b->link == b->s2) {
607 s = b->s1;
608 b->s1 = b->s2;
609 b->s2 = s;
610 } else
611 c = cmpneg(c);
612 fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
613 gasloc, id0+b->s2->id);
614 goto Jmp;
616 die("unhandled jump %d", b->jmp.type);
619 id0 += fn->nblk;