gas: use .balign instead of .align
[qbe.git] / amd64 / emit.c
blobd4bd54c2a4db0b6445eaf5f7752d66b03c11796a
1 #include "all.h"
4 #define CMP(X) \
5 X(Ciule, "be") \
6 X(Ciult, "b") \
7 X(Cisle, "le") \
8 X(Cislt, "l") \
9 X(Cisgt, "g") \
10 X(Cisge, "ge") \
11 X(Ciugt, "a") \
12 X(Ciuge, "ae") \
13 X(Cieq, "z") \
14 X(Cine, "nz") \
15 X(NCmpI+Cfle, "be") \
16 X(NCmpI+Cflt, "b") \
17 X(NCmpI+Cfgt, "a") \
18 X(NCmpI+Cfge, "ae") \
19 X(NCmpI+Cfeq, "z") \
20 X(NCmpI+Cfne, "nz") \
21 X(NCmpI+Cfo, "np") \
22 X(NCmpI+Cfuo, "p")
24 enum {
25 SLong = 0,
26 SWord = 1,
27 SShort = 2,
28 SByte = 3,
30 Ki = -1, /* matches Kw and Kl */
31 Ka = -2, /* matches all classes */
34 /* Instruction format strings:
36 * if the format string starts with -, the instruction
37 * is assumed to be 3-address and is put in 2-address
38 * mode using an extra mov if necessary
40 * if the format string starts with +, the same as the
41 * above applies, but commutativity is also assumed
43 * %k is used to set the class of the instruction,
44 * it'll expand to "l", "q", "ss", "sd", depending
45 * on the instruction class
46 * %0 designates the first argument
47 * %1 designates the second argument
48 * %= designates the result
50 * if %k is not used, a prefix to 0, 1, or = must be
51 * added, it can be:
52 * M - memory reference
53 * L - long (64 bits)
54 * W - word (32 bits)
55 * H - short (16 bits)
56 * B - byte (8 bits)
57 * S - single precision float
58 * D - double precision float
60 static struct {
61 short op;
62 short cls;
63 char *asm;
64 } omap[] = {
65 { Oadd, Ka, "+add%k %1, %=" },
66 { Osub, Ka, "-sub%k %1, %=" },
67 { Oand, Ki, "+and%k %1, %=" },
68 { Oor, Ki, "+or%k %1, %=" },
69 { Oxor, Ki, "+xor%k %1, %=" },
70 { Osar, Ki, "-sar%k %B1, %=" },
71 { Oshr, Ki, "-shr%k %B1, %=" },
72 { Oshl, Ki, "-shl%k %B1, %=" },
73 { Omul, Ki, "+imul%k %1, %=" },
74 { Omul, Ks, "+mulss %1, %=" },
75 { Omul, Kd, "+mulsd %1, %=" },
76 { Odiv, Ka, "-div%k %1, %=" },
77 { Ostorel, Ka, "movq %L0, %M1" },
78 { Ostorew, Ka, "movl %W0, %M1" },
79 { Ostoreh, Ka, "movw %H0, %M1" },
80 { Ostoreb, Ka, "movb %B0, %M1" },
81 { Ostores, Ka, "movss %S0, %M1" },
82 { Ostored, Ka, "movsd %D0, %M1" },
83 { Oload, Ka, "mov%k %M0, %=" },
84 { Oloadsw, Kl, "movslq %M0, %L=" },
85 { Oloadsw, Kw, "movl %M0, %W=" },
86 { Oloaduw, Ki, "movl %M0, %W=" },
87 { Oloadsh, Ki, "movsw%k %M0, %=" },
88 { Oloaduh, Ki, "movzw%k %M0, %=" },
89 { Oloadsb, Ki, "movsb%k %M0, %=" },
90 { Oloadub, Ki, "movzb%k %M0, %=" },
91 { Oextsw, Kl, "movslq %W0, %L=" },
92 { Oextuw, Kl, "movl %W0, %W=" },
93 { Oextsh, Ki, "movsw%k %H0, %=" },
94 { Oextuh, Ki, "movzw%k %H0, %=" },
95 { Oextsb, Ki, "movsb%k %B0, %=" },
96 { Oextub, Ki, "movzb%k %B0, %=" },
98 { Oexts, Kd, "cvtss2sd %0, %=" },
99 { Otruncd, Ks, "cvtsd2ss %0, %=" },
100 { Ostosi, Ki, "cvttss2si%k %0, %=" },
101 { Odtosi, Ki, "cvttsd2si%k %0, %=" },
102 { Oswtof, Ka, "cvtsi2%k %W0, %=" },
103 { Osltof, Ka, "cvtsi2%k %L0, %=" },
104 { Ocast, Ki, "movq %D0, %L=" },
105 { Ocast, Ka, "movq %L0, %D=" },
107 { Oaddr, Ki, "lea%k %M0, %=" },
108 { Oswap, Ki, "xchg%k %0, %1" },
109 { Osign, Kl, "cqto" },
110 { Osign, Kw, "cltd" },
111 { Oxdiv, Ki, "div%k %0" },
112 { Oxidiv, Ki, "idiv%k %0" },
113 { Oxcmp, Ks, "ucomiss %S0, %S1" },
114 { Oxcmp, Kd, "ucomisd %D0, %D1" },
115 { Oxcmp, Ki, "cmp%k %0, %1" },
116 { Oxtest, Ki, "test%k %0, %1" },
117 #define X(c, s) \
118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
119 CMP(X)
120 #undef X
121 { NOp, 0, 0 }
124 static char *rname[][4] = {
125 [RAX] = {"rax", "eax", "ax", "al"},
126 [RBX] = {"rbx", "ebx", "bx", "bl"},
127 [RCX] = {"rcx", "ecx", "cx", "cl"},
128 [RDX] = {"rdx", "edx", "dx", "dl"},
129 [RSI] = {"rsi", "esi", "si", "sil"},
130 [RDI] = {"rdi", "edi", "di", "dil"},
131 [RBP] = {"rbp", "ebp", "bp", "bpl"},
132 [RSP] = {"rsp", "esp", "sp", "spl"},
133 [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
134 [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
135 [R10] = {"r10", "r10d", "r10w", "r10b"},
136 [R11] = {"r11", "r11d", "r11w", "r11b"},
137 [R12] = {"r12", "r12d", "r12w", "r12b"},
138 [R13] = {"r13", "r13d", "r13w", "r13b"},
139 [R14] = {"r14", "r14d", "r14w", "r14b"},
140 [R15] = {"r15", "r15d", "r15w", "r15b"},
144 static int
145 slot(int s, Fn *fn)
147 struct { int i:29; } x;
149 /* sign extend s using a bitfield */
150 x.i = s;
151 assert(x.i <= fn->slot);
152 /* specific to NAlign == 3 */
153 if (x.i < 0)
154 return -4 * x.i;
155 else if (fn->vararg)
156 return -176 + -4 * (fn->slot - x.i);
157 else
158 return -4 * (fn->slot - x.i);
161 static void
162 emitcon(Con *con, FILE *f)
164 char *p;
166 switch (con->type) {
167 case CAddr:
168 p = con->local ? gasloc : gassym;
169 fprintf(f, "%s%s", p, str(con->label));
170 if (con->bits.i)
171 fprintf(f, "%+"PRId64, con->bits.i);
172 break;
173 case CBits:
174 fprintf(f, "%"PRId64, con->bits.i);
175 break;
176 default:
177 die("unreachable");
181 static char *
182 regtoa(int reg, int sz)
184 static char buf[6];
186 if (reg >= XMM0) {
187 sprintf(buf, "xmm%d", reg-XMM0);
188 return buf;
189 } else
190 return rname[reg][sz];
193 static Ref
194 getarg(char c, Ins *i)
196 switch (c) {
197 case '0':
198 return i->arg[0];
199 case '1':
200 return i->arg[1];
201 case '=':
202 return i->to;
203 default:
204 die("invalid arg letter %c", c);
208 static void emitins(Ins, Fn *, FILE *);
210 static void
211 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
213 Ins icp;
215 icp.op = Ocopy;
216 icp.arg[0] = r2;
217 icp.to = r1;
218 icp.cls = k;
219 emitins(icp, fn, f);
222 static void
223 emitf(char *s, Ins *i, Fn *fn, FILE *f)
225 static char clstoa[][3] = {"l", "q", "ss", "sd"};
226 char c;
227 int sz;
228 Ref ref;
229 Mem *m;
230 Con off;
232 switch (*s) {
233 case '+':
234 if (req(i->arg[1], i->to)) {
235 ref = i->arg[0];
236 i->arg[0] = i->arg[1];
237 i->arg[1] = ref;
239 /* fall through */
240 case '-':
241 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
242 "cannot convert to 2-address");
243 emitcopy(i->to, i->arg[0], i->cls, fn, f);
244 s++;
245 break;
248 fputc('\t', f);
249 Next:
250 while ((c = *s++) != '%')
251 if (!c) {
252 fputc('\n', f);
253 return;
254 } else
255 fputc(c, f);
256 switch ((c = *s++)) {
257 case '%':
258 fputc('%', f);
259 break;
260 case 'k':
261 fputs(clstoa[i->cls], f);
262 break;
263 case '0':
264 case '1':
265 case '=':
266 sz = KWIDE(i->cls) ? SLong : SWord;
267 s--;
268 goto Ref;
269 case 'D':
270 case 'S':
271 sz = SLong; /* does not matter for floats */
272 Ref:
273 c = *s++;
274 ref = getarg(c, i);
275 switch (rtype(ref)) {
276 case RTmp:
277 assert(isreg(ref));
278 fprintf(f, "%%%s", regtoa(ref.val, sz));
279 break;
280 case RSlot:
281 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
282 break;
283 case RMem:
284 Mem:
285 m = &fn->mem[ref.val];
286 if (rtype(m->base) == RSlot) {
287 off.type = CBits;
288 off.bits.i = slot(m->base.val, fn);
289 addcon(&m->offset, &off);
290 m->base = TMP(RBP);
292 if (m->offset.type != CUndef)
293 emitcon(&m->offset, f);
294 fputc('(', f);
295 if (!req(m->base, R))
296 fprintf(f, "%%%s", regtoa(m->base.val, SLong));
297 else if (m->offset.type == CAddr)
298 fprintf(f, "%%rip");
299 if (!req(m->index, R))
300 fprintf(f, ", %%%s, %d",
301 regtoa(m->index.val, SLong),
302 m->scale
304 fputc(')', f);
305 break;
306 case RCon:
307 fputc('$', f);
308 emitcon(&fn->con[ref.val], f);
309 break;
310 default:
311 die("unreachable");
313 break;
314 case 'L':
315 sz = SLong;
316 goto Ref;
317 case 'W':
318 sz = SWord;
319 goto Ref;
320 case 'H':
321 sz = SShort;
322 goto Ref;
323 case 'B':
324 sz = SByte;
325 goto Ref;
326 case 'M':
327 c = *s++;
328 ref = getarg(c, i);
329 switch (rtype(ref)) {
330 case RMem:
331 goto Mem;
332 case RSlot:
333 fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
334 break;
335 case RCon:
336 off = fn->con[ref.val];
337 emitcon(&off, f);
338 if (off.type == CAddr)
339 fprintf(f, "(%%rip)");
340 break;
341 case RTmp:
342 assert(isreg(ref));
343 fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
344 break;
345 default:
346 die("unreachable");
348 break;
349 default:
350 die("invalid format specifier %%%c", c);
352 goto Next;
355 static void *negmask[4] = {
356 [Ks] = (uint32_t[4]){ 0x80000000 },
357 [Kd] = (uint64_t[2]){ 0x8000000000000000 },
360 static void
361 emitins(Ins i, Fn *fn, FILE *f)
363 Ref r;
364 int64_t val;
365 int o, t0;
367 switch (i.op) {
368 default:
369 Table:
370 /* most instructions are just pulled out of
371 * the table omap[], some special cases are
372 * detailed below */
373 for (o=0;; o++) {
374 /* this linear search should really be a binary
375 * search */
376 if (omap[o].op == NOp)
377 die("no match for %s(%d)",
378 optab[i.op].name, "wlsd"[i.cls]);
379 if (omap[o].op == i.op)
380 if (omap[o].cls == i.cls
381 || (omap[o].cls == Ki && KBASE(i.cls) == 0)
382 || (omap[o].cls == Ka))
383 break;
385 emitf(omap[o].asm, &i, fn, f);
386 break;
387 case Onop:
388 /* just do nothing for nops, they are inserted
389 * by some passes */
390 break;
391 case Omul:
392 /* here, we try to use the 3-addresss form
393 * of multiplication when possible */
394 if (rtype(i.arg[1]) == RCon) {
395 r = i.arg[0];
396 i.arg[0] = i.arg[1];
397 i.arg[1] = r;
399 if (KBASE(i.cls) == 0 /* only available for ints */
400 && rtype(i.arg[0]) == RCon
401 && rtype(i.arg[1]) == RTmp) {
402 emitf("imul%k %0, %1, %=", &i, fn, f);
403 break;
405 goto Table;
406 case Osub:
407 /* we have to use the negation trick to handle
408 * some 3-address subtractions */
409 if (req(i.to, i.arg[1])) {
410 if (KBASE(i.cls) == 0)
411 emitf("neg%k %=", &i, fn, f);
412 else
413 fprintf(f,
414 "\txorp%c %sfp%d(%%rip), %%%s\n",
415 "xxsd"[i.cls],
416 gasloc,
417 gasstash(negmask[i.cls], 16),
418 regtoa(i.to.val, SLong)
420 emitf("add%k %0, %=", &i, fn, f);
421 break;
423 goto Table;
424 case Odiv:
425 /* use xmm15 to adjust the instruction when the
426 * conversion to 2-address in emitf() would fail */
427 if (req(i.to, i.arg[1])) {
428 i.arg[1] = TMP(XMM0+15);
429 emitf("mov%k %=, %1", &i, fn, f);
430 emitf("mov%k %0, %=", &i, fn, f);
431 i.arg[0] = i.to;
433 goto Table;
434 case Ocopy:
435 /* copies are used for many things; see my note
436 * to understand how to load big constants:
437 * https://c9x.me/notes/2015-09-19.html */
438 assert(rtype(i.to) != RMem);
439 if (req(i.to, R) || req(i.arg[0], R))
440 break;
441 if (req(i.to, i.arg[0]))
442 break;
443 t0 = rtype(i.arg[0]);
444 if (i.cls == Kl
445 && t0 == RCon
446 && fn->con[i.arg[0].val].type == CBits) {
447 val = fn->con[i.arg[0].val].bits.i;
448 if (isreg(i.to))
449 if (val >= 0 && val <= UINT32_MAX) {
450 emitf("movl %W0, %W=", &i, fn, f);
451 break;
453 if (rtype(i.to) == RSlot)
454 if (val < INT32_MIN || val > INT32_MAX) {
455 emitf("movl %0, %=", &i, fn, f);
456 emitf("movl %0>>32, 4+%=", &i, fn, f);
457 break;
460 if (isreg(i.to)
461 && t0 == RCon
462 && fn->con[i.arg[0].val].type == CAddr) {
463 emitf("lea%k %M0, %=", &i, fn, f);
464 break;
466 if (rtype(i.to) == RSlot
467 && (t0 == RSlot || t0 == RMem)) {
468 i.cls = KWIDE(i.cls) ? Kd : Ks;
469 i.arg[1] = TMP(XMM0+15);
470 emitf("mov%k %0, %1", &i, fn, f);
471 emitf("mov%k %1, %=", &i, fn, f);
472 break;
474 /* conveniently, the assembler knows if it
475 * should use movabsq when reading movq */
476 emitf("mov%k %0, %=", &i, fn, f);
477 break;
478 case Ocall:
479 /* calls simply have a weird syntax in AT&T
480 * assembly... */
481 switch (rtype(i.arg[0])) {
482 case RCon:
483 fprintf(f, "\tcallq ");
484 emitcon(&fn->con[i.arg[0].val], f);
485 fprintf(f, "\n");
486 break;
487 case RTmp:
488 emitf("callq *%L0", &i, fn, f);
489 break;
490 default:
491 die("invalid call argument");
493 break;
494 case Osalloc:
495 /* there is no good reason why this is here
496 * maybe we should split Osalloc in 2 different
497 * instructions depending on the result
499 emitf("subq %L0, %%rsp", &i, fn, f);
500 if (!req(i.to, R))
501 emitcopy(i.to, TMP(RSP), Kl, fn, f);
502 break;
503 case Oswap:
504 if (KBASE(i.cls) == 0)
505 goto Table;
506 /* for floats, there is no swap instruction
507 * so we use xmm15 as a temporary
509 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
510 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
511 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
512 break;
516 static uint64_t
517 framesz(Fn *fn)
519 uint64_t i, o, f;
521 /* specific to NAlign == 3 */
522 for (i=0, o=0; i<NCLR; i++)
523 o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
524 f = fn->slot;
525 f = (f + 3) & -4;
526 return 4*f + 8*o + 176*fn->vararg;
529 void
530 amd64_emitfn(Fn *fn, FILE *f)
532 static char *ctoa[] = {
533 #define X(c, s) [c] = s,
534 CMP(X)
535 #undef X
537 static int id0;
538 Blk *b, *s;
539 Ins *i, itmp;
540 int *r, c, o, n, lbl;
541 uint64_t fs;
543 fprintf(f, ".text\n");
544 if (fn->export)
545 fprintf(f, ".globl %s%s\n", gassym, fn->name);
546 fprintf(f,
547 "%s%s:\n"
548 "\tpushq %%rbp\n"
549 "\tmovq %%rsp, %%rbp\n",
550 gassym, fn->name
552 fs = framesz(fn);
553 if (fs)
554 fprintf(f, "\tsub $%"PRIu64", %%rsp\n", fs);
555 if (fn->vararg) {
556 o = -176;
557 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
558 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
559 for (n=0; n<8; ++n, o+=16)
560 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
562 for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
563 if (fn->reg & BIT(*r)) {
564 itmp.arg[0] = TMP(*r);
565 emitf("pushq %L0", &itmp, fn, f);
566 fs += 8;
569 for (lbl=0, b=fn->start; b; b=b->link) {
570 if (lbl || b->npred > 1)
571 fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
572 for (i=b->ins; i!=&b->ins[b->nins]; i++)
573 emitins(*i, fn, f);
574 lbl = 1;
575 switch (b->jmp.type) {
576 case Jret0:
577 if (fn->dynalloc)
578 fprintf(f,
579 "\tmovq %%rbp, %%rsp\n"
580 "\tsubq $%"PRIu64", %%rsp\n",
583 for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
584 if (fn->reg & BIT(*--r)) {
585 itmp.arg[0] = TMP(*r);
586 emitf("popq %L0", &itmp, fn, f);
588 fprintf(f,
589 "\tleave\n"
590 "\tret\n"
592 break;
593 case Jjmp:
594 Jmp:
595 if (b->s1 != b->link)
596 fprintf(f, "\tjmp %sbb%d\n",
597 gasloc, id0+b->s1->id);
598 else
599 lbl = 0;
600 break;
601 default:
602 c = b->jmp.type - Jjf;
603 if (0 <= c && c <= NCmp) {
604 if (b->link == b->s2) {
605 s = b->s1;
606 b->s1 = b->s2;
607 b->s2 = s;
608 } else
609 c = cmpneg(c);
610 fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
611 gasloc, id0+b->s2->id);
612 goto Jmp;
614 die("unhandled jump %d", b->jmp.type);
617 id0 += fn->nblk;