fix various codegen bugs on arm64
[qbe.git] / arm64 / emit.c
blob28cd6a51f744565064d3fd1d9fec4eed024e4f0c
1 #include "all.h"
3 typedef struct E E;
5 struct E {
6 FILE *f;
7 Fn *fn;
8 uint64_t frame;
9 uint padding;
12 #define CMP(X) \
13 X(Cieq, "eq") \
14 X(Cine, "ne") \
15 X(Cisge, "ge") \
16 X(Cisgt, "gt") \
17 X(Cisle, "le") \
18 X(Cislt, "lt") \
19 X(Ciuge, "cs") \
20 X(Ciugt, "hi") \
21 X(Ciule, "ls") \
22 X(Ciult, "cc") \
23 X(NCmpI+Cfeq, "eq") \
24 X(NCmpI+Cfge, "ge") \
25 X(NCmpI+Cfgt, "gt") \
26 X(NCmpI+Cfle, "ls") \
27 X(NCmpI+Cflt, "mi") \
28 X(NCmpI+Cfne, "ne") \
29 X(NCmpI+Cfo, "vc") \
30 X(NCmpI+Cfuo, "vs")
32 enum {
33 Ki = -1, /* matches Kw and Kl */
34 Ka = -2, /* matches all classes */
37 static struct {
38 short op;
39 short cls;
40 char *fmt;
41 } omap[] = {
42 { Oadd, Ki, "add %=, %0, %1" },
43 { Oadd, Ka, "fadd %=, %0, %1" },
44 { Osub, Ki, "sub %=, %0, %1" },
45 { Osub, Ka, "fsub %=, %0, %1" },
46 { Oneg, Ki, "neg %=, %0" },
47 { Oneg, Ka, "fneg %=, %0" },
48 { Oand, Ki, "and %=, %0, %1" },
49 { Oor, Ki, "orr %=, %0, %1" },
50 { Oxor, Ki, "eor %=, %0, %1" },
51 { Osar, Ki, "asr %=, %0, %1" },
52 { Oshr, Ki, "lsr %=, %0, %1" },
53 { Oshl, Ki, "lsl %=, %0, %1" },
54 { Omul, Ki, "mul %=, %0, %1" },
55 { Omul, Ka, "fmul %=, %0, %1" },
56 { Odiv, Ki, "sdiv %=, %0, %1" },
57 { Odiv, Ka, "fdiv %=, %0, %1" },
58 { Oudiv, Ki, "udiv %=, %0, %1" },
59 { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
60 { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
61 { Ocopy, Ki, "mov %=, %0" },
62 { Ocopy, Ka, "fmov %=, %0" },
63 { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
64 { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
65 { Ostoreb, Kw, "strb %W0, %M1" },
66 { Ostoreh, Kw, "strh %W0, %M1" },
67 { Ostorew, Kw, "str %W0, %M1" },
68 { Ostorel, Kw, "str %L0, %M1" },
69 { Ostores, Kw, "str %S0, %M1" },
70 { Ostored, Kw, "str %D0, %M1" },
71 { Oloadsb, Ki, "ldrsb %=, %M0" },
72 { Oloadub, Ki, "ldrb %W=, %M0" },
73 { Oloadsh, Ki, "ldrsh %=, %M0" },
74 { Oloaduh, Ki, "ldrh %W=, %M0" },
75 { Oloadsw, Kw, "ldr %=, %M0" },
76 { Oloadsw, Kl, "ldrsw %=, %M0" },
77 { Oloaduw, Ki, "ldr %W=, %M0" },
78 { Oload, Ka, "ldr %=, %M0" },
79 { Oextsb, Ki, "sxtb %=, %W0" },
80 { Oextub, Ki, "uxtb %W=, %W0" },
81 { Oextsh, Ki, "sxth %=, %W0" },
82 { Oextuh, Ki, "uxth %W=, %W0" },
83 { Oextsw, Ki, "sxtw %L=, %W0" },
84 { Oextuw, Ki, "mov %W=, %W0" },
85 { Oexts, Kd, "fcvt %=, %S0" },
86 { Otruncd, Ks, "fcvt %=, %D0" },
87 { Ocast, Kw, "fmov %=, %S0" },
88 { Ocast, Kl, "fmov %=, %D0" },
89 { Ocast, Ks, "fmov %=, %W0" },
90 { Ocast, Kd, "fmov %=, %L0" },
91 { Ostosi, Ka, "fcvtzs %=, %S0" },
92 { Ostoui, Ka, "fcvtzu %=, %S0" },
93 { Odtosi, Ka, "fcvtzs %=, %D0" },
94 { Odtoui, Ka, "fcvtzu %=, %D0" },
95 { Oswtof, Ka, "scvtf %=, %W0" },
96 { Ouwtof, Ka, "ucvtf %=, %W0" },
97 { Osltof, Ka, "scvtf %=, %L0" },
98 { Oultof, Ka, "ucvtf %=, %L0" },
99 { Ocall, Kw, "blr %L0" },
101 { Oacmp, Ki, "cmp %0, %1" },
102 { Oacmn, Ki, "cmn %0, %1" },
103 { Oafcmp, Ka, "fcmpe %0, %1" },
105 #define X(c, str) \
106 { Oflag+c, Ki, "cset %=, " str },
107 CMP(X)
108 #undef X
109 { NOp, 0, 0 }
112 static char *
113 rname(int r, int k)
115 static char buf[4];
117 if (r == SP) {
118 assert(k == Kl);
119 sprintf(buf, "sp");
121 else if (R0 <= r && r <= LR)
122 switch (k) {
123 default: die("invalid class");
124 case Kw: sprintf(buf, "w%d", r-R0); break;
125 case Kx:
126 case Kl: sprintf(buf, "x%d", r-R0); break;
128 else if (V0 <= r && r <= V30)
129 switch (k) {
130 default: die("invalid class");
131 case Ks: sprintf(buf, "s%d", r-V0); break;
132 case Kx:
133 case Kd: sprintf(buf, "d%d", r-V0); break;
135 else
136 die("invalid register");
137 return buf;
140 static uint64_t
141 slot(Ref r, E *e)
143 int s;
145 s = rsval(r);
146 if (s == -1)
147 return 16 + e->frame;
148 if (s < 0) {
149 if (e->fn->vararg && !T.apple)
150 return 16 + e->frame + 192 - (s+2);
151 else
152 return 16 + e->frame - (s+2);
153 } else
154 return 16 + e->padding + 4 * s;
157 static void
158 emitf(char *s, Ins *i, E *e)
160 Ref r;
161 int k, c;
162 Con *pc;
163 uint64_t n;
164 uint sp;
166 fputc('\t', e->f);
168 sp = 0;
169 for (;;) {
170 k = i->cls;
171 while ((c = *s++) != '%')
172 if (c == ' ' && !sp) {
173 fputc('\t', e->f);
174 sp = 1;
175 } else if ( !c) {
176 fputc('\n', e->f);
177 return;
178 } else
179 fputc(c, e->f);
180 Switch:
181 switch ((c = *s++)) {
182 default:
183 die("invalid escape");
184 case 'W':
185 k = Kw;
186 goto Switch;
187 case 'L':
188 k = Kl;
189 goto Switch;
190 case 'S':
191 k = Ks;
192 goto Switch;
193 case 'D':
194 k = Kd;
195 goto Switch;
196 case '?':
197 if (KBASE(k) == 0)
198 fputs(rname(R18, k), e->f);
199 else
200 fputs(k==Ks ? "s31" : "d31", e->f);
201 break;
202 case '=':
203 case '0':
204 r = c == '=' ? i->to : i->arg[0];
205 assert(isreg(r));
206 fputs(rname(r.val, k), e->f);
207 break;
208 case '1':
209 r = i->arg[1];
210 switch (rtype(r)) {
211 default:
212 die("invalid second argument");
213 case RTmp:
214 assert(isreg(r));
215 fputs(rname(r.val, k), e->f);
216 break;
217 case RCon:
218 pc = &e->fn->con[r.val];
219 n = pc->bits.i;
220 assert(pc->type == CBits);
221 if (n >> 24) {
222 assert(arm64_logimm(n, k));
223 fprintf(e->f, "#%"PRIu64, n);
224 } else if (n & 0xfff000) {
225 assert(!(n & ~0xfff000ull));
226 fprintf(e->f, "#%"PRIu64", lsl #12",
227 n>>12);
228 } else {
229 assert(!(n & ~0xfffull));
230 fprintf(e->f, "#%"PRIu64, n);
232 break;
234 break;
235 case 'M':
236 c = *s++;
237 assert(c == '0' || c == '1' || c == '=');
238 r = c == '=' ? i->to : i->arg[c - '0'];
239 switch (rtype(r)) {
240 default:
241 die("todo (arm emit): unhandled ref");
242 case RTmp:
243 assert(isreg(r));
244 fprintf(e->f, "[%s]", rname(r.val, Kl));
245 break;
246 case RSlot:
247 fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e));
248 break;
250 break;
255 static void
256 loadaddr(Con *c, char *rn, E *e)
258 char *p, *l, *s;
260 switch (c->sym.type) {
261 default:
262 die("unreachable");
263 case SGlo:
264 if (T.apple)
265 s = "\tadrp\tR, S@pageO\n"
266 "\tadd\tR, R, S@pageoffO\n";
267 else
268 s = "\tadrp\tR, SO\n"
269 "\tadd\tR, R, #:lo12:SO\n";
270 break;
271 case SThr:
272 if (T.apple)
273 s = "\tadrp\tR, S@tlvppage\n"
274 "\tldr\tR, [R, S@tlvppageoff]\n";
275 else
276 s = "\tmrs\tR, tpidr_el0\n"
277 "\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n"
278 "\tadd\tR, R, #:tprel_lo12_nc:SO\n";
279 break;
282 l = str(c->sym.id);
283 p = l[0] == '"' ? "" : T.assym;
284 for (; *s; s++)
285 switch (*s) {
286 default:
287 fputc(*s, e->f);
288 break;
289 case 'R':
290 fputs(rn, e->f);
291 break;
292 case 'S':
293 fputs(p, e->f);
294 fputs(l, e->f);
295 break;
296 case 'O':
297 if (c->bits.i)
298 /* todo, handle large offsets */
299 fprintf(e->f, "+%"PRIi64, c->bits.i);
300 break;
304 static void
305 loadcon(Con *c, int r, int k, E *e)
307 char *rn;
308 int64_t n;
309 int w, sh;
311 w = KWIDE(k);
312 rn = rname(r, k);
313 n = c->bits.i;
314 if (c->type == CAddr) {
315 rn = rname(r, Kl);
316 loadaddr(c, rn, e);
317 return;
319 assert(c->type == CBits);
320 if (!w)
321 n = (int32_t)n;
322 if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
323 fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
324 } else {
325 fprintf(e->f, "\tmov\t%s, #%d\n",
326 rn, (int)(n & 0xffff));
327 for (sh=16; n>>=16; sh+=16) {
328 if ((!w && sh == 32) || sh == 64)
329 break;
330 fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n",
331 rn, (uint)(n & 0xffff), sh);
336 static void emitins(Ins *, E *);
338 static void
339 fixarg(Ref *pr, int sz, E *e)
341 Ins *i;
342 Ref r;
343 uint64_t s;
345 r = *pr;
346 if (rtype(r) == RSlot) {
347 s = slot(r, e);
348 if (s > sz * 4095u) {
349 i = &(Ins){Oaddr, Kl, TMP(IP0), {r}};
350 emitins(i, e);
351 *pr = TMP(IP0);
356 static void
357 emitins(Ins *i, E *e)
359 char *l, *p, *rn;
360 uint64_t s;
361 int o;
362 Ref r;
363 Con *c;
365 switch (i->op) {
366 default:
367 if (isload(i->op))
368 fixarg(&i->arg[0], loadsz(i), e);
369 if (isstore(i->op))
370 fixarg(&i->arg[1], storesz(i), e);
371 Table:
372 /* most instructions are just pulled out of
373 * the table omap[], some special cases are
374 * detailed below */
375 for (o=0;; o++) {
376 /* this linear search should really be a binary
377 * search */
378 if (omap[o].op == NOp)
379 die("no match for %s(%c)",
380 optab[i->op].name, "wlsd"[i->cls]);
381 if (omap[o].op == i->op)
382 if (omap[o].cls == i->cls || omap[o].cls == Ka
383 || (omap[o].cls == Ki && KBASE(i->cls) == 0))
384 break;
386 emitf(omap[o].fmt, i, e);
387 break;
388 case Onop:
389 break;
390 case Ocopy:
391 if (req(i->to, i->arg[0]))
392 break;
393 if (rtype(i->to) == RSlot) {
394 r = i->to;
395 if (!isreg(i->arg[0])) {
396 i->to = TMP(R18);
397 emitins(i, e);
398 i->arg[0] = i->to;
400 i->op = Ostorew + i->cls;
401 i->cls = Kw;
402 i->arg[1] = r;
403 emitins(i, e);
404 break;
406 assert(isreg(i->to));
407 switch (rtype(i->arg[0])) {
408 case RCon:
409 c = &e->fn->con[i->arg[0].val];
410 loadcon(c, i->to.val, i->cls, e);
411 break;
412 case RSlot:
413 i->op = Oload;
414 emitins(i, e);
415 break;
416 default:
417 assert(i->to.val != R18);
418 goto Table;
420 break;
421 case Oaddr:
422 assert(rtype(i->arg[0]) == RSlot);
423 rn = rname(i->to.val, Kl);
424 s = slot(i->arg[0], e);
425 if (s <= 4095)
426 fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s);
427 else if (s <= 65535)
428 fprintf(e->f,
429 "\tmov\t%s, #%"PRIu64"\n"
430 "\tadd\t%s, x29, %s\n",
431 rn, s, rn, rn
433 else
434 fprintf(e->f,
435 "\tmov\t%s, #%"PRIu64"\n"
436 "\tmovk\t%s, #%"PRIu64", lsl #16\n"
437 "\tadd\t%s, x29, %s\n",
438 rn, s & 0xFFFF, rn, s >> 16, rn, rn
440 break;
441 case Ocall:
442 if (rtype(i->arg[0]) != RCon)
443 goto Table;
444 c = &e->fn->con[i->arg[0].val];
445 if (c->type != CAddr
446 || c->sym.type != SGlo
447 || c->bits.i)
448 die("invalid call argument");
449 l = str(c->sym.id);
450 p = l[0] == '"' ? "" : T.assym;
451 fprintf(e->f, "\tbl\t%s%s\n", p, l);
452 break;
453 case Osalloc:
454 emitf("sub sp, sp, %0", i, e);
455 if (!req(i->to, R))
456 emitf("mov %=, sp", i, e);
457 break;
458 case Odbgloc:
459 emitdbgloc(i->arg[0].val, i->arg[1].val, e->f);
460 break;
464 static void
465 framelayout(E *e)
467 int *r;
468 uint o;
469 uint64_t f;
471 for (o=0, r=arm64_rclob; *r>=0; r++)
472 o += 1 & (e->fn->reg >> *r);
473 f = e->fn->slot;
474 f = (f + 3) & -4;
475 o += o & 1;
476 e->padding = 4*(f-e->fn->slot);
477 e->frame = 4*f + 8*o;
482 Stack-frame layout:
484 +=============+
485 | varargs |
486 | save area |
487 +-------------+
488 | callee-save | ^
489 | registers | |
490 +-------------+ |
491 | ... | |
492 | spill slots | |
493 | ... | | e->frame
494 +-------------+ |
495 | ... | |
496 | locals | |
497 | ... | |
498 +-------------+ |
499 | e->padding | v
500 +-------------+
501 | saved x29 |
502 | saved x30 |
503 +=============+ <- x29
507 void
508 arm64_emitfn(Fn *fn, FILE *out)
510 static char *ctoa[] = {
511 #define X(c, s) [c] = s,
512 CMP(X)
513 #undef X
515 static int id0;
516 int s, n, c, lbl, *r;
517 uint64_t o;
518 Blk *b, *t;
519 Ins *i;
520 E *e;
522 e = &(E){.f = out, .fn = fn};
523 if (T.apple)
524 e->fn->lnk.align = 4;
525 emitfnlnk(e->fn->name, &e->fn->lnk, e->f);
526 fputs("\thint\t#34\n", e->f);
527 framelayout(e);
529 if (e->fn->vararg && !T.apple) {
530 for (n=7; n>=0; n--)
531 fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
532 for (n=7; n>=0; n-=2)
533 fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n);
536 if (e->frame + 16 <= 512)
537 fprintf(e->f,
538 "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
539 e->frame + 16
541 else if (e->frame <= 4095)
542 fprintf(e->f,
543 "\tsub\tsp, sp, #%"PRIu64"\n"
544 "\tstp\tx29, x30, [sp, -16]!\n",
545 e->frame
547 else if (e->frame <= 65535)
548 fprintf(e->f,
549 "\tmov\tx16, #%"PRIu64"\n"
550 "\tsub\tsp, sp, x16\n"
551 "\tstp\tx29, x30, [sp, -16]!\n",
552 e->frame
554 else
555 fprintf(e->f,
556 "\tmov\tx16, #%"PRIu64"\n"
557 "\tmovk\tx16, #%"PRIu64", lsl #16\n"
558 "\tsub\tsp, sp, x16\n"
559 "\tstp\tx29, x30, [sp, -16]!\n",
560 e->frame & 0xFFFF, e->frame >> 16
562 fputs("\tmov\tx29, sp\n", e->f);
563 s = (e->frame - e->padding) / 4;
564 for (r=arm64_rclob; *r>=0; r++)
565 if (e->fn->reg & BIT(*r)) {
566 s -= 2;
567 i = &(Ins){.arg = {TMP(*r), SLOT(s)}};
568 i->op = *r >= V0 ? Ostored : Ostorel;
569 emitins(i, e);
572 for (lbl=0, b=e->fn->start; b; b=b->link) {
573 if (lbl || b->npred > 1)
574 fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id);
575 for (i=b->ins; i!=&b->ins[b->nins]; i++)
576 emitins(i, e);
577 lbl = 1;
578 switch (b->jmp.type) {
579 case Jhlt:
580 fprintf(e->f, "\tbrk\t#1000\n");
581 break;
582 case Jret0:
583 s = (e->frame - e->padding) / 4;
584 for (r=arm64_rclob; *r>=0; r++)
585 if (e->fn->reg & BIT(*r)) {
586 s -= 2;
587 i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}};
588 i->cls = *r >= V0 ? Kd : Kl;
589 emitins(i, e);
591 if (e->fn->dynalloc)
592 fputs("\tmov sp, x29\n", e->f);
593 o = e->frame + 16;
594 if (e->fn->vararg && !T.apple)
595 o += 192;
596 if (o <= 504)
597 fprintf(e->f,
598 "\tldp\tx29, x30, [sp], %"PRIu64"\n",
601 else if (o - 16 <= 4095)
602 fprintf(e->f,
603 "\tldp\tx29, x30, [sp], 16\n"
604 "\tadd\tsp, sp, #%"PRIu64"\n",
605 o - 16
607 else if (o - 16 <= 65535)
608 fprintf(e->f,
609 "\tldp\tx29, x30, [sp], 16\n"
610 "\tmov\tx16, #%"PRIu64"\n"
611 "\tadd\tsp, sp, x16\n",
612 o - 16
614 else
615 fprintf(e->f,
616 "\tldp\tx29, x30, [sp], 16\n"
617 "\tmov\tx16, #%"PRIu64"\n"
618 "\tmovk\tx16, #%"PRIu64", lsl #16\n"
619 "\tadd\tsp, sp, x16\n",
620 (o - 16) & 0xFFFF, (o - 16) >> 16
622 fprintf(e->f, "\tret\n");
623 break;
624 case Jjmp:
625 Jmp:
626 if (b->s1 != b->link)
627 fprintf(e->f,
628 "\tb\t%s%d\n",
629 T.asloc, id0+b->s1->id
631 else
632 lbl = 0;
633 break;
634 default:
635 c = b->jmp.type - Jjf;
636 if (c < 0 || c > NCmp)
637 die("unhandled jump %d", b->jmp.type);
638 if (b->link == b->s2) {
639 t = b->s1;
640 b->s1 = b->s2;
641 b->s2 = t;
642 } else
643 c = cmpneg(c);
644 fprintf(e->f,
645 "\tb%s\t%s%d\n",
646 ctoa[c], T.asloc, id0+b->s2->id
648 goto Jmp;
651 id0 += e->fn->nblk;
652 if (!T.apple)
653 elf_emitfnfin(fn->name, out);