Add a negation instruction
[qbe.git] / arm64 / emit.c
blobde1859bd78ac2e1b66cbbec046550178f9567742
1 #include "all.h"
3 typedef struct E E;
5 struct E {
6 FILE *f;
7 Fn *fn;
8 uint64_t frame;
9 uint padding;
12 #define CMP(X) \
13 X(Cieq, "eq") \
14 X(Cine, "ne") \
15 X(Cisge, "ge") \
16 X(Cisgt, "gt") \
17 X(Cisle, "le") \
18 X(Cislt, "lt") \
19 X(Ciuge, "cs") \
20 X(Ciugt, "hi") \
21 X(Ciule, "ls") \
22 X(Ciult, "cc") \
23 X(NCmpI+Cfeq, "eq") \
24 X(NCmpI+Cfge, "ge") \
25 X(NCmpI+Cfgt, "gt") \
26 X(NCmpI+Cfle, "ls") \
27 X(NCmpI+Cflt, "mi") \
28 X(NCmpI+Cfne, "ne") \
29 X(NCmpI+Cfo, "vc") \
30 X(NCmpI+Cfuo, "vs")
32 enum {
33 Ki = -1, /* matches Kw and Kl */
34 Ka = -2, /* matches all classes */
37 static struct {
38 short op;
39 short cls;
40 char *asm;
41 } omap[] = {
42 { Oadd, Ki, "add %=, %0, %1" },
43 { Oadd, Ka, "fadd %=, %0, %1" },
44 { Osub, Ki, "sub %=, %0, %1" },
45 { Osub, Ka, "fsub %=, %0, %1" },
46 { Oneg, Ki, "neg %=, %0" },
47 { Oneg, Ka, "fneg %=, %0" },
48 { Oand, Ki, "and %=, %0, %1" },
49 { Oor, Ki, "orr %=, %0, %1" },
50 { Oxor, Ki, "eor %=, %0, %1" },
51 { Osar, Ki, "asr %=, %0, %1" },
52 { Oshr, Ki, "lsr %=, %0, %1" },
53 { Oshl, Ki, "lsl %=, %0, %1" },
54 { Omul, Ki, "mul %=, %0, %1" },
55 { Omul, Ka, "fmul %=, %0, %1" },
56 { Odiv, Ki, "sdiv %=, %0, %1" },
57 { Odiv, Ka, "fdiv %=, %0, %1" },
58 { Oudiv, Ki, "udiv %=, %0, %1" },
59 { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
60 { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
61 { Ocopy, Ki, "mov %=, %0" },
62 { Ocopy, Ka, "fmov %=, %0" },
63 { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
64 { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
65 { Ostoreb, Kw, "strb %W0, %M1" },
66 { Ostoreh, Kw, "strh %W0, %M1" },
67 { Ostorew, Kw, "str %W0, %M1" },
68 { Ostorel, Kw, "str %L0, %M1" },
69 { Ostores, Kw, "str %S0, %M1" },
70 { Ostored, Kw, "str %D0, %M1" },
71 { Oloadsb, Ki, "ldrsb %=, %M0" },
72 { Oloadub, Ki, "ldrb %W=, %M0" },
73 { Oloadsh, Ki, "ldrsh %=, %M0" },
74 { Oloaduh, Ki, "ldrh %W=, %M0" },
75 { Oloadsw, Kw, "ldr %=, %M0" },
76 { Oloadsw, Kl, "ldrsw %=, %M0" },
77 { Oloaduw, Ki, "ldr %W=, %M0" },
78 { Oload, Ka, "ldr %=, %M0" },
79 { Oextsb, Ki, "sxtb %=, %W0" },
80 { Oextub, Ki, "uxtb %W=, %W0" },
81 { Oextsh, Ki, "sxth %=, %W0" },
82 { Oextuh, Ki, "uxth %W=, %W0" },
83 { Oextsw, Ki, "sxtw %L=, %W0" },
84 { Oextuw, Ki, "mov %W=, %W0" },
85 { Oexts, Kd, "fcvt %=, %S0" },
86 { Otruncd, Ks, "fcvt %=, %D0" },
87 { Ocast, Kw, "fmov %=, %S0" },
88 { Ocast, Kl, "fmov %=, %D0" },
89 { Ocast, Ks, "fmov %=, %W0" },
90 { Ocast, Kd, "fmov %=, %L0" },
91 { Ostosi, Ka, "fcvtzs %=, %S0" },
92 { Odtosi, Ka, "fcvtzs %=, %D0" },
93 { Oswtof, Ka, "scvtf %=, %W0" },
94 { Osltof, Ka, "scvtf %=, %L0" },
95 { Ocall, Kw, "blr %L0" },
97 { Oacmp, Ki, "cmp %0, %1" },
98 { Oacmn, Ki, "cmn %0, %1" },
99 { Oafcmp, Ka, "fcmpe %0, %1" },
101 #define X(c, str) \
102 { Oflag+c, Ki, "cset %=, " str },
103 CMP(X)
104 #undef X
105 { NOp, 0, 0 }
108 static char *
109 rname(int r, int k)
111 static char buf[4];
113 if (r == SP) {
114 assert(k == Kl);
115 sprintf(buf, "sp");
117 else if (R0 <= r && r <= LR)
118 switch (k) {
119 default: die("invalid class");
120 case Kw: sprintf(buf, "w%d", r-R0); break;
121 case Kx:
122 case Kl: sprintf(buf, "x%d", r-R0); break;
124 else if (V0 <= r && r <= V30)
125 switch (k) {
126 default: die("invalid class");
127 case Ks: sprintf(buf, "s%d", r-V0); break;
128 case Kx:
129 case Kd: sprintf(buf, "d%d", r-V0); break;
131 else
132 die("invalid register");
133 return buf;
136 static uint64_t
137 slot(int s, E *e)
139 s = ((int32_t)s << 3) >> 3;
140 if (s == -1)
141 return 16 + e->frame;
142 if (s < 0) {
143 if (e->fn->vararg)
144 return 16 + e->frame + 192 - (s+2)*8;
145 else
146 return 16 + e->frame - (s+2)*8;
147 } else
148 return 16 + e->padding + 4 * s;
151 static void
152 emitf(char *s, Ins *i, E *e)
154 Ref r;
155 int k, c;
156 Con *pc;
157 unsigned n, sp;
159 fputc('\t', e->f);
161 sp = 0;
162 for (;;) {
163 k = i->cls;
164 while ((c = *s++) != '%')
165 if (c == ' ' && !sp) {
166 fputc('\t', e->f);
167 sp = 1;
168 } else if ( !c) {
169 fputc('\n', e->f);
170 return;
171 } else
172 fputc(c, e->f);
173 Switch:
174 switch ((c = *s++)) {
175 default:
176 die("invalid escape");
177 case 'W':
178 k = Kw;
179 goto Switch;
180 case 'L':
181 k = Kl;
182 goto Switch;
183 case 'S':
184 k = Ks;
185 goto Switch;
186 case 'D':
187 k = Kd;
188 goto Switch;
189 case '?':
190 if (KBASE(k) == 0)
191 fputs(rname(R18, k), e->f);
192 else
193 fputs(k==Ks ? "s31" : "d31", e->f);
194 break;
195 case '=':
196 case '0':
197 r = c == '=' ? i->to : i->arg[0];
198 assert(isreg(r));
199 fputs(rname(r.val, k), e->f);
200 break;
201 case '1':
202 r = i->arg[1];
203 switch (rtype(r)) {
204 default:
205 die("invalid second argument");
206 case RTmp:
207 assert(isreg(r));
208 fputs(rname(r.val, k), e->f);
209 break;
210 case RCon:
211 pc = &e->fn->con[r.val];
212 n = pc->bits.i;
213 assert(pc->type == CBits);
214 if (n & 0xfff000)
215 fprintf(e->f, "#%u, lsl #12", n>>12);
216 else
217 fprintf(e->f, "#%u", n);
218 break;
220 break;
221 case 'M':
222 c = *s++;
223 assert(c == '0' || c == '1' || c == '=');
224 r = c == '=' ? i->to : i->arg[c - '0'];
225 switch (rtype(r)) {
226 default:
227 die("todo (arm emit): unhandled ref");
228 case RTmp:
229 assert(isreg(r));
230 fprintf(e->f, "[%s]", rname(r.val, Kl));
231 break;
232 case RSlot:
233 fprintf(e->f, "[x29, %"PRIu64"]", slot(r.val, e));
234 break;
236 break;
241 static void
242 loadcon(Con *c, int r, int k, FILE *f)
244 char *rn, *p, off[32];
245 int64_t n;
246 int w, sh;
248 w = KWIDE(k);
249 rn = rname(r, k);
250 n = c->bits.i;
251 if (c->type == CAddr) {
252 rn = rname(r, Kl);
253 if (n)
254 sprintf(off, "+%"PRIi64, n);
255 else
256 off[0] = 0;
257 p = c->local ? ".L" : "";
258 fprintf(f, "\tadrp\t%s, %s%s%s\n",
259 rn, p, str(c->label), off);
260 fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n",
261 rn, rn, p, str(c->label), off);
262 return;
264 assert(c->type == CBits);
265 if (!w)
266 n = (int32_t)n;
267 if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
268 fprintf(f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
269 } else {
270 fprintf(f, "\tmov\t%s, #%d\n",
271 rn, (int)(n & 0xffff));
272 for (sh=16; n>>=16; sh+=16) {
273 if ((!w && sh == 32) || sh == 64)
274 break;
275 fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n",
276 rn, (unsigned)(n & 0xffff), sh);
281 static void emitins(Ins *, E *);
283 static void
284 fixarg(Ref *pr, E *e)
286 Ins *i;
287 Ref r;
288 uint64_t s;
290 r = *pr;
291 if (rtype(r) == RSlot) {
292 s = slot(r.val, e);
293 if (s > 32760) {
294 i = &(Ins){Oaddr, Kl, TMP(IP0), {r}};
295 emitins(i, e);
296 *pr = TMP(IP0);
301 static void
302 emitins(Ins *i, E *e)
304 int o;
305 char *rn;
306 uint64_t s;
308 switch (i->op) {
309 default:
310 if (isload(i->op))
311 fixarg(&i->arg[0], e);
312 if (isstore(i->op))
313 fixarg(&i->arg[1], e);
314 Table:
315 /* most instructions are just pulled out of
316 * the table omap[], some special cases are
317 * detailed below */
318 for (o=0;; o++) {
319 /* this linear search should really be a binary
320 * search */
321 if (omap[o].op == NOp)
322 die("no match for %s(%c)",
323 optab[i->op].name, "wlsd"[i->cls]);
324 if (omap[o].op == i->op)
325 if (omap[o].cls == i->cls || omap[o].cls == Ka
326 || (omap[o].cls == Ki && KBASE(i->cls) == 0))
327 break;
329 emitf(omap[o].asm, i, e);
330 break;
331 case Onop:
332 break;
333 case Ocopy:
334 if (req(i->to, i->arg[0]))
335 break;
336 if (rtype(i->to) == RSlot) {
337 switch (rtype(i->arg[0])) {
338 case RSlot:
339 emitf("ldr %?, %M0\n\tstr %?, %M=", i, e);
340 break;
341 case RCon:
342 loadcon(&e->fn->con[i->arg[0].val], R18, i->cls, e->f);
343 emitf("str %?, %M=", i, e);
344 break;
345 default:
346 assert(isreg(i->arg[0]));
347 emitf("str %0, %M=", i, e);
349 break;
351 assert(isreg(i->to));
352 switch (rtype(i->arg[0])) {
353 case RCon:
354 loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f);
355 break;
356 case RSlot:
357 emitf("ldr %=, %M0", i, e);
358 break;
359 default:
360 goto Table;
362 break;
363 case Oaddr:
364 assert(rtype(i->arg[0]) == RSlot);
365 rn = rname(i->to.val, Kl);
366 s = slot(i->arg[0].val, e);
367 if (s <= 4095)
368 fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s);
369 else if (s <= 65535)
370 fprintf(e->f,
371 "\tmov\t%s, #%"PRIu64"\n"
372 "\tadd\t%s, x29, %s\n",
373 rn, s, rn, rn
375 else
376 fprintf(e->f,
377 "\tmov\t%s, #%"PRIu64"\n"
378 "\tmovk\t%s, #%"PRIu64", lsl #16\n"
379 "\tadd\t%s, x29, %s\n",
380 rn, s & 0xFFFF, rn, s >> 16, rn, rn
382 break;
386 static void
387 framelayout(E *e)
389 int *r;
390 uint o;
391 uint64_t f;
393 for (o=0, r=arm64_rclob; *r>=0; r++)
394 o += 1 & (e->fn->reg >> *r);
395 f = e->fn->slot;
396 f = (f + 3) & -4;
397 o += o & 1;
398 e->padding = 4*(f-e->fn->slot);
399 e->frame = 4*f + 8*o;
404 Stack-frame layout:
406 +=============+
407 | varargs |
408 | save area |
409 +-------------+
410 | callee-save | ^
411 | registers | |
412 +-------------+ |
413 | ... | |
414 | spill slots | |
415 | ... | | e->frame
416 +-------------+ |
417 | ... | |
418 | locals | |
419 | ... | |
420 +-------------+ |
421 | e->padding | v
422 +-------------+
423 | saved x29 |
424 | saved x30 |
425 +=============+ <- x29
429 void
430 arm64_emitfn(Fn *fn, FILE *out)
432 static char *ctoa[] = {
433 #define X(c, s) [c] = s,
434 CMP(X)
435 #undef X
437 static int id0;
438 int s, n, c, lbl, *r;
439 uint64_t o;
440 Blk *b, *t;
441 Ins *i;
442 E *e;
444 e = &(E){.f = out, .fn = fn};
445 framelayout(e);
447 fprintf(e->f, ".text\n");
448 if (e->fn->export)
449 fprintf(e->f, ".globl %s\n", e->fn->name);
450 fprintf(e->f, "%s:\n", e->fn->name);
452 if (e->fn->vararg) {
453 for (n=7; n>=0; n--)
454 fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
455 for (n=7; n>=0; n-=2)
456 fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n);
459 if (e->frame + 16 <= 512)
460 fprintf(e->f,
461 "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
462 e->frame + 16
464 else if (e->frame <= 4095)
465 fprintf(e->f,
466 "\tsub\tsp, sp, #%"PRIu64"\n"
467 "\tstp\tx29, x30, [sp, -16]!\n",
468 e->frame
470 else if (e->frame <= 65535)
471 fprintf(e->f,
472 "\tmov\tx16, #%"PRIu64"\n"
473 "\tsub\tsp, sp, x16\n"
474 "\tstp\tx29, x30, [sp, -16]!\n",
475 e->frame
477 else
478 fprintf(e->f,
479 "\tmov\tx16, #%"PRIu64"\n"
480 "\tmovk\tx16, #%"PRIu64", lsl #16\n"
481 "\tsub\tsp, sp, x16\n"
482 "\tstp\tx29, x30, [sp, -16]!\n",
483 e->frame & 0xFFFF, e->frame >> 16
485 fputs("\tadd\tx29, sp, 0\n", e->f);
486 s = (e->frame - e->padding) / 4;
487 for (r=arm64_rclob; *r>=0; r++)
488 if (e->fn->reg & BIT(*r)) {
489 s -= 2;
490 i = &(Ins){.arg = {TMP(*r), SLOT(s)}};
491 i->op = *r >= V0 ? Ostored : Ostorel;
492 emitins(i, e);
495 for (lbl=0, b=e->fn->start; b; b=b->link) {
496 if (lbl || b->npred > 1)
497 fprintf(e->f, ".L%d:\n", id0+b->id);
498 for (i=b->ins; i!=&b->ins[b->nins]; i++)
499 emitins(i, e);
500 lbl = 1;
501 switch (b->jmp.type) {
502 case Jret0:
503 s = (e->frame - e->padding) / 4;
504 for (r=arm64_rclob; *r>=0; r++)
505 if (e->fn->reg & BIT(*r)) {
506 s -= 2;
507 i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}};
508 i->cls = *r >= V0 ? Kd : Kl;
509 emitins(i, e);
511 o = e->frame + 16;
512 if (e->fn->vararg)
513 o += 192;
514 if (o <= 504)
515 fprintf(e->f,
516 "\tldp\tx29, x30, [sp], %"PRIu64"\n",
519 else if (o - 16 <= 4095)
520 fprintf(e->f,
521 "\tldp\tx29, x30, [sp], 16\n"
522 "\tadd\tsp, sp, #%"PRIu64"\n",
523 o - 16
525 else if (o - 16 <= 65535)
526 fprintf(e->f,
527 "\tldp\tx29, x30, [sp], 16\n"
528 "\tmov\tx16, #%"PRIu64"\n"
529 "\tadd\tsp, sp, x16\n",
530 o - 16
532 else
533 fprintf(e->f,
534 "\tldp\tx29, x30, [sp], 16\n"
535 "\tmov\tx16, #%"PRIu64"\n"
536 "\tmovk\tx16, #%"PRIu64", lsl #16\n"
537 "\tadd\tsp, sp, x16\n",
538 (o - 16) & 0xFFFF, (o - 16) >> 16
540 fprintf(e->f, "\tret\n");
541 break;
542 case Jjmp:
543 Jmp:
544 if (b->s1 != b->link)
545 fprintf(e->f, "\tb\t.L%d\n", id0+b->s1->id);
546 else
547 lbl = 0;
548 break;
549 default:
550 c = b->jmp.type - Jjf;
551 if (c < 0 || c > NCmp)
552 die("unhandled jump %d", b->jmp.type);
553 if (b->link == b->s2) {
554 t = b->s1;
555 b->s1 = b->s2;
556 b->s2 = t;
557 } else
558 c = cmpneg(c);
559 fprintf(e->f, "\tb%s\t.L%d\n", ctoa[c], id0+b->s2->id);
560 goto Jmp;
563 id0 += e->fn->nblk;