flag types defined as unions
[qbe.git] / arm64 / abi.c
blob6ed393dc2f948f2d4fa57936a4081a0fc00a6980
1 #include "all.h"
3 typedef struct Class Class;
4 typedef struct Insl Insl;
5 typedef struct Params Params;
7 enum {
8 Cstk = 1, /* pass on the stack */
9 Cptr = 2, /* replaced by a pointer */
12 struct Class {
13 char class;
14 char ishfa;
15 struct {
16 char base;
17 uchar size;
18 } hfa;
19 uint size;
20 Typ *t;
21 uchar nreg;
22 uchar ngp;
23 uchar nfp;
24 int reg[4];
25 int cls[4];
28 struct Insl {
29 Ins i;
30 Insl *link;
33 struct Params {
34 uint ngp;
35 uint nfp;
36 uint nstk;
39 static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
40 static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
42 /* layout of call's second argument (RCall)
44 * 29 13 9 5 2 0
45 * |0.00|x|xxxx|xxxx|xxx|xx| range
46 * | | | | ` gp regs returned (0..2)
47 * | | | ` fp regs returned (0..4)
48 * | | ` gp regs passed (0..8)
49 * | ` fp regs passed (0..8)
50 * ` is x8 used (0..1)
53 static int
54 isfloatv(Typ *t, char *cls)
56 Field *f;
57 uint n;
59 for (n=0; n<t->nunion; n++)
60 for (f=t->fields[n]; f->type != FEnd; f++)
61 switch (f->type) {
62 case Fs:
63 if (*cls == Kd)
64 return 0;
65 *cls = Ks;
66 break;
67 case Fd:
68 if (*cls == Ks)
69 return 0;
70 *cls = Kd;
71 break;
72 case FTyp:
73 if (isfloatv(&typ[f->len], cls))
74 break;
75 /* fall through */
76 default:
77 return 0;
79 return 1;
82 static void
83 typclass(Class *c, Typ *t, int *gp, int *fp)
85 uint64_t sz;
86 uint n;
88 sz = (t->size + 7) & -8;
89 c->t = t;
90 c->class = 0;
91 c->ngp = 0;
92 c->nfp = 0;
94 if (t->align > 4)
95 err("alignments larger than 16 are not supported");
97 if (t->isdark || sz > 16 || sz == 0) {
98 /* large structs are replaced by a
99 * pointer to some caller-allocated
100 * memory */
101 c->class |= Cptr;
102 c->size = 8;
103 return;
106 c->size = sz;
107 c->hfa.base = Kx;
108 c->ishfa = isfloatv(t, &c->hfa.base);
109 c->hfa.size = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
111 if (c->ishfa)
112 for (n=0; n<c->hfa.size; n++, c->nfp++) {
113 c->reg[n] = *fp++;
114 c->cls[n] = c->hfa.base;
116 else
117 for (n=0; n<sz/8; n++, c->ngp++) {
118 c->reg[n] = *gp++;
119 c->cls[n] = Kl;
122 c->nreg = n;
125 static void
126 sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
128 static int st[] = {
129 [Kw] = Ostorew, [Kl] = Ostorel,
130 [Ks] = Ostores, [Kd] = Ostored
132 uint n;
133 uint64_t off;
134 Ref r;
136 assert(nreg <= 4);
137 off = 0;
138 for (n=0; n<nreg; n++) {
139 tmp[n] = newtmp("abi", cls[n], fn);
140 r = newtmp("abi", Kl, fn);
141 emit(st[cls[n]], 0, R, tmp[n], r);
142 emit(Oadd, Kl, r, mem, getcon(off, fn));
143 off += KWIDE(cls[n]) ? 8 : 4;
147 /* todo, may read out of bounds */
148 static void
149 ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
151 int i;
152 uint64_t off;
153 Ref r;
155 off = 0;
156 for (i=0; i<n; i++) {
157 r = newtmp("abi", Kl, fn);
158 emit(Oload, cls[i], TMP(reg[i]), r, R);
159 emit(Oadd, Kl, r, mem, getcon(off, fn));
160 off += KWIDE(cls[i]) ? 8 : 4;
164 static void
165 selret(Blk *b, Fn *fn)
167 int j, k, cty;
168 Ref r;
169 Class cr;
171 j = b->jmp.type;
173 if (!isret(j) || j == Jret0)
174 return;
176 r = b->jmp.arg;
177 b->jmp.type = Jret0;
179 if (j == Jretc) {
180 typclass(&cr, &typ[fn->retty], gpreg, fpreg);
181 cty = (cr.nfp << 2) | cr.ngp;
182 if (cr.class & Cptr) {
183 assert(rtype(fn->retr) == RTmp);
184 blit0(fn->retr, r, cr.t->size, fn);
185 } else
186 ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
187 } else {
188 k = j - Jretw;
189 if (KBASE(k) == 0) {
190 emit(Ocopy, k, TMP(R0), r, R);
191 cty = 1;
192 } else {
193 emit(Ocopy, k, TMP(V0), r, R);
194 cty = 1 << 2;
198 b->jmp.arg = CALL(cty);
201 static int
202 argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env)
204 int ngp, nfp, *gp, *fp;
205 Class *c;
206 Ins *i;
208 gp = gpreg;
209 fp = fpreg;
210 ngp = 8;
211 nfp = 8;
212 for (i=i0, c=carg; i<i1; i++, c++)
213 switch (i->op) {
214 case Opar:
215 case Oarg:
216 *c->cls = i->cls;
217 c->size = 8;
218 if (KBASE(i->cls) == 0 && ngp > 0) {
219 ngp--;
220 *c->reg = *gp++;
221 break;
223 if (KBASE(i->cls) == 1 && nfp > 0) {
224 nfp--;
225 *c->reg = *fp++;
226 break;
228 c->class |= Cstk;
229 break;
230 case Oparc:
231 case Oargc:
232 typclass(c, &typ[i->arg[0].val], gp, fp);
233 if (c->class & Cptr) {
234 if (ngp > 0) {
235 ngp--;
236 *c->reg = *gp++;
237 *c->cls = Kl;
238 break;
240 } else if (c->ngp <= ngp) {
241 if (c->nfp <= nfp) {
242 ngp -= c->ngp;
243 nfp -= c->nfp;
244 gp += c->ngp;
245 fp += c->nfp;
246 break;
247 } else
248 nfp = 0;
249 } else
250 ngp = 0;
251 c->class |= Cstk;
252 break;
253 case Opare:
254 *env = i->to;
255 break;
256 case Oarge:
257 *env = i->arg[0];
258 break;
259 case Oargv:
260 break;
261 default:
262 die("unreachable");
265 return ((gp-gpreg) << 5) | ((fp-fpreg) << 9);
268 bits
269 arm64_retregs(Ref r, int p[2])
271 bits b;
272 int ngp, nfp;
274 assert(rtype(r) == RCall);
275 ngp = r.val & 3;
276 nfp = (r.val >> 2) & 7;
277 if (p) {
278 p[0] = ngp;
279 p[1] = nfp;
281 b = 0;
282 while (ngp--)
283 b |= BIT(R0+ngp);
284 while (nfp--)
285 b |= BIT(V0+nfp);
286 return b;
289 bits
290 arm64_argregs(Ref r, int p[2])
292 bits b;
293 int ngp, nfp, x8;
295 assert(rtype(r) == RCall);
296 ngp = (r.val >> 5) & 15;
297 nfp = (r.val >> 9) & 15;
298 x8 = (r.val >> 13) & 1;
299 if (p) {
300 p[0] = ngp + x8;
301 p[1] = nfp;
303 b = 0;
304 while (ngp--)
305 b |= BIT(R0+ngp);
306 while (nfp--)
307 b |= BIT(V0+nfp);
308 return b | ((bits)x8 << R8);
311 static void
312 stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
314 Insl *il;
315 int al;
316 uint64_t sz;
318 il = alloc(sizeof *il);
319 al = c->t->align - 2; /* NAlign == 3 */
320 if (al < 0)
321 al = 0;
322 sz = c->class & Cptr ? c->t->size : c->size;
323 il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
324 il->link = *ilp;
325 *ilp = il;
328 static void
329 selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
331 Ins *i;
332 Class *ca, *c, cr;
333 int cty, envc;
334 uint n;
335 uint64_t stk, off;
336 Ref r, rstk, env, tmp[4];
338 env = R;
339 ca = alloc((i1-i0) * sizeof ca[0]);
340 cty = argsclass(i0, i1, ca, &env);
342 stk = 0;
343 for (i=i0, c=ca; i<i1; i++, c++) {
344 if (c->class & Cptr) {
345 i->arg[0] = newtmp("abi", Kl, fn);
346 stkblob(i->arg[0], c, fn, ilp);
347 i->op = Oarg;
349 if (c->class & Cstk)
350 stk += c->size;
352 stk += stk & 15;
353 rstk = getcon(stk, fn);
354 if (stk)
355 emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
357 if (!req(i1->arg[1], R)) {
358 typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
359 stkblob(i1->to, &cr, fn, ilp);
360 cty |= (cr.nfp << 2) | cr.ngp;
361 if (cr.class & Cptr) {
362 /* spill & rega expect calls to be
363 * followed by copies from regs,
364 * so we emit a dummy
366 cty |= 1 << 13 | 1;
367 emit(Ocopy, Kw, R, TMP(R0), R);
368 } else {
369 sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
370 for (n=0; n<cr.nreg; n++) {
371 r = TMP(cr.reg[n]);
372 emit(Ocopy, cr.cls[n], tmp[n], r, R);
375 } else {
376 if (KBASE(i1->cls) == 0) {
377 emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
378 cty |= 1;
379 } else {
380 emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
381 cty |= 1 << 2;
385 emit(Ocall, 0, R, i1->arg[0], CALL(cty));
387 envc = !req(R, env);
388 if (envc)
389 die("todo: env calls");
391 if (cty & (1 << 13))
392 /* struct return argument */
393 emit(Ocopy, Kl, TMP(R8), i1->to, R);
395 for (i=i0, c=ca; i<i1; i++, c++) {
396 if ((c->class & Cstk) != 0)
397 continue;
398 if (i->op == Oarg)
399 emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
400 if (i->op == Oargc)
401 ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
404 off = 0;
405 for (i=i0, c=ca; i<i1; i++, c++) {
406 if ((c->class & Cstk) == 0)
407 continue;
408 if (i->op == Oarg) {
409 r = newtmp("abi", Kl, fn);
410 emit(Ostorel, 0, R, i->arg[0], r);
411 emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
413 if (i->op == Oargc)
414 blit(TMP(SP), off, i->arg[1], 0, c->size, fn);
415 off += c->size;
417 if (stk)
418 emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
420 for (i=i0, c=ca; i<i1; i++, c++)
421 if (c->class & Cptr)
422 blit0(i->arg[0], i->arg[1], c->t->size, fn);
425 static Params
426 selpar(Fn *fn, Ins *i0, Ins *i1)
428 Class *ca, *c, cr;
429 Insl *il;
430 Ins *i;
431 int n, s, cty;
432 Ref r, env, tmp[16], *t;
434 env = R;
435 ca = alloc((i1-i0) * sizeof ca[0]);
436 curi = &insb[NIns];
438 cty = argsclass(i0, i1, ca, &env);
439 fn->reg = arm64_argregs(CALL(cty), 0);
441 il = 0;
442 t = tmp;
443 for (i=i0, c=ca; i<i1; i++, c++) {
444 if (i->op != Oparc || (c->class & (Cptr|Cstk)))
445 continue;
446 sttmps(t, c->cls, c->nreg, i->to, fn);
447 stkblob(i->to, c, fn, &il);
448 t += c->nreg;
450 for (; il; il=il->link)
451 emiti(il->i);
453 if (fn->retty >= 0) {
454 typclass(&cr, &typ[fn->retty], gpreg, fpreg);
455 if (cr.class & Cptr) {
456 fn->retr = newtmp("abi", Kl, fn);
457 emit(Ocopy, Kl, fn->retr, TMP(R8), R);
458 fn->reg |= BIT(R8);
462 t = tmp;
463 for (i=i0, c=ca, s=2; i<i1; i++, c++) {
464 if (i->op == Oparc
465 && (c->class & Cptr) == 0) {
466 if (c->class & Cstk) {
467 fn->tmp[i->to.val].slot = -s;
468 s += c->size / 8;
469 } else
470 for (n=0; n<c->nreg; n++) {
471 r = TMP(c->reg[n]);
472 emit(Ocopy, c->cls[n], *t++, r, R);
474 } else if (c->class & Cstk) {
475 r = newtmp("abi", Kl, fn);
476 emit(Oload, *c->cls, i->to, r, R);
477 emit(Oaddr, Kl, r, SLOT(-s), R);
478 s++;
479 } else {
480 r = TMP(*c->reg);
481 emit(Ocopy, *c->cls, i->to, r, R);
485 if (!req(R, env))
486 die("todo: env calls");
488 return (Params){
489 .nstk = s - 2,
490 .ngp = (cty >> 5) & 15,
491 .nfp = (cty >> 9) & 15
495 static Blk *
496 split(Fn *fn, Blk *b)
498 Blk *bn;
500 ++fn->nblk;
501 bn = blknew();
502 bn->nins = &insb[NIns] - curi;
503 idup(&bn->ins, curi, bn->nins);
504 curi = &insb[NIns];
505 bn->visit = ++b->visit;
506 (void)!snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
507 bn->loop = b->loop;
508 bn->link = b->link;
509 b->link = bn;
510 return bn;
513 static void
514 chpred(Blk *b, Blk *bp, Blk *bp1)
516 Phi *p;
517 uint a;
519 for (p=b->phi; p; p=p->link) {
520 for (a=0; p->blk[a]!=bp; a++)
521 assert(a+1<p->narg);
522 p->blk[a] = bp1;
526 static void
527 selvaarg(Fn *fn, Blk *b, Ins *i)
529 Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
530 Blk *b0, *bstk, *breg;
531 int isgp;
533 c8 = getcon(8, fn);
534 c16 = getcon(16, fn);
535 c24 = getcon(24, fn);
536 c28 = getcon(28, fn);
537 ap = i->arg[0];
538 isgp = KBASE(i->cls) == 0;
540 /* @b [...]
541 r0 =l add ap, (24 or 28)
542 nr =l loadsw r0
543 r1 =w csltw nr, 0
544 jnz r1, @breg, @bstk
545 @breg
546 r0 =l add ap, (8 or 16)
547 r1 =l loadl r0
548 lreg =l add r1, nr
549 r0 =w add nr, (8 or 16)
550 r1 =l add ap, (24 or 28)
551 storew r0, r1
552 @bstk
553 lstk =l loadl ap
554 r0 =l add lstk, 8
555 storel r0, ap
557 %loc =l phi @breg %lreg, @bstk %lstk
558 i->to =(i->cls) load %loc
561 loc = newtmp("abi", Kl, fn);
562 emit(Oload, i->cls, i->to, loc, R);
563 b0 = split(fn, b);
564 b0->jmp = b->jmp;
565 b0->s1 = b->s1;
566 b0->s2 = b->s2;
567 if (b->s1)
568 chpred(b->s1, b, b0);
569 if (b->s2 && b->s2 != b->s1)
570 chpred(b->s2, b, b0);
572 lreg = newtmp("abi", Kl, fn);
573 nr = newtmp("abi", Kl, fn);
574 r0 = newtmp("abi", Kw, fn);
575 r1 = newtmp("abi", Kl, fn);
576 emit(Ostorew, Kw, R, r0, r1);
577 emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
578 emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
579 r0 = newtmp("abi", Kl, fn);
580 r1 = newtmp("abi", Kl, fn);
581 emit(Oadd, Kl, lreg, r1, nr);
582 emit(Oload, Kl, r1, r0, R);
583 emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
584 breg = split(fn, b);
585 breg->jmp.type = Jjmp;
586 breg->s1 = b0;
588 lstk = newtmp("abi", Kl, fn);
589 r0 = newtmp("abi", Kl, fn);
590 emit(Ostorel, Kw, R, r0, ap);
591 emit(Oadd, Kl, r0, lstk, c8);
592 emit(Oload, Kl, lstk, ap, R);
593 bstk = split(fn, b);
594 bstk->jmp.type = Jjmp;
595 bstk->s1 = b0;
597 b0->phi = alloc(sizeof *b0->phi);
598 *b0->phi = (Phi){
599 .cls = Kl, .to = loc,
600 .narg = 2,
601 .blk = vnew(2, sizeof b0->phi->blk[0], Pfn),
602 .arg = vnew(2, sizeof b0->phi->arg[0], Pfn),
604 b0->phi->blk[0] = bstk;
605 b0->phi->blk[1] = breg;
606 b0->phi->arg[0] = lstk;
607 b0->phi->arg[1] = lreg;
608 r0 = newtmp("abi", Kl, fn);
609 r1 = newtmp("abi", Kw, fn);
610 b->jmp.type = Jjnz;
611 b->jmp.arg = r1;
612 b->s1 = breg;
613 b->s2 = bstk;
614 emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
615 emit(Oloadsw, Kl, nr, r0, R);
616 emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
619 static void
620 selvastart(Fn *fn, Params p, Ref ap)
622 Ref r0, r1, rsave;
624 rsave = newtmp("abi", Kl, fn);
626 r0 = newtmp("abi", Kl, fn);
627 emit(Ostorel, Kw, R, r0, ap);
628 emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn));
630 r0 = newtmp("abi", Kl, fn);
631 r1 = newtmp("abi", Kl, fn);
632 emit(Ostorel, Kw, R, r1, r0);
633 emit(Oadd, Kl, r1, rsave, getcon(64, fn));
634 emit(Oadd, Kl, r0, ap, getcon(8, fn));
636 r0 = newtmp("abi", Kl, fn);
637 r1 = newtmp("abi", Kl, fn);
638 emit(Ostorel, Kw, R, r1, r0);
639 emit(Oadd, Kl, r1, rsave, getcon(192, fn));
640 emit(Oaddr, Kl, rsave, SLOT(-1), R);
641 emit(Oadd, Kl, r0, ap, getcon(16, fn));
643 r0 = newtmp("abi", Kl, fn);
644 emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
645 emit(Oadd, Kl, r0, ap, getcon(24, fn));
647 r0 = newtmp("abi", Kl, fn);
648 emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
649 emit(Oadd, Kl, r0, ap, getcon(28, fn));
652 void
653 arm64_abi(Fn *fn)
655 Blk *b;
656 Ins *i, *i0, *ip;
657 Insl *il;
658 int n;
659 Params p;
661 for (b=fn->start; b; b=b->link)
662 b->visit = 0;
664 /* lower parameters */
665 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
666 if (!ispar(i->op))
667 break;
668 p = selpar(fn, b->ins, i);
669 n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
670 i0 = alloc(n * sizeof(Ins));
671 ip = icpy(ip = i0, curi, &insb[NIns] - curi);
672 ip = icpy(ip, i, &b->ins[b->nins] - i);
673 b->nins = n;
674 b->ins = i0;
676 /* lower calls, returns, and vararg instructions */
677 il = 0;
678 b = fn->start;
679 do {
680 if (!(b = b->link))
681 b = fn->start; /* do it last */
682 if (b->visit)
683 continue;
684 curi = &insb[NIns];
685 selret(b, fn);
686 for (i=&b->ins[b->nins]; i!=b->ins;)
687 switch ((--i)->op) {
688 default:
689 emiti(*i);
690 break;
691 case Ocall:
692 for (i0=i; i0>b->ins; i0--)
693 if (!isarg((i0-1)->op))
694 break;
695 selcall(fn, i0, i, &il);
696 i = i0;
697 break;
698 case Ovastart:
699 selvastart(fn, p, i->arg[0]);
700 break;
701 case Ovaarg:
702 selvaarg(fn, b, i);
703 break;
704 case Oarg:
705 case Oargc:
706 die("unreachable");
708 if (b == fn->start)
709 for (; il; il=il->link)
710 emiti(il->i);
711 b->nins = &insb[NIns] - curi;
712 idup(&b->ins, curi, b->nins);
713 } while (b != fn->start);
715 if (debug['A']) {
716 fprintf(stderr, "\n> After ABI lowering:\n");
717 printfn(fn, stderr);