fix some variadic calls in test/abi8.ssa
[qbe.git] / arm64 / abi.c
blobb2b597351b26a0c52abba2501fda1f8c3b28faf7
1 #include "all.h"
3 typedef struct Class Class;
4 typedef struct Insl Insl;
5 typedef struct Params Params;
7 enum {
8 Cstk = 1, /* pass on the stack */
9 Cptr = 2, /* replaced by a pointer */
12 struct Class {
13 char class;
14 char ishfa;
15 struct {
16 char base;
17 uchar size;
18 } hfa;
19 uint size;
20 Typ *t;
21 uchar nreg;
22 uchar ngp;
23 uchar nfp;
24 int reg[4];
25 int cls[4];
28 struct Insl {
29 Ins i;
30 Insl *link;
33 struct Params {
34 uint ngp;
35 uint nfp;
36 uint nstk;
39 static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
40 static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
42 /* layout of call's second argument (RCall)
44 * 13
45 * 29 14 | 9 5 2 0
46 * |0.00|x|x|xxxx|xxxx|xxx|xx| range
47 * | | | | | ` gp regs returned (0..2)
48 * | | | | ` fp regs returned (0..4)
49 * | | | ` gp regs passed (0..8)
50 * | | ` fp regs passed (0..8)
51 * | ` indirect result register x8 used (0..1)
52 * ` env pointer passed in x9 (0..1)
55 static int
56 isfloatv(Typ *t, char *cls)
58 Field *f;
59 uint n;
61 for (n=0; n<t->nunion; n++)
62 for (f=t->fields[n]; f->type != FEnd; f++)
63 switch (f->type) {
64 case Fs:
65 if (*cls == Kd)
66 return 0;
67 *cls = Ks;
68 break;
69 case Fd:
70 if (*cls == Ks)
71 return 0;
72 *cls = Kd;
73 break;
74 case FTyp:
75 if (isfloatv(&typ[f->len], cls))
76 break;
77 /* fall through */
78 default:
79 return 0;
81 return 1;
84 static void
85 typclass(Class *c, Typ *t, int *gp, int *fp)
87 uint64_t sz;
88 uint n;
90 sz = (t->size + 7) & -8;
91 c->t = t;
92 c->class = 0;
93 c->ngp = 0;
94 c->nfp = 0;
96 if (t->align > 4)
97 err("alignments larger than 16 are not supported");
99 if (t->isdark || sz > 16 || sz == 0) {
100 /* large structs are replaced by a
101 * pointer to some caller-allocated
102 * memory */
103 c->class |= Cptr;
104 c->size = 8;
105 c->ngp = 1;
106 *c->reg = *gp;
107 *c->cls = Kl;
108 return;
111 c->size = sz;
112 c->hfa.base = Kx;
113 c->ishfa = isfloatv(t, &c->hfa.base);
114 c->hfa.size = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
116 if (c->ishfa)
117 for (n=0; n<c->hfa.size; n++, c->nfp++) {
118 c->reg[n] = *fp++;
119 c->cls[n] = c->hfa.base;
121 else
122 for (n=0; n<sz/8; n++, c->ngp++) {
123 c->reg[n] = *gp++;
124 c->cls[n] = Kl;
127 c->nreg = n;
130 static void
131 sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
133 static int st[] = {
134 [Kw] = Ostorew, [Kl] = Ostorel,
135 [Ks] = Ostores, [Kd] = Ostored
137 uint n;
138 uint64_t off;
139 Ref r;
141 assert(nreg <= 4);
142 off = 0;
143 for (n=0; n<nreg; n++) {
144 tmp[n] = newtmp("abi", cls[n], fn);
145 r = newtmp("abi", Kl, fn);
146 emit(st[cls[n]], 0, R, tmp[n], r);
147 emit(Oadd, Kl, r, mem, getcon(off, fn));
148 off += KWIDE(cls[n]) ? 8 : 4;
152 /* todo, may read out of bounds */
153 static void
154 ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
156 int i;
157 uint64_t off;
158 Ref r;
160 off = 0;
161 for (i=0; i<n; i++) {
162 r = newtmp("abi", Kl, fn);
163 emit(Oload, cls[i], TMP(reg[i]), r, R);
164 emit(Oadd, Kl, r, mem, getcon(off, fn));
165 off += KWIDE(cls[i]) ? 8 : 4;
169 static void
170 selret(Blk *b, Fn *fn)
172 int j, k, cty;
173 Ref r;
174 Class cr;
176 j = b->jmp.type;
178 if (!isret(j) || j == Jret0)
179 return;
181 r = b->jmp.arg;
182 b->jmp.type = Jret0;
184 if (j == Jretc) {
185 typclass(&cr, &typ[fn->retty], gpreg, fpreg);
186 if (cr.class & Cptr) {
187 assert(rtype(fn->retr) == RTmp);
188 blit0(fn->retr, r, cr.t->size, fn);
189 cty = 0;
190 } else {
191 ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
192 cty = (cr.nfp << 2) | cr.ngp;
194 } else {
195 k = j - Jretw;
196 if (KBASE(k) == 0) {
197 emit(Ocopy, k, TMP(R0), r, R);
198 cty = 1;
199 } else {
200 emit(Ocopy, k, TMP(V0), r, R);
201 cty = 1 << 2;
205 b->jmp.arg = CALL(cty);
208 static int
209 argsclass(Ins *i0, Ins *i1, Class *carg)
211 int envc, ngp, nfp, *gp, *fp;
212 Class *c;
213 Ins *i;
215 envc = 0;
216 gp = gpreg;
217 fp = fpreg;
218 ngp = 8;
219 nfp = 8;
220 for (i=i0, c=carg; i<i1; i++, c++)
221 switch (i->op) {
222 case Opar:
223 case Oarg:
224 *c->cls = i->cls;
225 c->size = 8;
226 if (KBASE(i->cls) == 0 && ngp > 0) {
227 ngp--;
228 *c->reg = *gp++;
229 break;
231 if (KBASE(i->cls) == 1 && nfp > 0) {
232 nfp--;
233 *c->reg = *fp++;
234 break;
236 c->class |= Cstk;
237 break;
238 case Oparc:
239 case Oargc:
240 typclass(c, &typ[i->arg[0].val], gp, fp);
241 if (c->ngp <= ngp) {
242 if (c->nfp <= nfp) {
243 ngp -= c->ngp;
244 nfp -= c->nfp;
245 gp += c->ngp;
246 fp += c->nfp;
247 break;
248 } else
249 nfp = 0;
250 } else
251 ngp = 0;
252 c->class |= Cstk;
253 break;
254 case Opare:
255 case Oarge:
256 *c->reg = R9;
257 *c->cls = Kl;
258 envc = 1;
259 break;
260 case Oargv:
261 break;
262 default:
263 die("unreachable");
266 return envc << 14 | (gp-gpreg) << 5 | (fp-fpreg) << 9;
269 bits
270 arm64_retregs(Ref r, int p[2])
272 bits b;
273 int ngp, nfp;
275 assert(rtype(r) == RCall);
276 ngp = r.val & 3;
277 nfp = (r.val >> 2) & 7;
278 if (p) {
279 p[0] = ngp;
280 p[1] = nfp;
282 b = 0;
283 while (ngp--)
284 b |= BIT(R0+ngp);
285 while (nfp--)
286 b |= BIT(V0+nfp);
287 return b;
290 bits
291 arm64_argregs(Ref r, int p[2])
293 bits b;
294 int ngp, nfp, x8, x9;
296 assert(rtype(r) == RCall);
297 ngp = (r.val >> 5) & 15;
298 nfp = (r.val >> 9) & 15;
299 x8 = (r.val >> 13) & 1;
300 x9 = (r.val >> 14) & 1;
301 if (p) {
302 p[0] = ngp + x8 + x9;
303 p[1] = nfp;
305 b = 0;
306 while (ngp--)
307 b |= BIT(R0+ngp);
308 while (nfp--)
309 b |= BIT(V0+nfp);
310 return b | ((bits)x8 << R8) | ((bits)x9 << R9);
313 static void
314 stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
316 Insl *il;
317 int al;
318 uint64_t sz;
320 il = alloc(sizeof *il);
321 al = c->t->align - 2; /* NAlign == 3 */
322 if (al < 0)
323 al = 0;
324 sz = c->class & Cptr ? c->t->size : c->size;
325 il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
326 il->link = *ilp;
327 *ilp = il;
330 static void
331 selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
333 Ins *i;
334 Class *ca, *c, cr;
335 int cty;
336 uint n;
337 uint64_t stk, off;
338 Ref r, rstk, tmp[4];
340 ca = alloc((i1-i0) * sizeof ca[0]);
341 cty = argsclass(i0, i1, ca);
343 stk = 0;
344 for (i=i0, c=ca; i<i1; i++, c++) {
345 if (c->class & Cptr) {
346 i->arg[0] = newtmp("abi", Kl, fn);
347 stkblob(i->arg[0], c, fn, ilp);
348 i->op = Oarg;
350 if (c->class & Cstk)
351 stk += c->size;
353 stk += stk & 15;
354 rstk = getcon(stk, fn);
355 if (stk)
356 emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
358 if (!req(i1->arg[1], R)) {
359 typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
360 stkblob(i1->to, &cr, fn, ilp);
361 cty |= (cr.nfp << 2) | cr.ngp;
362 if (cr.class & Cptr) {
363 /* spill & rega expect calls to be
364 * followed by copies from regs,
365 * so we emit a dummy
367 cty |= 1 << 13 | 1;
368 emit(Ocopy, Kw, R, TMP(R0), R);
369 } else {
370 sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
371 for (n=0; n<cr.nreg; n++) {
372 r = TMP(cr.reg[n]);
373 emit(Ocopy, cr.cls[n], tmp[n], r, R);
376 } else {
377 if (KBASE(i1->cls) == 0) {
378 emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
379 cty |= 1;
380 } else {
381 emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
382 cty |= 1 << 2;
386 emit(Ocall, 0, R, i1->arg[0], CALL(cty));
388 if (cty & (1 << 13))
389 /* struct return argument */
390 emit(Ocopy, Kl, TMP(R8), i1->to, R);
392 for (i=i0, c=ca; i<i1; i++, c++) {
393 if ((c->class & Cstk) != 0)
394 continue;
395 if (i->op == Oarg || i->op == Oarge)
396 emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
397 if (i->op == Oargc)
398 ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
401 /* populate the stack */
402 off = 0;
403 for (i=i0, c=ca; i<i1; i++, c++) {
404 if ((c->class & Cstk) == 0)
405 continue;
406 if (i->op == Oarg) {
407 r = newtmp("abi", Kl, fn);
408 emit(Ostorel, 0, R, i->arg[0], r);
409 emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
411 if (i->op == Oargc)
412 blit(TMP(SP), off, i->arg[1], 0, c->size, fn);
413 off += c->size;
415 if (stk)
416 emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
418 for (i=i0, c=ca; i<i1; i++, c++)
419 if (c->class & Cptr)
420 blit0(i->arg[0], i->arg[1], c->t->size, fn);
423 static Params
424 selpar(Fn *fn, Ins *i0, Ins *i1)
426 Class *ca, *c, cr;
427 Insl *il;
428 Ins *i;
429 int n, s, cty;
430 Ref r, tmp[16], *t;
432 ca = alloc((i1-i0) * sizeof ca[0]);
433 curi = &insb[NIns];
435 cty = argsclass(i0, i1, ca);
436 fn->reg = arm64_argregs(CALL(cty), 0);
438 il = 0;
439 t = tmp;
440 for (i=i0, c=ca; i<i1; i++, c++) {
441 if (i->op != Oparc || (c->class & (Cptr|Cstk)))
442 continue;
443 sttmps(t, c->cls, c->nreg, i->to, fn);
444 stkblob(i->to, c, fn, &il);
445 t += c->nreg;
447 for (; il; il=il->link)
448 emiti(il->i);
450 if (fn->retty >= 0) {
451 typclass(&cr, &typ[fn->retty], gpreg, fpreg);
452 if (cr.class & Cptr) {
453 fn->retr = newtmp("abi", Kl, fn);
454 emit(Ocopy, Kl, fn->retr, TMP(R8), R);
455 fn->reg |= BIT(R8);
459 t = tmp;
460 s = 2;
461 for (i=i0, c=ca; i<i1; i++, c++)
462 if (i->op == Oparc && !(c->class & Cptr)) {
463 if (c->class & Cstk) {
464 fn->tmp[i->to.val].slot = -s;
465 s += c->size / 8;
466 } else
467 for (n=0; n<c->nreg; n++) {
468 r = TMP(c->reg[n]);
469 emit(Ocopy, c->cls[n], *t++, r, R);
471 } else if (c->class & Cstk) {
472 emit(Oload, *c->cls, i->to, SLOT(-s), R);
473 s++;
474 } else {
475 emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
478 return (Params){
479 .nstk = s - 2,
480 .ngp = (cty >> 5) & 15,
481 .nfp = (cty >> 9) & 15
485 static Blk *
486 split(Fn *fn, Blk *b)
488 Blk *bn;
490 ++fn->nblk;
491 bn = blknew();
492 bn->nins = &insb[NIns] - curi;
493 idup(&bn->ins, curi, bn->nins);
494 curi = &insb[NIns];
495 bn->visit = ++b->visit;
496 (void)!snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
497 bn->loop = b->loop;
498 bn->link = b->link;
499 b->link = bn;
500 return bn;
503 static void
504 chpred(Blk *b, Blk *bp, Blk *bp1)
506 Phi *p;
507 uint a;
509 for (p=b->phi; p; p=p->link) {
510 for (a=0; p->blk[a]!=bp; a++)
511 assert(a+1<p->narg);
512 p->blk[a] = bp1;
516 static void
517 selvaarg(Fn *fn, Blk *b, Ins *i)
519 Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
520 Blk *b0, *bstk, *breg;
521 int isgp;
523 c8 = getcon(8, fn);
524 c16 = getcon(16, fn);
525 c24 = getcon(24, fn);
526 c28 = getcon(28, fn);
527 ap = i->arg[0];
528 isgp = KBASE(i->cls) == 0;
530 /* @b [...]
531 r0 =l add ap, (24 or 28)
532 nr =l loadsw r0
533 r1 =w csltw nr, 0
534 jnz r1, @breg, @bstk
535 @breg
536 r0 =l add ap, (8 or 16)
537 r1 =l loadl r0
538 lreg =l add r1, nr
539 r0 =w add nr, (8 or 16)
540 r1 =l add ap, (24 or 28)
541 storew r0, r1
542 @bstk
543 lstk =l loadl ap
544 r0 =l add lstk, 8
545 storel r0, ap
547 %loc =l phi @breg %lreg, @bstk %lstk
548 i->to =(i->cls) load %loc
551 loc = newtmp("abi", Kl, fn);
552 emit(Oload, i->cls, i->to, loc, R);
553 b0 = split(fn, b);
554 b0->jmp = b->jmp;
555 b0->s1 = b->s1;
556 b0->s2 = b->s2;
557 if (b->s1)
558 chpred(b->s1, b, b0);
559 if (b->s2 && b->s2 != b->s1)
560 chpred(b->s2, b, b0);
562 lreg = newtmp("abi", Kl, fn);
563 nr = newtmp("abi", Kl, fn);
564 r0 = newtmp("abi", Kw, fn);
565 r1 = newtmp("abi", Kl, fn);
566 emit(Ostorew, Kw, R, r0, r1);
567 emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
568 emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
569 r0 = newtmp("abi", Kl, fn);
570 r1 = newtmp("abi", Kl, fn);
571 emit(Oadd, Kl, lreg, r1, nr);
572 emit(Oload, Kl, r1, r0, R);
573 emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
574 breg = split(fn, b);
575 breg->jmp.type = Jjmp;
576 breg->s1 = b0;
578 lstk = newtmp("abi", Kl, fn);
579 r0 = newtmp("abi", Kl, fn);
580 emit(Ostorel, Kw, R, r0, ap);
581 emit(Oadd, Kl, r0, lstk, c8);
582 emit(Oload, Kl, lstk, ap, R);
583 bstk = split(fn, b);
584 bstk->jmp.type = Jjmp;
585 bstk->s1 = b0;
587 b0->phi = alloc(sizeof *b0->phi);
588 *b0->phi = (Phi){
589 .cls = Kl, .to = loc,
590 .narg = 2,
591 .blk = vnew(2, sizeof b0->phi->blk[0], Pfn),
592 .arg = vnew(2, sizeof b0->phi->arg[0], Pfn),
594 b0->phi->blk[0] = bstk;
595 b0->phi->blk[1] = breg;
596 b0->phi->arg[0] = lstk;
597 b0->phi->arg[1] = lreg;
598 r0 = newtmp("abi", Kl, fn);
599 r1 = newtmp("abi", Kw, fn);
600 b->jmp.type = Jjnz;
601 b->jmp.arg = r1;
602 b->s1 = breg;
603 b->s2 = bstk;
604 emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
605 emit(Oloadsw, Kl, nr, r0, R);
606 emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
609 static void
610 selvastart(Fn *fn, Params p, Ref ap)
612 Ref r0, r1, rsave;
614 rsave = newtmp("abi", Kl, fn);
616 r0 = newtmp("abi", Kl, fn);
617 emit(Ostorel, Kw, R, r0, ap);
618 emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn));
620 r0 = newtmp("abi", Kl, fn);
621 r1 = newtmp("abi", Kl, fn);
622 emit(Ostorel, Kw, R, r1, r0);
623 emit(Oadd, Kl, r1, rsave, getcon(64, fn));
624 emit(Oadd, Kl, r0, ap, getcon(8, fn));
626 r0 = newtmp("abi", Kl, fn);
627 r1 = newtmp("abi", Kl, fn);
628 emit(Ostorel, Kw, R, r1, r0);
629 emit(Oadd, Kl, r1, rsave, getcon(192, fn));
630 emit(Oaddr, Kl, rsave, SLOT(-1), R);
631 emit(Oadd, Kl, r0, ap, getcon(16, fn));
633 r0 = newtmp("abi", Kl, fn);
634 emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
635 emit(Oadd, Kl, r0, ap, getcon(24, fn));
637 r0 = newtmp("abi", Kl, fn);
638 emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
639 emit(Oadd, Kl, r0, ap, getcon(28, fn));
642 void
643 arm64_abi(Fn *fn)
645 Blk *b;
646 Ins *i, *i0, *ip;
647 Insl *il;
648 int n;
649 Params p;
651 for (b=fn->start; b; b=b->link)
652 b->visit = 0;
654 /* lower parameters */
655 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
656 if (!ispar(i->op))
657 break;
658 p = selpar(fn, b->ins, i);
659 n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
660 i0 = alloc(n * sizeof(Ins));
661 ip = icpy(ip = i0, curi, &insb[NIns] - curi);
662 ip = icpy(ip, i, &b->ins[b->nins] - i);
663 b->nins = n;
664 b->ins = i0;
666 /* lower calls, returns, and vararg instructions */
667 il = 0;
668 b = fn->start;
669 do {
670 if (!(b = b->link))
671 b = fn->start; /* do it last */
672 if (b->visit)
673 continue;
674 curi = &insb[NIns];
675 selret(b, fn);
676 for (i=&b->ins[b->nins]; i!=b->ins;)
677 switch ((--i)->op) {
678 default:
679 emiti(*i);
680 break;
681 case Ocall:
682 for (i0=i; i0>b->ins; i0--)
683 if (!isarg((i0-1)->op))
684 break;
685 selcall(fn, i0, i, &il);
686 i = i0;
687 break;
688 case Ovastart:
689 selvastart(fn, p, i->arg[0]);
690 break;
691 case Ovaarg:
692 selvaarg(fn, b, i);
693 break;
694 case Oarg:
695 case Oargc:
696 die("unreachable");
698 if (b == fn->start)
699 for (; il; il=il->link)
700 emiti(il->i);
701 b->nins = &insb[NIns] - curi;
702 idup(&b->ins, curi, b->nins);
703 } while (b != fn->start);
705 if (debug['A']) {
706 fprintf(stderr, "\n> After ABI lowering:\n");
707 printfn(fn, stderr);