gas: use .balign instead of .align
[qbe.git] / amd64 / sysv.c
blobea9b2d26295eff7bb0c6ea0334f47a69a3049d5f
1 #include "all.h"
3 typedef struct AClass AClass;
4 typedef struct RAlloc RAlloc;
6 struct AClass {
7 int inmem;
8 int align;
9 uint size;
10 int cls[2];
11 Ref ref[2];
14 struct RAlloc {
15 Ins i;
16 RAlloc *link;
19 static void
20 classify(AClass *a, Typ *t, uint s)
22 Field *f;
23 int *cls;
24 uint n, s1;
26 for (n=0, s1=s; n<t->nunion; n++, s=s1)
27 for (f=t->fields[n]; f->type!=FEnd; f++) {
28 assert(s <= 16);
29 cls = &a->cls[s/8];
30 switch (f->type) {
31 case FEnd:
32 die("unreachable");
33 case FPad:
34 /* don't change anything */
35 s += f->len;
36 break;
37 case Fs:
38 case Fd:
39 if (*cls == Kx)
40 *cls = Kd;
41 s += f->len;
42 break;
43 case Fb:
44 case Fh:
45 case Fw:
46 case Fl:
47 *cls = Kl;
48 s += f->len;
49 break;
50 case FTyp:
51 classify(a, &typ[f->len], s);
52 s += typ[f->len].size;
53 break;
58 static void
59 typclass(AClass *a, Typ *t)
61 uint sz, al;
63 sz = t->size;
64 al = 1u << t->align;
66 /* the ABI requires sizes to be rounded
67 * up to the nearest multiple of 8, moreover
68 * it makes it easy load and store structures
69 * in registers
71 if (al < 8)
72 al = 8;
73 sz = (sz + al-1) & -al;
75 a->size = sz;
76 a->align = t->align;
78 if (t->dark || sz > 16 || sz == 0) {
79 /* large or unaligned structures are
80 * required to be passed in memory
82 a->inmem = 1;
83 return;
86 a->cls[0] = Kx;
87 a->cls[1] = Kx;
88 a->inmem = 0;
89 classify(a, t, 0);
92 static int
93 retr(Ref reg[2], AClass *aret)
95 static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
96 int n, k, ca, nr[2];
98 nr[0] = nr[1] = 0;
99 ca = 0;
100 for (n=0; (uint)n*8<aret->size; n++) {
101 k = KBASE(aret->cls[n]);
102 reg[n] = TMP(retreg[k][nr[k]++]);
103 ca += 1 << (2 * k);
105 return ca;
108 static void
109 selret(Blk *b, Fn *fn)
111 int j, k, ca;
112 Ref r, r0, reg[2];
113 AClass aret;
115 j = b->jmp.type;
117 if (!isret(j) || j == Jret0)
118 return;
120 r0 = b->jmp.arg;
121 b->jmp.type = Jret0;
123 if (j == Jretc) {
124 typclass(&aret, &typ[fn->retty]);
125 if (aret.inmem) {
126 assert(rtype(fn->retr) == RTmp);
127 emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
128 blit(fn->retr, 0, r0, aret.size, fn);
129 ca = 1;
130 } else {
131 ca = retr(reg, &aret);
132 if (aret.size > 8) {
133 r = newtmp("abi", Kl, fn);
134 emit(Oload, Kl, reg[1], r, R);
135 emit(Oadd, Kl, r, r0, getcon(8, fn));
137 emit(Oload, Kl, reg[0], r0, R);
139 } else {
140 k = j - Jretw;
141 if (KBASE(k) == 0) {
142 emit(Ocopy, k, TMP(RAX), r0, R);
143 ca = 1;
144 } else {
145 emit(Ocopy, k, TMP(XMM0), r0, R);
146 ca = 1 << 2;
150 b->jmp.arg = CALL(ca);
153 static int
154 argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
156 int nint, ni, nsse, ns, n, *pn;
157 AClass *a;
158 Ins *i;
160 if (aret && aret->inmem)
161 nint = 5; /* hidden argument */
162 else
163 nint = 6;
164 nsse = 8;
165 for (i=i0, a=ac; i<i1; i++, a++)
166 switch (i->op - op + Oarg) {
167 case Oarg:
168 if (KBASE(i->cls) == 0)
169 pn = &nint;
170 else
171 pn = &nsse;
172 if (*pn > 0) {
173 --*pn;
174 a->inmem = 0;
175 } else
176 a->inmem = 2;
177 a->align = 3;
178 a->size = 8;
179 a->cls[0] = i->cls;
180 break;
181 case Oargc:
182 n = i->arg[0].val;
183 typclass(a, &typ[n]);
184 if (a->inmem)
185 continue;
186 ni = ns = 0;
187 for (n=0; (uint)n*8<a->size; n++)
188 if (KBASE(a->cls[n]) == 0)
189 ni++;
190 else
191 ns++;
192 if (nint >= ni && nsse >= ns) {
193 nint -= ni;
194 nsse -= ns;
195 } else
196 a->inmem = 1;
197 break;
198 case Oarge:
199 if (op == Opar)
200 *env = i->to;
201 else
202 *env = i->arg[0];
203 break;
206 return ((6-nint) << 4) | ((8-nsse) << 8);
209 int amd64_sysv_rsave[] = {
210 RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
211 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
212 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
214 int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
216 MAKESURE(sysv_arrays_ok,
217 sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
218 sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
221 /* layout of call's second argument (RCall)
223 * 29 12 8 4 3 0
224 * |0...00|x|xxxx|xxxx|xx|xx| range
225 * | | | | ` gp regs returned (0..2)
226 * | | | ` sse regs returned (0..2)
227 * | | ` gp regs passed (0..6)
228 * | ` sse regs passed (0..8)
229 * ` 1 if rax is used to pass data (0..1)
232 bits
233 amd64_sysv_retregs(Ref r, int p[2])
235 bits b;
236 int ni, nf;
238 assert(rtype(r) == RCall);
239 b = 0;
240 ni = r.val & 3;
241 nf = (r.val >> 2) & 3;
242 if (ni >= 1)
243 b |= BIT(RAX);
244 if (ni >= 2)
245 b |= BIT(RDX);
246 if (nf >= 1)
247 b |= BIT(XMM0);
248 if (nf >= 2)
249 b |= BIT(XMM1);
250 if (p) {
251 p[0] = ni;
252 p[1] = nf;
254 return b;
257 bits
258 amd64_sysv_argregs(Ref r, int p[2])
260 bits b;
261 int j, ni, nf, ra;
263 assert(rtype(r) == RCall);
264 b = 0;
265 ni = (r.val >> 4) & 15;
266 nf = (r.val >> 8) & 15;
267 ra = (r.val >> 12) & 1;
268 for (j=0; j<ni; j++)
269 b |= BIT(amd64_sysv_rsave[j]);
270 for (j=0; j<nf; j++)
271 b |= BIT(XMM0+j);
272 if (p) {
273 p[0] = ni + ra;
274 p[1] = nf;
276 return b | (ra ? BIT(RAX) : 0);
279 static Ref
280 rarg(int ty, int *ni, int *ns)
282 if (KBASE(ty) == 0)
283 return TMP(amd64_sysv_rsave[(*ni)++]);
284 else
285 return TMP(XMM0 + (*ns)++);
288 static void
289 selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
291 Ins *i;
292 AClass *ac, *a, aret;
293 int ca, ni, ns, al, varc, envc;
294 uint stk, off;
295 Ref r, r1, r2, reg[2], env;
296 RAlloc *ra;
298 env = R;
299 ac = alloc((i1-i0) * sizeof ac[0]);
301 if (!req(i1->arg[1], R)) {
302 assert(rtype(i1->arg[1]) == RType);
303 typclass(&aret, &typ[i1->arg[1].val]);
304 ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
305 } else
306 ca = argsclass(i0, i1, ac, Oarg, 0, &env);
308 for (stk=0, a=&ac[i1-i0]; a>ac;)
309 if ((--a)->inmem) {
310 if (a->align > 4)
311 err("sysv abi requires alignments of 16 or less");
312 stk += a->size;
313 if (a->align == 4)
314 stk += stk & 15;
316 stk += stk & 15;
317 if (stk) {
318 r = getcon(-(int64_t)stk, fn);
319 emit(Osalloc, Kl, R, r, R);
322 if (!req(i1->arg[1], R)) {
323 if (aret.inmem) {
324 /* get the return location from eax
325 * it saves one callee-save reg */
326 r1 = newtmp("abi", Kl, fn);
327 emit(Ocopy, Kl, i1->to, TMP(RAX), R);
328 ca += 1;
329 } else {
330 if (aret.size > 8) {
331 r = newtmp("abi", Kl, fn);
332 aret.ref[1] = newtmp("abi", aret.cls[1], fn);
333 emit(Ostorel, 0, R, aret.ref[1], r);
334 emit(Oadd, Kl, r, i1->to, getcon(8, fn));
336 aret.ref[0] = newtmp("abi", aret.cls[0], fn);
337 emit(Ostorel, 0, R, aret.ref[0], i1->to);
338 ca += retr(reg, &aret);
339 if (aret.size > 8)
340 emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
341 emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
342 r1 = i1->to;
344 /* allocate return pad */
345 ra = alloc(sizeof *ra);
346 /* specific to NAlign == 3 */
347 al = aret.align >= 2 ? aret.align - 2 : 0;
348 ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
349 ra->link = (*rap);
350 *rap = ra;
351 } else {
352 ra = 0;
353 if (KBASE(i1->cls) == 0) {
354 emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
355 ca += 1;
356 } else {
357 emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
358 ca += 1 << 2;
361 envc = !req(R, env);
362 varc = i1->op == Ovacall;
363 if (varc && envc)
364 err("sysv abi does not support variadic env calls");
365 ca |= (varc | envc) << 12;
366 emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
367 if (envc)
368 emit(Ocopy, Kl, TMP(RAX), env, R);
369 if (varc)
370 emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
372 ni = ns = 0;
373 if (ra && aret.inmem)
374 emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
375 for (i=i0, a=ac; i<i1; i++, a++) {
376 if (a->inmem)
377 continue;
378 r1 = rarg(a->cls[0], &ni, &ns);
379 if (i->op == Oargc) {
380 if (a->size > 8) {
381 r2 = rarg(a->cls[1], &ni, &ns);
382 r = newtmp("abi", Kl, fn);
383 emit(Oload, a->cls[1], r2, r, R);
384 emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
386 emit(Oload, a->cls[0], r1, i->arg[1], R);
387 } else
388 emit(Ocopy, i->cls, r1, i->arg[0], R);
391 if (!stk)
392 return;
394 r = newtmp("abi", Kl, fn);
395 for (i=i0, a=ac, off=0; i<i1; i++, a++) {
396 if (!a->inmem)
397 continue;
398 if (i->op == Oargc) {
399 if (a->align == 4)
400 off += off & 15;
401 blit(r, off, i->arg[1], a->size, fn);
402 } else {
403 r1 = newtmp("abi", Kl, fn);
404 emit(Ostorel, 0, R, i->arg[0], r1);
405 emit(Oadd, Kl, r1, r, getcon(off, fn));
407 off += a->size;
409 emit(Osalloc, Kl, r, getcon(stk, fn), R);
412 static int
413 selpar(Fn *fn, Ins *i0, Ins *i1)
415 AClass *ac, *a, aret;
416 Ins *i;
417 int ni, ns, s, al, fa;
418 Ref r, env;
420 env = R;
421 ac = alloc((i1-i0) * sizeof ac[0]);
422 curi = &insb[NIns];
423 ni = ns = 0;
425 if (fn->retty >= 0) {
426 typclass(&aret, &typ[fn->retty]);
427 fa = argsclass(i0, i1, ac, Opar, &aret, &env);
428 } else
429 fa = argsclass(i0, i1, ac, Opar, 0, &env);
431 for (i=i0, a=ac; i<i1; i++, a++) {
432 if (i->op != Oparc || a->inmem)
433 continue;
434 if (a->size > 8) {
435 r = newtmp("abi", Kl, fn);
436 a->ref[1] = newtmp("abi", Kl, fn);
437 emit(Ostorel, 0, R, a->ref[1], r);
438 emit(Oadd, Kl, r, i->to, getcon(8, fn));
440 a->ref[0] = newtmp("abi", Kl, fn);
441 emit(Ostorel, 0, R, a->ref[0], i->to);
442 /* specific to NAlign == 3 */
443 al = a->align >= 2 ? a->align - 2 : 0;
444 emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
447 if (fn->retty >= 0 && aret.inmem) {
448 r = newtmp("abi", Kl, fn);
449 emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
450 fn->retr = r;
453 for (i=i0, a=ac, s=4; i<i1; i++, a++) {
454 switch (a->inmem) {
455 case 1:
456 if (a->align > 4)
457 err("sysv abi requires alignments of 16 or less");
458 if (a->align == 4)
459 s = (s+3) & -4;
460 fn->tmp[i->to.val].slot = -s;
461 s += a->size / 4;
462 continue;
463 case 2:
464 emit(Oload, i->cls, i->to, SLOT(-s), R);
465 s += 2;
466 continue;
468 r = rarg(a->cls[0], &ni, &ns);
469 if (i->op == Oparc) {
470 emit(Ocopy, Kl, a->ref[0], r, R);
471 if (a->size > 8) {
472 r = rarg(a->cls[1], &ni, &ns);
473 emit(Ocopy, Kl, a->ref[1], r, R);
475 } else
476 emit(Ocopy, i->cls, i->to, r, R);
479 if (!req(R, env))
480 emit(Ocopy, Kl, env, TMP(RAX), R);
482 return fa | (s*4)<<12;
485 static Blk *
486 split(Fn *fn, Blk *b)
488 Blk *bn;
490 ++fn->nblk;
491 bn = blknew();
492 bn->nins = &insb[NIns] - curi;
493 idup(&bn->ins, curi, bn->nins);
494 curi = &insb[NIns];
495 bn->visit = ++b->visit;
496 (void)!snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
497 bn->loop = b->loop;
498 bn->link = b->link;
499 b->link = bn;
500 return bn;
503 static void
504 chpred(Blk *b, Blk *bp, Blk *bp1)
506 Phi *p;
507 uint a;
509 for (p=b->phi; p; p=p->link) {
510 for (a=0; p->blk[a]!=bp; a++)
511 assert(a+1<p->narg);
512 p->blk[a] = bp1;
516 static void
517 selvaarg(Fn *fn, Blk *b, Ins *i)
519 Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
520 Blk *b0, *bstk, *breg;
521 int isint;
523 c4 = getcon(4, fn);
524 c8 = getcon(8, fn);
525 c16 = getcon(16, fn);
526 ap = i->arg[0];
527 isint = KBASE(i->cls) == 0;
529 /* @b [...]
530 r0 =l add ap, (0 or 4)
531 nr =l loadsw r0
532 r1 =w cultw nr, (48 or 176)
533 jnz r1, @breg, @bstk
534 @breg
535 r0 =l add ap, 16
536 r1 =l loadl r0
537 lreg =l add r1, nr
538 r0 =w add nr, (8 or 16)
539 r1 =l add ap, (0 or 4)
540 storew r0, r1
541 @bstk
542 r0 =l add ap, 8
543 lstk =l loadl r0
544 r1 =l add lstk, 8
545 storel r1, r0
547 %loc =l phi @breg %lreg, @bstk %lstk
548 i->to =(i->cls) load %loc
551 loc = newtmp("abi", Kl, fn);
552 emit(Oload, i->cls, i->to, loc, R);
553 b0 = split(fn, b);
554 b0->jmp = b->jmp;
555 b0->s1 = b->s1;
556 b0->s2 = b->s2;
557 if (b->s1)
558 chpred(b->s1, b, b0);
559 if (b->s2 && b->s2 != b->s1)
560 chpred(b->s2, b, b0);
562 lreg = newtmp("abi", Kl, fn);
563 nr = newtmp("abi", Kl, fn);
564 r0 = newtmp("abi", Kw, fn);
565 r1 = newtmp("abi", Kl, fn);
566 emit(Ostorew, Kw, R, r0, r1);
567 emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
568 emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
569 r0 = newtmp("abi", Kl, fn);
570 r1 = newtmp("abi", Kl, fn);
571 emit(Oadd, Kl, lreg, r1, nr);
572 emit(Oload, Kl, r1, r0, R);
573 emit(Oadd, Kl, r0, ap, c16);
574 breg = split(fn, b);
575 breg->jmp.type = Jjmp;
576 breg->s1 = b0;
578 lstk = newtmp("abi", Kl, fn);
579 r0 = newtmp("abi", Kl, fn);
580 r1 = newtmp("abi", Kl, fn);
581 emit(Ostorel, Kw, R, r1, r0);
582 emit(Oadd, Kl, r1, lstk, c8);
583 emit(Oload, Kl, lstk, r0, R);
584 emit(Oadd, Kl, r0, ap, c8);
585 bstk = split(fn, b);
586 bstk->jmp.type = Jjmp;
587 bstk->s1 = b0;
589 b0->phi = alloc(sizeof *b0->phi);
590 *b0->phi = (Phi){
591 .cls = Kl, .to = loc,
592 .narg = 2,
593 .blk = {bstk, breg},
594 .arg = {lstk, lreg},
596 r0 = newtmp("abi", Kl, fn);
597 r1 = newtmp("abi", Kw, fn);
598 b->jmp.type = Jjnz;
599 b->jmp.arg = r1;
600 b->s1 = breg;
601 b->s2 = bstk;
602 c = getcon(isint ? 48 : 176, fn);
603 emit(Ocmpw+Ciult, Kw, r1, nr, c);
604 emit(Oloadsw, Kl, nr, r0, R);
605 emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
608 static void
609 selvastart(Fn *fn, int fa, Ref ap)
611 Ref r0, r1;
612 int gp, fp, sp;
614 gp = ((fa >> 4) & 15) * 8;
615 fp = 48 + ((fa >> 8) & 15) * 16;
616 sp = fa >> 12;
617 r0 = newtmp("abi", Kl, fn);
618 r1 = newtmp("abi", Kl, fn);
619 emit(Ostorel, Kw, R, r1, r0);
620 emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
621 emit(Oadd, Kl, r0, ap, getcon(16, fn));
622 r0 = newtmp("abi", Kl, fn);
623 r1 = newtmp("abi", Kl, fn);
624 emit(Ostorel, Kw, R, r1, r0);
625 emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
626 emit(Oadd, Kl, r0, ap, getcon(8, fn));
627 r0 = newtmp("abi", Kl, fn);
628 emit(Ostorew, Kw, R, getcon(fp, fn), r0);
629 emit(Oadd, Kl, r0, ap, getcon(4, fn));
630 emit(Ostorew, Kw, R, getcon(gp, fn), ap);
633 void
634 amd64_sysv_abi(Fn *fn)
636 Blk *b;
637 Ins *i, *i0, *ip;
638 RAlloc *ral;
639 int n, fa;
641 for (b=fn->start; b; b=b->link)
642 b->visit = 0;
644 /* lower parameters */
645 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
646 if (!ispar(i->op))
647 break;
648 fa = selpar(fn, b->ins, i);
649 n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
650 i0 = alloc(n * sizeof(Ins));
651 ip = icpy(ip = i0, curi, &insb[NIns] - curi);
652 ip = icpy(ip, i, &b->ins[b->nins] - i);
653 b->nins = n;
654 b->ins = i0;
656 /* lower calls, returns, and vararg instructions */
657 ral = 0;
658 b = fn->start;
659 do {
660 if (!(b = b->link))
661 b = fn->start; /* do it last */
662 if (b->visit)
663 continue;
664 curi = &insb[NIns];
665 selret(b, fn);
666 for (i=&b->ins[b->nins]; i!=b->ins;)
667 switch ((--i)->op) {
668 default:
669 emiti(*i);
670 break;
671 case Ocall:
672 case Ovacall:
673 for (i0=i; i0>b->ins; i0--)
674 if (!isarg((i0-1)->op))
675 break;
676 selcall(fn, i0, i, &ral);
677 i = i0;
678 break;
679 case Ovastart:
680 selvastart(fn, fa, i->arg[0]);
681 break;
682 case Ovaarg:
683 selvaarg(fn, b, i);
684 break;
685 case Oarg:
686 case Oargc:
687 die("unreachable");
689 if (b == fn->start)
690 for (; ral; ral=ral->link)
691 emiti(ral->i);
692 b->nins = &insb[NIns] - curi;
693 idup(&b->ins, curi, b->nins);
694 } while (b != fn->start);
696 if (debug['A']) {
697 fprintf(stderr, "\n> After ABI lowering:\n");
698 printfn(fn, stderr);