3 typedef struct AClass AClass
;
4 typedef struct RAlloc RAlloc
;
20 classify(AClass
*a
, Typ
*t
, uint s
)
26 for (n
=0, s1
=s
; n
<t
->nunion
; n
++, s
=s1
)
27 for (f
=t
->fields
[n
]; f
->type
!=FEnd
; f
++) {
34 /* don't change anything */
51 classify(a
, &typ
[f
->len
], s
);
52 s
+= typ
[f
->len
].size
;
59 typclass(AClass
*a
, Typ
*t
)
66 /* the ABI requires sizes to be rounded
67 * up to the nearest multiple of 8, moreover
68 * it makes it easy load and store structures
73 sz
= (sz
+ al
-1) & -al
;
78 if (t
->dark
|| sz
> 16 || sz
== 0) {
79 /* large or unaligned structures are
80 * required to be passed in memory
93 retr(Ref reg
[2], AClass
*aret
)
95 static int retreg
[2][2] = {{RAX
, RDX
}, {XMM0
, XMM0
+1}};
100 for (n
=0; (uint
)n
*8<aret
->size
; n
++) {
101 k
= KBASE(aret
->cls
[n
]);
102 reg
[n
] = TMP(retreg
[k
][nr
[k
]++]);
109 selret(Blk
*b
, Fn
*fn
)
117 if (!isret(j
) || j
== Jret0
)
124 typclass(&aret
, &typ
[fn
->retty
]);
126 assert(rtype(fn
->retr
) == RTmp
);
127 emit(Ocopy
, Kl
, TMP(RAX
), fn
->retr
, R
);
128 blit(fn
->retr
, 0, r0
, aret
.size
, fn
);
131 ca
= retr(reg
, &aret
);
133 r
= newtmp("abi", Kl
, fn
);
134 emit(Oload
, Kl
, reg
[1], r
, R
);
135 emit(Oadd
, Kl
, r
, r0
, getcon(8, fn
));
137 emit(Oload
, Kl
, reg
[0], r0
, R
);
142 emit(Ocopy
, k
, TMP(RAX
), r0
, R
);
145 emit(Ocopy
, k
, TMP(XMM0
), r0
, R
);
150 b
->jmp
.arg
= CALL(ca
);
154 argsclass(Ins
*i0
, Ins
*i1
, AClass
*ac
, int op
, AClass
*aret
, Ref
*env
)
156 int nint
, ni
, nsse
, ns
, n
, *pn
;
160 if (aret
&& aret
->inmem
)
161 nint
= 5; /* hidden argument */
165 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++)
166 switch (i
->op
- op
+ Oarg
) {
168 if (KBASE(i
->cls
) == 0)
183 typclass(a
, &typ
[n
]);
187 for (n
=0; (uint
)n
*8<a
->size
; n
++)
188 if (KBASE(a
->cls
[n
]) == 0)
192 if (nint
>= ni
&& nsse
>= ns
) {
206 return ((6-nint
) << 4) | ((8-nsse
) << 8);
209 int amd64_sysv_rsave
[] = {
210 RDI
, RSI
, RDX
, RCX
, R8
, R9
, R10
, R11
, RAX
,
211 XMM0
, XMM1
, XMM2
, XMM3
, XMM4
, XMM5
, XMM6
, XMM7
,
212 XMM8
, XMM9
, XMM10
, XMM11
, XMM12
, XMM13
, XMM14
, -1
214 int amd64_sysv_rclob
[] = {RBX
, R12
, R13
, R14
, R15
, -1};
216 MAKESURE(sysv_arrays_ok
,
217 sizeof amd64_sysv_rsave
== (NGPS
+NFPS
+1) * sizeof(int) &&
218 sizeof amd64_sysv_rclob
== (NCLR
+1) * sizeof(int)
221 /* layout of call's second argument (RCall)
224 * |0...00|x|xxxx|xxxx|xx|xx| range
225 * | | | | ` gp regs returned (0..2)
226 * | | | ` sse regs returned (0..2)
227 * | | ` gp regs passed (0..6)
228 * | ` sse regs passed (0..8)
229 * ` 1 if rax is used to pass data (0..1)
233 amd64_sysv_retregs(Ref r
, int p
[2])
238 assert(rtype(r
) == RCall
);
241 nf
= (r
.val
>> 2) & 3;
258 amd64_sysv_argregs(Ref r
, int p
[2])
263 assert(rtype(r
) == RCall
);
265 ni
= (r
.val
>> 4) & 15;
266 nf
= (r
.val
>> 8) & 15;
267 ra
= (r
.val
>> 12) & 1;
269 b
|= BIT(amd64_sysv_rsave
[j
]);
276 return b
| (ra
? BIT(RAX
) : 0);
280 rarg(int ty
, int *ni
, int *ns
)
283 return TMP(amd64_sysv_rsave
[(*ni
)++]);
285 return TMP(XMM0
+ (*ns
)++);
289 selcall(Fn
*fn
, Ins
*i0
, Ins
*i1
, RAlloc
**rap
)
292 AClass
*ac
, *a
, aret
;
293 int ca
, ni
, ns
, al
, varc
, envc
;
295 Ref r
, r1
, r2
, reg
[2], env
;
299 ac
= alloc((i1
-i0
) * sizeof ac
[0]);
301 if (!req(i1
->arg
[1], R
)) {
302 assert(rtype(i1
->arg
[1]) == RType
);
303 typclass(&aret
, &typ
[i1
->arg
[1].val
]);
304 ca
= argsclass(i0
, i1
, ac
, Oarg
, &aret
, &env
);
306 ca
= argsclass(i0
, i1
, ac
, Oarg
, 0, &env
);
308 for (stk
=0, a
=&ac
[i1
-i0
]; a
>ac
;)
311 err("sysv abi requires alignments of 16 or less");
318 r
= getcon(-(int64_t)stk
, fn
);
319 emit(Osalloc
, Kl
, R
, r
, R
);
322 if (!req(i1
->arg
[1], R
)) {
324 /* get the return location from eax
325 * it saves one callee-save reg */
326 r1
= newtmp("abi", Kl
, fn
);
327 emit(Ocopy
, Kl
, i1
->to
, TMP(RAX
), R
);
331 r
= newtmp("abi", Kl
, fn
);
332 aret
.ref
[1] = newtmp("abi", aret
.cls
[1], fn
);
333 emit(Ostorel
, 0, R
, aret
.ref
[1], r
);
334 emit(Oadd
, Kl
, r
, i1
->to
, getcon(8, fn
));
336 aret
.ref
[0] = newtmp("abi", aret
.cls
[0], fn
);
337 emit(Ostorel
, 0, R
, aret
.ref
[0], i1
->to
);
338 ca
+= retr(reg
, &aret
);
340 emit(Ocopy
, aret
.cls
[1], aret
.ref
[1], reg
[1], R
);
341 emit(Ocopy
, aret
.cls
[0], aret
.ref
[0], reg
[0], R
);
344 /* allocate return pad */
345 ra
= alloc(sizeof *ra
);
346 /* specific to NAlign == 3 */
347 al
= aret
.align
>= 2 ? aret
.align
- 2 : 0;
348 ra
->i
= (Ins
){Oalloc
+al
, Kl
, r1
, {getcon(aret
.size
, fn
)}};
353 if (KBASE(i1
->cls
) == 0) {
354 emit(Ocopy
, i1
->cls
, i1
->to
, TMP(RAX
), R
);
357 emit(Ocopy
, i1
->cls
, i1
->to
, TMP(XMM0
), R
);
362 varc
= i1
->op
== Ovacall
;
364 err("sysv abi does not support variadic env calls");
365 ca
|= (varc
| envc
) << 12;
366 emit(Ocall
, i1
->cls
, R
, i1
->arg
[0], CALL(ca
));
368 emit(Ocopy
, Kl
, TMP(RAX
), env
, R
);
370 emit(Ocopy
, Kw
, TMP(RAX
), getcon((ca
>> 8) & 15, fn
), R
);
373 if (ra
&& aret
.inmem
)
374 emit(Ocopy
, Kl
, rarg(Kl
, &ni
, &ns
), ra
->i
.to
, R
); /* pass hidden argument */
375 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++) {
378 r1
= rarg(a
->cls
[0], &ni
, &ns
);
379 if (i
->op
== Oargc
) {
381 r2
= rarg(a
->cls
[1], &ni
, &ns
);
382 r
= newtmp("abi", Kl
, fn
);
383 emit(Oload
, a
->cls
[1], r2
, r
, R
);
384 emit(Oadd
, Kl
, r
, i
->arg
[1], getcon(8, fn
));
386 emit(Oload
, a
->cls
[0], r1
, i
->arg
[1], R
);
388 emit(Ocopy
, i
->cls
, r1
, i
->arg
[0], R
);
394 r
= newtmp("abi", Kl
, fn
);
395 for (i
=i0
, a
=ac
, off
=0; i
<i1
; i
++, a
++) {
398 if (i
->op
== Oargc
) {
401 blit(r
, off
, i
->arg
[1], a
->size
, fn
);
403 r1
= newtmp("abi", Kl
, fn
);
404 emit(Ostorel
, 0, R
, i
->arg
[0], r1
);
405 emit(Oadd
, Kl
, r1
, r
, getcon(off
, fn
));
409 emit(Osalloc
, Kl
, r
, getcon(stk
, fn
), R
);
413 selpar(Fn
*fn
, Ins
*i0
, Ins
*i1
)
415 AClass
*ac
, *a
, aret
;
417 int ni
, ns
, s
, al
, fa
;
421 ac
= alloc((i1
-i0
) * sizeof ac
[0]);
425 if (fn
->retty
>= 0) {
426 typclass(&aret
, &typ
[fn
->retty
]);
427 fa
= argsclass(i0
, i1
, ac
, Opar
, &aret
, &env
);
429 fa
= argsclass(i0
, i1
, ac
, Opar
, 0, &env
);
431 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++) {
432 if (i
->op
!= Oparc
|| a
->inmem
)
435 r
= newtmp("abi", Kl
, fn
);
436 a
->ref
[1] = newtmp("abi", Kl
, fn
);
437 emit(Ostorel
, 0, R
, a
->ref
[1], r
);
438 emit(Oadd
, Kl
, r
, i
->to
, getcon(8, fn
));
440 a
->ref
[0] = newtmp("abi", Kl
, fn
);
441 emit(Ostorel
, 0, R
, a
->ref
[0], i
->to
);
442 /* specific to NAlign == 3 */
443 al
= a
->align
>= 2 ? a
->align
- 2 : 0;
444 emit(Oalloc
+al
, Kl
, i
->to
, getcon(a
->size
, fn
), R
);
447 if (fn
->retty
>= 0 && aret
.inmem
) {
448 r
= newtmp("abi", Kl
, fn
);
449 emit(Ocopy
, Kl
, r
, rarg(Kl
, &ni
, &ns
), R
);
453 for (i
=i0
, a
=ac
, s
=4; i
<i1
; i
++, a
++) {
457 err("sysv abi requires alignments of 16 or less");
460 fn
->tmp
[i
->to
.val
].slot
= -s
;
464 emit(Oload
, i
->cls
, i
->to
, SLOT(-s
), R
);
468 r
= rarg(a
->cls
[0], &ni
, &ns
);
469 if (i
->op
== Oparc
) {
470 emit(Ocopy
, Kl
, a
->ref
[0], r
, R
);
472 r
= rarg(a
->cls
[1], &ni
, &ns
);
473 emit(Ocopy
, Kl
, a
->ref
[1], r
, R
);
476 emit(Ocopy
, i
->cls
, i
->to
, r
, R
);
480 emit(Ocopy
, Kl
, env
, TMP(RAX
), R
);
482 return fa
| (s
*4)<<12;
486 split(Fn
*fn
, Blk
*b
)
492 bn
->nins
= &insb
[NIns
] - curi
;
493 idup(&bn
->ins
, curi
, bn
->nins
);
495 bn
->visit
= ++b
->visit
;
496 (void)!snprintf(bn
->name
, NString
, "%s.%d", b
->name
, b
->visit
);
504 chpred(Blk
*b
, Blk
*bp
, Blk
*bp1
)
509 for (p
=b
->phi
; p
; p
=p
->link
) {
510 for (a
=0; p
->blk
[a
]!=bp
; a
++)
517 selvaarg(Fn
*fn
, Blk
*b
, Ins
*i
)
519 Ref loc
, lreg
, lstk
, nr
, r0
, r1
, c4
, c8
, c16
, c
, ap
;
520 Blk
*b0
, *bstk
, *breg
;
525 c16
= getcon(16, fn
);
527 isint
= KBASE(i
->cls
) == 0;
530 r0 =l add ap, (0 or 4)
532 r1 =w cultw nr, (48 or 176)
538 r0 =w add nr, (8 or 16)
539 r1 =l add ap, (0 or 4)
547 %loc =l phi @breg %lreg, @bstk %lstk
548 i->to =(i->cls) load %loc
551 loc
= newtmp("abi", Kl
, fn
);
552 emit(Oload
, i
->cls
, i
->to
, loc
, R
);
558 chpred(b
->s1
, b
, b0
);
559 if (b
->s2
&& b
->s2
!= b
->s1
)
560 chpred(b
->s2
, b
, b0
);
562 lreg
= newtmp("abi", Kl
, fn
);
563 nr
= newtmp("abi", Kl
, fn
);
564 r0
= newtmp("abi", Kw
, fn
);
565 r1
= newtmp("abi", Kl
, fn
);
566 emit(Ostorew
, Kw
, R
, r0
, r1
);
567 emit(Oadd
, Kl
, r1
, ap
, isint
? CON_Z
: c4
);
568 emit(Oadd
, Kw
, r0
, nr
, isint
? c8
: c16
);
569 r0
= newtmp("abi", Kl
, fn
);
570 r1
= newtmp("abi", Kl
, fn
);
571 emit(Oadd
, Kl
, lreg
, r1
, nr
);
572 emit(Oload
, Kl
, r1
, r0
, R
);
573 emit(Oadd
, Kl
, r0
, ap
, c16
);
575 breg
->jmp
.type
= Jjmp
;
578 lstk
= newtmp("abi", Kl
, fn
);
579 r0
= newtmp("abi", Kl
, fn
);
580 r1
= newtmp("abi", Kl
, fn
);
581 emit(Ostorel
, Kw
, R
, r1
, r0
);
582 emit(Oadd
, Kl
, r1
, lstk
, c8
);
583 emit(Oload
, Kl
, lstk
, r0
, R
);
584 emit(Oadd
, Kl
, r0
, ap
, c8
);
586 bstk
->jmp
.type
= Jjmp
;
589 b0
->phi
= alloc(sizeof *b0
->phi
);
591 .cls
= Kl
, .to
= loc
,
596 r0
= newtmp("abi", Kl
, fn
);
597 r1
= newtmp("abi", Kw
, fn
);
602 c
= getcon(isint
? 48 : 176, fn
);
603 emit(Ocmpw
+Ciult
, Kw
, r1
, nr
, c
);
604 emit(Oloadsw
, Kl
, nr
, r0
, R
);
605 emit(Oadd
, Kl
, r0
, ap
, isint
? CON_Z
: c4
);
609 selvastart(Fn
*fn
, int fa
, Ref ap
)
614 gp
= ((fa
>> 4) & 15) * 8;
615 fp
= 48 + ((fa
>> 8) & 15) * 16;
617 r0
= newtmp("abi", Kl
, fn
);
618 r1
= newtmp("abi", Kl
, fn
);
619 emit(Ostorel
, Kw
, R
, r1
, r0
);
620 emit(Oadd
, Kl
, r1
, TMP(RBP
), getcon(-176, fn
));
621 emit(Oadd
, Kl
, r0
, ap
, getcon(16, fn
));
622 r0
= newtmp("abi", Kl
, fn
);
623 r1
= newtmp("abi", Kl
, fn
);
624 emit(Ostorel
, Kw
, R
, r1
, r0
);
625 emit(Oadd
, Kl
, r1
, TMP(RBP
), getcon(sp
, fn
));
626 emit(Oadd
, Kl
, r0
, ap
, getcon(8, fn
));
627 r0
= newtmp("abi", Kl
, fn
);
628 emit(Ostorew
, Kw
, R
, getcon(fp
, fn
), r0
);
629 emit(Oadd
, Kl
, r0
, ap
, getcon(4, fn
));
630 emit(Ostorew
, Kw
, R
, getcon(gp
, fn
), ap
);
634 amd64_sysv_abi(Fn
*fn
)
641 for (b
=fn
->start
; b
; b
=b
->link
)
644 /* lower parameters */
645 for (b
=fn
->start
, i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
648 fa
= selpar(fn
, b
->ins
, i
);
649 n
= b
->nins
- (i
- b
->ins
) + (&insb
[NIns
] - curi
);
650 i0
= alloc(n
* sizeof(Ins
));
651 ip
= icpy(ip
= i0
, curi
, &insb
[NIns
] - curi
);
652 ip
= icpy(ip
, i
, &b
->ins
[b
->nins
] - i
);
656 /* lower calls, returns, and vararg instructions */
661 b
= fn
->start
; /* do it last */
666 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
673 for (i0
=i
; i0
>b
->ins
; i0
--)
674 if (!isarg((i0
-1)->op
))
676 selcall(fn
, i0
, i
, &ral
);
680 selvastart(fn
, fa
, i
->arg
[0]);
690 for (; ral
; ral
=ral
->link
)
692 b
->nins
= &insb
[NIns
] - curi
;
693 idup(&b
->ins
, curi
, b
->nins
);
694 } while (b
!= fn
->start
);
697 fprintf(stderr
, "\n> After ABI lowering:\n");