3 typedef struct AClass AClass
;
4 typedef struct RAlloc RAlloc
;
21 classify(AClass
*a
, Typ
*t
, uint s
)
27 for (n
=0, s1
=s
; n
<t
->nunion
; n
++, s
=s1
)
28 for (f
=t
->fields
[n
]; f
->type
!=FEnd
; f
++) {
35 /* don't change anything */
52 classify(a
, &typ
[f
->len
], s
);
53 s
+= typ
[f
->len
].size
;
60 typclass(AClass
*a
, Typ
*t
)
67 /* the ABI requires sizes to be rounded
68 * up to the nearest multiple of 8, moreover
69 * it makes it easy load and store structures
74 sz
= (sz
+ al
-1) & -al
;
80 if (t
->isdark
|| sz
> 16 || sz
== 0) {
81 /* large or unaligned structures are
82 * required to be passed in memory
95 retr(Ref reg
[2], AClass
*aret
)
97 static int retreg
[2][2] = {{RAX
, RDX
}, {XMM0
, XMM0
+1}};
102 for (n
=0; (uint
)n
*8<aret
->size
; n
++) {
103 k
= KBASE(aret
->cls
[n
]);
104 reg
[n
] = TMP(retreg
[k
][nr
[k
]++]);
111 selret(Blk
*b
, Fn
*fn
)
119 if (!isret(j
) || j
== Jret0
)
126 typclass(&aret
, &typ
[fn
->retty
]);
128 assert(rtype(fn
->retr
) == RTmp
);
129 emit(Ocopy
, Kl
, TMP(RAX
), fn
->retr
, R
);
130 blit0(fn
->retr
, r0
, aret
.type
->size
, fn
);
133 ca
= retr(reg
, &aret
);
135 r
= newtmp("abi", Kl
, fn
);
136 emit(Oload
, Kl
, reg
[1], r
, R
);
137 emit(Oadd
, Kl
, r
, r0
, getcon(8, fn
));
139 emit(Oload
, Kl
, reg
[0], r0
, R
);
144 emit(Ocopy
, k
, TMP(RAX
), r0
, R
);
147 emit(Ocopy
, k
, TMP(XMM0
), r0
, R
);
152 b
->jmp
.arg
= CALL(ca
);
156 argsclass(Ins
*i0
, Ins
*i1
, AClass
*ac
, int op
, AClass
*aret
, Ref
*env
)
158 int varc
, envc
, nint
, ni
, nsse
, ns
, n
, *pn
;
162 if (aret
&& aret
->inmem
)
163 nint
= 5; /* hidden argument */
169 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++)
170 switch (i
->op
- op
+ Oarg
) {
172 if (KBASE(i
->cls
) == 0)
187 typclass(a
, &typ
[n
]);
191 for (n
=0; (uint
)n
*8<a
->size
; n
++)
192 if (KBASE(a
->cls
[n
]) == 0)
196 if (nint
>= ni
&& nsse
>= ns
) {
217 err("sysv abi does not support variadic env calls");
219 return ((varc
|envc
) << 12) | ((6-nint
) << 4) | ((8-nsse
) << 8);
222 int amd64_sysv_rsave
[] = {
223 RDI
, RSI
, RDX
, RCX
, R8
, R9
, R10
, R11
, RAX
,
224 XMM0
, XMM1
, XMM2
, XMM3
, XMM4
, XMM5
, XMM6
, XMM7
,
225 XMM8
, XMM9
, XMM10
, XMM11
, XMM12
, XMM13
, XMM14
, -1
227 int amd64_sysv_rclob
[] = {RBX
, R12
, R13
, R14
, R15
, -1};
229 MAKESURE(sysv_arrays_ok
,
230 sizeof amd64_sysv_rsave
== (NGPS
+NFPS
+1) * sizeof(int) &&
231 sizeof amd64_sysv_rclob
== (NCLR
+1) * sizeof(int)
234 /* layout of call's second argument (RCall)
237 * |0...00|x|xxxx|xxxx|xx|xx| range
238 * | | | | ` gp regs returned (0..2)
239 * | | | ` sse regs returned (0..2)
240 * | | ` gp regs passed (0..6)
241 * | ` sse regs passed (0..8)
242 * ` 1 if rax is used to pass data (0..1)
246 amd64_sysv_retregs(Ref r
, int p
[2])
251 assert(rtype(r
) == RCall
);
254 nf
= (r
.val
>> 2) & 3;
271 amd64_sysv_argregs(Ref r
, int p
[2])
276 assert(rtype(r
) == RCall
);
278 ni
= (r
.val
>> 4) & 15;
279 nf
= (r
.val
>> 8) & 15;
280 ra
= (r
.val
>> 12) & 1;
282 b
|= BIT(amd64_sysv_rsave
[j
]);
289 return b
| (ra
? BIT(RAX
) : 0);
293 rarg(int ty
, int *ni
, int *ns
)
296 return TMP(amd64_sysv_rsave
[(*ni
)++]);
298 return TMP(XMM0
+ (*ns
)++);
302 selcall(Fn
*fn
, Ins
*i0
, Ins
*i1
, RAlloc
**rap
)
305 AClass
*ac
, *a
, aret
;
308 Ref r
, r1
, r2
, reg
[2], env
;
312 ac
= alloc((i1
-i0
) * sizeof ac
[0]);
314 if (!req(i1
->arg
[1], R
)) {
315 assert(rtype(i1
->arg
[1]) == RType
);
316 typclass(&aret
, &typ
[i1
->arg
[1].val
]);
317 ca
= argsclass(i0
, i1
, ac
, Oarg
, &aret
, &env
);
319 ca
= argsclass(i0
, i1
, ac
, Oarg
, 0, &env
);
321 for (stk
=0, a
=&ac
[i1
-i0
]; a
>ac
;)
324 err("sysv abi requires alignments of 16 or less");
331 r
= getcon(-(int64_t)stk
, fn
);
332 emit(Osalloc
, Kl
, R
, r
, R
);
335 if (!req(i1
->arg
[1], R
)) {
337 /* get the return location from eax
338 * it saves one callee-save reg */
339 r1
= newtmp("abi", Kl
, fn
);
340 emit(Ocopy
, Kl
, i1
->to
, TMP(RAX
), R
);
343 /* todo, may read out of bounds.
344 * gcc did this up until 5.2, but
345 * this should still be fixed.
348 r
= newtmp("abi", Kl
, fn
);
349 aret
.ref
[1] = newtmp("abi", aret
.cls
[1], fn
);
350 emit(Ostorel
, 0, R
, aret
.ref
[1], r
);
351 emit(Oadd
, Kl
, r
, i1
->to
, getcon(8, fn
));
353 aret
.ref
[0] = newtmp("abi", aret
.cls
[0], fn
);
354 emit(Ostorel
, 0, R
, aret
.ref
[0], i1
->to
);
355 ca
+= retr(reg
, &aret
);
357 emit(Ocopy
, aret
.cls
[1], aret
.ref
[1], reg
[1], R
);
358 emit(Ocopy
, aret
.cls
[0], aret
.ref
[0], reg
[0], R
);
361 /* allocate return pad */
362 ra
= alloc(sizeof *ra
);
363 /* specific to NAlign == 3 */
364 al
= aret
.align
>= 2 ? aret
.align
- 2 : 0;
365 ra
->i
= (Ins
){Oalloc
+al
, Kl
, r1
, {getcon(aret
.size
, fn
)}};
370 if (KBASE(i1
->cls
) == 0) {
371 emit(Ocopy
, i1
->cls
, i1
->to
, TMP(RAX
), R
);
374 emit(Ocopy
, i1
->cls
, i1
->to
, TMP(XMM0
), R
);
379 emit(Ocall
, i1
->cls
, R
, i1
->arg
[0], CALL(ca
));
382 emit(Ocopy
, Kl
, TMP(RAX
), env
, R
);
383 else if ((ca
>> 12) & 1) /* vararg call */
384 emit(Ocopy
, Kw
, TMP(RAX
), getcon((ca
>> 8) & 15, fn
), R
);
387 if (ra
&& aret
.inmem
)
388 emit(Ocopy
, Kl
, rarg(Kl
, &ni
, &ns
), ra
->i
.to
, R
); /* pass hidden argument */
390 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++) {
391 if (i
->op
>= Oarge
|| a
->inmem
)
393 r1
= rarg(a
->cls
[0], &ni
, &ns
);
394 if (i
->op
== Oargc
) {
396 r2
= rarg(a
->cls
[1], &ni
, &ns
);
397 r
= newtmp("abi", Kl
, fn
);
398 emit(Oload
, a
->cls
[1], r2
, r
, R
);
399 emit(Oadd
, Kl
, r
, i
->arg
[1], getcon(8, fn
));
401 emit(Oload
, a
->cls
[0], r1
, i
->arg
[1], R
);
403 emit(Ocopy
, i
->cls
, r1
, i
->arg
[0], R
);
409 r
= newtmp("abi", Kl
, fn
);
410 for (i
=i0
, a
=ac
, off
=0; i
<i1
; i
++, a
++) {
411 if (i
->op
>= Oarge
|| !a
->inmem
)
413 if (i
->op
== Oargc
) {
416 blit(r
, off
, i
->arg
[1], 0, a
->type
->size
, fn
);
418 r1
= newtmp("abi", Kl
, fn
);
419 emit(Ostorel
, 0, R
, i
->arg
[0], r1
);
420 emit(Oadd
, Kl
, r1
, r
, getcon(off
, fn
));
424 emit(Osalloc
, Kl
, r
, getcon(stk
, fn
), R
);
428 selpar(Fn
*fn
, Ins
*i0
, Ins
*i1
)
430 AClass
*ac
, *a
, aret
;
432 int ni
, ns
, s
, al
, fa
;
436 ac
= alloc((i1
-i0
) * sizeof ac
[0]);
440 if (fn
->retty
>= 0) {
441 typclass(&aret
, &typ
[fn
->retty
]);
442 fa
= argsclass(i0
, i1
, ac
, Opar
, &aret
, &env
);
444 fa
= argsclass(i0
, i1
, ac
, Opar
, 0, &env
);
445 fn
->reg
= amd64_sysv_argregs(CALL(fa
), 0);
447 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++) {
448 if (i
->op
!= Oparc
|| a
->inmem
)
451 r
= newtmp("abi", Kl
, fn
);
452 a
->ref
[1] = newtmp("abi", Kl
, fn
);
453 emit(Ostorel
, 0, R
, a
->ref
[1], r
);
454 emit(Oadd
, Kl
, r
, i
->to
, getcon(8, fn
));
456 a
->ref
[0] = newtmp("abi", Kl
, fn
);
457 emit(Ostorel
, 0, R
, a
->ref
[0], i
->to
);
458 /* specific to NAlign == 3 */
459 al
= a
->align
>= 2 ? a
->align
- 2 : 0;
460 emit(Oalloc
+al
, Kl
, i
->to
, getcon(a
->size
, fn
), R
);
463 if (fn
->retty
>= 0 && aret
.inmem
) {
464 r
= newtmp("abi", Kl
, fn
);
465 emit(Ocopy
, Kl
, r
, rarg(Kl
, &ni
, &ns
), R
);
469 for (i
=i0
, a
=ac
, s
=4; i
<i1
; i
++, a
++) {
473 err("sysv abi requires alignments of 16 or less");
476 fn
->tmp
[i
->to
.val
].slot
= -s
;
480 emit(Oload
, i
->cls
, i
->to
, SLOT(-s
), R
);
486 r
= rarg(a
->cls
[0], &ni
, &ns
);
487 if (i
->op
== Oparc
) {
488 emit(Ocopy
, a
->cls
[0], a
->ref
[0], r
, R
);
490 r
= rarg(a
->cls
[1], &ni
, &ns
);
491 emit(Ocopy
, a
->cls
[1], a
->ref
[1], r
, R
);
494 emit(Ocopy
, i
->cls
, i
->to
, r
, R
);
498 emit(Ocopy
, Kl
, env
, TMP(RAX
), R
);
500 return fa
| (s
*4)<<12;
504 split(Fn
*fn
, Blk
*b
)
510 bn
->nins
= &insb
[NIns
] - curi
;
511 idup(&bn
->ins
, curi
, bn
->nins
);
513 bn
->visit
= ++b
->visit
;
514 (void)!snprintf(bn
->name
, NString
, "%s.%d", b
->name
, b
->visit
);
522 chpred(Blk
*b
, Blk
*bp
, Blk
*bp1
)
527 for (p
=b
->phi
; p
; p
=p
->link
) {
528 for (a
=0; p
->blk
[a
]!=bp
; a
++)
535 selvaarg(Fn
*fn
, Blk
*b
, Ins
*i
)
537 Ref loc
, lreg
, lstk
, nr
, r0
, r1
, c4
, c8
, c16
, c
, ap
;
538 Blk
*b0
, *bstk
, *breg
;
543 c16
= getcon(16, fn
);
545 isint
= KBASE(i
->cls
) == 0;
548 r0 =l add ap, (0 or 4)
550 r1 =w cultw nr, (48 or 176)
556 r0 =w add nr, (8 or 16)
557 r1 =l add ap, (0 or 4)
565 %loc =l phi @breg %lreg, @bstk %lstk
566 i->to =(i->cls) load %loc
569 loc
= newtmp("abi", Kl
, fn
);
570 emit(Oload
, i
->cls
, i
->to
, loc
, R
);
576 chpred(b
->s1
, b
, b0
);
577 if (b
->s2
&& b
->s2
!= b
->s1
)
578 chpred(b
->s2
, b
, b0
);
580 lreg
= newtmp("abi", Kl
, fn
);
581 nr
= newtmp("abi", Kl
, fn
);
582 r0
= newtmp("abi", Kw
, fn
);
583 r1
= newtmp("abi", Kl
, fn
);
584 emit(Ostorew
, Kw
, R
, r0
, r1
);
585 emit(Oadd
, Kl
, r1
, ap
, isint
? CON_Z
: c4
);
586 emit(Oadd
, Kw
, r0
, nr
, isint
? c8
: c16
);
587 r0
= newtmp("abi", Kl
, fn
);
588 r1
= newtmp("abi", Kl
, fn
);
589 emit(Oadd
, Kl
, lreg
, r1
, nr
);
590 emit(Oload
, Kl
, r1
, r0
, R
);
591 emit(Oadd
, Kl
, r0
, ap
, c16
);
593 breg
->jmp
.type
= Jjmp
;
596 lstk
= newtmp("abi", Kl
, fn
);
597 r0
= newtmp("abi", Kl
, fn
);
598 r1
= newtmp("abi", Kl
, fn
);
599 emit(Ostorel
, Kw
, R
, r1
, r0
);
600 emit(Oadd
, Kl
, r1
, lstk
, c8
);
601 emit(Oload
, Kl
, lstk
, r0
, R
);
602 emit(Oadd
, Kl
, r0
, ap
, c8
);
604 bstk
->jmp
.type
= Jjmp
;
607 b0
->phi
= alloc(sizeof *b0
->phi
);
609 .cls
= Kl
, .to
= loc
,
611 .blk
= vnew(2, sizeof b0
->phi
->blk
[0], PFn
),
612 .arg
= vnew(2, sizeof b0
->phi
->arg
[0], PFn
),
614 b0
->phi
->blk
[0] = bstk
;
615 b0
->phi
->blk
[1] = breg
;
616 b0
->phi
->arg
[0] = lstk
;
617 b0
->phi
->arg
[1] = lreg
;
618 r0
= newtmp("abi", Kl
, fn
);
619 r1
= newtmp("abi", Kw
, fn
);
624 c
= getcon(isint
? 48 : 176, fn
);
625 emit(Ocmpw
+Ciult
, Kw
, r1
, nr
, c
);
626 emit(Oloadsw
, Kl
, nr
, r0
, R
);
627 emit(Oadd
, Kl
, r0
, ap
, isint
? CON_Z
: c4
);
631 selvastart(Fn
*fn
, int fa
, Ref ap
)
636 gp
= ((fa
>> 4) & 15) * 8;
637 fp
= 48 + ((fa
>> 8) & 15) * 16;
639 r0
= newtmp("abi", Kl
, fn
);
640 r1
= newtmp("abi", Kl
, fn
);
641 emit(Ostorel
, Kw
, R
, r1
, r0
);
642 emit(Oadd
, Kl
, r1
, TMP(RBP
), getcon(-176, fn
));
643 emit(Oadd
, Kl
, r0
, ap
, getcon(16, fn
));
644 r0
= newtmp("abi", Kl
, fn
);
645 r1
= newtmp("abi", Kl
, fn
);
646 emit(Ostorel
, Kw
, R
, r1
, r0
);
647 emit(Oadd
, Kl
, r1
, TMP(RBP
), getcon(sp
, fn
));
648 emit(Oadd
, Kl
, r0
, ap
, getcon(8, fn
));
649 r0
= newtmp("abi", Kl
, fn
);
650 emit(Ostorew
, Kw
, R
, getcon(fp
, fn
), r0
);
651 emit(Oadd
, Kl
, r0
, ap
, getcon(4, fn
));
652 emit(Ostorew
, Kw
, R
, getcon(gp
, fn
), ap
);
656 amd64_sysv_abi(Fn
*fn
)
663 for (b
=fn
->start
; b
; b
=b
->link
)
666 /* lower parameters */
667 for (b
=fn
->start
, i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
670 fa
= selpar(fn
, b
->ins
, i
);
671 n
= b
->nins
- (i
- b
->ins
) + (&insb
[NIns
] - curi
);
672 i0
= alloc(n
* sizeof(Ins
));
673 ip
= icpy(ip
= i0
, curi
, &insb
[NIns
] - curi
);
674 ip
= icpy(ip
, i
, &b
->ins
[b
->nins
] - i
);
678 /* lower calls, returns, and vararg instructions */
683 b
= fn
->start
; /* do it last */
688 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
694 for (i0
=i
; i0
>b
->ins
; i0
--)
695 if (!isarg((i0
-1)->op
))
697 selcall(fn
, i0
, i
, &ral
);
701 selvastart(fn
, fa
, i
->arg
[0]);
711 for (; ral
; ral
=ral
->link
)
713 b
->nins
= &insb
[NIns
] - curi
;
714 idup(&b
->ins
, curi
, b
->nins
);
715 } while (b
!= fn
->start
);
718 fprintf(stderr
, "\n> After ABI lowering:\n");