3 typedef struct AClass AClass
;
4 typedef struct RAlloc RAlloc
;
21 classify(AClass
*a
, Typ
*t
, uint s
)
27 for (n
=0, s1
=s
; n
<t
->nunion
; n
++, s
=s1
)
28 for (f
=t
->fields
[n
]; f
->type
!=FEnd
; f
++) {
35 /* don't change anything */
52 classify(a
, &typ
[f
->len
], s
);
53 s
+= typ
[f
->len
].size
;
60 typclass(AClass
*a
, Typ
*t
)
67 /* the ABI requires sizes to be rounded
68 * up to the nearest multiple of 8, moreover
69 * it makes it easy load and store structures
74 sz
= (sz
+ al
-1) & -al
;
80 if (t
->isdark
|| sz
> 16 || sz
== 0) {
81 /* large or unaligned structures are
82 * required to be passed in memory
95 retr(Ref reg
[2], AClass
*aret
)
97 static int retreg
[2][2] = {{RAX
, RDX
}, {XMM0
, XMM0
+1}};
102 for (n
=0; (uint
)n
*8<aret
->size
; n
++) {
103 k
= KBASE(aret
->cls
[n
]);
104 reg
[n
] = TMP(retreg
[k
][nr
[k
]++]);
111 selret(Blk
*b
, Fn
*fn
)
119 if (!isret(j
) || j
== Jret0
)
126 typclass(&aret
, &typ
[fn
->retty
]);
128 assert(rtype(fn
->retr
) == RTmp
);
129 emit(Ocopy
, Kl
, TMP(RAX
), fn
->retr
, R
);
130 emit(Oblit1
, 0, R
, INT(aret
.type
->size
), R
);
131 emit(Oblit0
, 0, R
, r0
, fn
->retr
);
134 ca
= retr(reg
, &aret
);
136 r
= newtmp("abi", Kl
, fn
);
137 emit(Oload
, Kl
, reg
[1], r
, R
);
138 emit(Oadd
, Kl
, r
, r0
, getcon(8, fn
));
140 emit(Oload
, Kl
, reg
[0], r0
, R
);
145 emit(Ocopy
, k
, TMP(RAX
), r0
, R
);
148 emit(Ocopy
, k
, TMP(XMM0
), r0
, R
);
153 b
->jmp
.arg
= CALL(ca
);
157 argsclass(Ins
*i0
, Ins
*i1
, AClass
*ac
, int op
, AClass
*aret
, Ref
*env
)
159 int varc
, envc
, nint
, ni
, nsse
, ns
, n
, *pn
;
163 if (aret
&& aret
->inmem
)
164 nint
= 5; /* hidden argument */
170 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++)
171 switch (i
->op
- op
+ Oarg
) {
173 if (KBASE(i
->cls
) == 0)
188 typclass(a
, &typ
[n
]);
192 for (n
=0; (uint
)n
*8<a
->size
; n
++)
193 if (KBASE(a
->cls
[n
]) == 0)
197 if (nint
>= ni
&& nsse
>= ns
) {
218 err("sysv abi does not support variadic env calls");
220 return ((varc
|envc
) << 12) | ((6-nint
) << 4) | ((8-nsse
) << 8);
223 int amd64_sysv_rsave
[] = {
224 RDI
, RSI
, RDX
, RCX
, R8
, R9
, R10
, R11
, RAX
,
225 XMM0
, XMM1
, XMM2
, XMM3
, XMM4
, XMM5
, XMM6
, XMM7
,
226 XMM8
, XMM9
, XMM10
, XMM11
, XMM12
, XMM13
, XMM14
, -1
228 int amd64_sysv_rclob
[] = {RBX
, R12
, R13
, R14
, R15
, -1};
230 MAKESURE(sysv_arrays_ok
,
231 sizeof amd64_sysv_rsave
== (NGPS
+NFPS
+1) * sizeof(int) &&
232 sizeof amd64_sysv_rclob
== (NCLR
+1) * sizeof(int)
235 /* layout of call's second argument (RCall)
238 * |0...00|x|xxxx|xxxx|xx|xx| range
239 * | | | | ` gp regs returned (0..2)
240 * | | | ` sse regs returned (0..2)
241 * | | ` gp regs passed (0..6)
242 * | ` sse regs passed (0..8)
243 * ` 1 if rax is used to pass data (0..1)
247 amd64_sysv_retregs(Ref r
, int p
[2])
252 assert(rtype(r
) == RCall
);
255 nf
= (r
.val
>> 2) & 3;
272 amd64_sysv_argregs(Ref r
, int p
[2])
277 assert(rtype(r
) == RCall
);
279 ni
= (r
.val
>> 4) & 15;
280 nf
= (r
.val
>> 8) & 15;
281 ra
= (r
.val
>> 12) & 1;
283 b
|= BIT(amd64_sysv_rsave
[j
]);
290 return b
| (ra
? BIT(RAX
) : 0);
294 rarg(int ty
, int *ni
, int *ns
)
297 return TMP(amd64_sysv_rsave
[(*ni
)++]);
299 return TMP(XMM0
+ (*ns
)++);
303 selcall(Fn
*fn
, Ins
*i0
, Ins
*i1
, RAlloc
**rap
)
306 AClass
*ac
, *a
, aret
;
309 Ref r
, r1
, r2
, reg
[2], env
;
313 ac
= alloc((i1
-i0
) * sizeof ac
[0]);
315 if (!req(i1
->arg
[1], R
)) {
316 assert(rtype(i1
->arg
[1]) == RType
);
317 typclass(&aret
, &typ
[i1
->arg
[1].val
]);
318 ca
= argsclass(i0
, i1
, ac
, Oarg
, &aret
, &env
);
320 ca
= argsclass(i0
, i1
, ac
, Oarg
, 0, &env
);
322 for (stk
=0, a
=&ac
[i1
-i0
]; a
>ac
;)
325 err("sysv abi requires alignments of 16 or less");
332 r
= getcon(-(int64_t)stk
, fn
);
333 emit(Osalloc
, Kl
, R
, r
, R
);
336 if (!req(i1
->arg
[1], R
)) {
338 /* get the return location from eax
339 * it saves one callee-save reg */
340 r1
= newtmp("abi", Kl
, fn
);
341 emit(Ocopy
, Kl
, i1
->to
, TMP(RAX
), R
);
344 /* todo, may read out of bounds.
345 * gcc did this up until 5.2, but
346 * this should still be fixed.
349 r
= newtmp("abi", Kl
, fn
);
350 aret
.ref
[1] = newtmp("abi", aret
.cls
[1], fn
);
351 emit(Ostorel
, 0, R
, aret
.ref
[1], r
);
352 emit(Oadd
, Kl
, r
, i1
->to
, getcon(8, fn
));
354 aret
.ref
[0] = newtmp("abi", aret
.cls
[0], fn
);
355 emit(Ostorel
, 0, R
, aret
.ref
[0], i1
->to
);
356 ca
+= retr(reg
, &aret
);
358 emit(Ocopy
, aret
.cls
[1], aret
.ref
[1], reg
[1], R
);
359 emit(Ocopy
, aret
.cls
[0], aret
.ref
[0], reg
[0], R
);
362 /* allocate return pad */
363 ra
= alloc(sizeof *ra
);
364 /* specific to NAlign == 3 */
365 al
= aret
.align
>= 2 ? aret
.align
- 2 : 0;
366 ra
->i
= (Ins
){Oalloc
+al
, Kl
, r1
, {getcon(aret
.size
, fn
)}};
371 if (KBASE(i1
->cls
) == 0) {
372 emit(Ocopy
, i1
->cls
, i1
->to
, TMP(RAX
), R
);
375 emit(Ocopy
, i1
->cls
, i1
->to
, TMP(XMM0
), R
);
380 emit(Ocall
, i1
->cls
, R
, i1
->arg
[0], CALL(ca
));
383 emit(Ocopy
, Kl
, TMP(RAX
), env
, R
);
384 else if ((ca
>> 12) & 1) /* vararg call */
385 emit(Ocopy
, Kw
, TMP(RAX
), getcon((ca
>> 8) & 15, fn
), R
);
388 if (ra
&& aret
.inmem
)
389 emit(Ocopy
, Kl
, rarg(Kl
, &ni
, &ns
), ra
->i
.to
, R
); /* pass hidden argument */
391 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++) {
392 if (i
->op
>= Oarge
|| a
->inmem
)
394 r1
= rarg(a
->cls
[0], &ni
, &ns
);
395 if (i
->op
== Oargc
) {
397 r2
= rarg(a
->cls
[1], &ni
, &ns
);
398 r
= newtmp("abi", Kl
, fn
);
399 emit(Oload
, a
->cls
[1], r2
, r
, R
);
400 emit(Oadd
, Kl
, r
, i
->arg
[1], getcon(8, fn
));
402 emit(Oload
, a
->cls
[0], r1
, i
->arg
[1], R
);
404 emit(Ocopy
, i
->cls
, r1
, i
->arg
[0], R
);
410 r
= newtmp("abi", Kl
, fn
);
411 for (i
=i0
, a
=ac
, off
=0; i
<i1
; i
++, a
++) {
412 if (i
->op
>= Oarge
|| !a
->inmem
)
414 r1
= newtmp("abi", Kl
, fn
);
415 if (i
->op
== Oargc
) {
418 emit(Oblit1
, 0, R
, INT(a
->type
->size
), R
);
419 emit(Oblit0
, 0, R
, i
->arg
[1], r1
);
421 emit(Ostorel
, 0, R
, i
->arg
[0], r1
);
422 emit(Oadd
, Kl
, r1
, r
, getcon(off
, fn
));
425 emit(Osalloc
, Kl
, r
, getcon(stk
, fn
), R
);
429 selpar(Fn
*fn
, Ins
*i0
, Ins
*i1
)
431 AClass
*ac
, *a
, aret
;
433 int ni
, ns
, s
, al
, fa
;
437 ac
= alloc((i1
-i0
) * sizeof ac
[0]);
441 if (fn
->retty
>= 0) {
442 typclass(&aret
, &typ
[fn
->retty
]);
443 fa
= argsclass(i0
, i1
, ac
, Opar
, &aret
, &env
);
445 fa
= argsclass(i0
, i1
, ac
, Opar
, 0, &env
);
446 fn
->reg
= amd64_sysv_argregs(CALL(fa
), 0);
448 for (i
=i0
, a
=ac
; i
<i1
; i
++, a
++) {
449 if (i
->op
!= Oparc
|| a
->inmem
)
452 r
= newtmp("abi", Kl
, fn
);
453 a
->ref
[1] = newtmp("abi", Kl
, fn
);
454 emit(Ostorel
, 0, R
, a
->ref
[1], r
);
455 emit(Oadd
, Kl
, r
, i
->to
, getcon(8, fn
));
457 a
->ref
[0] = newtmp("abi", Kl
, fn
);
458 emit(Ostorel
, 0, R
, a
->ref
[0], i
->to
);
459 /* specific to NAlign == 3 */
460 al
= a
->align
>= 2 ? a
->align
- 2 : 0;
461 emit(Oalloc
+al
, Kl
, i
->to
, getcon(a
->size
, fn
), R
);
464 if (fn
->retty
>= 0 && aret
.inmem
) {
465 r
= newtmp("abi", Kl
, fn
);
466 emit(Ocopy
, Kl
, r
, rarg(Kl
, &ni
, &ns
), R
);
470 for (i
=i0
, a
=ac
, s
=4; i
<i1
; i
++, a
++) {
474 err("sysv abi requires alignments of 16 or less");
477 fn
->tmp
[i
->to
.val
].slot
= -s
;
481 emit(Oload
, i
->cls
, i
->to
, SLOT(-s
), R
);
487 r
= rarg(a
->cls
[0], &ni
, &ns
);
488 if (i
->op
== Oparc
) {
489 emit(Ocopy
, a
->cls
[0], a
->ref
[0], r
, R
);
491 r
= rarg(a
->cls
[1], &ni
, &ns
);
492 emit(Ocopy
, a
->cls
[1], a
->ref
[1], r
, R
);
495 emit(Ocopy
, i
->cls
, i
->to
, r
, R
);
499 emit(Ocopy
, Kl
, env
, TMP(RAX
), R
);
501 return fa
| (s
*4)<<12;
505 split(Fn
*fn
, Blk
*b
)
511 bn
->nins
= &insb
[NIns
] - curi
;
512 idup(&bn
->ins
, curi
, bn
->nins
);
514 bn
->visit
= ++b
->visit
;
515 strf(bn
->name
, "%s.%d", b
->name
, b
->visit
);
523 chpred(Blk
*b
, Blk
*bp
, Blk
*bp1
)
528 for (p
=b
->phi
; p
; p
=p
->link
) {
529 for (a
=0; p
->blk
[a
]!=bp
; a
++)
536 selvaarg(Fn
*fn
, Blk
*b
, Ins
*i
)
538 Ref loc
, lreg
, lstk
, nr
, r0
, r1
, c4
, c8
, c16
, c
, ap
;
539 Blk
*b0
, *bstk
, *breg
;
544 c16
= getcon(16, fn
);
546 isint
= KBASE(i
->cls
) == 0;
549 r0 =l add ap, (0 or 4)
551 r1 =w cultw nr, (48 or 176)
557 r0 =w add nr, (8 or 16)
558 r1 =l add ap, (0 or 4)
566 %loc =l phi @breg %lreg, @bstk %lstk
567 i->to =(i->cls) load %loc
570 loc
= newtmp("abi", Kl
, fn
);
571 emit(Oload
, i
->cls
, i
->to
, loc
, R
);
577 chpred(b
->s1
, b
, b0
);
578 if (b
->s2
&& b
->s2
!= b
->s1
)
579 chpred(b
->s2
, b
, b0
);
581 lreg
= newtmp("abi", Kl
, fn
);
582 nr
= newtmp("abi", Kl
, fn
);
583 r0
= newtmp("abi", Kw
, fn
);
584 r1
= newtmp("abi", Kl
, fn
);
585 emit(Ostorew
, Kw
, R
, r0
, r1
);
586 emit(Oadd
, Kl
, r1
, ap
, isint
? CON_Z
: c4
);
587 emit(Oadd
, Kw
, r0
, nr
, isint
? c8
: c16
);
588 r0
= newtmp("abi", Kl
, fn
);
589 r1
= newtmp("abi", Kl
, fn
);
590 emit(Oadd
, Kl
, lreg
, r1
, nr
);
591 emit(Oload
, Kl
, r1
, r0
, R
);
592 emit(Oadd
, Kl
, r0
, ap
, c16
);
594 breg
->jmp
.type
= Jjmp
;
597 lstk
= newtmp("abi", Kl
, fn
);
598 r0
= newtmp("abi", Kl
, fn
);
599 r1
= newtmp("abi", Kl
, fn
);
600 emit(Ostorel
, Kw
, R
, r1
, r0
);
601 emit(Oadd
, Kl
, r1
, lstk
, c8
);
602 emit(Oload
, Kl
, lstk
, r0
, R
);
603 emit(Oadd
, Kl
, r0
, ap
, c8
);
605 bstk
->jmp
.type
= Jjmp
;
608 b0
->phi
= alloc(sizeof *b0
->phi
);
610 .cls
= Kl
, .to
= loc
,
612 .blk
= vnew(2, sizeof b0
->phi
->blk
[0], PFn
),
613 .arg
= vnew(2, sizeof b0
->phi
->arg
[0], PFn
),
615 b0
->phi
->blk
[0] = bstk
;
616 b0
->phi
->blk
[1] = breg
;
617 b0
->phi
->arg
[0] = lstk
;
618 b0
->phi
->arg
[1] = lreg
;
619 r0
= newtmp("abi", Kl
, fn
);
620 r1
= newtmp("abi", Kw
, fn
);
625 c
= getcon(isint
? 48 : 176, fn
);
626 emit(Ocmpw
+Ciult
, Kw
, r1
, nr
, c
);
627 emit(Oloadsw
, Kl
, nr
, r0
, R
);
628 emit(Oadd
, Kl
, r0
, ap
, isint
? CON_Z
: c4
);
632 selvastart(Fn
*fn
, int fa
, Ref ap
)
637 gp
= ((fa
>> 4) & 15) * 8;
638 fp
= 48 + ((fa
>> 8) & 15) * 16;
640 r0
= newtmp("abi", Kl
, fn
);
641 r1
= newtmp("abi", Kl
, fn
);
642 emit(Ostorel
, Kw
, R
, r1
, r0
);
643 emit(Oadd
, Kl
, r1
, TMP(RBP
), getcon(-176, fn
));
644 emit(Oadd
, Kl
, r0
, ap
, getcon(16, fn
));
645 r0
= newtmp("abi", Kl
, fn
);
646 r1
= newtmp("abi", Kl
, fn
);
647 emit(Ostorel
, Kw
, R
, r1
, r0
);
648 emit(Oadd
, Kl
, r1
, TMP(RBP
), getcon(sp
, fn
));
649 emit(Oadd
, Kl
, r0
, ap
, getcon(8, fn
));
650 r0
= newtmp("abi", Kl
, fn
);
651 emit(Ostorew
, Kw
, R
, getcon(fp
, fn
), r0
);
652 emit(Oadd
, Kl
, r0
, ap
, getcon(4, fn
));
653 emit(Ostorew
, Kw
, R
, getcon(gp
, fn
), ap
);
657 amd64_sysv_abi(Fn
*fn
)
664 for (b
=fn
->start
; b
; b
=b
->link
)
667 /* lower parameters */
668 for (b
=fn
->start
, i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
671 fa
= selpar(fn
, b
->ins
, i
);
672 n
= b
->nins
- (i
- b
->ins
) + (&insb
[NIns
] - curi
);
673 i0
= alloc(n
* sizeof(Ins
));
674 ip
= icpy(ip
= i0
, curi
, &insb
[NIns
] - curi
);
675 ip
= icpy(ip
, i
, &b
->ins
[b
->nins
] - i
);
679 /* lower calls, returns, and vararg instructions */
684 b
= fn
->start
; /* do it last */
689 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
695 for (i0
=i
; i0
>b
->ins
; i0
--)
696 if (!isarg((i0
-1)->op
))
698 selcall(fn
, i0
, i
, &ral
);
702 selvastart(fn
, fa
, i
->arg
[0]);
712 for (; ral
; ral
=ral
->link
)
714 b
->nins
= &insb
[NIns
] - curi
;
715 idup(&b
->ins
, curi
, b
->nins
);
716 } while (b
!= fn
->start
);
719 fprintf(stderr
, "\n> After ABI lowering:\n");