4 /* For x86_64, do the following:
6 * - check that constants are used only in
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 typedef struct ANum ANum
;
28 static int amatch(Addr
*, Ref
, int, ANum
*, Fn
*);
37 switch (fn
->con
[r
.val
].type
) {
39 /* we only support the 'small'
40 * code model of the ABI, this
41 * means that we can always
42 * address data with 32bits
46 val
= fn
->con
[r
.val
].bits
.i
;
47 return (val
< INT32_MIN
|| val
> INT32_MAX
);
49 die("invalid constant");
58 return fn
->tmp
[r
.val
].slot
;
62 fixarg(Ref
*r
, int k
, Ins
*i
, Fn
*fn
)
71 op
= i
? i
->op
: Ocopy
;
72 if (KBASE(k
) == 1 && rtype(r0
) == RCon
) {
73 /* load floating points from memory
74 * slots, they can't be used as
78 vgrow(&fn
->mem
, ++fn
->nmem
);
79 memset(&a
, 0, sizeof a
);
80 a
.offset
.type
= CAddr
;
82 n
= gasstash(&fn
->con
[r0
.val
].bits
, KWIDE(k
) ? 8 : 4);
83 sprintf(buf
, "fp%d", n
);
84 a
.offset
.label
= intern(buf
);
85 fn
->mem
[fn
->nmem
-1] = a
;
87 else if (op
!= Ocopy
&& k
== Kl
&& noimm(r0
, fn
)) {
88 /* load constants that do not fit in
89 * a 32bit signed integer into a
92 r1
= newtmp("isel", Kl
, fn
);
93 emit(Ocopy
, Kl
, r1
, r0
, R
);
96 /* load fast locals' addresses into
97 * temporaries right before the
100 r1
= newtmp("isel", Kl
, fn
);
101 emit(Oaddr
, Kl
, r1
, SLOT(s
), R
);
103 else if (!(isstore(op
) && r
== &i
->arg
[1])
104 && !isload(op
) && op
!= Ocall
&& rtype(r0
) == RCon
105 && fn
->con
[r0
.val
].type
== CAddr
) {
106 /* apple as does not support 32-bit
107 * absolute addressing, use a rip-
108 * relative leaq instead
110 r1
= newtmp("isel", Kl
, fn
);
111 emit(Oaddr
, Kl
, r1
, r0
, R
);
113 else if (rtype(r0
) == RMem
) {
114 /* eliminate memory operands of
115 * the form $foo(%rip, ...)
117 m
= &fn
->mem
[r0
.val
];
119 if (m
->offset
.type
== CAddr
) {
121 vgrow(&fn
->con
, ++fn
->ncon
);
122 fn
->con
[n
] = m
->offset
;
123 m
->offset
.type
= CUndef
;
124 r0
= newtmp("isel", Kl
, fn
);
125 emit(Oaddr
, Kl
, r0
, CON(n
), R
);
133 seladdr(Ref
*r
, ANum
*an
, Fn
*fn
)
139 if (rtype(r0
) == RTmp
) {
140 memset(&a
, 0, sizeof a
);
141 if (!amatch(&a
, r0
, an
[r0
.val
].n
, an
, fn
))
144 if (a
.offset
.type
== CAddr
) {
145 /* apple as does not support
146 * $foo(%r0, %r1, M); try to
147 * rewrite it or bail out if
150 if (!req(a
.index
, R
))
159 vgrow(&fn
->mem
, ++fn
->nmem
);
160 fn
->mem
[fn
->nmem
-1] = a
;
161 chuse(a
.base
, +1, fn
);
162 chuse(a
.index
, +1, fn
);
163 *r
= MEM(fn
->nmem
-1);
168 selcmp(Ref arg
[2], int k
, Fn
*fn
)
174 swap
= rtype(arg
[0]) == RCon
;
180 emit(Oxcmp
, k
, R
, arg
[1], arg
[0]);
182 if (rtype(arg
[0]) == RCon
) {
184 icmp
->arg
[1] = newtmp("isel", k
, fn
);
185 emit(Ocopy
, k
, icmp
->arg
[1], arg
[0], R
);
187 fixarg(&icmp
->arg
[0], k
, icmp
, fn
);
188 fixarg(&icmp
->arg
[1], k
, icmp
, fn
);
193 sel(Ins i
, ANum
*an
, Fn
*fn
)
200 if (rtype(i
.to
) == RTmp
)
201 if (!isreg(i
.to
) && !isreg(i
.arg
[0]) && !isreg(i
.arg
[1]))
202 if (fn
->tmp
[i
.to
.val
].nuse
== 0) {
203 chuse(i
.arg
[0], -1, fn
);
204 chuse(i
.arg
[1], -1, fn
);
216 if (i
.op
== Odiv
|| i
.op
== Oudiv
)
217 r0
= TMP(RAX
), r1
= TMP(RDX
);
219 r0
= TMP(RDX
), r1
= TMP(RAX
);
220 emit(Ocopy
, k
, i
.to
, r0
, R
);
221 emit(Ocopy
, k
, R
, r1
, R
);
222 if (rtype(i
.arg
[1]) == RCon
) {
223 /* immediates not allowed for
226 r0
= newtmp("isel", k
, fn
);
229 if (fn
->tmp
[r0
.val
].slot
!= -1)
230 err("unlikely argument %%%s in %s",
231 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
232 if (i
.op
== Odiv
|| i
.op
== Orem
) {
233 emit(Oxidiv
, k
, R
, r0
, R
);
234 emit(Osign
, k
, TMP(RDX
), TMP(RAX
), R
);
236 emit(Oxdiv
, k
, R
, r0
, R
);
237 emit(Ocopy
, k
, TMP(RDX
), CON_Z
, R
);
239 emit(Ocopy
, k
, TMP(RAX
), i
.arg
[0], R
);
240 fixarg(&curi
->arg
[0], k
, curi
, fn
);
241 if (rtype(i
.arg
[1]) == RCon
)
242 emit(Ocopy
, k
, r0
, i
.arg
[1], R
);
247 if (rtype(i
.arg
[1]) == RCon
)
251 emit(Ocopy
, Kw
, R
, TMP(RCX
), R
);
253 emit(Ocopy
, Kw
, TMP(RCX
), r0
, R
);
263 if (rtype(i
.arg
[0]) == RCon
) {
269 seladdr(&i
.arg
[1], an
, fn
);
272 seladdr(&i
.arg
[0], an
, fn
);
294 i1
= curi
; /* fixarg() can change curi */
295 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
296 fixarg(&i1
->arg
[1], argcls(&i
, 1), i1
, fn
);
300 case Oalloc
+2: /* == Oalloc1 */
301 /* we need to make sure
302 * the stack remains aligned
306 if (rtype(i
.arg
[0]) == RCon
) {
307 sz
= fn
->con
[i
.arg
[0].val
].bits
.i
;
308 if (sz
< 0 || sz
>= INT_MAX
-15)
309 err("invalid alloc size %"PRId64
, sz
);
310 sz
= (sz
+ 15) & -16;
311 emit(Osalloc
, Kl
, i
.to
, getcon(sz
, fn
), R
);
313 /* r0 = (i.arg[0] + 15) & -16 */
314 r0
= newtmp("isel", Kl
, fn
);
315 r1
= newtmp("isel", Kl
, fn
);
316 emit(Osalloc
, Kl
, i
.to
, r0
, R
);
317 emit(Oand
, Kl
, r0
, r1
, getcon(-16, fn
));
318 emit(Oadd
, Kl
, r1
, i
.arg
[0], getcon(15, fn
));
319 if (fn
->tmp
[i
.arg
[0].val
].slot
!= -1)
320 err("unlikely argument %%%s in %s",
321 fn
->tmp
[i
.arg
[0].val
].name
, optab
[i
.op
].name
);
329 if (iscmp(i
.op
, &kc
, &x
)) {
330 emit(Oflag
+x
, k
, i
.to
, R
, R
);
332 if (selcmp(i
.arg
, kc
, fn
))
333 i1
->op
= Oflag
+ cmpop(x
);
336 die("unknown instruction %s", optab
[i
.op
].name
);
339 while (i0
> curi
&& --i0
) {
340 assert(rslot(i0
->arg
[0], fn
) == -1);
341 assert(rslot(i0
->arg
[1], fn
) == -1);
346 flagi(Ins
*i0
, Ins
*i
)
350 if (amd64_op
[i
->op
].zflag
)
352 if (amd64_op
[i
->op
].lflag
)
360 seljmp(Blk
*b
, Fn
*fn
)
367 if (b
->jmp
.type
== Jret0
|| b
->jmp
.type
== Jjmp
)
369 assert(b
->jmp
.type
== Jjnz
);
373 assert(!req(r
, R
) && rtype(r
) != RCon
);
374 if (b
->s1
== b
->s2
) {
380 fi
= flagi(b
->ins
, &b
->ins
[b
->nins
]);
381 if (!fi
|| !req(fi
->to
, r
)) {
382 selcmp((Ref
[2]){r
, CON_Z
}, Kw
, fn
); /* todo, long jnz */
383 b
->jmp
.type
= Jjf
+ Cine
;
385 else if (iscmp(fi
->op
, &k
, &c
)) {
387 if (selcmp(fi
->arg
, k
, fn
))
389 *fi
= (Ins
){.op
= Onop
};
391 b
->jmp
.type
= Jjf
+ c
;
393 else if (fi
->op
== Oand
&& t
->nuse
== 1
394 && (rtype(fi
->arg
[0]) == RTmp
||
395 rtype(fi
->arg
[1]) == RTmp
)) {
398 b
->jmp
.type
= Jjf
+ Cine
;
399 if (rtype(fi
->arg
[1]) == RCon
) {
401 fi
->arg
[1] = fi
->arg
[0];
406 /* since flags are not tracked in liveness,
407 * the result of the flag-setting instruction
408 * has to be marked as live
411 emit(Ocopy
, Kw
, R
, r
, R
);
412 b
->jmp
.type
= Jjf
+ Cine
;
417 aref(Ref r
, ANum
*ai
)
425 die("constant or temporary expected");
430 ascale(Ref r
, Con
*con
)
434 if (rtype(r
) != RCon
)
436 if (con
[r
.val
].type
!= CBits
)
438 n
= con
[r
.val
].bits
.i
;
439 return n
== 1 || n
== 2 || n
== 4 || n
== 8;
443 anumber(ANum
*ai
, Blk
*b
, Con
*con
)
445 /* This should be made obsolete by a proper
451 * ( RTmp(_) -> 1 slot )
453 * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
455 static char add
[10][10] = {
456 [2] [2] = 2, /* folding */
457 [2] [4] = 4, [4] [2] = 4,
458 [2] [6] = 6, [6] [2] = 6,
459 [2] [7] = 7, [7] [2] = 7,
460 [0] [2] = 4, [2] [0] = 4, /* 4: o + b */
461 [0] [0] = 5, /* 5: b + s * i */
462 [0] [3] = 5, [3] [0] = 5,
463 [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
464 [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
465 [0] [6] = 7, [6] [0] = 7,
466 [4] [3] = 7, [3] [4] = 7,
468 int a
, a1
, a2
, n1
, n2
, t1
, t2
;
471 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
472 if (rtype(i
->to
) == RTmp
)
474 if (i
->op
!= Oadd
&& i
->op
!= Omul
)
476 a1
= aref(i
->arg
[0], ai
);
477 a2
= aref(i
->arg
[1], ai
);
478 t1
= a1
!= 1 && a1
!= 2;
479 t2
= a2
!= 1 && a2
!= 2;
481 a
= add
[n1
= a1
][n2
= a2
];
482 if (t1
&& a
< add
[0][a2
])
483 a
= add
[n1
= 0][n2
= a2
];
484 if (t2
&& a
< add
[a1
][0])
485 a
= add
[n1
= a1
][n2
= 0];
486 if (t1
&& t2
&& a
< add
[0][0])
487 a
= add
[n1
= 0][n2
= 0];
490 if (ascale(i
->arg
[0], con
) && t2
)
491 a
= 3, n1
= 2, n2
= 0;
492 if (t1
&& ascale(i
->arg
[1], con
))
493 a
= 3, n1
= 0, n2
= 2;
496 ai
[i
->to
.val
].l
= n1
;
497 ai
[i
->to
.val
].r
= n2
;
502 amatch(Addr
*a
, Ref r
, int n
, ANum
*ai
, Fn
*fn
)
508 if (rtype(r
) == RCon
) {
509 addcon(&a
->offset
, &fn
->con
[r
.val
]);
512 assert(rtype(r
) == RTmp
);
520 t
= nl
, nl
= nr
, nr
= t
;
529 a
->scale
= fn
->con
[ar
.val
].bits
.i
;
531 case 5: /* b + s * i */
534 if (fn
->tmp
[ar
.val
].slot
!= -1) {
542 amatch(a
, ar
, nr
, ai
, fn
);
548 s
= fn
->tmp
[r
.val
].slot
;
553 case 2: /* constants */
555 case 6: /* o + s * i */
556 case 7: /* o + b + s * i */
557 amatch(a
, ar
, nr
, ai
, fn
);
558 amatch(a
, al
, nl
, ai
, fn
);
565 /* instruction selection
566 * requires use counts (as given by parsing)
579 /* assign slots to fast allocs */
581 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
582 for (al
=Oalloc
, n
=4; al
<=Oalloc1
; al
++, n
*=2)
583 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
585 if (rtype(i
->arg
[0]) != RCon
)
587 sz
= fn
->con
[i
->arg
[0].val
].bits
.i
;
588 if (sz
< 0 || sz
>= INT_MAX
-15)
589 err("invalid alloc size %"PRId64
, sz
);
590 sz
= (sz
+ n
-1) & -n
;
592 if (sz
> INT_MAX
- fn
->slot
)
593 die("alloc too large");
594 fn
->tmp
[i
->to
.val
].slot
= fn
->slot
;
596 *i
= (Ins
){.op
= Onop
};
599 /* process basic blocks */
601 ainfo
= emalloc(n
* sizeof ainfo
[0]);
602 for (b
=fn
->start
; b
; b
=b
->link
) {
604 for (sb
=(Blk
*[3]){b
->s1
, b
->s2
, 0}; *sb
; sb
++)
605 for (p
=(*sb
)->phi
; p
; p
=p
->link
) {
606 for (a
=0; p
->blk
[a
] != b
; a
++)
607 assert(a
+1 < p
->narg
);
608 fixarg(&p
->arg
[a
], p
->cls
, 0, fn
);
610 memset(ainfo
, 0, n
* sizeof ainfo
[0]);
611 anumber(ainfo
, b
, fn
->con
);
613 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
614 sel(*--i
, ainfo
, fn
);
615 b
->nins
= &insb
[NIns
] - curi
;
616 idup(&b
->ins
, curi
, b
->nins
);
621 fprintf(stderr
, "\n> After instruction selection:\n");