4 /* For x86_64, do the following:
6 * - check that constants are used only in
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 typedef struct ANum ANum
;
28 static int amatch(Addr
*, Ref
, int, ANum
*, Fn
*);
37 switch (fn
->con
[r
.val
].type
) {
39 /* we only support the 'small'
40 * code model of the ABI, this
41 * means that we can always
42 * address data with 32bits
46 val
= fn
->con
[r
.val
].bits
.i
;
47 return (val
< INT32_MIN
|| val
> INT32_MAX
);
49 die("invalid constant");
58 return fn
->tmp
[r
.val
].slot
;
62 fixarg(Ref
*r
, int k
, Ins
*i
, Fn
*fn
)
71 op
= i
? i
->op
: Ocopy
;
72 if (KBASE(k
) == 1 && rtype(r0
) == RCon
) {
73 /* load floating points from memory
74 * slots, they can't be used as
78 vgrow(&fn
->mem
, ++fn
->nmem
);
79 memset(&a
, 0, sizeof a
);
80 a
.offset
.type
= CAddr
;
81 n
= stashbits(&fn
->con
[r0
.val
].bits
, KWIDE(k
) ? 8 : 4);
82 sprintf(buf
, "\"%sfp%d\"", T
.asloc
, n
);
83 a
.offset
.label
= intern(buf
);
84 fn
->mem
[fn
->nmem
-1] = a
;
86 else if (op
!= Ocopy
&& k
== Kl
&& noimm(r0
, fn
)) {
87 /* load constants that do not fit in
88 * a 32bit signed integer into a
91 r1
= newtmp("isel", Kl
, fn
);
92 emit(Ocopy
, Kl
, r1
, r0
, R
);
95 /* load fast locals' addresses into
96 * temporaries right before the
99 r1
= newtmp("isel", Kl
, fn
);
100 emit(Oaddr
, Kl
, r1
, SLOT(s
), R
);
102 else if (!(isstore(op
) && r
== &i
->arg
[1])
103 && !isload(op
) && op
!= Ocall
&& rtype(r0
) == RCon
104 && fn
->con
[r0
.val
].type
== CAddr
) {
105 /* apple as does not support 32-bit
106 * absolute addressing, use a rip-
107 * relative leaq instead
109 r1
= newtmp("isel", Kl
, fn
);
110 emit(Oaddr
, Kl
, r1
, r0
, R
);
112 else if (rtype(r0
) == RMem
) {
113 /* eliminate memory operands of
114 * the form $foo(%rip, ...)
116 m
= &fn
->mem
[r0
.val
];
118 if (m
->offset
.type
== CAddr
) {
119 r0
= newtmp("isel", Kl
, fn
);
120 emit(Oaddr
, Kl
, r0
, newcon(&m
->offset
, fn
), R
);
121 m
->offset
.type
= CUndef
;
129 seladdr(Ref
*r
, ANum
*an
, Fn
*fn
)
135 if (rtype(r0
) == RTmp
) {
136 memset(&a
, 0, sizeof a
);
137 if (!amatch(&a
, r0
, an
[r0
.val
].n
, an
, fn
))
140 if (a
.offset
.type
== CAddr
) {
141 /* apple as does not support
142 * $foo(%r0, %r1, M); try to
143 * rewrite it or bail out if
146 if (!req(a
.index
, R
) || rtype(a
.base
) != RTmp
)
155 vgrow(&fn
->mem
, ++fn
->nmem
);
156 fn
->mem
[fn
->nmem
-1] = a
;
157 chuse(a
.base
, +1, fn
);
158 chuse(a
.index
, +1, fn
);
159 *r
= MEM(fn
->nmem
-1);
164 cmpswap(Ref arg
[2], int op
)
174 return rtype(arg
[0]) == RCon
;
178 selcmp(Ref arg
[2], int k
, int swap
, Fn
*fn
)
188 emit(Oxcmp
, k
, R
, arg
[1], arg
[0]);
190 if (rtype(arg
[0]) == RCon
) {
192 icmp
->arg
[1] = newtmp("isel", k
, fn
);
193 emit(Ocopy
, k
, icmp
->arg
[1], arg
[0], R
);
194 fixarg(&curi
->arg
[0], k
, curi
, fn
);
196 fixarg(&icmp
->arg
[0], k
, icmp
, fn
);
197 fixarg(&icmp
->arg
[1], k
, icmp
, fn
);
201 sel(Ins i
, ANum
*an
, Fn
*fn
)
204 int x
, j
, k
, kc
, sh
, swap
;
207 if (rtype(i
.to
) == RTmp
)
208 if (!isreg(i
.to
) && !isreg(i
.arg
[0]) && !isreg(i
.arg
[1]))
209 if (fn
->tmp
[i
.to
.val
].nuse
== 0) {
210 chuse(i
.arg
[0], -1, fn
);
211 chuse(i
.arg
[1], -1, fn
);
223 if (i
.op
== Odiv
|| i
.op
== Oudiv
)
224 r0
= TMP(RAX
), r1
= TMP(RDX
);
226 r0
= TMP(RDX
), r1
= TMP(RAX
);
227 emit(Ocopy
, k
, i
.to
, r0
, R
);
228 emit(Ocopy
, k
, R
, r1
, R
);
229 if (rtype(i
.arg
[1]) == RCon
) {
230 /* immediates not allowed for
233 r0
= newtmp("isel", k
, fn
);
236 if (fn
->tmp
[r0
.val
].slot
!= -1)
237 err("unlikely argument %%%s in %s",
238 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
239 if (i
.op
== Odiv
|| i
.op
== Orem
) {
240 emit(Oxidiv
, k
, R
, r0
, R
);
241 emit(Osign
, k
, TMP(RDX
), TMP(RAX
), R
);
243 emit(Oxdiv
, k
, R
, r0
, R
);
244 emit(Ocopy
, k
, TMP(RDX
), CON_Z
, R
);
246 emit(Ocopy
, k
, TMP(RAX
), i
.arg
[0], R
);
247 fixarg(&curi
->arg
[0], k
, curi
, fn
);
248 if (rtype(i
.arg
[1]) == RCon
)
249 emit(Ocopy
, k
, r0
, i
.arg
[1], R
);
255 if (rtype(r0
) == RCon
)
257 if (fn
->tmp
[r0
.val
].slot
!= -1)
258 err("unlikely argument %%%s in %s",
259 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
261 emit(Ocopy
, Kw
, R
, TMP(RCX
), R
);
264 emit(Ocopy
, Kw
, TMP(RCX
), r0
, R
);
265 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
268 r0
= newtmp("utof", Kl
, fn
);
269 emit(Osltof
, k
, i
.to
, r0
, R
);
270 emit(Oextuw
, Kl
, r0
, i
.arg
[0], R
);
271 fixarg(&curi
->arg
[0], k
, curi
, fn
);
274 /* %mask =l and %arg.0, 1
275 %isbig =l shr %arg.0, 63
276 %divided =l shr %arg.0, %isbig
277 %or =l or %mask, %divided
280 %addend =l shl %isbig, 52
281 %sum =l add %cast, %addend
284 r0
= newtmp("utof", k
, fn
);
290 tmp
[j
] = newtmp("utof", Kl
, fn
);
292 tmp
[j
] = newtmp("utof", kc
, fn
);
293 emit(Ocast
, k
, i
.to
, tmp
[6], R
);
294 emit(Oadd
, kc
, tmp
[6], tmp
[4], tmp
[5]);
295 emit(Oshl
, kc
, tmp
[5], tmp
[1], getcon(sh
, fn
));
296 emit(Ocast
, kc
, tmp
[4], r0
, R
);
297 emit(Osltof
, k
, r0
, tmp
[3], R
);
298 emit(Oor
, Kl
, tmp
[3], tmp
[0], tmp
[2]);
299 emit(Oshr
, Kl
, tmp
[2], i
.arg
[0], tmp
[1]);
301 emit(Oshr
, Kl
, tmp
[1], i
.arg
[0], getcon(63, fn
));
302 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
303 emit(Oand
, Kl
, tmp
[0], i
.arg
[0], getcon(1, fn
));
304 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
309 tmp
[4] = getcon(0xdf000000, fn
);
314 tmp
[4] = getcon(0xc3e0000000000000, fn
);
318 r0
= newtmp("ftou", kc
, fn
);
320 tmp
[j
] = newtmp("ftou", Kl
, fn
);
321 emit(Oor
, Kl
, i
.to
, tmp
[0], tmp
[3]);
322 emit(Oand
, Kl
, tmp
[3], tmp
[2], tmp
[1]);
323 emit(i
.op
, Kl
, tmp
[2], r0
, R
);
324 emit(Oadd
, kc
, r0
, tmp
[4], i
.arg
[0]);
325 i1
= curi
; /* fixarg() can change curi */
326 fixarg(&i1
->arg
[0], kc
, i1
, fn
);
327 fixarg(&i1
->arg
[1], kc
, i1
, fn
);
328 emit(Osar
, Kl
, tmp
[1], tmp
[0], getcon(63, fn
));
329 emit(i
.op
, Kl
, tmp
[0], i
.arg
[0], R
);
330 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
340 if (rtype(i
.arg
[0]) == RCon
) {
346 seladdr(&i
.arg
[1], an
, fn
);
349 seladdr(&i
.arg
[0], an
, fn
);
372 i1
= curi
; /* fixarg() can change curi */
373 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
374 fixarg(&i1
->arg
[1], argcls(&i
, 1), i1
, fn
);
379 salloc(i
.to
, i
.arg
[0], fn
);
386 if (iscmp(i
.op
, &kc
, &x
)) {
389 /* zf is set when operands are
390 * unordered, so we may have to
393 r0
= newtmp("isel", Kw
, fn
);
394 r1
= newtmp("isel", Kw
, fn
);
395 emit(Oand
, Kw
, i
.to
, r0
, r1
);
396 emit(Oflagfo
, k
, r1
, R
, R
);
400 r0
= newtmp("isel", Kw
, fn
);
401 r1
= newtmp("isel", Kw
, fn
);
402 emit(Oor
, Kw
, i
.to
, r0
, r1
);
403 emit(Oflagfuo
, k
, r1
, R
, R
);
407 swap
= cmpswap(i
.arg
, x
);
410 emit(Oflag
+x
, k
, i
.to
, R
, R
);
411 selcmp(i
.arg
, kc
, swap
, fn
);
414 die("unknown instruction %s", optab
[i
.op
].name
);
417 while (i0
>curi
&& --i0
) {
418 assert(rslot(i0
->arg
[0], fn
) == -1);
419 assert(rslot(i0
->arg
[1], fn
) == -1);
424 flagi(Ins
*i0
, Ins
*i
)
428 if (amd64_op
[i
->op
].zflag
)
430 if (amd64_op
[i
->op
].lflag
)
438 seljmp(Blk
*b
, Fn
*fn
)
445 if (b
->jmp
.type
== Jret0
|| b
->jmp
.type
== Jjmp
)
447 assert(b
->jmp
.type
== Jjnz
);
451 assert(rtype(r
) == RTmp
);
452 if (b
->s1
== b
->s2
) {
458 fi
= flagi(b
->ins
, &b
->ins
[b
->nins
]);
459 if (!fi
|| !req(fi
->to
, r
)) {
460 selcmp((Ref
[2]){r
, CON_Z
}, Kw
, 0, fn
); /* todo, long jnz */
461 b
->jmp
.type
= Jjf
+ Cine
;
463 else if (iscmp(fi
->op
, &k
, &c
)
464 && c
!= NCmpI
+Cfeq
/* see sel() */
465 && c
!= NCmpI
+Cfne
) {
466 swap
= cmpswap(fi
->arg
, c
);
470 selcmp(fi
->arg
, k
, swap
, fn
);
471 *fi
= (Ins
){.op
= Onop
};
473 b
->jmp
.type
= Jjf
+ c
;
475 else if (fi
->op
== Oand
&& t
->nuse
== 1
476 && (rtype(fi
->arg
[0]) == RTmp
||
477 rtype(fi
->arg
[1]) == RTmp
)) {
480 b
->jmp
.type
= Jjf
+ Cine
;
481 if (rtype(fi
->arg
[1]) == RCon
) {
483 fi
->arg
[1] = fi
->arg
[0];
488 /* since flags are not tracked in liveness,
489 * the result of the flag-setting instruction
490 * has to be marked as live
493 emit(Ocopy
, Kw
, R
, r
, R
);
494 b
->jmp
.type
= Jjf
+ Cine
;
499 aref(Ref r
, ANum
*ai
)
507 die("constant or temporary expected");
512 ascale(Ref r
, Con
*con
)
516 if (rtype(r
) != RCon
)
518 if (con
[r
.val
].type
!= CBits
)
520 n
= con
[r
.val
].bits
.i
;
521 return n
== 1 || n
== 2 || n
== 4 || n
== 8;
525 anumber(ANum
*ai
, Blk
*b
, Con
*con
)
527 /* This should be made obsolete by a proper
533 * ( RTmp(_) -> 1 slot )
535 * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
537 static char add
[10][10] = {
538 [2] [4] = 4, [4] [2] = 4,
539 [2] [6] = 6, [6] [2] = 6,
540 [2] [7] = 7, [7] [2] = 7,
541 [0] [2] = 4, [2] [0] = 4, /* 4: o + b */
542 [0] [0] = 5, /* 5: b + s * i */
543 [0] [3] = 5, [3] [0] = 5,
544 [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
545 [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
546 [0] [6] = 7, [6] [0] = 7,
547 [4] [3] = 7, [3] [4] = 7,
549 int a
, a1
, a2
, n1
, n2
, t1
, t2
;
552 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
553 if (rtype(i
->to
) == RTmp
)
555 if (i
->op
!= Oadd
&& i
->op
!= Omul
)
557 a1
= aref(i
->arg
[0], ai
);
558 a2
= aref(i
->arg
[1], ai
);
559 t1
= a1
!= 1 && a1
!= 2;
560 t2
= a2
!= 1 && a2
!= 2;
562 a
= add
[n1
= a1
][n2
= a2
];
563 if (t1
&& a
< add
[0][a2
])
564 a
= add
[n1
= 0][n2
= a2
];
565 if (t2
&& a
< add
[a1
][0])
566 a
= add
[n1
= a1
][n2
= 0];
567 if (t1
&& t2
&& a
< add
[0][0])
568 a
= add
[n1
= 0][n2
= 0];
571 if (ascale(i
->arg
[0], con
) && t2
)
572 a
= 3, n1
= 2, n2
= 0;
573 if (t1
&& ascale(i
->arg
[1], con
))
574 a
= 3, n1
= 0, n2
= 2;
577 ai
[i
->to
.val
].l
= n1
;
578 ai
[i
->to
.val
].r
= n2
;
583 amatch(Addr
*a
, Ref r
, int n
, ANum
*ai
, Fn
*fn
)
589 if (rtype(r
) == RCon
) {
590 if (!addcon(&a
->offset
, &fn
->con
[r
.val
]))
591 err("unlikely sum of $%s and $%s",
592 str(a
->offset
.label
), str(fn
->con
[r
.val
].label
));
595 assert(rtype(r
) == RTmp
);
603 t
= nl
, nl
= nr
, nr
= t
;
612 a
->scale
= fn
->con
[ar
.val
].bits
.i
;
614 case 5: /* b + s * i */
617 if (fn
->tmp
[ar
.val
].slot
!= -1) {
625 amatch(a
, ar
, nr
, ai
, fn
);
631 s
= fn
->tmp
[r
.val
].slot
;
636 case 2: /* constants */
638 case 6: /* o + s * i */
639 case 7: /* o + b + s * i */
640 amatch(a
, ar
, nr
, ai
, fn
);
641 amatch(a
, al
, nl
, ai
, fn
);
648 /* instruction selection
649 * requires use counts (as given by parsing)
662 /* assign slots to fast allocs */
664 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
665 for (al
=Oalloc
, n
=4; al
<=Oalloc1
; al
++, n
*=2)
666 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
668 if (rtype(i
->arg
[0]) != RCon
)
670 sz
= fn
->con
[i
->arg
[0].val
].bits
.i
;
671 if (sz
< 0 || sz
>= INT_MAX
-15)
672 err("invalid alloc size %"PRId64
, sz
);
673 sz
= (sz
+ n
-1) & -n
;
675 if (sz
> INT_MAX
- fn
->slot
)
676 die("alloc too large");
677 fn
->tmp
[i
->to
.val
].slot
= fn
->slot
;
679 *i
= (Ins
){.op
= Onop
};
682 /* process basic blocks */
684 ainfo
= emalloc(n
* sizeof ainfo
[0]);
685 for (b
=fn
->start
; b
; b
=b
->link
) {
687 for (sb
=(Blk
*[3]){b
->s1
, b
->s2
, 0}; *sb
; sb
++)
688 for (p
=(*sb
)->phi
; p
; p
=p
->link
) {
689 for (a
=0; p
->blk
[a
] != b
; a
++)
690 assert(a
+1 < p
->narg
);
691 fixarg(&p
->arg
[a
], p
->cls
, 0, fn
);
693 memset(ainfo
, 0, n
* sizeof ainfo
[0]);
694 anumber(ainfo
, b
, fn
->con
);
696 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
697 sel(*--i
, ainfo
, fn
);
698 b
->nins
= &insb
[NIns
] - curi
;
699 idup(&b
->ins
, curi
, b
->nins
);
704 fprintf(stderr
, "\n> After instruction selection:\n");