4 /* For x86_64, do the following:
6 * - check that constants are used only in
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 typedef struct ANum ANum
;
28 static int amatch(Addr
*, Ref
, int, ANum
*, Fn
*);
37 switch (fn
->con
[r
.val
].type
) {
39 /* we only support the 'small'
40 * code model of the ABI, this
41 * means that we can always
42 * address data with 32bits
46 val
= fn
->con
[r
.val
].bits
.i
;
47 return (val
< INT32_MIN
|| val
> INT32_MAX
);
49 die("invalid constant");
58 return fn
->tmp
[r
.val
].slot
;
62 fixarg(Ref
*r
, int k
, Ins
*i
, Fn
*fn
)
72 op
= i
? i
->op
: Ocopy
;
73 if (KBASE(k
) == 1 && rtype(r0
) == RCon
) {
74 /* load floating points from memory
75 * slots, they can't be used as
79 vgrow(&fn
->mem
, ++fn
->nmem
);
80 memset(&a
, 0, sizeof a
);
81 a
.offset
.type
= CAddr
;
82 n
= stashbits(&fn
->con
[r0
.val
].bits
, KWIDE(k
) ? 8 : 4);
83 sprintf(buf
, "\"%sfp%d\"", T
.asloc
, n
);
84 a
.offset
.label
= intern(buf
);
85 fn
->mem
[fn
->nmem
-1] = a
;
87 else if (op
!= Ocopy
&& k
== Kl
&& noimm(r0
, fn
)) {
88 /* load constants that do not fit in
89 * a 32bit signed integer into a
92 r1
= newtmp("isel", Kl
, fn
);
93 emit(Ocopy
, Kl
, r1
, r0
, R
);
96 /* load fast locals' addresses into
97 * temporaries right before the
100 r1
= newtmp("isel", Kl
, fn
);
101 emit(Oaddr
, Kl
, r1
, SLOT(s
), R
);
103 else if (!(isstore(op
) && r
== &i
->arg
[1])
104 && !isload(op
) && op
!= Ocall
&& rtype(r0
) == RCon
105 && fn
->con
[r0
.val
].type
== CAddr
) {
106 /* apple as does not support 32-bit
107 * absolute addressing, use a rip-
108 * relative leaq instead
110 r1
= newtmp("isel", Kl
, fn
);
111 emit(Oaddr
, Kl
, r1
, r0
, R
);
113 else if (rtype(r0
) == RMem
) {
114 /* eliminate memory operands of
115 * the form $foo(%rip, ...)
117 m
= &fn
->mem
[r0
.val
];
119 if (m
->offset
.type
== CAddr
) {
120 r0
= newtmp("isel", Kl
, fn
);
121 emit(Oaddr
, Kl
, r0
, newcon(&m
->offset
, fn
), R
);
122 m
->offset
.type
= CUndef
;
125 } else if (T
.apple
&& rtype(r0
) == RCon
126 && (c
= &fn
->con
[r0
.val
])->type
== CAddr
127 && c
->reloc
== RelThr
) {
128 r1
= newtmp("isel", Kl
, fn
);
130 r2
= newtmp("isel", Kl
, fn
);
131 cc
= (Con
){.type
= CBits
};
132 cc
.bits
.i
= c
->bits
.i
;
133 r3
= newcon(&cc
, fn
);
134 emit(Oadd
, Kl
, r1
, r2
, r3
);
137 emit(Ocopy
, Kl
, r2
, TMP(RAX
), R
);
138 r2
= newtmp("isel", Kl
, fn
);
139 r3
= newtmp("isel", Kl
, fn
);
140 emit(Ocall
, 0, R
, r3
, CALL(17));
141 emit(Ocopy
, Kl
, TMP(RDI
), r2
, R
);
142 emit(Oload
, Kl
, r3
, r2
, R
);
145 r3
= newcon(&cc
, fn
);
146 emit(Oload
, Kl
, r2
, r3
, R
);
152 seladdr(Ref
*r
, ANum
*an
, Fn
*fn
)
158 if (rtype(r0
) == RTmp
) {
159 memset(&a
, 0, sizeof a
);
160 if (!amatch(&a
, r0
, an
[r0
.val
].n
, an
, fn
))
163 if (a
.offset
.type
== CAddr
) {
164 /* apple as does not support
165 * $foo(%r0, %r1, M); try to
166 * rewrite it or bail out if
169 if (!req(a
.index
, R
) || rtype(a
.base
) != RTmp
)
178 vgrow(&fn
->mem
, ++fn
->nmem
);
179 fn
->mem
[fn
->nmem
-1] = a
;
180 chuse(a
.base
, +1, fn
);
181 chuse(a
.index
, +1, fn
);
182 *r
= MEM(fn
->nmem
-1);
187 cmpswap(Ref arg
[2], int op
)
197 return rtype(arg
[0]) == RCon
;
201 selcmp(Ref arg
[2], int k
, int swap
, Fn
*fn
)
211 emit(Oxcmp
, k
, R
, arg
[1], arg
[0]);
213 if (rtype(arg
[0]) == RCon
) {
215 icmp
->arg
[1] = newtmp("isel", k
, fn
);
216 emit(Ocopy
, k
, icmp
->arg
[1], arg
[0], R
);
217 fixarg(&curi
->arg
[0], k
, curi
, fn
);
219 fixarg(&icmp
->arg
[0], k
, icmp
, fn
);
220 fixarg(&icmp
->arg
[1], k
, icmp
, fn
);
224 sel(Ins i
, ANum
*an
, Fn
*fn
)
227 int x
, j
, k
, kc
, sh
, swap
;
230 if (rtype(i
.to
) == RTmp
)
231 if (!isreg(i
.to
) && !isreg(i
.arg
[0]) && !isreg(i
.arg
[1]))
232 if (fn
->tmp
[i
.to
.val
].nuse
== 0) {
233 chuse(i
.arg
[0], -1, fn
);
234 chuse(i
.arg
[1], -1, fn
);
246 if (i
.op
== Odiv
|| i
.op
== Oudiv
)
247 r0
= TMP(RAX
), r1
= TMP(RDX
);
249 r0
= TMP(RDX
), r1
= TMP(RAX
);
250 emit(Ocopy
, k
, i
.to
, r0
, R
);
251 emit(Ocopy
, k
, R
, r1
, R
);
252 if (rtype(i
.arg
[1]) == RCon
) {
253 /* immediates not allowed for
256 r0
= newtmp("isel", k
, fn
);
259 if (fn
->tmp
[r0
.val
].slot
!= -1)
260 err("unlikely argument %%%s in %s",
261 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
262 if (i
.op
== Odiv
|| i
.op
== Orem
) {
263 emit(Oxidiv
, k
, R
, r0
, R
);
264 emit(Osign
, k
, TMP(RDX
), TMP(RAX
), R
);
266 emit(Oxdiv
, k
, R
, r0
, R
);
267 emit(Ocopy
, k
, TMP(RDX
), CON_Z
, R
);
269 emit(Ocopy
, k
, TMP(RAX
), i
.arg
[0], R
);
270 fixarg(&curi
->arg
[0], k
, curi
, fn
);
271 if (rtype(i
.arg
[1]) == RCon
)
272 emit(Ocopy
, k
, r0
, i
.arg
[1], R
);
278 if (rtype(r0
) == RCon
)
280 if (fn
->tmp
[r0
.val
].slot
!= -1)
281 err("unlikely argument %%%s in %s",
282 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
284 emit(Ocopy
, Kw
, R
, TMP(RCX
), R
);
287 emit(Ocopy
, Kw
, TMP(RCX
), r0
, R
);
288 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
291 r0
= newtmp("utof", Kl
, fn
);
292 emit(Osltof
, k
, i
.to
, r0
, R
);
293 emit(Oextuw
, Kl
, r0
, i
.arg
[0], R
);
294 fixarg(&curi
->arg
[0], k
, curi
, fn
);
297 /* %mask =l and %arg.0, 1
298 %isbig =l shr %arg.0, 63
299 %divided =l shr %arg.0, %isbig
300 %or =l or %mask, %divided
303 %addend =l shl %isbig, 52
304 %sum =l add %cast, %addend
307 r0
= newtmp("utof", k
, fn
);
313 tmp
[j
] = newtmp("utof", Kl
, fn
);
315 tmp
[j
] = newtmp("utof", kc
, fn
);
316 emit(Ocast
, k
, i
.to
, tmp
[6], R
);
317 emit(Oadd
, kc
, tmp
[6], tmp
[4], tmp
[5]);
318 emit(Oshl
, kc
, tmp
[5], tmp
[1], getcon(sh
, fn
));
319 emit(Ocast
, kc
, tmp
[4], r0
, R
);
320 emit(Osltof
, k
, r0
, tmp
[3], R
);
321 emit(Oor
, Kl
, tmp
[3], tmp
[0], tmp
[2]);
322 emit(Oshr
, Kl
, tmp
[2], i
.arg
[0], tmp
[1]);
324 emit(Oshr
, Kl
, tmp
[1], i
.arg
[0], getcon(63, fn
));
325 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
326 emit(Oand
, Kl
, tmp
[0], i
.arg
[0], getcon(1, fn
));
327 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
332 tmp
[4] = getcon(0xdf000000, fn
);
337 tmp
[4] = getcon(0xc3e0000000000000, fn
);
341 r0
= newtmp("ftou", kc
, fn
);
343 tmp
[j
] = newtmp("ftou", Kl
, fn
);
344 emit(Oor
, Kl
, i
.to
, tmp
[0], tmp
[3]);
345 emit(Oand
, Kl
, tmp
[3], tmp
[2], tmp
[1]);
346 emit(i
.op
, Kl
, tmp
[2], r0
, R
);
347 emit(Oadd
, kc
, r0
, tmp
[4], i
.arg
[0]);
348 i1
= curi
; /* fixarg() can change curi */
349 fixarg(&i1
->arg
[0], kc
, i1
, fn
);
350 fixarg(&i1
->arg
[1], kc
, i1
, fn
);
351 emit(Osar
, Kl
, tmp
[1], tmp
[0], getcon(63, fn
));
352 emit(i
.op
, Kl
, tmp
[0], i
.arg
[0], R
);
353 fixarg(&curi
->arg
[0], Kl
, curi
, fn
);
363 if (rtype(i
.arg
[0]) == RCon
) {
369 seladdr(&i
.arg
[1], an
, fn
);
372 seladdr(&i
.arg
[0], an
, fn
);
395 i1
= curi
; /* fixarg() can change curi */
396 fixarg(&i1
->arg
[0], argcls(&i
, 0), i1
, fn
);
397 fixarg(&i1
->arg
[1], argcls(&i
, 1), i1
, fn
);
402 salloc(i
.to
, i
.arg
[0], fn
);
409 if (iscmp(i
.op
, &kc
, &x
)) {
412 /* zf is set when operands are
413 * unordered, so we may have to
416 r0
= newtmp("isel", Kw
, fn
);
417 r1
= newtmp("isel", Kw
, fn
);
418 emit(Oand
, Kw
, i
.to
, r0
, r1
);
419 emit(Oflagfo
, k
, r1
, R
, R
);
423 r0
= newtmp("isel", Kw
, fn
);
424 r1
= newtmp("isel", Kw
, fn
);
425 emit(Oor
, Kw
, i
.to
, r0
, r1
);
426 emit(Oflagfuo
, k
, r1
, R
, R
);
430 swap
= cmpswap(i
.arg
, x
);
433 emit(Oflag
+x
, k
, i
.to
, R
, R
);
434 selcmp(i
.arg
, kc
, swap
, fn
);
437 die("unknown instruction %s", optab
[i
.op
].name
);
440 while (i0
>curi
&& --i0
) {
441 assert(rslot(i0
->arg
[0], fn
) == -1);
442 assert(rslot(i0
->arg
[1], fn
) == -1);
447 flagi(Ins
*i0
, Ins
*i
)
451 if (amd64_op
[i
->op
].zflag
)
453 if (amd64_op
[i
->op
].lflag
)
461 seljmp(Blk
*b
, Fn
*fn
)
468 if (b
->jmp
.type
== Jret0
|| b
->jmp
.type
== Jjmp
)
470 assert(b
->jmp
.type
== Jjnz
);
474 assert(rtype(r
) == RTmp
);
475 if (b
->s1
== b
->s2
) {
481 fi
= flagi(b
->ins
, &b
->ins
[b
->nins
]);
482 if (!fi
|| !req(fi
->to
, r
)) {
483 selcmp((Ref
[2]){r
, CON_Z
}, Kw
, 0, fn
); /* todo, long jnz */
484 b
->jmp
.type
= Jjf
+ Cine
;
486 else if (iscmp(fi
->op
, &k
, &c
)
487 && c
!= NCmpI
+Cfeq
/* see sel() */
488 && c
!= NCmpI
+Cfne
) {
489 swap
= cmpswap(fi
->arg
, c
);
493 selcmp(fi
->arg
, k
, swap
, fn
);
494 *fi
= (Ins
){.op
= Onop
};
496 b
->jmp
.type
= Jjf
+ c
;
498 else if (fi
->op
== Oand
&& t
->nuse
== 1
499 && (rtype(fi
->arg
[0]) == RTmp
||
500 rtype(fi
->arg
[1]) == RTmp
)) {
503 b
->jmp
.type
= Jjf
+ Cine
;
504 if (rtype(fi
->arg
[1]) == RCon
) {
506 fi
->arg
[1] = fi
->arg
[0];
511 /* since flags are not tracked in liveness,
512 * the result of the flag-setting instruction
513 * has to be marked as live
516 emit(Ocopy
, Kw
, R
, r
, R
);
517 b
->jmp
.type
= Jjf
+ Cine
;
522 aref(Ref r
, ANum
*ai
)
530 die("constant or temporary expected");
535 ascale(Ref r
, Con
*con
)
539 if (rtype(r
) != RCon
)
541 if (con
[r
.val
].type
!= CBits
)
543 n
= con
[r
.val
].bits
.i
;
544 return n
== 1 || n
== 2 || n
== 4 || n
== 8;
548 anumber(ANum
*ai
, Blk
*b
, Con
*con
)
550 /* This should be made obsolete by a proper
556 * ( RTmp(_) -> 1 slot )
558 * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
560 static char add
[10][10] = {
561 [2] [4] = 4, [4] [2] = 4,
562 [2] [6] = 6, [6] [2] = 6,
563 [2] [7] = 7, [7] [2] = 7,
564 [0] [2] = 4, [2] [0] = 4, /* 4: o + b */
565 [0] [0] = 5, /* 5: b + s * i */
566 [0] [3] = 5, [3] [0] = 5,
567 [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
568 [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
569 [0] [6] = 7, [6] [0] = 7,
570 [4] [3] = 7, [3] [4] = 7,
572 int a
, a1
, a2
, n1
, n2
, t1
, t2
;
575 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
576 if (rtype(i
->to
) == RTmp
)
578 if (i
->op
!= Oadd
&& i
->op
!= Omul
)
580 a1
= aref(i
->arg
[0], ai
);
581 a2
= aref(i
->arg
[1], ai
);
582 t1
= a1
!= 1 && a1
!= 2;
583 t2
= a2
!= 1 && a2
!= 2;
585 a
= add
[n1
= a1
][n2
= a2
];
586 if (t1
&& a
< add
[0][a2
])
587 a
= add
[n1
= 0][n2
= a2
];
588 if (t2
&& a
< add
[a1
][0])
589 a
= add
[n1
= a1
][n2
= 0];
590 if (t1
&& t2
&& a
< add
[0][0])
591 a
= add
[n1
= 0][n2
= 0];
594 if (ascale(i
->arg
[0], con
) && t2
)
595 a
= 3, n1
= 2, n2
= 0;
596 if (t1
&& ascale(i
->arg
[1], con
))
597 a
= 3, n1
= 0, n2
= 2;
600 ai
[i
->to
.val
].l
= n1
;
601 ai
[i
->to
.val
].r
= n2
;
606 amatch(Addr
*a
, Ref r
, int n
, ANum
*ai
, Fn
*fn
)
612 if (rtype(r
) == RCon
) {
613 if (!addcon(&a
->offset
, &fn
->con
[r
.val
]))
614 err("unlikely sum of $%s and $%s",
615 str(a
->offset
.label
), str(fn
->con
[r
.val
].label
));
618 assert(rtype(r
) == RTmp
);
626 t
= nl
, nl
= nr
, nr
= t
;
635 a
->scale
= fn
->con
[ar
.val
].bits
.i
;
637 case 5: /* b + s * i */
640 if (fn
->tmp
[ar
.val
].slot
!= -1) {
648 amatch(a
, ar
, nr
, ai
, fn
);
654 s
= fn
->tmp
[r
.val
].slot
;
659 case 2: /* constants */
661 case 6: /* o + s * i */
662 case 7: /* o + b + s * i */
663 amatch(a
, ar
, nr
, ai
, fn
);
664 amatch(a
, al
, nl
, ai
, fn
);
671 /* instruction selection
672 * requires use counts (as given by parsing)
685 /* assign slots to fast allocs */
687 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
688 for (al
=Oalloc
, n
=4; al
<=Oalloc1
; al
++, n
*=2)
689 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
691 if (rtype(i
->arg
[0]) != RCon
)
693 sz
= fn
->con
[i
->arg
[0].val
].bits
.i
;
694 if (sz
< 0 || sz
>= INT_MAX
-15)
695 err("invalid alloc size %"PRId64
, sz
);
696 sz
= (sz
+ n
-1) & -n
;
698 if (sz
> INT_MAX
- fn
->slot
)
699 die("alloc too large");
700 fn
->tmp
[i
->to
.val
].slot
= fn
->slot
;
702 *i
= (Ins
){.op
= Onop
};
705 /* process basic blocks */
707 ainfo
= emalloc(n
* sizeof ainfo
[0]);
708 for (b
=fn
->start
; b
; b
=b
->link
) {
710 for (sb
=(Blk
*[3]){b
->s1
, b
->s2
, 0}; *sb
; sb
++)
711 for (p
=(*sb
)->phi
; p
; p
=p
->link
) {
712 for (a
=0; p
->blk
[a
] != b
; a
++)
713 assert(a
+1 < p
->narg
);
714 fixarg(&p
->arg
[a
], p
->cls
, 0, fn
);
716 memset(ainfo
, 0, n
* sizeof ainfo
[0]);
717 anumber(ainfo
, b
, fn
->con
);
719 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
720 sel(*--i
, ainfo
, fn
);
721 b
->nins
= &insb
[NIns
] - curi
;
722 idup(&b
->ins
, curi
, b
->nins
);
727 fprintf(stderr
, "\n> After instruction selection:\n");