4 /* For x86_64, do the following:
7 * - check that constants are used only in
9 * - ensure immediates always fit in 32b
10 * - explicit machine register contraints
11 * on instructions like division.
12 * - implement fast locals (the streak of
13 * constant allocX in the first basic block)
14 * - recognize complex addressing modes
16 * Invariant: the use counts that are used
17 * in sel() must be sound. This
18 * is not so trivial, maybe the
19 * dce should be moved out...
22 typedef struct ANum ANum
;
30 static void amatch(Addr
*, Ref
, ANum
*, Fn
*, int);
36 default: die("invalid fp comparison %d", fc
);
37 case FCle
: return ICule
;
38 case FClt
: return ICult
;
39 case FCgt
: return ICugt
;
40 case FCge
: return ICuge
;
41 case FCne
: return ICne
;
42 case FCeq
: return ICeq
;
43 case FCo
: return ICxnp
;
44 case FCuo
: return ICxp
;
49 iscmp(int op
, int *pk
, int *pc
)
53 if (Ocmpw
<= op
&& op
<= Ocmpw1
) {
57 else if (Ocmpl
<= op
&& op
<= Ocmpl1
) {
61 else if (Ocmps
<= op
&& op
<= Ocmps1
) {
62 c
= fcmptoi(op
- Ocmps
);
65 else if (Ocmpd
<= op
&& op
<= Ocmpd1
) {
66 c
= fcmptoi(op
- Ocmpd
);
85 switch (fn
->con
[r
.val
].type
) {
87 /* we only support the 'small'
88 * code model of the ABI, this
89 * means that we can always
90 * address data with 32bits
94 val
= fn
->con
[r
.val
].bits
.i
;
95 return (val
< INT32_MIN
|| val
> INT32_MAX
);
97 die("invalid constant");
104 if (rtype(r
) != RTmp
)
106 return fn
->tmp
[r
.val
].slot
;
110 argcls(Ins
*i
, int n
)
112 return opdesc
[i
->op
].argcls
[n
][i
->cls
];
116 fixarg(Ref
*r
, int k
, int phi
, Fn
*fn
)
124 if (KBASE(k
) == 1 && rtype(r0
) == RCon
) {
125 /* load floating points from memory
126 * slots, they can't be used as
130 vgrow(&fn
->mem
, ++fn
->nmem
);
131 memset(&a
, 0, sizeof a
);
132 a
.offset
.type
= CAddr
;
134 n
= stashfp(fn
->con
[r0
.val
].bits
.i
, KWIDE(k
));
135 sprintf(a
.offset
.label
, "fp%d", n
);
136 fn
->mem
[fn
->nmem
-1] = a
;
138 else if (!phi
&& k
== Kl
&& noimm(r0
, fn
)) {
139 /* load constants that do not fit in
140 * a 32bit signed integer into a
143 r1
= newtmp("isel", Kl
, fn
);
144 emit(Ocopy
, Kl
, r1
, r0
, R
);
147 /* load fast locals' addresses into
148 * temporaries right before the
151 r1
= newtmp("isel", Kl
, fn
);
152 emit(Oaddr
, Kl
, r1
, SLOT(s
), R
);
158 seladdr(Ref
*r
, ANum
*an
, Fn
*fn
)
164 if (rtype(r0
) == RTmp
) {
168 amatch(&a
, r0
, an
, fn
, 1);
169 vgrow(&fn
->mem
, ++fn
->nmem
);
170 fn
->mem
[fn
->nmem
-1] = a
;
171 r1
= MEM(fn
->nmem
-1);
172 chuse(a
.base
, +1, fn
);
173 chuse(a
.index
, +1, fn
);
174 if (rtype(a
.base
) != RTmp
)
175 if (rtype(a
.index
) != RTmp
)
183 selcmp(Ref arg
[2], int k
, Fn
*fn
)
187 if (rtype(arg
[0]) == RCon
) {
192 assert(rtype(arg
[0]) != RCon
);
193 emit(Oxcmp
, k
, R
, arg
[1], arg
[0]);
195 fixarg(&iarg
[0], k
, 0, fn
);
196 fixarg(&iarg
[1], k
, 0, fn
);
200 sel(Ins i
, ANum
*an
, Fn
*fn
)
207 if (rtype(i
.to
) == RTmp
)
208 if (!isreg(i
.to
) && !isreg(i
.arg
[0]) && !isreg(i
.arg
[1]))
209 if (fn
->tmp
[i
.to
.val
].nuse
== 0) {
210 chuse(i
.arg
[0], -1, fn
);
211 chuse(i
.arg
[1], -1, fn
);
221 if (i
.op
== Odiv
|| i
.op
== Oudiv
)
222 r0
= TMP(RAX
), r1
= TMP(RDX
);
224 r0
= TMP(RDX
), r1
= TMP(RAX
);
225 emit(Ocopy
, k
, i
.to
, r0
, R
);
226 emit(Ocopy
, k
, R
, r1
, R
);
227 if (rtype(i
.arg
[1]) == RCon
) {
228 /* immediates not allowed for
231 r0
= newtmp("isel", k
, fn
);
234 if (fn
->tmp
[r0
.val
].slot
!= -1)
235 err("unlikely argument %%%s in %s",
236 fn
->tmp
[r0
.val
].name
, opdesc
[i
.op
].name
);
237 if (i
.op
== Odiv
|| i
.op
== Orem
) {
238 emit(Oxidiv
, k
, R
, r0
, R
);
239 emit(Osign
, k
, TMP(RDX
), TMP(RAX
), R
);
241 emit(Oxdiv
, k
, R
, r0
, R
);
242 emit(Ocopy
, k
, TMP(RDX
), CON_Z
, R
);
244 emit(Ocopy
, k
, TMP(RAX
), i
.arg
[0], R
);
245 fixarg(&curi
->arg
[0], k
, 0, fn
);
246 if (rtype(i
.arg
[1]) == RCon
)
247 emit(Ocopy
, k
, r0
, i
.arg
[1], R
);
252 if (rtype(i
.arg
[1]) == RCon
)
256 emit(Ocopy
, Kw
, R
, TMP(RCX
), R
);
258 emit(Ocopy
, Kw
, TMP(RCX
), r0
, R
);
268 if (rtype(i
.arg
[0]) == RCon
) {
274 seladdr(&i
.arg
[1], an
, fn
);
277 seladdr(&i
.arg
[0], an
, fn
);
300 fixarg(&iarg
[0], argcls(&i
, 0), 0, fn
);
301 fixarg(&iarg
[1], argcls(&i
, 1), 0, fn
);
305 case Oalloc
+2: /* == Oalloc1 */
306 /* we need to make sure
307 * the stack remains aligned
310 if (rtype(i
.arg
[0]) == RCon
) {
311 sz
= fn
->con
[i
.arg
[0].val
].bits
.i
;
312 if (sz
< 0 || sz
>= INT_MAX
-15)
313 err("invalid alloc size %"PRId64
, sz
);
314 sz
= (sz
+ 15) & -16;
315 emit(Osalloc
, Kl
, i
.to
, getcon(sz
, fn
), R
);
317 /* r0 = (i.arg[0] + 15) & -16 */
318 r0
= newtmp("isel", Kl
, fn
);
319 r1
= newtmp("isel", Kl
, fn
);
320 emit(Osalloc
, Kl
, i
.to
, r0
, R
);
321 emit(Oand
, Kl
, r0
, r1
, getcon(-16, fn
));
322 emit(Oadd
, Kl
, r1
, i
.arg
[0], getcon(15, fn
));
323 if (fn
->tmp
[i
.arg
[0].val
].slot
!= -1)
324 err("unlikely argument %%%s in %s",
325 fn
->tmp
[i
.arg
[0].val
].name
, opdesc
[i
.op
].name
);
333 if (iscmp(i
.op
, &kc
, &x
)) {
334 if (rtype(i
.arg
[0]) == RCon
)
336 emit(Oxset
+x
, k
, i
.to
, R
, R
);
337 selcmp(i
.arg
, kc
, fn
);
340 die("unknown instruction %s", opdesc
[i
.op
].name
);
343 while (i0
> curi
&& --i0
) {
344 assert(rslot(i0
->arg
[0], fn
) == -1);
345 assert(rslot(i0
->arg
[1], fn
) == -1);
350 flagi(Ins
*i0
, Ins
*i
)
354 if (opdesc
[i
->op
].sflag
)
356 if (opdesc
[i
->op
].lflag
)
364 seljmp(Blk
*b
, Fn
*fn
)
371 if (b
->jmp
.type
== Jret0
|| b
->jmp
.type
== Jjmp
)
373 assert(b
->jmp
.type
== Jjnz
);
377 assert(!req(r
, R
) && rtype(r
) != RCon
);
378 if (b
->s1
== b
->s2
) {
384 fi
= flagi(b
->ins
, &b
->ins
[b
->nins
]);
385 if (fi
&& req(fi
->to
, r
)) {
386 if (iscmp(fi
->op
, &k
, &c
)) {
387 if (rtype(fi
->arg
[0]) == RCon
)
389 b
->jmp
.type
= Jxjc
+ c
;
391 selcmp(fi
->arg
, k
, fn
);
392 *fi
= (Ins
){.op
= Onop
};
396 if (fi
->op
== Oand
&& t
->nuse
== 1
397 && (rtype(fi
->arg
[0]) == RTmp
||
398 rtype(fi
->arg
[1]) == RTmp
)) {
401 b
->jmp
.type
= Jxjc
+ ICne
;
402 if (rtype(fi
->arg
[1]) == RCon
) {
404 fi
->arg
[1] = fi
->arg
[0];
409 /* since flags are not tracked in liveness,
410 * the result of the flag-setting instruction
411 * has to be marked as live
414 emit(Ocopy
, Kw
, R
, r
, R
);
415 b
->jmp
.type
= Jxjc
+ ICne
;
418 selcmp((Ref
[2]){r
, CON_Z
}, Kw
, fn
); /* todo, add long branch if non-zero */
419 b
->jmp
.type
= Jxjc
+ ICne
;
423 aref(Ref r
, ANum
*ai
)
431 die("constant or temporary expected");
436 ascale(Ref r
, Con
*con
)
440 if (rtype(r
) != RCon
)
442 if (con
[r
.val
].type
!= CBits
)
444 n
= con
[r
.val
].bits
.i
;
445 return n
== 1 || n
== 2 || n
== 4 || n
== 8;
449 anumber(ANum
*ai
, Blk
*b
, Con
*con
)
451 /* This should be made obsolete by a proper
457 * ( RTmp(_) -> 1 slot )
459 * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
461 static char add
[10][10] = {
462 [2] [2] = 2, /* folding */
463 [2] [5] = 5, [5] [2] = 5,
464 [2] [6] = 6, [6] [2] = 6,
465 [2] [7] = 7, [7] [2] = 7,
466 [0] [0] = 4, /* 4: b + s * i */
467 [0] [3] = 4, [3] [0] = 4,
468 [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
469 [0] [2] = 6, [2] [0] = 6, /* 6: o + b */
470 [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
471 [0] [5] = 7, [5] [0] = 7,
472 [6] [3] = 7, [3] [6] = 7,
475 int a
, a1
, a2
, n1
, n2
, t1
, t2
;
478 for (i
=b
->ins
; i
-b
->ins
< b
->nins
; i
++) {
479 if (rtype(i
->to
) == RTmp
)
481 if (i
->op
!= Oadd
&& i
->op
!= Omul
)
483 a1
= aref(i
->arg
[0], ai
);
484 a2
= aref(i
->arg
[1], ai
);
485 t1
= a1
!= 1 && a1
!= 2;
486 t2
= a2
!= 1 && a2
!= 2;
488 a
= add
[n1
= a1
][n2
= a2
];
489 if (t1
&& a
< add
[0][a2
])
490 a
= add
[n1
= 0][n2
= a2
];
491 if (t2
&& a
< add
[a1
][0])
492 a
= add
[n1
= a1
][n2
= 0];
493 if (t1
&& t2
&& a
< add
[0][0])
494 a
= add
[n1
= 0][n2
= 0];
497 if (ascale(i
->arg
[0], con
) && t2
)
498 a
= 3, n1
= 2, n2
= 0;
499 if (t1
&& ascale(i
->arg
[1], con
))
500 a
= 3, n1
= 0, n2
= 2;
503 ai
[i
->to
.val
].l
= n1
;
504 ai
[i
->to
.val
].r
= n2
;
509 amatch(Addr
*a
, Ref r
, ANum
*ai
, Fn
*fn
, int top
)
516 memset(a
, 0, sizeof *a
);
517 if (rtype(r
) == RCon
) {
518 addcon(&a
->offset
, &fn
->con
[r
.val
]);
521 assert(rtype(r
) == RTmp
);
529 t
= nl
, nl
= nr
, nr
= t
;
535 switch (ai
[r
.val
].n
) {
539 a
->scale
= fn
->con
[ar
.val
].bits
.i
;
543 case 4: /* b + s * i */
546 if (fn
->tmp
[ar
.val
].slot
!= -1) {
554 amatch(a
, ar
, ai
, fn
, 0);
559 s
= fn
->tmp
[r
.val
].slot
;
564 case 2: /* constants */
565 case 5: /* o + s * i */
567 case 7: /* o + b + s * i */
568 amatch(a
, ar
, ai
, fn
, 0);
569 amatch(a
, al
, ai
, fn
, 0);
576 /* instruction selection
577 * requires use counts (as given by parsing)
590 /* assign slots to fast allocs */
592 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
593 for (al
=Oalloc
, n
=4; al
<=Oalloc1
; al
++, n
*=2)
594 for (i
=b
->ins
; i
-b
->ins
< b
->nins
; i
++)
596 if (rtype(i
->arg
[0]) != RCon
)
598 sz
= fn
->con
[i
->arg
[0].val
].bits
.i
;
599 if (sz
< 0 || sz
>= INT_MAX
-15)
600 err("invalid alloc size %"PRId64
, sz
);
601 sz
= (sz
+ n
-1) & -n
;
603 fn
->tmp
[i
->to
.val
].slot
= fn
->slot
;
605 *i
= (Ins
){.op
= Onop
};
608 /* process basic blocks */
610 ainfo
= emalloc(n
* sizeof ainfo
[0]);
611 for (b
=fn
->start
; b
; b
=b
->link
) {
613 for (sb
=(Blk
*[3]){b
->s1
, b
->s2
, 0}; *sb
; sb
++)
614 for (p
=(*sb
)->phi
; p
; p
=p
->link
) {
615 for (a
=0; p
->blk
[a
] != b
; a
++)
616 assert(a
+1 < p
->narg
);
617 fixarg(&p
->arg
[a
], p
->cls
, 1, fn
);
619 memset(ainfo
, 0, n
* sizeof ainfo
[0]);
620 anumber(ainfo
, b
, fn
->con
);
622 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
623 sel(*--i
, ainfo
, fn
);
624 b
->nins
= &insb
[NIns
] - curi
;
625 idup(&b
->ins
, curi
, b
->nins
);
630 fprintf(stderr
, "\n> After instruction selection:\n");