4 /* For x86_64, do the following:
6 * - check that constants are used only in
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 typedef struct ANum ANum
;
28 static int amatch(Addr
*, Ref
, int, ANum
*, Fn
*);
37 switch (fn
->con
[r
.val
].type
) {
39 /* we only support the 'small'
40 * code model of the ABI, this
41 * means that we can always
42 * address data with 32bits
46 val
= fn
->con
[r
.val
].bits
.i
;
47 return (val
< INT32_MIN
|| val
> INT32_MAX
);
49 die("invalid constant");
58 return fn
->tmp
[r
.val
].slot
;
62 fixarg(Ref
*r
, int k
, int op
, Fn
*fn
)
71 cpy
= op
== Ocopy
|| op
== -1;
72 mem
= isstore(op
) || isload(op
) || op
== Ocall
;
73 if (KBASE(k
) == 1 && rtype(r0
) == RCon
) {
74 /* load floating points from memory
75 * slots, they can't be used as
79 vgrow(&fn
->mem
, ++fn
->nmem
);
80 memset(&a
, 0, sizeof a
);
81 a
.offset
.type
= CAddr
;
83 n
= gasstash(&fn
->con
[r0
.val
].bits
, KWIDE(k
) ? 8 : 4);
84 sprintf(buf
, "fp%d", n
);
85 a
.offset
.label
= intern(buf
);
86 fn
->mem
[fn
->nmem
-1] = a
;
88 else if (!cpy
&& k
== Kl
&& noimm(r0
, fn
)) {
89 /* load constants that do not fit in
90 * a 32bit signed integer into a
93 r1
= newtmp("isel", Kl
, fn
);
94 emit(Ocopy
, Kl
, r1
, r0
, R
);
97 /* load fast locals' addresses into
98 * temporaries right before the
101 r1
= newtmp("isel", Kl
, fn
);
102 emit(Oaddr
, Kl
, r1
, SLOT(s
), R
);
104 else if (!mem
&& rtype(r0
) == RCon
105 && fn
->con
[r0
.val
].type
== CAddr
) {
106 /* apple as does not support 32-bit
107 * absolute addressing, use a rip-
108 * relative leaq instead
110 r1
= newtmp("isel", Kl
, fn
);
111 emit(Oaddr
, Kl
, r1
, r0
, R
);
117 seladdr(Ref
*r
, ANum
*an
, Fn
*fn
)
123 if (rtype(r0
) == RTmp
) {
124 memset(&a
, 0, sizeof a
);
125 if (!amatch(&a
, r0
, an
[r0
.val
].n
, an
, fn
))
128 if (a
.offset
.type
== CAddr
) {
129 /* apple as does not support
130 * $foo(%r0, %r1, M); try to
131 * rewrite it or bail out if
134 if (!req(a
.index
, R
))
143 vgrow(&fn
->mem
, ++fn
->nmem
);
144 fn
->mem
[fn
->nmem
-1] = a
;
145 chuse(a
.base
, +1, fn
);
146 chuse(a
.index
, +1, fn
);
147 *r
= MEM(fn
->nmem
-1);
152 selcmp(Ref arg
[2], int k
, Fn
*fn
)
157 swap
= rtype(arg
[0]) == RCon
;
163 emit(Oxcmp
, k
, R
, arg
[1], arg
[0]);
165 if (rtype(arg
[0]) == RCon
) {
167 iarg
[1] = newtmp("isel", k
, fn
);
168 emit(Ocopy
, k
, iarg
[1], arg
[0], R
);
170 fixarg(&iarg
[0], k
, Oxcmp
, fn
);
171 fixarg(&iarg
[1], k
, Oxcmp
, fn
);
176 sel(Ins i
, ANum
*an
, Fn
*fn
)
183 if (rtype(i
.to
) == RTmp
)
184 if (!isreg(i
.to
) && !isreg(i
.arg
[0]) && !isreg(i
.arg
[1]))
185 if (fn
->tmp
[i
.to
.val
].nuse
== 0) {
186 chuse(i
.arg
[0], -1, fn
);
187 chuse(i
.arg
[1], -1, fn
);
199 if (i
.op
== Odiv
|| i
.op
== Oudiv
)
200 r0
= TMP(RAX
), r1
= TMP(RDX
);
202 r0
= TMP(RDX
), r1
= TMP(RAX
);
203 emit(Ocopy
, k
, i
.to
, r0
, R
);
204 emit(Ocopy
, k
, R
, r1
, R
);
205 if (rtype(i
.arg
[1]) == RCon
) {
206 /* immediates not allowed for
209 r0
= newtmp("isel", k
, fn
);
212 if (fn
->tmp
[r0
.val
].slot
!= -1)
213 err("unlikely argument %%%s in %s",
214 fn
->tmp
[r0
.val
].name
, optab
[i
.op
].name
);
215 if (i
.op
== Odiv
|| i
.op
== Orem
) {
216 emit(Oxidiv
, k
, R
, r0
, R
);
217 emit(Osign
, k
, TMP(RDX
), TMP(RAX
), R
);
219 emit(Oxdiv
, k
, R
, r0
, R
);
220 emit(Ocopy
, k
, TMP(RDX
), CON_Z
, R
);
222 emit(Ocopy
, k
, TMP(RAX
), i
.arg
[0], R
);
223 fixarg(&curi
->arg
[0], k
, Ocopy
, fn
);
224 if (rtype(i
.arg
[1]) == RCon
)
225 emit(Ocopy
, k
, r0
, i
.arg
[1], R
);
230 if (rtype(i
.arg
[1]) == RCon
)
234 emit(Ocopy
, Kw
, R
, TMP(RCX
), R
);
236 emit(Ocopy
, Kw
, TMP(RCX
), r0
, R
);
246 if (rtype(i
.arg
[0]) == RCon
) {
252 seladdr(&i
.arg
[1], an
, fn
);
255 seladdr(&i
.arg
[0], an
, fn
);
277 iarg
= curi
->arg
; /* fixarg() can change curi */
278 fixarg(&iarg
[0], argcls(&i
, 0), i
.op
, fn
);
279 fixarg(&iarg
[1], argcls(&i
, 1), i
.op
, fn
);
283 case Oalloc
+2: /* == Oalloc1 */
284 /* we need to make sure
285 * the stack remains aligned
289 if (rtype(i
.arg
[0]) == RCon
) {
290 sz
= fn
->con
[i
.arg
[0].val
].bits
.i
;
291 if (sz
< 0 || sz
>= INT_MAX
-15)
292 err("invalid alloc size %"PRId64
, sz
);
293 sz
= (sz
+ 15) & -16;
294 emit(Osalloc
, Kl
, i
.to
, getcon(sz
, fn
), R
);
296 /* r0 = (i.arg[0] + 15) & -16 */
297 r0
= newtmp("isel", Kl
, fn
);
298 r1
= newtmp("isel", Kl
, fn
);
299 emit(Osalloc
, Kl
, i
.to
, r0
, R
);
300 emit(Oand
, Kl
, r0
, r1
, getcon(-16, fn
));
301 emit(Oadd
, Kl
, r1
, i
.arg
[0], getcon(15, fn
));
302 if (fn
->tmp
[i
.arg
[0].val
].slot
!= -1)
303 err("unlikely argument %%%s in %s",
304 fn
->tmp
[i
.arg
[0].val
].name
, optab
[i
.op
].name
);
312 if (iscmp(i
.op
, &kc
, &x
)) {
313 emit(Oflag
+x
, k
, i
.to
, R
, R
);
315 if (selcmp(i
.arg
, kc
, fn
))
316 i1
->op
= Oflag
+ cmpop(x
);
319 die("unknown instruction %s", optab
[i
.op
].name
);
322 while (i0
> curi
&& --i0
) {
323 assert(rslot(i0
->arg
[0], fn
) == -1);
324 assert(rslot(i0
->arg
[1], fn
) == -1);
329 flagi(Ins
*i0
, Ins
*i
)
333 if (amd64_op
[i
->op
].zflag
)
335 if (amd64_op
[i
->op
].lflag
)
343 seljmp(Blk
*b
, Fn
*fn
)
350 if (b
->jmp
.type
== Jret0
|| b
->jmp
.type
== Jjmp
)
352 assert(b
->jmp
.type
== Jjnz
);
356 assert(!req(r
, R
) && rtype(r
) != RCon
);
357 if (b
->s1
== b
->s2
) {
363 fi
= flagi(b
->ins
, &b
->ins
[b
->nins
]);
364 if (!fi
|| !req(fi
->to
, r
)) {
365 selcmp((Ref
[2]){r
, CON_Z
}, Kw
, fn
); /* todo, long jnz */
366 b
->jmp
.type
= Jjf
+ Cine
;
368 else if (iscmp(fi
->op
, &k
, &c
)) {
370 if (selcmp(fi
->arg
, k
, fn
))
372 *fi
= (Ins
){.op
= Onop
};
374 b
->jmp
.type
= Jjf
+ c
;
376 else if (fi
->op
== Oand
&& t
->nuse
== 1
377 && (rtype(fi
->arg
[0]) == RTmp
||
378 rtype(fi
->arg
[1]) == RTmp
)) {
381 b
->jmp
.type
= Jjf
+ Cine
;
382 if (rtype(fi
->arg
[1]) == RCon
) {
384 fi
->arg
[1] = fi
->arg
[0];
389 /* since flags are not tracked in liveness,
390 * the result of the flag-setting instruction
391 * has to be marked as live
394 emit(Ocopy
, Kw
, R
, r
, R
);
395 b
->jmp
.type
= Jjf
+ Cine
;
400 aref(Ref r
, ANum
*ai
)
408 die("constant or temporary expected");
413 ascale(Ref r
, Con
*con
)
417 if (rtype(r
) != RCon
)
419 if (con
[r
.val
].type
!= CBits
)
421 n
= con
[r
.val
].bits
.i
;
422 return n
== 1 || n
== 2 || n
== 4 || n
== 8;
426 anumber(ANum
*ai
, Blk
*b
, Con
*con
)
428 /* This should be made obsolete by a proper
434 * ( RTmp(_) -> 1 slot )
436 * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
438 static char add
[10][10] = {
439 [2] [2] = 2, /* folding */
440 [2] [4] = 4, [4] [2] = 4,
441 [2] [6] = 6, [6] [2] = 6,
442 [2] [7] = 7, [7] [2] = 7,
443 [0] [2] = 4, [2] [0] = 4, /* 4: o + b */
444 [0] [0] = 5, /* 5: b + s * i */
445 [0] [3] = 5, [3] [0] = 5,
446 [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
447 [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
448 [0] [6] = 7, [6] [0] = 7,
449 [4] [3] = 7, [3] [4] = 7,
451 int a
, a1
, a2
, n1
, n2
, t1
, t2
;
454 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
455 if (rtype(i
->to
) == RTmp
)
457 if (i
->op
!= Oadd
&& i
->op
!= Omul
)
459 a1
= aref(i
->arg
[0], ai
);
460 a2
= aref(i
->arg
[1], ai
);
461 t1
= a1
!= 1 && a1
!= 2;
462 t2
= a2
!= 1 && a2
!= 2;
464 a
= add
[n1
= a1
][n2
= a2
];
465 if (t1
&& a
< add
[0][a2
])
466 a
= add
[n1
= 0][n2
= a2
];
467 if (t2
&& a
< add
[a1
][0])
468 a
= add
[n1
= a1
][n2
= 0];
469 if (t1
&& t2
&& a
< add
[0][0])
470 a
= add
[n1
= 0][n2
= 0];
473 if (ascale(i
->arg
[0], con
) && t2
)
474 a
= 3, n1
= 2, n2
= 0;
475 if (t1
&& ascale(i
->arg
[1], con
))
476 a
= 3, n1
= 0, n2
= 2;
479 ai
[i
->to
.val
].l
= n1
;
480 ai
[i
->to
.val
].r
= n2
;
485 amatch(Addr
*a
, Ref r
, int n
, ANum
*ai
, Fn
*fn
)
491 if (rtype(r
) == RCon
) {
492 addcon(&a
->offset
, &fn
->con
[r
.val
]);
495 assert(rtype(r
) == RTmp
);
503 t
= nl
, nl
= nr
, nr
= t
;
512 a
->scale
= fn
->con
[ar
.val
].bits
.i
;
514 case 5: /* b + s * i */
517 if (fn
->tmp
[ar
.val
].slot
!= -1) {
525 amatch(a
, ar
, nr
, ai
, fn
);
531 s
= fn
->tmp
[r
.val
].slot
;
536 case 2: /* constants */
538 case 6: /* o + s * i */
539 case 7: /* o + b + s * i */
540 amatch(a
, ar
, nr
, ai
, fn
);
541 amatch(a
, al
, nl
, ai
, fn
);
548 /* instruction selection
549 * requires use counts (as given by parsing)
562 /* assign slots to fast allocs */
564 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
565 for (al
=Oalloc
, n
=4; al
<=Oalloc1
; al
++, n
*=2)
566 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
568 if (rtype(i
->arg
[0]) != RCon
)
570 sz
= fn
->con
[i
->arg
[0].val
].bits
.i
;
571 if (sz
< 0 || sz
>= INT_MAX
-15)
572 err("invalid alloc size %"PRId64
, sz
);
573 sz
= (sz
+ n
-1) & -n
;
575 fn
->tmp
[i
->to
.val
].slot
= fn
->slot
;
577 *i
= (Ins
){.op
= Onop
};
580 /* process basic blocks */
582 ainfo
= emalloc(n
* sizeof ainfo
[0]);
583 for (b
=fn
->start
; b
; b
=b
->link
) {
585 for (sb
=(Blk
*[3]){b
->s1
, b
->s2
, 0}; *sb
; sb
++)
586 for (p
=(*sb
)->phi
; p
; p
=p
->link
) {
587 for (a
=0; p
->blk
[a
] != b
; a
++)
588 assert(a
+1 < p
->narg
);
589 fixarg(&p
->arg
[a
], p
->cls
, -1, fn
);
591 memset(ainfo
, 0, n
* sizeof ainfo
[0]);
592 anumber(ainfo
, b
, fn
->con
);
594 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;)
595 sel(*--i
, ainfo
, fn
);
596 b
->nins
= &insb
[NIns
] - curi
;
597 idup(&b
->ins
, curi
, b
->nins
);
602 fprintf(stderr
, "\n> After instruction selection:\n");