disable pie for arm64 tests
[qbe.git] / amd64 / isel.c
blob56e4cf3df6157dc83e35c17f62bc9d5a932a50ef
1 #include "all.h"
2 #include <limits.h>
4 /* For x86_64, do the following:
6 * - check that constants are used only in
7 * places allowed
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 typedef struct ANum ANum;
23 struct ANum {
24 char n, l, r;
25 Ins *i;
28 static int amatch(Addr *, Ref, int, ANum *, Fn *);
30 static int
31 noimm(Ref r, Fn *fn)
33 int64_t val;
35 if (rtype(r) != RCon)
36 return 0;
37 switch (fn->con[r.val].type) {
38 case CAddr:
39 /* we only support the 'small'
40 * code model of the ABI, this
41 * means that we can always
42 * address data with 32bits
44 return 0;
45 case CBits:
46 val = fn->con[r.val].bits.i;
47 return (val < INT32_MIN || val > INT32_MAX);
48 default:
49 die("invalid constant");
53 static int
54 rslot(Ref r, Fn *fn)
56 if (rtype(r) != RTmp)
57 return -1;
58 return fn->tmp[r.val].slot;
61 static void
62 fixarg(Ref *r, int k, Ins *i, Fn *fn)
64 char buf[32];
65 Addr a, *m;
66 Ref r0, r1;
67 int s, n, op;
69 r1 = r0 = *r;
70 s = rslot(r0, fn);
71 op = i ? i->op : Ocopy;
72 if (KBASE(k) == 1 && rtype(r0) == RCon) {
73 /* load floating points from memory
74 * slots, they can't be used as
75 * immediates
77 r1 = MEM(fn->nmem);
78 vgrow(&fn->mem, ++fn->nmem);
79 memset(&a, 0, sizeof a);
80 a.offset.type = CAddr;
81 a.offset.local = 1;
82 n = gasstash(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4);
83 sprintf(buf, "fp%d", n);
84 a.offset.label = intern(buf);
85 fn->mem[fn->nmem-1] = a;
87 else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
88 /* load constants that do not fit in
89 * a 32bit signed integer into a
90 * long temporary
92 r1 = newtmp("isel", Kl, fn);
93 emit(Ocopy, Kl, r1, r0, R);
95 else if (s != -1) {
96 /* load fast locals' addresses into
97 * temporaries right before the
98 * instruction
100 r1 = newtmp("isel", Kl, fn);
101 emit(Oaddr, Kl, r1, SLOT(s), R);
103 else if (!(isstore(op) && r == &i->arg[1])
104 && !isload(op) && op != Ocall && rtype(r0) == RCon
105 && fn->con[r0.val].type == CAddr) {
106 /* apple as does not support 32-bit
107 * absolute addressing, use a rip-
108 * relative leaq instead
110 r1 = newtmp("isel", Kl, fn);
111 emit(Oaddr, Kl, r1, r0, R);
113 else if (rtype(r0) == RMem) {
114 /* eliminate memory operands of
115 * the form $foo(%rip, ...)
117 m = &fn->mem[r0.val];
118 if (req(m->base, R))
119 if (m->offset.type == CAddr) {
120 n = fn->ncon;
121 vgrow(&fn->con, ++fn->ncon);
122 fn->con[n] = m->offset;
123 m->offset.type = CUndef;
124 r0 = newtmp("isel", Kl, fn);
125 emit(Oaddr, Kl, r0, CON(n), R);
126 m->base = r0;
129 *r = r1;
132 static void
133 seladdr(Ref *r, ANum *an, Fn *fn)
135 Addr a;
136 Ref r0;
138 r0 = *r;
139 if (rtype(r0) == RTmp) {
140 memset(&a, 0, sizeof a);
141 if (!amatch(&a, r0, an[r0.val].n, an, fn))
142 return;
143 if (!req(a.base, R))
144 if (a.offset.type == CAddr) {
145 /* apple as does not support
146 * $foo(%r0, %r1, M); try to
147 * rewrite it or bail out if
148 * impossible
150 if (!req(a.index, R))
151 return;
152 else {
153 a.index = a.base;
154 a.scale = 1;
155 a.base = R;
158 chuse(r0, -1, fn);
159 vgrow(&fn->mem, ++fn->nmem);
160 fn->mem[fn->nmem-1] = a;
161 chuse(a.base, +1, fn);
162 chuse(a.index, +1, fn);
163 *r = MEM(fn->nmem-1);
167 static int
168 selcmp(Ref arg[2], int k, Fn *fn)
170 int swap;
171 Ref r;
172 Ins *icmp;
174 swap = rtype(arg[0]) == RCon;
175 if (swap) {
176 r = arg[1];
177 arg[1] = arg[0];
178 arg[0] = r;
180 emit(Oxcmp, k, R, arg[1], arg[0]);
181 icmp = curi;
182 if (rtype(arg[0]) == RCon) {
183 assert(k == Kl);
184 icmp->arg[1] = newtmp("isel", k, fn);
185 emit(Ocopy, k, icmp->arg[1], arg[0], R);
187 fixarg(&icmp->arg[0], k, icmp, fn);
188 fixarg(&icmp->arg[1], k, icmp, fn);
189 return swap;
192 static void
193 sel(Ins i, ANum *an, Fn *fn)
195 Ref r0, r1;
196 int x, k, kc;
197 int64_t sz;
198 Ins *i0, *i1;
200 if (rtype(i.to) == RTmp)
201 if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
202 if (fn->tmp[i.to.val].nuse == 0) {
203 chuse(i.arg[0], -1, fn);
204 chuse(i.arg[1], -1, fn);
205 return;
207 i0 = curi;
208 k = i.cls;
209 switch (i.op) {
210 case Odiv:
211 case Orem:
212 case Oudiv:
213 case Ourem:
214 if (KBASE(k) == 1)
215 goto Emit;
216 if (i.op == Odiv || i.op == Oudiv)
217 r0 = TMP(RAX), r1 = TMP(RDX);
218 else
219 r0 = TMP(RDX), r1 = TMP(RAX);
220 emit(Ocopy, k, i.to, r0, R);
221 emit(Ocopy, k, R, r1, R);
222 if (rtype(i.arg[1]) == RCon) {
223 /* immediates not allowed for
224 * divisions in x86
226 r0 = newtmp("isel", k, fn);
227 } else
228 r0 = i.arg[1];
229 if (fn->tmp[r0.val].slot != -1)
230 err("unlikely argument %%%s in %s",
231 fn->tmp[r0.val].name, optab[i.op].name);
232 if (i.op == Odiv || i.op == Orem) {
233 emit(Oxidiv, k, R, r0, R);
234 emit(Osign, k, TMP(RDX), TMP(RAX), R);
235 } else {
236 emit(Oxdiv, k, R, r0, R);
237 emit(Ocopy, k, TMP(RDX), CON_Z, R);
239 emit(Ocopy, k, TMP(RAX), i.arg[0], R);
240 fixarg(&curi->arg[0], k, curi, fn);
241 if (rtype(i.arg[1]) == RCon)
242 emit(Ocopy, k, r0, i.arg[1], R);
243 break;
244 case Osar:
245 case Oshr:
246 case Oshl:
247 if (rtype(i.arg[1]) == RCon)
248 goto Emit;
249 r0 = i.arg[1];
250 i.arg[1] = TMP(RCX);
251 emit(Ocopy, Kw, R, TMP(RCX), R);
252 emiti(i);
253 emit(Ocopy, Kw, TMP(RCX), r0, R);
254 break;
255 case Onop:
256 break;
257 case Ostored:
258 case Ostores:
259 case Ostorel:
260 case Ostorew:
261 case Ostoreh:
262 case Ostoreb:
263 if (rtype(i.arg[0]) == RCon) {
264 if (i.op == Ostored)
265 i.op = Ostorel;
266 if (i.op == Ostores)
267 i.op = Ostorew;
269 seladdr(&i.arg[1], an, fn);
270 goto Emit;
271 case_Oload:
272 seladdr(&i.arg[0], an, fn);
273 goto Emit;
274 case Ocall:
275 case Osalloc:
276 case Ocopy:
277 case Oadd:
278 case Osub:
279 case Omul:
280 case Oand:
281 case Oor:
282 case Oxor:
283 case Oxtest:
284 case Ostosi:
285 case Odtosi:
286 case Oswtof:
287 case Osltof:
288 case Oexts:
289 case Otruncd:
290 case Ocast:
291 case_OExt:
292 Emit:
293 emiti(i);
294 i1 = curi; /* fixarg() can change curi */
295 fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
296 fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
297 break;
298 case Oalloc:
299 case Oalloc+1:
300 case Oalloc+2: /* == Oalloc1 */
301 /* we need to make sure
302 * the stack remains aligned
303 * (rsp = 0) mod 16
305 fn->dynalloc = 1;
306 if (rtype(i.arg[0]) == RCon) {
307 sz = fn->con[i.arg[0].val].bits.i;
308 if (sz < 0 || sz >= INT_MAX-15)
309 err("invalid alloc size %"PRId64, sz);
310 sz = (sz + 15) & -16;
311 emit(Osalloc, Kl, i.to, getcon(sz, fn), R);
312 } else {
313 /* r0 = (i.arg[0] + 15) & -16 */
314 r0 = newtmp("isel", Kl, fn);
315 r1 = newtmp("isel", Kl, fn);
316 emit(Osalloc, Kl, i.to, r0, R);
317 emit(Oand, Kl, r0, r1, getcon(-16, fn));
318 emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn));
319 if (fn->tmp[i.arg[0].val].slot != -1)
320 err("unlikely argument %%%s in %s",
321 fn->tmp[i.arg[0].val].name, optab[i.op].name);
323 break;
324 default:
325 if (isext(i.op))
326 goto case_OExt;
327 if (isload(i.op))
328 goto case_Oload;
329 if (iscmp(i.op, &kc, &x)) {
330 emit(Oflag+x, k, i.to, R, R);
331 i1 = curi;
332 if (selcmp(i.arg, kc, fn))
333 i1->op = Oflag + cmpop(x);
334 break;
336 die("unknown instruction %s", optab[i.op].name);
339 while (i0 > curi && --i0) {
340 assert(rslot(i0->arg[0], fn) == -1);
341 assert(rslot(i0->arg[1], fn) == -1);
345 static Ins *
346 flagi(Ins *i0, Ins *i)
348 while (i>i0) {
349 i--;
350 if (amd64_op[i->op].zflag)
351 return i;
352 if (amd64_op[i->op].lflag)
353 continue;
354 return 0;
356 return 0;
359 static void
360 seljmp(Blk *b, Fn *fn)
362 Ref r;
363 int c, k;
364 Ins *fi;
365 Tmp *t;
367 if (b->jmp.type == Jret0 || b->jmp.type == Jjmp)
368 return;
369 assert(b->jmp.type == Jjnz);
370 r = b->jmp.arg;
371 t = &fn->tmp[r.val];
372 b->jmp.arg = R;
373 assert(!req(r, R) && rtype(r) != RCon);
374 if (b->s1 == b->s2) {
375 chuse(r, -1, fn);
376 b->jmp.type = Jjmp;
377 b->s2 = 0;
378 return;
380 fi = flagi(b->ins, &b->ins[b->nins]);
381 if (!fi || !req(fi->to, r)) {
382 selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
383 b->jmp.type = Jjf + Cine;
385 else if (iscmp(fi->op, &k, &c)) {
386 if (t->nuse == 1) {
387 if (selcmp(fi->arg, k, fn))
388 c = cmpop(c);
389 *fi = (Ins){.op = Onop};
391 b->jmp.type = Jjf + c;
393 else if (fi->op == Oand && t->nuse == 1
394 && (rtype(fi->arg[0]) == RTmp ||
395 rtype(fi->arg[1]) == RTmp)) {
396 fi->op = Oxtest;
397 fi->to = R;
398 b->jmp.type = Jjf + Cine;
399 if (rtype(fi->arg[1]) == RCon) {
400 r = fi->arg[1];
401 fi->arg[1] = fi->arg[0];
402 fi->arg[0] = r;
405 else {
406 /* since flags are not tracked in liveness,
407 * the result of the flag-setting instruction
408 * has to be marked as live
410 if (t->nuse == 1)
411 emit(Ocopy, Kw, R, r, R);
412 b->jmp.type = Jjf + Cine;
416 static int
417 aref(Ref r, ANum *ai)
419 switch (rtype(r)) {
420 case RCon:
421 return 2;
422 case RTmp:
423 return ai[r.val].n;
424 default:
425 die("constant or temporary expected");
429 static int
430 ascale(Ref r, Con *con)
432 int64_t n;
434 if (rtype(r) != RCon)
435 return 0;
436 if (con[r.val].type != CBits)
437 return 0;
438 n = con[r.val].bits.i;
439 return n == 1 || n == 2 || n == 4 || n == 8;
442 static void
443 anumber(ANum *ai, Blk *b, Con *con)
445 /* This should be made obsolete by a proper
446 * reassoc pass.
448 * Rules:
450 * RTmp(_) -> 0 tmp
451 * ( RTmp(_) -> 1 slot )
452 * RCon(_) -> 2 con
453 * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
455 static char add[10][10] = {
456 [2] [2] = 2, /* folding */
457 [2] [4] = 4, [4] [2] = 4,
458 [2] [6] = 6, [6] [2] = 6,
459 [2] [7] = 7, [7] [2] = 7,
460 [0] [2] = 4, [2] [0] = 4, /* 4: o + b */
461 [0] [0] = 5, /* 5: b + s * i */
462 [0] [3] = 5, [3] [0] = 5,
463 [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
464 [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
465 [0] [6] = 7, [6] [0] = 7,
466 [4] [3] = 7, [3] [4] = 7,
468 int a, a1, a2, n1, n2, t1, t2;
469 Ins *i;
471 for (i=b->ins; i<&b->ins[b->nins]; i++) {
472 if (rtype(i->to) == RTmp)
473 ai[i->to.val].i = i;
474 if (i->op != Oadd && i->op != Omul)
475 continue;
476 a1 = aref(i->arg[0], ai);
477 a2 = aref(i->arg[1], ai);
478 t1 = a1 != 1 && a1 != 2;
479 t2 = a2 != 1 && a2 != 2;
480 if (i->op == Oadd) {
481 a = add[n1 = a1][n2 = a2];
482 if (t1 && a < add[0][a2])
483 a = add[n1 = 0][n2 = a2];
484 if (t2 && a < add[a1][0])
485 a = add[n1 = a1][n2 = 0];
486 if (t1 && t2 && a < add[0][0])
487 a = add[n1 = 0][n2 = 0];
488 } else {
489 n1 = n2 = a = 0;
490 if (ascale(i->arg[0], con) && t2)
491 a = 3, n1 = 2, n2 = 0;
492 if (t1 && ascale(i->arg[1], con))
493 a = 3, n1 = 0, n2 = 2;
495 ai[i->to.val].n = a;
496 ai[i->to.val].l = n1;
497 ai[i->to.val].r = n2;
501 static int
502 amatch(Addr *a, Ref r, int n, ANum *ai, Fn *fn)
504 Ins *i;
505 int nl, nr, t, s;
506 Ref al, ar;
508 if (rtype(r) == RCon) {
509 addcon(&a->offset, &fn->con[r.val]);
510 return 1;
512 assert(rtype(r) == RTmp);
513 i = ai[r.val].i;
514 nl = ai[r.val].l;
515 nr = ai[r.val].r;
516 if (i) {
517 if (nl > nr) {
518 al = i->arg[1];
519 ar = i->arg[0];
520 t = nl, nl = nr, nr = t;
521 } else {
522 al = i->arg[0];
523 ar = i->arg[1];
526 switch (n) {
527 case 3: /* s * i */
528 a->index = al;
529 a->scale = fn->con[ar.val].bits.i;
530 return 0;
531 case 5: /* b + s * i */
532 switch (nr) {
533 case 0:
534 if (fn->tmp[ar.val].slot != -1) {
535 al = i->arg[1];
536 ar = i->arg[0];
538 a->index = ar;
539 a->scale = 1;
540 break;
541 case 3:
542 amatch(a, ar, nr, ai, fn);
543 break;
545 r = al;
546 /* fall through */
547 case 0:
548 s = fn->tmp[r.val].slot;
549 if (s != -1)
550 r = SLOT(s);
551 a->base = r;
552 return n || s != -1;
553 case 2: /* constants */
554 case 4: /* o + b */
555 case 6: /* o + s * i */
556 case 7: /* o + b + s * i */
557 amatch(a, ar, nr, ai, fn);
558 amatch(a, al, nl, ai, fn);
559 return 1;
560 default:
561 die("unreachable");
565 /* instruction selection
566 * requires use counts (as given by parsing)
568 void
569 amd64_isel(Fn *fn)
571 Blk *b, **sb;
572 Ins *i;
573 Phi *p;
574 uint a;
575 int n, al;
576 int64_t sz;
577 ANum *ainfo;
579 /* assign slots to fast allocs */
580 b = fn->start;
581 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
582 for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
583 for (i=b->ins; i<&b->ins[b->nins]; i++)
584 if (i->op == al) {
585 if (rtype(i->arg[0]) != RCon)
586 break;
587 sz = fn->con[i->arg[0].val].bits.i;
588 if (sz < 0 || sz >= INT_MAX-15)
589 err("invalid alloc size %"PRId64, sz);
590 sz = (sz + n-1) & -n;
591 sz /= 4;
592 if (sz > INT_MAX - fn->slot)
593 die("alloc too large");
594 fn->tmp[i->to.val].slot = fn->slot;
595 fn->slot += sz;
596 *i = (Ins){.op = Onop};
599 /* process basic blocks */
600 n = fn->ntmp;
601 ainfo = emalloc(n * sizeof ainfo[0]);
602 for (b=fn->start; b; b=b->link) {
603 curi = &insb[NIns];
604 for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
605 for (p=(*sb)->phi; p; p=p->link) {
606 for (a=0; p->blk[a] != b; a++)
607 assert(a+1 < p->narg);
608 fixarg(&p->arg[a], p->cls, 0, fn);
610 memset(ainfo, 0, n * sizeof ainfo[0]);
611 anumber(ainfo, b, fn->con);
612 seljmp(b, fn);
613 for (i=&b->ins[b->nins]; i!=b->ins;)
614 sel(*--i, ainfo, fn);
615 b->nins = &insb[NIns] - curi;
616 idup(&b->ins, curi, b->nins);
618 free(ainfo);
620 if (debug['I']) {
621 fprintf(stderr, "\n> After instruction selection:\n");
622 printfn(fn, stderr);