cheaper mul by small constants on amd64
[qbe.git] / amd64 / isel.c
blob2b928787059b89cba8a2872b568c53bba8589456
1 #include "all.h"
2 #include <limits.h>
4 /* For x86_64, do the following:
6 * - check that constants are used only in
7 * places allowed
8 * - ensure immediates always fit in 32b
9 * - expose machine register contraints
10 * on instructions like division.
11 * - implement fast locals (the streak of
12 * constant allocX in the first basic block)
13 * - recognize complex addressing modes
15 * Invariant: the use counts that are used
16 * in sel() must be sound. This
17 * is not so trivial, maybe the
18 * dce should be moved out...
21 static int amatch(Addr *, Num *, Ref, Fn *);
23 static int
24 noimm(Ref r, Fn *fn)
26 int64_t val;
28 if (rtype(r) != RCon)
29 return 0;
30 switch (fn->con[r.val].type) {
31 case CAddr:
32 /* we only support the 'small'
33 * code model of the ABI, this
34 * means that we can always
35 * address data with 32bits
37 return 0;
38 case CBits:
39 val = fn->con[r.val].bits.i;
40 return (val < INT32_MIN || val > INT32_MAX);
41 default:
42 die("invalid constant");
46 static int
47 rslot(Ref r, Fn *fn)
49 if (rtype(r) != RTmp)
50 return -1;
51 return fn->tmp[r.val].slot;
54 static int
55 hascon(Ref r, Con **pc, Fn *fn)
57 switch (rtype(r)) {
58 case RCon:
59 *pc = &fn->con[r.val];
60 return 1;
61 case RMem:
62 *pc = &fn->mem[r.val].offset;
63 return 1;
64 default:
65 return 0;
69 static void
70 fixarg(Ref *r, int k, Ins *i, Fn *fn)
72 char buf[32];
73 Addr a, *m;
74 Con cc, *c;
75 Ref r0, r1, r2, r3;
76 int s, n, op;
78 r1 = r0 = *r;
79 s = rslot(r0, fn);
80 op = i ? i->op : Ocopy;
81 if (KBASE(k) == 1 && rtype(r0) == RCon) {
82 /* load floating points from memory
83 * slots, they can't be used as
84 * immediates
86 r1 = MEM(fn->nmem);
87 vgrow(&fn->mem, ++fn->nmem);
88 memset(&a, 0, sizeof a);
89 a.offset.type = CAddr;
90 n = stashbits(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4);
91 /* quote the name so that we do not
92 * add symbol prefixes on the apple
93 * target variant
95 sprintf(buf, "\"%sfp%d\"", T.asloc, n);
96 a.offset.sym.id = intern(buf);
97 fn->mem[fn->nmem-1] = a;
99 else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
100 /* load constants that do not fit in
101 * a 32bit signed integer into a
102 * long temporary
104 r1 = newtmp("isel", Kl, fn);
105 emit(Ocopy, Kl, r1, r0, R);
107 else if (s != -1) {
108 /* load fast locals' addresses into
109 * temporaries right before the
110 * instruction
112 r1 = newtmp("isel", Kl, fn);
113 emit(Oaddr, Kl, r1, SLOT(s), R);
115 else if (T.apple && hascon(r0, &c, fn)
116 && c->type == CAddr && c->sym.type == SThr) {
117 r1 = newtmp("isel", Kl, fn);
118 if (c->bits.i) {
119 r2 = newtmp("isel", Kl, fn);
120 cc = (Con){.type = CBits};
121 cc.bits.i = c->bits.i;
122 r3 = newcon(&cc, fn);
123 emit(Oadd, Kl, r1, r2, r3);
124 } else
125 r2 = r1;
126 emit(Ocopy, Kl, r2, TMP(RAX), R);
127 r2 = newtmp("isel", Kl, fn);
128 r3 = newtmp("isel", Kl, fn);
129 emit(Ocall, 0, R, r3, CALL(17));
130 emit(Ocopy, Kl, TMP(RDI), r2, R);
131 emit(Oload, Kl, r3, r2, R);
132 cc = *c;
133 cc.bits.i = 0;
134 r3 = newcon(&cc, fn);
135 emit(Oload, Kl, r2, r3, R);
136 if (rtype(r0) == RMem) {
137 m = &fn->mem[r0.val];
138 m->offset.type = CUndef;
139 m->base = r1;
140 r1 = r0;
143 else if (!(isstore(op) && r == &i->arg[1])
144 && !isload(op) && op != Ocall && rtype(r0) == RCon
145 && fn->con[r0.val].type == CAddr) {
146 /* apple as does not support 32-bit
147 * absolute addressing, use a rip-
148 * relative leaq instead
150 r1 = newtmp("isel", Kl, fn);
151 emit(Oaddr, Kl, r1, r0, R);
153 else if (rtype(r0) == RMem) {
154 /* eliminate memory operands of
155 * the form $foo(%rip, ...)
157 m = &fn->mem[r0.val];
158 if (req(m->base, R))
159 if (m->offset.type == CAddr) {
160 r0 = newtmp("isel", Kl, fn);
161 emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
162 m->offset.type = CUndef;
163 m->base = r0;
166 *r = r1;
169 static void
170 seladdr(Ref *r, Num *tn, Fn *fn)
172 Addr a;
173 Ref r0;
175 r0 = *r;
176 if (rtype(r0) == RTmp) {
177 memset(&a, 0, sizeof a);
178 if (!amatch(&a, tn, r0, fn))
179 return;
180 if (!req(a.base, R))
181 if (a.offset.type == CAddr) {
182 /* apple as does not support
183 * $foo(%r0, %r1, M); try to
184 * rewrite it or bail out if
185 * impossible
187 if (!req(a.index, R) || rtype(a.base) != RTmp)
188 return;
189 else {
190 a.index = a.base;
191 a.scale = 1;
192 a.base = R;
195 chuse(r0, -1, fn);
196 vgrow(&fn->mem, ++fn->nmem);
197 fn->mem[fn->nmem-1] = a;
198 chuse(a.base, +1, fn);
199 chuse(a.index, +1, fn);
200 *r = MEM(fn->nmem-1);
204 static int
205 cmpswap(Ref arg[2], int op)
207 switch (op) {
208 case NCmpI+Cflt:
209 case NCmpI+Cfle:
210 return 1;
211 case NCmpI+Cfgt:
212 case NCmpI+Cfge:
213 return 0;
215 return rtype(arg[0]) == RCon;
218 static void
219 selcmp(Ref arg[2], int k, int swap, Fn *fn)
221 Ref r;
222 Ins *icmp;
224 if (swap) {
225 r = arg[1];
226 arg[1] = arg[0];
227 arg[0] = r;
229 emit(Oxcmp, k, R, arg[1], arg[0]);
230 icmp = curi;
231 if (rtype(arg[0]) == RCon) {
232 assert(k != Kw);
233 icmp->arg[1] = newtmp("isel", k, fn);
234 emit(Ocopy, k, icmp->arg[1], arg[0], R);
235 fixarg(&curi->arg[0], k, curi, fn);
237 fixarg(&icmp->arg[0], k, icmp, fn);
238 fixarg(&icmp->arg[1], k, icmp, fn);
241 static void
242 sel(Ins i, Num *tn, Fn *fn)
244 Ref r0, r1, tmp[7];
245 int x, j, k, kc, sh, swap;
246 Ins *i0, *i1;
248 if (rtype(i.to) == RTmp)
249 if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
250 if (fn->tmp[i.to.val].nuse == 0) {
251 chuse(i.arg[0], -1, fn);
252 chuse(i.arg[1], -1, fn);
253 return;
255 i0 = curi;
256 k = i.cls;
257 switch (i.op) {
258 case Odiv:
259 case Orem:
260 case Oudiv:
261 case Ourem:
262 if (KBASE(k) == 1)
263 goto Emit;
264 if (i.op == Odiv || i.op == Oudiv)
265 r0 = TMP(RAX), r1 = TMP(RDX);
266 else
267 r0 = TMP(RDX), r1 = TMP(RAX);
268 emit(Ocopy, k, i.to, r0, R);
269 emit(Ocopy, k, R, r1, R);
270 if (rtype(i.arg[1]) == RCon) {
271 /* immediates not allowed for
272 * divisions in x86
274 r0 = newtmp("isel", k, fn);
275 } else
276 r0 = i.arg[1];
277 if (fn->tmp[r0.val].slot != -1)
278 err("unlikely argument %%%s in %s",
279 fn->tmp[r0.val].name, optab[i.op].name);
280 if (i.op == Odiv || i.op == Orem) {
281 emit(Oxidiv, k, R, r0, R);
282 emit(Osign, k, TMP(RDX), TMP(RAX), R);
283 } else {
284 emit(Oxdiv, k, R, r0, R);
285 emit(Ocopy, k, TMP(RDX), CON_Z, R);
287 emit(Ocopy, k, TMP(RAX), i.arg[0], R);
288 fixarg(&curi->arg[0], k, curi, fn);
289 if (rtype(i.arg[1]) == RCon)
290 emit(Ocopy, k, r0, i.arg[1], R);
291 break;
292 case Osar:
293 case Oshr:
294 case Oshl:
295 r0 = i.arg[1];
296 if (rtype(r0) == RCon)
297 goto Emit;
298 if (fn->tmp[r0.val].slot != -1)
299 err("unlikely argument %%%s in %s",
300 fn->tmp[r0.val].name, optab[i.op].name);
301 i.arg[1] = TMP(RCX);
302 emit(Ocopy, Kw, R, TMP(RCX), R);
303 emiti(i);
304 i1 = curi;
305 emit(Ocopy, Kw, TMP(RCX), r0, R);
306 fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
307 break;
308 case Ouwtof:
309 r0 = newtmp("utof", Kl, fn);
310 emit(Osltof, k, i.to, r0, R);
311 emit(Oextuw, Kl, r0, i.arg[0], R);
312 fixarg(&curi->arg[0], k, curi, fn);
313 break;
314 case Oultof:
315 /* %mask =l and %arg.0, 1
316 * %isbig =l shr %arg.0, 63
317 * %divided =l shr %arg.0, %isbig
318 * %or =l or %mask, %divided
319 * %float =d sltof %or
320 * %cast =l cast %float
321 * %addend =l shl %isbig, 52
322 * %sum =l add %cast, %addend
323 * %result =d cast %sum
325 r0 = newtmp("utof", k, fn);
326 if (k == Ks)
327 kc = Kw, sh = 23;
328 else
329 kc = Kl, sh = 52;
330 for (j=0; j<4; j++)
331 tmp[j] = newtmp("utof", Kl, fn);
332 for (; j<7; j++)
333 tmp[j] = newtmp("utof", kc, fn);
334 emit(Ocast, k, i.to, tmp[6], R);
335 emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
336 emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
337 emit(Ocast, kc, tmp[4], r0, R);
338 emit(Osltof, k, r0, tmp[3], R);
339 emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
340 emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
341 sel(*curi++, 0, fn);
342 emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
343 fixarg(&curi->arg[0], Kl, curi, fn);
344 emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
345 fixarg(&curi->arg[0], Kl, curi, fn);
346 break;
347 case Ostoui:
348 i.op = Ostosi;
349 kc = Ks;
350 tmp[4] = getcon(0xdf000000, fn);
351 goto Oftoui;
352 case Odtoui:
353 i.op = Odtosi;
354 kc = Kd;
355 tmp[4] = getcon(0xc3e0000000000000, fn);
356 Oftoui:
357 if (k == Kw) {
358 r0 = newtmp("ftou", Kl, fn);
359 emit(Ocopy, Kw, i.to, r0, R);
360 i.cls = Kl;
361 i.to = r0;
362 goto Emit;
364 /* %try0 =l {s,d}tosi %fp
365 * %mask =l sar %try0, 63
367 * mask is all ones if the first
368 * try was oob, all zeroes o.w.
370 * %fps ={s,d} sub %fp, (1<<63)
371 * %try1 =l {s,d}tosi %fps
373 * %tmp =l and %mask, %try1
374 * %res =l or %tmp, %try0
376 r0 = newtmp("ftou", kc, fn);
377 for (j=0; j<4; j++)
378 tmp[j] = newtmp("ftou", Kl, fn);
379 emit(Oor, Kl, i.to, tmp[0], tmp[3]);
380 emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
381 emit(i.op, Kl, tmp[2], r0, R);
382 emit(Oadd, kc, r0, tmp[4], i.arg[0]);
383 i1 = curi; /* fixarg() can change curi */
384 fixarg(&i1->arg[0], kc, i1, fn);
385 fixarg(&i1->arg[1], kc, i1, fn);
386 emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
387 emit(i.op, Kl, tmp[0], i.arg[0], R);
388 fixarg(&curi->arg[0], Kl, curi, fn);
389 break;
390 case Onop:
391 break;
392 case Ostored:
393 case Ostores:
394 case Ostorel:
395 case Ostorew:
396 case Ostoreh:
397 case Ostoreb:
398 if (rtype(i.arg[0]) == RCon) {
399 if (i.op == Ostored)
400 i.op = Ostorel;
401 if (i.op == Ostores)
402 i.op = Ostorew;
404 seladdr(&i.arg[1], tn, fn);
405 goto Emit;
406 case_Oload:
407 seladdr(&i.arg[0], tn, fn);
408 goto Emit;
409 case Odbgloc:
410 case Ocall:
411 case Osalloc:
412 case Ocopy:
413 case Oadd:
414 case Osub:
415 case Oneg:
416 case Omul:
417 case Oand:
418 case Oor:
419 case Oxor:
420 case Oxtest:
421 case Ostosi:
422 case Odtosi:
423 case Oswtof:
424 case Osltof:
425 case Oexts:
426 case Otruncd:
427 case Ocast:
428 case_OExt:
429 Emit:
430 emiti(i);
431 i1 = curi; /* fixarg() can change curi */
432 fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
433 fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
434 break;
435 case Oalloc4:
436 case Oalloc8:
437 case Oalloc16:
438 salloc(i.to, i.arg[0], fn);
439 break;
440 default:
441 if (isext(i.op))
442 goto case_OExt;
443 if (isload(i.op))
444 goto case_Oload;
445 if (iscmp(i.op, &kc, &x)) {
446 switch (x) {
447 case NCmpI+Cfeq:
448 /* zf is set when operands are
449 * unordered, so we may have to
450 * check pf
452 r0 = newtmp("isel", Kw, fn);
453 r1 = newtmp("isel", Kw, fn);
454 emit(Oand, Kw, i.to, r0, r1);
455 emit(Oflagfo, k, r1, R, R);
456 i.to = r0;
457 break;
458 case NCmpI+Cfne:
459 r0 = newtmp("isel", Kw, fn);
460 r1 = newtmp("isel", Kw, fn);
461 emit(Oor, Kw, i.to, r0, r1);
462 emit(Oflagfuo, k, r1, R, R);
463 i.to = r0;
464 break;
466 swap = cmpswap(i.arg, x);
467 if (swap)
468 x = cmpop(x);
469 emit(Oflag+x, k, i.to, R, R);
470 selcmp(i.arg, kc, swap, fn);
471 break;
473 die("unknown instruction %s", optab[i.op].name);
476 while (i0>curi && --i0) {
477 assert(rslot(i0->arg[0], fn) == -1);
478 assert(rslot(i0->arg[1], fn) == -1);
482 static Ins *
483 flagi(Ins *i0, Ins *i)
485 while (i>i0) {
486 i--;
487 if (amd64_op[i->op].zflag)
488 return i;
489 if (amd64_op[i->op].lflag)
490 continue;
491 return 0;
493 return 0;
496 static void
497 seljmp(Blk *b, Fn *fn)
499 Ref r;
500 int c, k, swap;
501 Ins *fi;
502 Tmp *t;
504 if (b->jmp.type == Jret0
505 || b->jmp.type == Jjmp
506 || b->jmp.type == Jhlt)
507 return;
508 assert(b->jmp.type == Jjnz);
509 r = b->jmp.arg;
510 t = &fn->tmp[r.val];
511 b->jmp.arg = R;
512 assert(rtype(r) == RTmp);
513 if (b->s1 == b->s2) {
514 chuse(r, -1, fn);
515 b->jmp.type = Jjmp;
516 b->s2 = 0;
517 return;
519 fi = flagi(b->ins, &b->ins[b->nins]);
520 if (!fi || !req(fi->to, r)) {
521 selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
522 b->jmp.type = Jjf + Cine;
524 else if (iscmp(fi->op, &k, &c)
525 && c != NCmpI+Cfeq /* see sel() */
526 && c != NCmpI+Cfne) {
527 swap = cmpswap(fi->arg, c);
528 if (swap)
529 c = cmpop(c);
530 if (t->nuse == 1) {
531 selcmp(fi->arg, k, swap, fn);
532 *fi = (Ins){.op = Onop};
534 b->jmp.type = Jjf + c;
536 else if (fi->op == Oand && t->nuse == 1
537 && (rtype(fi->arg[0]) == RTmp ||
538 rtype(fi->arg[1]) == RTmp)) {
539 fi->op = Oxtest;
540 fi->to = R;
541 b->jmp.type = Jjf + Cine;
542 if (rtype(fi->arg[1]) == RCon) {
543 r = fi->arg[1];
544 fi->arg[1] = fi->arg[0];
545 fi->arg[0] = r;
548 else {
549 /* since flags are not tracked in liveness,
550 * the result of the flag-setting instruction
551 * has to be marked as live
553 if (t->nuse == 1)
554 emit(Ocopy, Kw, R, r, R);
555 b->jmp.type = Jjf + Cine;
559 enum {
560 Pob,
561 Pbis,
562 Pois,
563 Pobis,
564 Pbi1,
565 Pobi1,
568 /* mgen generated code
570 * (with-vars (o b i s)
571 * (patterns
572 * (ob (add (con o) (tmp b)))
573 * (bis (add (tmp b) (mul (tmp i) (con s 1 2 4 8))))
574 * (ois (add (con o) (mul (tmp i) (con s 1 2 4 8))))
575 * (obis (add (con o) (tmp b) (mul (tmp i) (con s 1 2 4 8))))
576 * (bi1 (add (tmp b) (tmp i)))
577 * (obi1 (add (con o) (tmp b) (tmp i)))
578 * ))
581 static int
582 opn(int op, int l, int r)
584 static uchar Oaddtbl[91] = {
586 2,2,
587 4,4,5,
588 6,6,8,8,
589 4,4,9,10,9,
590 7,7,5,8,9,5,
591 4,4,12,10,12,12,12,
592 4,4,9,10,9,9,12,9,
593 11,11,5,8,9,5,12,9,5,
594 7,7,5,8,9,5,12,9,5,5,
595 11,11,5,8,9,5,12,9,5,5,5,
596 4,4,9,10,9,9,12,9,9,9,9,9,
597 7,7,5,8,9,5,12,9,5,5,5,9,5,
599 int t;
601 if (l < r)
602 t = l, l = r, r = t;
603 switch (op) {
604 case Omul:
605 if (2 <= l)
606 if (r == 0) {
607 return 3;
609 return 2;
610 case Oadd:
611 return Oaddtbl[(l + l*l)/2 + r];
612 default:
613 return 2;
617 static int
618 refn(Ref r, Num *tn, Con *con)
620 int64_t n;
622 switch (rtype(r)) {
623 case RTmp:
624 if (!tn[r.val].n)
625 tn[r.val].n = 2;
626 return tn[r.val].n;
627 case RCon:
628 if (con[r.val].type != CBits)
629 return 1;
630 n = con[r.val].bits.i;
631 if (n == 8 || n == 4 || n == 2 || n == 1)
632 return 0;
633 return 1;
634 default:
635 return INT_MIN;
639 static bits match[13] = {
640 [4] = BIT(Pob),
641 [5] = BIT(Pbi1),
642 [6] = BIT(Pob) | BIT(Pois),
643 [7] = BIT(Pob) | BIT(Pobi1),
644 [8] = BIT(Pbi1) | BIT(Pbis),
645 [9] = BIT(Pbi1) | BIT(Pobi1),
646 [10] = BIT(Pbi1) | BIT(Pbis) | BIT(Pobi1) | BIT(Pobis),
647 [11] = BIT(Pob) | BIT(Pobi1) | BIT(Pobis),
648 [12] = BIT(Pbi1) | BIT(Pobi1) | BIT(Pobis),
651 static uchar *matcher[] = {
652 [Pbi1] = (uchar[]){
653 1,3,1,3,2,0
655 [Pbis] = (uchar[]){
656 5,1,8,5,27,1,5,1,2,5,13,3,1,1,3,3,3,2,0,1,
657 3,3,3,2,3,1,0,1,29
659 [Pob] = (uchar[]){
660 1,3,0,3,1,0
662 [Pobi1] = (uchar[]){
663 5,3,9,9,10,33,12,35,45,1,5,3,11,9,7,9,4,9,
664 17,1,3,0,3,1,3,2,0,3,1,1,3,0,34,1,37,1,5,2,
665 5,7,2,7,8,37,29,1,3,0,1,32
667 [Pobis] = (uchar[]){
668 5,2,10,7,11,19,49,1,1,3,3,3,2,1,3,0,3,1,0,
669 1,3,0,5,1,8,5,25,1,5,1,2,5,13,3,1,1,3,3,3,
670 2,0,1,3,3,3,2,26,1,51,1,5,1,6,5,9,1,3,0,51,
671 3,1,1,3,0,45
673 [Pois] = (uchar[]){
674 1,3,0,1,3,3,3,2,0
678 /* end of generated code */
680 static void
681 anumber(Num *tn, Blk *b, Con *con)
683 Ins *i;
684 Num *n;
686 for (i=b->ins; i<&b->ins[b->nins]; i++) {
687 if (rtype(i->to) != RTmp)
688 continue;
689 n = &tn[i->to.val];
690 n->l = i->arg[0];
691 n->r = i->arg[1];
692 n->nl = refn(n->l, tn, con);
693 n->nr = refn(n->r, tn, con);
694 n->n = opn(i->op, n->nl, n->nr);
698 static Ref
699 adisp(Con *c, Num *tn, Ref r, Fn *fn, int s)
701 Ref v[2];
702 int n;
704 while (!req(r, R)) {
705 assert(rtype(r) == RTmp);
706 n = refn(r, tn, fn->con);
707 if (!(match[n] & BIT(Pob)))
708 break;
709 runmatch(matcher[Pob], tn, r, v);
710 assert(rtype(v[0]) == RCon);
711 addcon(c, &fn->con[v[0].val], s);
712 r = v[1];
714 return r;
717 static int
718 amatch(Addr *a, Num *tn, Ref r, Fn *fn)
720 static int pat[] = {Pobis, Pobi1, Pbis, Pois, Pbi1, -1};
721 Ref ro, rb, ri, rs, v[4];
722 Con *c, co;
723 int s, n, *p;
725 if (rtype(r) != RTmp)
726 return 0;
728 n = refn(r, tn, fn->con);
729 memset(v, 0, sizeof v);
730 for (p=pat; *p>=0; p++)
731 if (match[n] & BIT(*p)) {
732 runmatch(matcher[*p], tn, r, v);
733 break;
735 if (*p < 0)
736 v[1] = r;
738 memset(&co, 0, sizeof co);
739 ro = v[0];
740 rb = adisp(&co, tn, v[1], fn, 1);
741 ri = v[2];
742 rs = v[3];
743 s = 1;
745 if (*p < 0 && co.type != CUndef)
746 if (amatch(a, tn, rb, fn))
747 return addcon(&a->offset, &co, 1);
748 if (!req(ro, R)) {
749 assert(rtype(ro) == RCon);
750 c = &fn->con[ro.val];
751 if (!addcon(&co, c, 1))
752 return 0;
754 if (!req(rs, R)) {
755 assert(rtype(rs) == RCon);
756 c = &fn->con[rs.val];
757 assert(c->type == CBits);
758 s = c->bits.i;
760 ri = adisp(&co, tn, ri, fn, s);
761 *a = (Addr){co, rb, ri, s};
763 if (rtype(ri) == RTmp)
764 if (fn->tmp[ri.val].slot != -1) {
765 if (a->scale != 1
766 || fn->tmp[rb.val].slot != -1)
767 return 0;
768 a->base = ri;
769 a->index = rb;
771 if (!req(a->base, R)) {
772 assert(rtype(a->base) == RTmp);
773 s = fn->tmp[a->base.val].slot;
774 if (s != -1)
775 a->base = SLOT(s);
777 return 1;
780 /* instruction selection
781 * requires use counts (as given by parsing)
783 void
784 amd64_isel(Fn *fn)
786 Blk *b, **sb;
787 Ins *i;
788 Phi *p;
789 uint a;
790 int n, al;
791 int64_t sz;
792 Num *num;
794 /* assign slots to fast allocs */
795 b = fn->start;
796 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
797 for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
798 for (i=b->ins; i<&b->ins[b->nins]; i++)
799 if (i->op == al) {
800 if (rtype(i->arg[0]) != RCon)
801 break;
802 sz = fn->con[i->arg[0].val].bits.i;
803 if (sz < 0 || sz >= INT_MAX-15)
804 err("invalid alloc size %"PRId64, sz);
805 sz = (sz + n-1) & -n;
806 sz /= 4;
807 if (sz > INT_MAX - fn->slot)
808 die("alloc too large");
809 fn->tmp[i->to.val].slot = fn->slot;
810 fn->slot += sz;
811 *i = (Ins){.op = Onop};
814 /* process basic blocks */
815 n = fn->ntmp;
816 num = emalloc(n * sizeof num[0]);
817 for (b=fn->start; b; b=b->link) {
818 curi = &insb[NIns];
819 for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
820 for (p=(*sb)->phi; p; p=p->link) {
821 for (a=0; p->blk[a] != b; a++)
822 assert(a+1 < p->narg);
823 fixarg(&p->arg[a], p->cls, 0, fn);
825 memset(num, 0, n * sizeof num[0]);
826 anumber(num, b, fn->con);
827 seljmp(b, fn);
828 for (i=&b->ins[b->nins]; i!=b->ins;)
829 sel(*--i, num, fn);
830 b->nins = &insb[NIns] - curi;
831 idup(&b->ins, curi, b->nins);
833 free(num);
835 if (debug['I']) {
836 fprintf(stderr, "\n> After instruction selection:\n");
837 printfn(fn, stderr);