no mul->shl as it confuses address matching
[qbe.git] / arm64 / isel.c
blob062beb31ee74139fe4fa21b56eb528aed1ad7237
1 #include "all.h"
3 enum Imm {
4 Iother,
5 Iplo12,
6 Iphi12,
7 Iplo24,
8 Inlo12,
9 Inhi12,
10 Inlo24
13 static enum Imm
14 imm(Con *c, int k, int64_t *pn)
16 int64_t n;
17 int i;
19 if (c->type != CBits)
20 return Iother;
21 n = c->bits.i;
22 if (k == Kw)
23 n = (int32_t)n;
24 i = Iplo12;
25 if (n < 0) {
26 i = Inlo12;
27 n = -n;
29 *pn = n;
30 if ((n & 0x000fff) == n)
31 return i;
32 if ((n & 0xfff000) == n)
33 return i + 1;
34 if ((n & 0xffffff) == n)
35 return i + 2;
36 return Iother;
39 int
40 arm64_logimm(uint64_t x, int k)
42 uint64_t n;
44 if (k == Kw)
45 x = (x & 0xffffffff) | x << 32;
46 if (x & 1)
47 x = ~x;
48 if (x == 0)
49 return 0;
50 if (x == 0xaaaaaaaaaaaaaaaa)
51 return 1;
52 n = x & 0xf;
53 if (0x1111111111111111 * n == x)
54 goto Check;
55 n = x & 0xff;
56 if (0x0101010101010101 * n == x)
57 goto Check;
58 n = x & 0xffff;
59 if (0x0001000100010001 * n == x)
60 goto Check;
61 n = x & 0xffffffff;
62 if (0x0000000100000001 * n == x)
63 goto Check;
64 n = x;
65 Check:
66 return (n & (n + (n & -n))) == 0;
69 static void
70 fixarg(Ref *pr, int k, int phi, Fn *fn)
72 char buf[32];
73 Con *c, cc;
74 Ref r0, r1, r2, r3;
75 int s, n;
77 r0 = *pr;
78 switch (rtype(r0)) {
79 case RCon:
80 c = &fn->con[r0.val];
81 if (T.apple
82 && c->type == CAddr
83 && c->sym.type == SThr) {
84 r1 = newtmp("isel", Kl, fn);
85 *pr = r1;
86 if (c->bits.i) {
87 r2 = newtmp("isel", Kl, fn);
88 cc = (Con){.type = CBits};
89 cc.bits.i = c->bits.i;
90 r3 = newcon(&cc, fn);
91 emit(Oadd, Kl, r1, r2, r3);
92 r1 = r2;
94 emit(Ocopy, Kl, r1, TMP(R0), R);
95 r1 = newtmp("isel", Kl, fn);
96 r2 = newtmp("isel", Kl, fn);
97 emit(Ocall, 0, R, r1, CALL(33));
98 emit(Ocopy, Kl, TMP(R0), r2, R);
99 emit(Oload, Kl, r1, r2, R);
100 cc = *c;
101 cc.bits.i = 0;
102 r3 = newcon(&cc, fn);
103 emit(Ocopy, Kl, r2, r3, R);
104 break;
106 if (KBASE(k) == 0 && phi)
107 return;
108 r1 = newtmp("isel", k, fn);
109 if (KBASE(k) == 0) {
110 emit(Ocopy, k, r1, r0, R);
111 } else {
112 n = stashbits(&c->bits, KWIDE(k) ? 8 : 4);
113 vgrow(&fn->con, ++fn->ncon);
114 c = &fn->con[fn->ncon-1];
115 sprintf(buf, "\"%sfp%d\"", T.asloc, n);
116 *c = (Con){.type = CAddr};
117 c->sym.id = intern(buf);
118 r2 = newtmp("isel", Kl, fn);
119 emit(Oload, k, r1, r2, R);
120 emit(Ocopy, Kl, r2, CON(c-fn->con), R);
122 *pr = r1;
123 break;
124 case RTmp:
125 s = fn->tmp[r0.val].slot;
126 if (s == -1)
127 break;
128 r1 = newtmp("isel", Kl, fn);
129 emit(Oaddr, Kl, r1, SLOT(s), R);
130 *pr = r1;
131 break;
135 static int
136 selcmp(Ref arg[2], int k, Fn *fn)
138 Ref r, *iarg;
139 Con *c;
140 int swap, cmp, fix;
141 int64_t n;
143 if (KBASE(k) == 1) {
144 emit(Oafcmp, k, R, arg[0], arg[1]);
145 iarg = curi->arg;
146 fixarg(&iarg[0], k, 0, fn);
147 fixarg(&iarg[1], k, 0, fn);
148 return 0;
150 swap = rtype(arg[0]) == RCon;
151 if (swap) {
152 r = arg[1];
153 arg[1] = arg[0];
154 arg[0] = r;
156 fix = 1;
157 cmp = Oacmp;
158 r = arg[1];
159 if (rtype(r) == RCon) {
160 c = &fn->con[r.val];
161 switch (imm(c, k, &n)) {
162 default:
163 break;
164 case Iplo12:
165 case Iphi12:
166 fix = 0;
167 break;
168 case Inlo12:
169 case Inhi12:
170 cmp = Oacmn;
171 r = getcon(n, fn);
172 fix = 0;
173 break;
176 emit(cmp, k, R, arg[0], r);
177 iarg = curi->arg;
178 fixarg(&iarg[0], k, 0, fn);
179 if (fix)
180 fixarg(&iarg[1], k, 0, fn);
181 return swap;
184 static int
185 callable(Ref r, Fn *fn)
187 Con *c;
189 if (rtype(r) == RTmp)
190 return 1;
191 if (rtype(r) == RCon) {
192 c = &fn->con[r.val];
193 if (c->type == CAddr)
194 if (c->bits.i == 0)
195 return 1;
197 return 0;
200 static void
201 sel(Ins i, Fn *fn)
203 Ref *iarg;
204 Ins *i0;
205 int ck, cc;
207 if (INRANGE(i.op, Oalloc, Oalloc1)) {
208 i0 = curi - 1;
209 salloc(i.to, i.arg[0], fn);
210 fixarg(&i0->arg[0], Kl, 0, fn);
211 return;
213 if (iscmp(i.op, &ck, &cc)) {
214 emit(Oflag, i.cls, i.to, R, R);
215 i0 = curi;
216 if (selcmp(i.arg, ck, fn))
217 i0->op += cmpop(cc);
218 else
219 i0->op += cc;
220 return;
222 if (i.op == Ocall)
223 if (callable(i.arg[0], fn)) {
224 emiti(i);
225 return;
227 if (i.op != Onop) {
228 emiti(i);
229 iarg = curi->arg; /* fixarg() can change curi */
230 fixarg(&iarg[0], argcls(&i, 0), 0, fn);
231 fixarg(&iarg[1], argcls(&i, 1), 0, fn);
235 static void
236 seljmp(Blk *b, Fn *fn)
238 Ref r;
239 Ins *i, *ir;
240 int ck, cc, use;
242 if (b->jmp.type == Jret0
243 || b->jmp.type == Jjmp
244 || b->jmp.type == Jhlt)
245 return;
246 assert(b->jmp.type == Jjnz);
247 r = b->jmp.arg;
248 use = -1;
249 b->jmp.arg = R;
250 ir = 0;
251 i = &b->ins[b->nins];
252 while (i > b->ins)
253 if (req((--i)->to, r)) {
254 use = fn->tmp[r.val].nuse;
255 ir = i;
256 break;
258 if (ir && use == 1
259 && iscmp(ir->op, &ck, &cc)) {
260 if (selcmp(ir->arg, ck, fn))
261 cc = cmpop(cc);
262 b->jmp.type = Jjf + cc;
263 *ir = (Ins){.op = Onop};
265 else {
266 selcmp((Ref[]){r, CON_Z}, Kw, fn);
267 b->jmp.type = Jjfine;
271 void
272 arm64_isel(Fn *fn)
274 Blk *b, **sb;
275 Ins *i;
276 Phi *p;
277 uint n, al;
278 int64_t sz;
280 /* assign slots to fast allocs */
281 b = fn->start;
282 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
283 for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
284 for (i=b->ins; i<&b->ins[b->nins]; i++)
285 if (i->op == al) {
286 if (rtype(i->arg[0]) != RCon)
287 break;
288 sz = fn->con[i->arg[0].val].bits.i;
289 if (sz < 0 || sz >= INT_MAX-15)
290 err("invalid alloc size %"PRId64, sz);
291 sz = (sz + n-1) & -n;
292 sz /= 4;
293 fn->tmp[i->to.val].slot = fn->slot;
294 fn->slot += sz;
295 *i = (Ins){.op = Onop};
298 for (b=fn->start; b; b=b->link) {
299 curi = &insb[NIns];
300 for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
301 for (p=(*sb)->phi; p; p=p->link) {
302 for (n=0; p->blk[n] != b; n++)
303 assert(n+1 < p->narg);
304 fixarg(&p->arg[n], p->cls, 1, fn);
306 seljmp(b, fn);
307 for (i=&b->ins[b->nins]; i!=b->ins;)
308 sel(*--i, fn);
309 b->nins = &insb[NIns] - curi;
310 idup(&b->ins, curi, b->nins);
313 if (debug['I']) {
314 fprintf(stderr, "\n> After instruction selection:\n");
315 printfn(fn, stderr);