bump NString and NPred
[qbe.git] / load.c
blob8e2dd6470c2a85af9ae6ac511ae6c615cfd9bba3
1 #include "all.h"
3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
5 typedef struct Loc Loc;
6 typedef struct Slice Slice;
7 typedef struct Insert Insert;
10 struct Loc {
11 enum {
12 LRoot, /* right above the original load */
13 LLoad, /* inserting a load is allowed */
14 LNoLoad, /* only scalar operations allowed */
15 } type;
16 uint off;
17 Blk *blk;
20 struct Slice {
21 Ref ref;
22 short sz;
23 short cls; /* load class */
26 struct Insert {
27 uint isphi:1;
28 uint num:31;
29 uint bid;
30 uint off;
31 union {
32 Ins ins;
33 struct {
34 Slice m;
35 Phi *p;
36 } phi;
37 } new;
40 static Fn *curf;
41 static uint inum; /* current insertion number */
42 static Insert *ilog; /* global insertion log */
43 static uint nlog; /* number of entries in the log */
45 int
46 loadsz(Ins *l)
48 switch (l->op) {
49 case Oloadsb: case Oloadub: return 1;
50 case Oloadsh: case Oloaduh: return 2;
51 case Oloadsw: case Oloaduw: return 4;
52 case Oload: return KWIDE(l->cls) ? 8 : 4;
54 die("unreachable");
57 int
58 storesz(Ins *s)
60 switch (s->op) {
61 case Ostoreb: return 1;
62 case Ostoreh: return 2;
63 case Ostorew: case Ostores: return 4;
64 case Ostorel: case Ostored: return 8;
66 die("unreachable");
69 static Ref
70 iins(int cls, int op, Ref a0, Ref a1, Loc *l)
72 Insert *ist;
74 vgrow(&ilog, ++nlog);
75 ist = &ilog[nlog-1];
76 ist->isphi = 0;
77 ist->num = inum++;
78 ist->bid = l->blk->id;
79 ist->off = l->off;
80 ist->new.ins = (Ins){op, cls, R, {a0, a1}};
81 return ist->new.ins.to = newtmp("ld", cls, curf);
84 static void
85 cast(Ref *r, int cls, Loc *l)
87 int cls0;
89 if (rtype(*r) == RCon)
90 return;
91 assert(rtype(*r) == RTmp);
92 cls0 = curf->tmp[r->val].cls;
93 if (cls0 == cls || (cls == Kw && cls0 == Kl))
94 return;
95 assert(!KWIDE(cls0) || KWIDE(cls));
96 if (KWIDE(cls) == KWIDE(cls0))
97 *r = iins(cls, Ocast, *r, R, l);
98 else {
99 assert(cls == Kl);
100 if (cls0 == Ks)
101 *r = iins(Kw, Ocast, *r, R, l);
102 *r = iins(Kl, Oextuw, *r, R, l);
106 static inline void
107 mask(int cls, Ref *r, bits msk, Loc *l)
109 cast(r, cls, l);
110 *r = iins(cls, Oand, *r, getcon(msk, curf), l);
113 static Ref
114 load(Slice sl, bits msk, Loc *l)
116 Alias *a;
117 Ref r, r1;
118 int ld, cls, all, c;
120 ld = (int[]){
121 [1] = Oloadub,
122 [2] = Oloaduh,
123 [4] = Oloaduw,
124 [8] = Oload
125 }[sl.sz];
126 all = msk == MASK(sl.sz);
127 if (all)
128 cls = sl.cls;
129 else
130 cls = sl.sz > 4 ? Kl : Kw;
131 r = sl.ref;
132 /* sl.ref might not be live here,
133 * but its alias base ref will be
134 * (see killsl() below) */
135 if (rtype(r) == RTmp) {
136 a = &curf->tmp[r.val].alias;
137 switch (a->type) {
138 default:
139 die("unreachable");
140 case ALoc:
141 case AEsc:
142 case AUnk:
143 r = a->base;
144 if (!a->offset)
145 break;
146 r1 = getcon(a->offset, curf);
147 r = iins(Kl, Oadd, r, r1, l);
148 break;
149 case ACon:
150 case ASym:
151 c = curf->ncon++;
152 vgrow(&curf->con, curf->ncon);
153 curf->con[c].type = CAddr;
154 curf->con[c].label = a->label;
155 curf->con[c].bits.i = a->offset;
156 curf->con[c].local = 0;
157 r = CON(c);
158 break;
161 r = iins(cls, ld, r, R, l);
162 if (!all)
163 mask(cls, &r, msk, l);
164 return r;
167 static int
168 killsl(Ref r, Slice sl)
170 Alias *a;
172 if (rtype(sl.ref) != RTmp)
173 return 0;
174 a = &curf->tmp[sl.ref.val].alias;
175 switch (a->type) {
176 default: die("unreachable");
177 case ALoc:
178 case AEsc:
179 case AUnk: return req(a->base, r);
180 case ACon:
181 case ASym: return 0;
185 /* returns a ref containing the contents of the slice
186 * passed as argument, all the bits set to 0 in the
187 * mask argument are zeroed in the result;
188 * the returned ref has an integer class when the
189 * mask does not cover all the bits of the slice,
190 * otherwise, it has class sl.cls
191 * the procedure returns R when it fails */
192 static Ref
193 def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
195 Blk *bp;
196 bits msk1, msks;
197 int off, cls, cls1, op, sz, ld;
198 uint np, oldl, oldt;
199 Ref r, r1;
200 Phi *p;
201 Insert *ist;
202 Loc l;
204 /* invariants:
205 * -1- b dominates il->blk; so we can use
206 * temporaries of b in il->blk
207 * -2- if il->type != LNoLoad, then il->blk
208 * postdominates the original load; so it
209 * is safe to load in il->blk
210 * -3- if il->type != LNoLoad, then b
211 * postdominates il->blk (and by 2, the
212 * original load)
214 assert(dom(b, il->blk));
215 oldl = nlog;
216 oldt = curf->ntmp;
217 if (0) {
218 Load:
219 curf->ntmp = oldt;
220 nlog = oldl;
221 if (il->type != LLoad)
222 return R;
223 return load(sl, msk, il);
226 if (!i)
227 i = &b->ins[b->nins];
228 cls = sl.sz > 4 ? Kl : Kw;
229 msks = MASK(sl.sz);
231 while (i > b->ins) {
232 --i;
233 if (killsl(i->to, sl)
234 || (iscall(i->op) && escapes(sl.ref, curf)))
235 goto Load;
236 ld = isload(i->op);
237 if (ld) {
238 sz = loadsz(i);
239 r1 = i->arg[0];
240 r = i->to;
241 } else if (isstore(i->op)) {
242 sz = storesz(i);
243 r1 = i->arg[1];
244 r = i->arg[0];
245 } else
246 continue;
247 switch (alias(sl.ref, sl.sz, r1, sz, &off, curf)) {
248 case MustAlias:
249 if (off < 0) {
250 off = -off;
251 msk1 = (MASK(sz) << 8*off) & msks;
252 op = Oshl;
253 } else {
254 msk1 = (MASK(sz) >> 8*off) & msks;
255 op = Oshr;
257 if ((msk1 & msk) == 0)
258 break;
259 if (off) {
260 cls1 = cls;
261 if (op == Oshr && off + sl.sz > 4)
262 cls1 = Kl;
263 cast(&r, cls1, il);
264 r1 = getcon(8*off, curf);
265 r = iins(cls1, op, r, r1, il);
267 if ((msk1 & msk) != msk1 || off + sz < sl.sz)
268 mask(cls, &r, msk1 & msk, il);
269 if ((msk & ~msk1) != 0) {
270 r1 = def(sl, msk & ~msk1, b, i, il);
271 if (req(r1, R))
272 goto Load;
273 r = iins(cls, Oor, r, r1, il);
275 if (msk == msks)
276 cast(&r, sl.cls, il);
277 return r;
278 case MayAlias:
279 if (ld)
280 break;
281 else
282 goto Load;
283 case NoAlias:
284 break;
285 default:
286 die("unreachable");
290 for (ist=ilog; ist<&ilog[nlog]; ++ist)
291 if (ist->isphi && ist->bid == b->id)
292 if (req(ist->new.phi.m.ref, sl.ref))
293 if (ist->new.phi.m.sz == sl.sz) {
294 r = ist->new.phi.p->to;
295 if (msk != msks)
296 mask(cls, &r, msk, il);
297 else
298 cast(&r, sl.cls, il);
299 return r;
302 for (p=b->phi; p; p=p->link)
303 if (killsl(p->to, sl))
304 /* scanning predecessors in that
305 * case would be unsafe */
306 goto Load;
308 if (b->npred == 0)
309 goto Load;
310 if (b->npred == 1) {
311 bp = b->pred[0];
312 assert(bp->loop >= il->blk->loop);
313 l = *il;
314 if (bp->s2)
315 l.type = LNoLoad;
316 r1 = def(sl, msk, bp, 0, &l);
317 if (req(r1, R))
318 goto Load;
319 return r1;
322 r = newtmp("ld", sl.cls, curf);
323 p = alloc(sizeof *p);
324 vgrow(&ilog, ++nlog);
325 ist = &ilog[nlog-1];
326 ist->isphi = 1;
327 ist->bid = b->id;
328 ist->new.phi.m = sl;
329 ist->new.phi.p = p;
330 p->to = r;
331 p->cls = sl.cls;
332 p->narg = b->npred;
333 for (np=0; np<b->npred; ++np) {
334 bp = b->pred[np];
335 if (!bp->s2
336 && il->type != LNoLoad
337 && bp->loop < il->blk->loop)
338 l.type = LLoad;
339 else
340 l.type = LNoLoad;
341 l.blk = bp;
342 l.off = bp->nins;
343 r1 = def(sl, msks, bp, 0, &l);
344 if (req(r1, R))
345 goto Load;
346 p->arg[np] = r1;
347 p->blk[np] = bp;
349 if (msk != msks)
350 mask(cls, &r, msk, il);
351 return r;
354 static int
355 icmp(const void *pa, const void *pb)
357 Insert *a, *b;
358 int c;
360 a = (Insert *)pa;
361 b = (Insert *)pb;
362 if ((c = a->bid - b->bid))
363 return c;
364 if (a->isphi && b->isphi)
365 return 0;
366 if (a->isphi)
367 return -1;
368 if (b->isphi)
369 return +1;
370 if ((c = a->off - b->off))
371 return c;
372 return a->num - b->num;
375 /* require rpo ssa alias */
376 void
377 loadopt(Fn *fn)
379 Ins *i, *ib;
380 Blk *b;
381 int sz;
382 uint n, ni, ext, nt;
383 Insert *ist;
384 Slice sl;
385 Loc l;
387 curf = fn;
388 ilog = vnew(0, sizeof ilog[0], Pheap);
389 nlog = 0;
390 inum = 0;
391 for (b=fn->start; b; b=b->link)
392 for (i=b->ins; i<&b->ins[b->nins]; ++i) {
393 if (!isload(i->op))
394 continue;
395 sz = loadsz(i);
396 sl = (Slice){i->arg[0], sz, i->cls};
397 l = (Loc){LRoot, i-b->ins, b};
398 i->arg[1] = def(sl, MASK(sz), b, i, &l);
400 qsort(ilog, nlog, sizeof ilog[0], icmp);
401 vgrow(&ilog, nlog+1);
402 ilog[nlog].bid = fn->nblk; /* add a sentinel */
403 ib = vnew(0, sizeof(Ins), Pheap);
404 for (ist=ilog, n=0; n<fn->nblk; ++n) {
405 b = fn->rpo[n];
406 for (; ist->bid == n && ist->isphi; ++ist) {
407 ist->new.phi.p->link = b->phi;
408 b->phi = ist->new.phi.p;
410 ni = 0;
411 nt = 0;
412 for (;;) {
413 if (ist->bid == n && ist->off == ni)
414 i = &ist++->new.ins;
415 else {
416 if (ni == b->nins)
417 break;
418 i = &b->ins[ni++];
419 if (isload(i->op)
420 && !req(i->arg[1], R)) {
421 ext = Oextsb + i->op - Oloadsb;
422 switch (i->op) {
423 default:
424 die("unreachable");
425 case Oloadsb:
426 case Oloadub:
427 case Oloadsh:
428 case Oloaduh:
429 i->op = ext;
430 break;
431 case Oloadsw:
432 case Oloaduw:
433 if (i->cls == Kl) {
434 i->op = ext;
435 break;
437 /* fall through */
438 case Oload:
439 i->op = Ocopy;
440 break;
442 i->arg[0] = i->arg[1];
443 i->arg[1] = R;
446 vgrow(&ib, ++nt);
447 ib[nt-1] = *i;
449 b->nins = nt;
450 idup(&b->ins, ib, nt);
452 vfree(ib);
453 vfree(ilog);
454 if (debug['M']) {
455 fprintf(stderr, "\n> After load elimination:\n");
456 printfn(fn, stderr);