rename Tmp.ins to be more descriptive
[qbe.git] / mem.c
blobe372f0f06fdeb5d1b465d1e5e962016dd2416b46
1 #include "all.h"
3 typedef struct Range Range;
4 typedef struct Slot Slot;
6 /* require use, maintains use counts */
7 void
8 promote(Fn *fn)
10 Blk *b;
11 Ins *i, *l;
12 Tmp *t;
13 Use *u, *ue;
14 int s, k;
16 /* promote uniform stack slots to temporaries */
17 b = fn->start;
18 for (i=b->ins; i<&b->ins[b->nins]; i++) {
19 if (Oalloc > i->op || i->op > Oalloc1)
20 continue;
21 /* specific to NAlign == 3 */
22 assert(rtype(i->to) == RTmp);
23 t = &fn->tmp[i->to.val];
24 if (t->ndef != 1)
25 goto Skip;
26 k = -1;
27 s = -1;
28 for (u=t->use; u<&t->use[t->nuse]; u++) {
29 if (u->type != UIns)
30 goto Skip;
31 l = u->u.ins;
32 if (isload(l->op))
33 if (s == -1 || s == loadsz(l)) {
34 s = loadsz(l);
35 continue;
37 if (isstore(l->op))
38 if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0]))
39 if (s == -1 || s == storesz(l))
40 if (k == -1 || k == optab[l->op].argcls[0][0]) {
41 s = storesz(l);
42 k = optab[l->op].argcls[0][0];
43 continue;
45 goto Skip;
47 /* get rid of the alloc and replace uses */
48 *i = (Ins){.op = Onop};
49 t->ndef--;
50 ue = &t->use[t->nuse];
51 for (u=t->use; u!=ue; u++) {
52 l = u->u.ins;
53 if (isstore(l->op)) {
54 l->cls = k;
55 l->op = Ocopy;
56 l->to = l->arg[1];
57 l->arg[1] = R;
58 t->nuse--;
59 t->ndef++;
60 } else {
61 if (k == -1)
62 err("slot %%%s is read but never stored to",
63 fn->tmp[l->arg[0].val].name);
64 /* try to turn loads into copies so we
65 * can eliminate them later */
66 switch(l->op) {
67 case Oloadsw:
68 case Oloaduw:
69 if (k == Kl)
70 goto Extend;
71 /* fall through */
72 case Oload:
73 if (KBASE(k) != KBASE(l->cls))
74 l->op = Ocast;
75 else
76 l->op = Ocopy;
77 break;
78 default:
79 Extend:
80 l->op = Oextsb + (l->op - Oloadsb);
81 break;
85 Skip:;
87 if (debug['M']) {
88 fprintf(stderr, "\n> After slot promotion:\n");
89 printfn(fn, stderr);
93 /* [a, b) with 0 <= a */
94 struct Range {
95 int a, b;
98 struct Slot {
99 int t;
100 int sz;
101 bits m;
102 bits l;
103 Range r;
104 Slot *s;
107 static inline int
108 rin(Range r, int n)
110 return r.a <= n && n < r.b;
113 static inline int
114 rovlap(Range r0, Range r1)
116 return r0.b && r1.b && r0.a < r1.b && r1.a < r0.b;
119 static void
120 radd(Range *r, int n)
122 if (!r->b)
123 *r = (Range){n, n+1};
124 else if (n < r->a)
125 r->a = n;
126 else if (n >= r->b)
127 r->b = n+1;
130 static int
131 slot(Slot **ps, int64_t *off, Ref r, Fn *fn, Slot *sl)
133 Alias a;
134 Tmp *t;
136 getalias(&a, r, fn);
137 if (a.type != ALoc)
138 return 0;
139 t = &fn->tmp[a.base];
140 if (t->visit < 0)
141 return 0;
142 *off = a.offset;
143 *ps = &sl[t->visit];
144 return 1;
147 static void
148 memr(Ref r, bits x, int ip, Fn *fn, Slot *sl)
150 int64_t off;
151 Slot *s;
153 if (slot(&s, &off, r, fn, sl)) {
154 s->l |= x << off;
155 s->l &= s->m;
156 if (s->l)
157 radd(&s->r, ip);
161 static void
162 memw(Ref r, bits x, int ip, Fn *fn, Slot *sl)
164 int64_t off;
165 Slot *s;
167 if (slot(&s, &off, r, fn, sl)) {
168 if (s->l)
169 radd(&s->r, ip);
170 s->l &= ~(x << off);
174 static int
175 scmp(const void *pa, const void *pb)
177 Slot *a, *b;
179 a = (Slot *)pa, b = (Slot *)pb;
180 if (a->sz != b->sz)
181 return b->sz - a->sz;
182 return a->r.a - b->r.a;
185 static void
186 maxrpo(Blk *hd, Blk *b)
188 if (hd->loop < (int)b->id)
189 hd->loop = b->id;
192 void
193 coalesce(Fn *fn)
195 Range r, *br;
196 Slot *s, *s0, *sl;
197 Blk *b, **ps, *succ[3];
198 Ins *i;
199 Use *u;
200 Tmp *t;
201 Ref *arg;
202 bits x;
203 int n, m, nsl, ip, *kill;
204 uint total, freed, fused;
206 /* minimize the stack usage
207 * by coalescing slots
209 nsl = 0;
210 sl = vnew(0, sizeof sl[0], PHeap);
211 for (n=Tmp0; n<fn->ntmp; n++) {
212 t = &fn->tmp[n];
213 t->visit = -1;
214 if (t->alias.type == ALoc)
215 if (t->alias.slot == &t->alias)
216 if (t->alias.u.loc.sz != -1) {
217 t->visit = nsl;
218 vgrow(&sl, ++nsl);
219 s = &sl[nsl-1];
220 s->t = n;
221 s->sz = t->alias.u.loc.sz;
222 s->m = t->alias.u.loc.m;
223 s->s = 0;
227 /* one-pass liveness analysis */
228 for (b=fn->start; b; b=b->link)
229 b->loop = -1;
230 loopiter(fn, maxrpo);
231 br = vnew(fn->nblk, sizeof br[0], PHeap);
232 ip = INT_MAX - 1;
233 for (n=fn->nblk-1; n>=0; n--) {
234 b = fn->rpo[n];
235 succ[0] = b->s1;
236 succ[1] = b->s2;
237 succ[2] = 0;
238 br[n].b = ip--;
239 for (s=sl; s<&sl[nsl]; s++) {
240 s->l = 0;
241 for (ps=succ; *ps; ps++) {
242 m = (*ps)->id;
243 if (m > n && rin(s->r, br[m].a)) {
244 s->l = s->m;
245 radd(&s->r, ip);
249 for (i=&b->ins[b->nins]; i!=b->ins;) {
250 arg = (--i)->arg;
251 if (i->op == Oargc) {
252 memr(arg[1], -1, --ip, fn, sl);
254 if (isload(i->op)) {
255 x = BIT(loadsz(i)) - 1;
256 memr(arg[0], x, --ip, fn, sl);
258 if (isstore(i->op)) {
259 x = BIT(storesz(i)) - 1;
260 memw(arg[1], x, ip--, fn, sl);
263 for (s=sl; s<&sl[nsl]; s++)
264 if (s->l) {
265 radd(&s->r, ip);
266 if (b->loop != -1) {
267 assert(b->loop > n);
268 radd(&s->r, br[b->loop].b - 1);
271 br[n].a = ip;
273 vfree(br);
275 /* kill slots with an empty live range */
276 total = 0;
277 freed = 0;
278 kill = vnew(0, sizeof kill[0], PHeap);
279 n = 0;
280 for (s=s0=sl; s<&sl[nsl]; s++) {
281 total += s->sz;
282 if (!s->r.b) {
283 vgrow(&kill, ++n);
284 kill[n-1] = s->t;
285 freed += s->sz;
286 } else
287 *s0++ = *s;
289 nsl = s0-sl;
290 if (debug['M']) {
291 fputs("\n> Slot coalescing:\n", stderr);
292 if (n) {
293 fputs("\tDEAD", stderr);
294 for (m=0; m<n; m++)
295 fprintf(stderr, " %%%s",
296 fn->tmp[kill[m]].name);
297 fputc('\n', stderr);
300 while (n--) {
301 t = &fn->tmp[kill[n]];
302 assert(t->ndef == 1 && t->def);
303 *t->def = (Ins){.op = Onop};
304 for (u=t->use; u<&t->use[t->nuse]; u++) {
305 assert(u->type == UIns);
306 i = u->u.ins;
307 if (!req(i->to, R)) {
308 assert(rtype(i->to) == RTmp);
309 vgrow(&kill, ++n);
310 kill[n-1] = i->to.val;
311 } else
312 *i = (Ins){.op = Onop};
315 vfree(kill);
317 /* fuse slots by decreasing size */
318 qsort(sl, nsl, sizeof *sl, scmp);
319 fused = 0;
320 for (n=0; n<nsl; n++) {
321 s0 = &sl[n];
322 if (s0->s)
323 continue;
324 s0->s = s0;
325 r = s0->r;
326 for (s=s0+1; s<&sl[nsl]; s++) {
327 if (s->s || !s->r.b)
328 goto Skip;
329 if (rovlap(r, s->r))
330 /* O(n) can be approximated
331 * by 'goto Skip;' if need be
333 for (m=n; &sl[m]<s; m++)
334 if (sl[m].s == s0)
335 if (rovlap(sl[m].r, s->r))
336 goto Skip;
337 radd(&r, s->r.a);
338 radd(&r, s->r.b - 1);
339 s->s = s0;
340 fused += s->sz;
341 Skip:;
345 /* substitute fused slots */
346 for (s=sl; s<&sl[nsl]; s++) {
347 if (s->s == s)
348 continue;
349 t = &fn->tmp[s->t];
350 assert(t->ndef == 1 && t->def);
351 *t->def = (Ins){.op = Onop};
352 for (u=t->use; u<&t->use[t->nuse]; u++) {
353 assert(u->type == UIns);
354 arg = u->u.ins->arg;
355 for (n=0; n<2; n++)
356 if (req(arg[n], TMP(s->t)))
357 arg[n] = TMP(s->s->t);
361 if (debug['M']) {
362 for (s0=sl; s0<&sl[nsl]; s0++) {
363 if (s0->s != s0)
364 continue;
365 fprintf(stderr, "\tLOCL (% 3db) %s:",
366 s0->sz, fn->tmp[s0->t].name);
367 for (s=s0; s<&sl[nsl]; s++) {
368 if (s->s != s0)
369 continue;
370 fprintf(stderr, " %%%s", fn->tmp[s->t].name);
371 if (s->r.b)
372 fprintf(stderr, "[%d,%d)",
373 s->r.a-ip, s->r.b-ip);
374 else
375 fputs("{}", stderr);
377 fputc('\n', stderr);
379 fprintf(stderr, "\tSUMS %u/%u/%u (freed/fused/total)\n\n",
380 freed, fused, total);
381 printfn(fn, stderr);
384 vfree(sl);