free the typ vector at the end of parse()
[qbe.git] / spill.c
blobd3a11f7c572907f30ccb50a5186dea30a2440ff2
1 #include "all.h"
3 static void
4 aggreg(Blk *hd, Blk *b)
6 int k;
8 /* aggregate looping information at
9 * loop headers */
10 bsunion(hd->gen, b->gen);
11 for (k=0; k<2; k++)
12 if (b->nlive[k] > hd->nlive[k])
13 hd->nlive[k] = b->nlive[k];
16 static void
17 tmpuse(Ref r, int use, int loop, Fn *fn)
19 Mem *m;
20 Tmp *t;
22 if (rtype(r) == RMem) {
23 m = &fn->mem[r.val];
24 tmpuse(m->base, 1, loop, fn);
25 tmpuse(m->index, 1, loop, fn);
27 else if (rtype(r) == RTmp && r.val >= Tmp0) {
28 t = &fn->tmp[r.val];
29 t->nuse += use;
30 t->ndef += !use;
31 t->cost += loop;
35 /* evaluate spill costs of temporaries,
36 * this also fills usage information
37 * requires rpo, preds
39 void
40 fillcost(Fn *fn)
42 int n;
43 uint a;
44 Blk *b;
45 Ins *i;
46 Tmp *t;
47 Phi *p;
49 loopiter(fn, aggreg);
50 if (debug['S']) {
51 fprintf(stderr, "\n> Loop information:\n");
52 for (b=fn->start; b; b=b->link) {
53 for (a=0; a<b->npred; ++a)
54 if (b->id <= b->pred[a]->id)
55 break;
56 if (a != b->npred) {
57 fprintf(stderr, "\t%-10s", b->name);
58 fprintf(stderr, " (% 3d ", b->nlive[0]);
59 fprintf(stderr, "% 3d) ", b->nlive[1]);
60 dumpts(b->gen, fn->tmp, stderr);
64 for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
65 t->cost = t-fn->tmp < Tmp0 ? UINT_MAX : 0;
66 t->nuse = 0;
67 t->ndef = 0;
69 for (b=fn->start; b; b=b->link) {
70 for (p=b->phi; p; p=p->link) {
71 t = &fn->tmp[p->to.val];
72 tmpuse(p->to, 0, 0, fn);
73 for (a=0; a<p->narg; a++) {
74 n = p->blk[a]->loop;
75 t->cost += n;
76 tmpuse(p->arg[a], 1, n, fn);
79 n = b->loop;
80 for (i=b->ins; i-b->ins < b->nins; i++) {
81 tmpuse(i->to, 0, n, fn);
82 tmpuse(i->arg[0], 1, n, fn);
83 tmpuse(i->arg[1], 1, n, fn);
85 tmpuse(b->jmp.arg, 1, n, fn);
87 if (debug['S']) {
88 fprintf(stderr, "\n> Spill costs:\n");
89 for (n=Tmp0; n<fn->ntmp; n++)
90 fprintf(stderr, "\t%-10s %d\n",
91 fn->tmp[n].name,
92 fn->tmp[n].cost);
93 fprintf(stderr, "\n");
97 static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
98 static Tmp *tmp; /* current temporaries (for tcmpX) */
99 static int ntmp; /* current # of temps (for limit) */
100 static int locs; /* stack size used by locals */
101 static int slot4; /* next slot of 4 bytes */
102 static int slot8; /* ditto, 8 bytes */
103 static BSet mask[2][1]; /* class masks */
105 static int
106 tcmp0(const void *pa, const void *pb)
108 uint ca, cb;
110 ca = tmp[*(int *)pa].cost;
111 cb = tmp[*(int *)pb].cost;
112 return (cb < ca) ? -1 : (cb > ca);
115 static int
116 tcmp1(const void *pa, const void *pb)
118 int c;
120 c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
121 return c ? c : tcmp0(pa, pb);
124 static Ref
125 slot(int t)
127 int s;
129 assert(t >= Tmp0 && "cannot spill register");
130 s = tmp[t].slot;
131 if (s == -1) {
132 /* specific to NAlign == 3 */
133 /* nice logic to pack stack slots
134 * on demand, there can be only
135 * one hole and slot4 points to it
137 * invariant: slot4 <= slot8
139 if (KWIDE(tmp[t].cls)) {
140 s = slot8;
141 if (slot4 == slot8)
142 slot4 += 2;
143 slot8 += 2;
144 } else {
145 s = slot4;
146 if (slot4 == slot8) {
147 slot8 += 2;
148 slot4 += 1;
149 } else
150 slot4 = slot8;
152 s += locs;
153 tmp[t].slot = s;
155 return SLOT(s);
158 static void
159 limit(BSet *b, int k, BSet *f)
161 static int *tarr, maxt;
162 int i, t, nt;
164 nt = bscount(b);
165 if (nt <= k)
166 return;
167 if (nt > maxt) {
168 free(tarr);
169 tarr = emalloc(nt * sizeof tarr[0]);
170 maxt = nt;
172 for (i=0, t=0; bsiter(b, &t); t++) {
173 bsclr(b, t);
174 tarr[i++] = t;
176 if (!f)
177 qsort(tarr, nt, sizeof tarr[0], tcmp0);
178 else {
179 fst = f;
180 qsort(tarr, nt, sizeof tarr[0], tcmp1);
182 for (i=0; i<k && i<nt; i++)
183 bsset(b, tarr[i]);
184 for (; i<nt; i++)
185 slot(tarr[i]);
188 static void
189 limit2(BSet *b1, int k1, int k2, BSet *fst)
191 BSet b2[1];
193 bsinit(b2, ntmp); /* todo, free those */
194 bscopy(b2, b1);
195 bsinter(b1, mask[0]);
196 bsinter(b2, mask[1]);
197 limit(b1, T.ngpr - k1, fst);
198 limit(b2, T.nfpr - k2, fst);
199 bsunion(b1, b2);
202 static void
203 sethint(BSet *u, bits r)
205 int t;
207 for (t=Tmp0; bsiter(u, &t); t++)
208 tmp[phicls(t, tmp)].hint.m |= r;
211 static void
212 reloads(BSet *u, BSet *v)
214 int t;
216 for (t=Tmp0; bsiter(u, &t); t++)
217 if (!bshas(v, t))
218 emit(Oload, tmp[t].cls, TMP(t), slot(t), R);
221 static void
222 store(Ref r, int s)
224 if (s != -1)
225 emit(Ostorew + tmp[r.val].cls, 0, R, r, SLOT(s));
228 static int
229 regcpy(Ins *i)
231 return i->op == Ocopy && isreg(i->arg[0]);
234 static Ins *
235 dopm(Blk *b, Ins *i, BSet *v)
237 int n, t;
238 BSet u[1];
239 Ins *i1;
240 bits r;
242 bsinit(u, ntmp); /* todo, free those */
243 /* consecutive copies from
244 * registers need to be handled
245 * as one large instruction
247 * fixme: there is an assumption
248 * that calls are always followed
249 * by copy instructions here, this
250 * might not be true if previous
251 * passes change
253 i1 = ++i;
254 do {
255 i--;
256 t = i->to.val;
257 if (!req(i->to, R))
258 if (bshas(v, t)) {
259 bsclr(v, t);
260 store(i->to, tmp[t].slot);
262 bsset(v, i->arg[0].val);
263 } while (i != b->ins && regcpy(i-1));
264 bscopy(u, v);
265 if (i != b->ins && (i-1)->op == Ocall) {
266 v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
267 limit2(v, T.nrsave[0], T.nrsave[1], 0);
268 for (n=0, r=0; T.rsave[n]>=0; n++)
269 r |= BIT(T.rsave[n]);
270 v->t[0] |= T.argregs((i-1)->arg[1], 0);
271 } else {
272 limit2(v, 0, 0, 0);
273 r = v->t[0];
275 sethint(v, r);
276 reloads(u, v);
278 emiti(*--i1);
279 while (i1 != i);
280 return i;
283 /* spill code insertion
284 * requires spill costs, rpo, liveness
286 * Note: this will replace liveness
287 * information (in, out) with temporaries
288 * that must be in registers at block
289 * borders
291 * Be careful with:
292 * - Ocopy instructions to ensure register
293 * constraints
295 void
296 spill(Fn *fn)
298 Blk *b, *s1, *s2, *hd, **bp;
299 int j, l, t, k, lvarg[2];
300 uint n;
301 BSet u[1], v[1], w[1];
302 Ins *i;
303 Phi *p;
304 Mem *m;
305 bits r;
307 tmp = fn->tmp;
308 ntmp = fn->ntmp;
309 bsinit(u, ntmp);
310 bsinit(v, ntmp);
311 bsinit(w, ntmp);
312 bsinit(mask[0], ntmp);
313 bsinit(mask[1], ntmp);
314 locs = fn->slot;
315 slot4 = 0;
316 slot8 = 0;
317 for (t=0; t<ntmp; t++) {
318 k = 0;
319 if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
320 k = 1;
321 if (t >= Tmp0)
322 k = KBASE(tmp[t].cls);
323 bsset(mask[k], t);
326 for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
327 b = *--bp;
328 /* invariant: all bocks with bigger rpo got
329 * their in,out updated. */
331 /* 1. find temporaries in registers at
332 * the end of the block (put them in v) */
333 curi = 0;
334 s1 = b->s1;
335 s2 = b->s2;
336 hd = 0;
337 if (s1 && s1->id <= b->id)
338 hd = s1;
339 if (s2 && s2->id <= b->id)
340 if (!hd || s2->id >= hd->id)
341 hd = s2;
342 if (hd) {
343 /* back-edge */
344 bszero(v);
345 hd->gen->t[0] |= T.rglob; /* don't spill registers */
346 for (k=0; k<2; k++) {
347 n = k == 0 ? T.ngpr : T.nfpr;
348 bscopy(u, b->out);
349 bsinter(u, mask[k]);
350 bscopy(w, u);
351 bsinter(u, hd->gen);
352 bsdiff(w, hd->gen);
353 if (bscount(u) < n) {
354 j = bscount(w); /* live through */
355 l = hd->nlive[k];
356 limit(w, n - (l - j), 0);
357 bsunion(u, w);
358 } else
359 limit(u, n, 0);
360 bsunion(v, u);
362 } else if (s1) {
363 liveon(v, b, s1);
364 if (s2) {
365 liveon(u, b, s2);
366 bscopy(w, u);
367 bsinter(w, v);
368 bsunion(v, u);
370 limit2(v, 0, 0, w);
371 } else {
372 bscopy(v, b->out);
373 if (rtype(b->jmp.arg) == RCall)
374 v->t[0] |= T.retregs(b->jmp.arg, 0);
376 for (t=Tmp0; bsiter(b->out, &t); t++)
377 if (!bshas(v, t))
378 slot(t);
379 bscopy(b->out, v);
381 /* 2. process the block instructions */
382 r = v->t[0];
383 curi = &insb[NIns];
384 for (i=&b->ins[b->nins]; i!=b->ins;) {
385 i--;
386 if (regcpy(i)) {
387 i = dopm(b, i, v);
388 continue;
390 bszero(w);
391 if (!req(i->to, R)) {
392 assert(rtype(i->to) == RTmp);
393 t = i->to.val;
394 if (bshas(v, t))
395 bsclr(v, t);
396 else {
397 /* make sure we have a reg
398 * for the result */
399 bsset(v, t);
400 bsset(w, t);
403 j = T.memargs(i->op);
404 for (n=0; n<2; n++)
405 if (rtype(i->arg[n]) == RMem)
406 j--;
407 for (n=0; n<2; n++)
408 switch (rtype(i->arg[n])) {
409 case RMem:
410 t = i->arg[n].val;
411 m = &fn->mem[t];
412 if (rtype(m->base) == RTmp) {
413 bsset(v, m->base.val);
414 bsset(w, m->base.val);
416 if (rtype(m->index) == RTmp) {
417 bsset(v, m->index.val);
418 bsset(w, m->index.val);
420 break;
421 case RTmp:
422 t = i->arg[n].val;
423 lvarg[n] = bshas(v, t);
424 bsset(v, t);
425 if (j-- <= 0)
426 bsset(w, t);
427 break;
429 bscopy(u, v);
430 limit2(v, 0, 0, w);
431 for (n=0; n<2; n++)
432 if (rtype(i->arg[n]) == RTmp) {
433 t = i->arg[n].val;
434 if (!bshas(v, t)) {
435 /* do not reload if the
436 * the temporary was dead
438 if (!lvarg[n])
439 bsclr(u, t);
440 i->arg[n] = slot(t);
443 reloads(u, v);
444 if (!req(i->to, R)) {
445 t = i->to.val;
446 store(i->to, tmp[t].slot);
447 bsclr(v, t);
449 emiti(*i);
450 r = v->t[0]; /* Tmp0 is NBit */
451 if (r)
452 sethint(v, r);
454 assert(r == T.rglob || b == fn->start);
456 for (p=b->phi; p; p=p->link) {
457 assert(rtype(p->to) == RTmp);
458 t = p->to.val;
459 if (bshas(v, t)) {
460 bsclr(v, t);
461 store(p->to, tmp[t].slot);
462 } else if (bshas(b->in, t))
463 /* only if the phi is live */
464 p->to = slot(p->to.val);
466 bscopy(b->in, v);
467 b->nins = &insb[NIns] - curi;
468 idup(&b->ins, curi, b->nins);
471 /* align the locals to a 16 byte boundary */
472 /* specific to NAlign == 3 */
473 slot8 += slot8 & 3;
474 fn->slot += slot8;
476 if (debug['S']) {
477 fprintf(stderr, "\n> Block information:\n");
478 for (b=fn->start; b; b=b->link) {
479 fprintf(stderr, "\t%-10s (% 5d) ", b->name, b->loop);
480 dumpts(b->out, fn->tmp, stderr);
482 fprintf(stderr, "\n> After spilling:\n");
483 printfn(fn, stderr);