test: use architecture-neutral wrapper for calling vprintf
[qbe.git] / spill.c
blob132e0e94e0c4216d93a587b346efc3dc85fcfa19
1 #include "all.h"
3 static void
4 aggreg(Blk *hd, Blk *b)
6 int k;
8 /* aggregate looping information at
9 * loop headers */
10 bsunion(hd->gen, b->gen);
11 for (k=0; k<2; k++)
12 if (b->nlive[k] > hd->nlive[k])
13 hd->nlive[k] = b->nlive[k];
16 static void
17 tmpuse(Ref r, int use, int loop, Fn *fn)
19 Mem *m;
20 Tmp *t;
22 if (rtype(r) == RMem) {
23 m = &fn->mem[r.val];
24 tmpuse(m->base, 1, loop, fn);
25 tmpuse(m->index, 1, loop, fn);
27 else if (rtype(r) == RTmp && r.val >= Tmp0) {
28 t = &fn->tmp[r.val];
29 t->nuse += use;
30 t->ndef += !use;
31 t->cost += loop;
35 /* evaluate spill costs of temporaries,
36 * this also fills usage information
37 * requires rpo, preds
39 void
40 fillcost(Fn *fn)
42 int n;
43 uint a;
44 Blk *b;
45 Ins *i;
46 Tmp *t;
47 Phi *p;
49 loopiter(fn, aggreg);
50 if (debug['S']) {
51 fprintf(stderr, "\n> Loop information:\n");
52 for (b=fn->start; b; b=b->link) {
53 for (a=0; a<b->npred; ++a)
54 if (b->id <= b->pred[a]->id)
55 break;
56 if (a != b->npred) {
57 fprintf(stderr, "\t%-10s", b->name);
58 fprintf(stderr, " (% 3d ", b->nlive[0]);
59 fprintf(stderr, "% 3d) ", b->nlive[1]);
60 dumpts(b->gen, fn->tmp, stderr);
64 for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
65 t->cost = t-fn->tmp < Tmp0 ? UINT_MAX : 0;
66 t->nuse = 0;
67 t->ndef = 0;
69 for (b=fn->start; b; b=b->link) {
70 for (p=b->phi; p; p=p->link) {
71 t = &fn->tmp[p->to.val];
72 tmpuse(p->to, 0, 0, fn);
73 for (a=0; a<p->narg; a++) {
74 n = p->blk[a]->loop;
75 t->cost += n;
76 tmpuse(p->arg[a], 1, n, fn);
79 n = b->loop;
80 for (i=b->ins; i<&b->ins[b->nins]; i++) {
81 tmpuse(i->to, 0, n, fn);
82 tmpuse(i->arg[0], 1, n, fn);
83 tmpuse(i->arg[1], 1, n, fn);
85 tmpuse(b->jmp.arg, 1, n, fn);
87 if (debug['S']) {
88 fprintf(stderr, "\n> Spill costs:\n");
89 for (n=Tmp0; n<fn->ntmp; n++)
90 fprintf(stderr, "\t%-10s %d\n",
91 fn->tmp[n].name,
92 fn->tmp[n].cost);
93 fprintf(stderr, "\n");
97 static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
98 static Tmp *tmp; /* current temporaries (for tcmpX) */
99 static int ntmp; /* current # of temps (for limit) */
100 static int locs; /* stack size used by locals */
101 static int slot4; /* next slot of 4 bytes */
102 static int slot8; /* ditto, 8 bytes */
103 static BSet mask[2][1]; /* class masks */
105 static int
106 tcmp0(const void *pa, const void *pb)
108 uint ca, cb;
110 ca = tmp[*(int *)pa].cost;
111 cb = tmp[*(int *)pb].cost;
112 return (cb < ca) ? -1 : (cb > ca);
115 static int
116 tcmp1(const void *pa, const void *pb)
118 int c;
120 c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
121 return c ? c : tcmp0(pa, pb);
124 static Ref
125 slot(int t)
127 int s;
129 assert(t >= Tmp0 && "cannot spill register");
130 s = tmp[t].slot;
131 if (s == -1) {
132 /* specific to NAlign == 3 */
133 /* nice logic to pack stack slots
134 * on demand, there can be only
135 * one hole and slot4 points to it
137 * invariant: slot4 <= slot8
139 if (KWIDE(tmp[t].cls)) {
140 s = slot8;
141 if (slot4 == slot8)
142 slot4 += 2;
143 slot8 += 2;
144 } else {
145 s = slot4;
146 if (slot4 == slot8) {
147 slot8 += 2;
148 slot4 += 1;
149 } else
150 slot4 = slot8;
152 s += locs;
153 tmp[t].slot = s;
155 return SLOT(s);
158 static void
159 limit(BSet *b, int k, BSet *f)
161 static int *tarr, maxt;
162 int i, t, nt;
164 nt = bscount(b);
165 if (nt <= k)
166 return;
167 if (nt > maxt) {
168 free(tarr);
169 tarr = emalloc(nt * sizeof tarr[0]);
170 maxt = nt;
172 for (i=0, t=0; bsiter(b, &t); t++) {
173 bsclr(b, t);
174 tarr[i++] = t;
176 if (nt > 1) {
177 if (!f)
178 qsort(tarr, nt, sizeof tarr[0], tcmp0);
179 else {
180 fst = f;
181 qsort(tarr, nt, sizeof tarr[0], tcmp1);
184 for (i=0; i<k && i<nt; i++)
185 bsset(b, tarr[i]);
186 for (; i<nt; i++)
187 slot(tarr[i]);
190 static void
191 limit2(BSet *b1, int k1, int k2, BSet *fst)
193 BSet b2[1];
195 bsinit(b2, ntmp); /* todo, free those */
196 bscopy(b2, b1);
197 bsinter(b1, mask[0]);
198 bsinter(b2, mask[1]);
199 limit(b1, T.ngpr - k1, fst);
200 limit(b2, T.nfpr - k2, fst);
201 bsunion(b1, b2);
204 static void
205 sethint(BSet *u, bits r)
207 int t;
209 for (t=Tmp0; bsiter(u, &t); t++)
210 tmp[phicls(t, tmp)].hint.m |= r;
213 static void
214 reloads(BSet *u, BSet *v)
216 int t;
218 for (t=Tmp0; bsiter(u, &t); t++)
219 if (!bshas(v, t))
220 emit(Oload, tmp[t].cls, TMP(t), slot(t), R);
223 static void
224 store(Ref r, int s)
226 if (s != -1)
227 emit(Ostorew + tmp[r.val].cls, 0, R, r, SLOT(s));
230 static int
231 regcpy(Ins *i)
233 return i->op == Ocopy && isreg(i->arg[0]);
236 static Ins *
237 dopm(Blk *b, Ins *i, BSet *v)
239 int n, t;
240 BSet u[1];
241 Ins *i1;
242 bits r;
244 bsinit(u, ntmp); /* todo, free those */
245 /* consecutive copies from
246 * registers need to be handled
247 * as one large instruction
249 * fixme: there is an assumption
250 * that calls are always followed
251 * by copy instructions here, this
252 * might not be true if previous
253 * passes change
255 i1 = ++i;
256 do {
257 i--;
258 t = i->to.val;
259 if (!req(i->to, R))
260 if (bshas(v, t)) {
261 bsclr(v, t);
262 store(i->to, tmp[t].slot);
264 bsset(v, i->arg[0].val);
265 } while (i != b->ins && regcpy(i-1));
266 bscopy(u, v);
267 if (i != b->ins && (i-1)->op == Ocall) {
268 v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
269 limit2(v, T.nrsave[0], T.nrsave[1], 0);
270 for (n=0, r=0; T.rsave[n]>=0; n++)
271 r |= BIT(T.rsave[n]);
272 v->t[0] |= T.argregs((i-1)->arg[1], 0);
273 } else {
274 limit2(v, 0, 0, 0);
275 r = v->t[0];
277 sethint(v, r);
278 reloads(u, v);
280 emiti(*--i1);
281 while (i1 != i);
282 return i;
285 static void
286 merge(BSet *u, Blk *bu, BSet *v, Blk *bv)
288 int t;
290 if (bu->loop <= bv->loop)
291 bsunion(u, v);
292 else
293 for (t=0; bsiter(v, &t); t++)
294 if (tmp[t].slot == -1)
295 bsset(u, t);
298 /* spill code insertion
299 * requires spill costs, rpo, liveness
301 * Note: this will replace liveness
302 * information (in, out) with temporaries
303 * that must be in registers at block
304 * borders
306 * Be careful with:
307 * - Ocopy instructions to ensure register
308 * constraints
310 void
311 spill(Fn *fn)
313 Blk *b, *s1, *s2, *hd, **bp;
314 int j, l, t, k, lvarg[2];
315 uint n;
316 BSet u[1], v[1], w[1];
317 Ins *i;
318 Phi *p;
319 Mem *m;
320 bits r;
322 tmp = fn->tmp;
323 ntmp = fn->ntmp;
324 bsinit(u, ntmp);
325 bsinit(v, ntmp);
326 bsinit(w, ntmp);
327 bsinit(mask[0], ntmp);
328 bsinit(mask[1], ntmp);
329 locs = fn->slot;
330 slot4 = 0;
331 slot8 = 0;
332 for (t=0; t<ntmp; t++) {
333 k = 0;
334 if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
335 k = 1;
336 if (t >= Tmp0)
337 k = KBASE(tmp[t].cls);
338 bsset(mask[k], t);
341 for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
342 b = *--bp;
343 /* invariant: all blocks with bigger rpo got
344 * their in,out updated. */
346 /* 1. find temporaries in registers at
347 * the end of the block (put them in v) */
348 curi = 0;
349 s1 = b->s1;
350 s2 = b->s2;
351 hd = 0;
352 if (s1 && s1->id <= b->id)
353 hd = s1;
354 if (s2 && s2->id <= b->id)
355 if (!hd || s2->id >= hd->id)
356 hd = s2;
357 if (hd) {
358 /* back-edge */
359 bszero(v);
360 hd->gen->t[0] |= T.rglob; /* don't spill registers */
361 for (k=0; k<2; k++) {
362 n = k == 0 ? T.ngpr : T.nfpr;
363 bscopy(u, b->out);
364 bsinter(u, mask[k]);
365 bscopy(w, u);
366 bsinter(u, hd->gen);
367 bsdiff(w, hd->gen);
368 if (bscount(u) < n) {
369 j = bscount(w); /* live through */
370 l = hd->nlive[k];
371 limit(w, n - (l - j), 0);
372 bsunion(u, w);
373 } else
374 limit(u, n, 0);
375 bsunion(v, u);
377 } else if (s1) {
378 /* avoid reloading temporaries
379 * in the middle of loops */
380 bszero(v);
381 liveon(w, b, s1);
382 merge(v, b, w, s1);
383 if (s2) {
384 liveon(u, b, s2);
385 merge(v, b, u, s2);
386 bsinter(w, u);
388 limit2(v, 0, 0, w);
389 } else {
390 bscopy(v, b->out);
391 if (rtype(b->jmp.arg) == RCall)
392 v->t[0] |= T.retregs(b->jmp.arg, 0);
394 for (t=Tmp0; bsiter(b->out, &t); t++)
395 if (!bshas(v, t))
396 slot(t);
397 bscopy(b->out, v);
399 /* 2. process the block instructions */
400 curi = &insb[NIns];
401 for (i=&b->ins[b->nins]; i!=b->ins;) {
402 i--;
403 if (regcpy(i)) {
404 i = dopm(b, i, v);
405 continue;
407 bszero(w);
408 if (!req(i->to, R)) {
409 assert(rtype(i->to) == RTmp);
410 t = i->to.val;
411 if (bshas(v, t))
412 bsclr(v, t);
413 else {
414 /* make sure we have a reg
415 * for the result */
416 bsset(v, t);
417 bsset(w, t);
420 j = T.memargs(i->op);
421 for (n=0; n<2; n++)
422 if (rtype(i->arg[n]) == RMem)
423 j--;
424 for (n=0; n<2; n++)
425 switch (rtype(i->arg[n])) {
426 case RMem:
427 t = i->arg[n].val;
428 m = &fn->mem[t];
429 if (rtype(m->base) == RTmp) {
430 bsset(v, m->base.val);
431 bsset(w, m->base.val);
433 if (rtype(m->index) == RTmp) {
434 bsset(v, m->index.val);
435 bsset(w, m->index.val);
437 break;
438 case RTmp:
439 t = i->arg[n].val;
440 lvarg[n] = bshas(v, t);
441 bsset(v, t);
442 if (j-- <= 0)
443 bsset(w, t);
444 break;
446 bscopy(u, v);
447 limit2(v, 0, 0, w);
448 for (n=0; n<2; n++)
449 if (rtype(i->arg[n]) == RTmp) {
450 t = i->arg[n].val;
451 if (!bshas(v, t)) {
452 /* do not reload if the
453 * argument is dead
455 if (!lvarg[n])
456 bsclr(u, t);
457 i->arg[n] = slot(t);
460 reloads(u, v);
461 if (!req(i->to, R)) {
462 t = i->to.val;
463 store(i->to, tmp[t].slot);
464 bsclr(v, t);
466 emiti(*i);
467 r = v->t[0]; /* Tmp0 is NBit */
468 if (r)
469 sethint(v, r);
471 if (b == fn->start)
472 assert(v->t[0] == (T.rglob | fn->reg));
473 else
474 assert(v->t[0] == T.rglob);
476 for (p=b->phi; p; p=p->link) {
477 assert(rtype(p->to) == RTmp);
478 t = p->to.val;
479 if (bshas(v, t)) {
480 bsclr(v, t);
481 store(p->to, tmp[t].slot);
482 } else if (bshas(b->in, t))
483 /* only if the phi is live */
484 p->to = slot(p->to.val);
486 bscopy(b->in, v);
487 b->nins = &insb[NIns] - curi;
488 idup(&b->ins, curi, b->nins);
491 /* align the locals to a 16 byte boundary */
492 /* specific to NAlign == 3 */
493 slot8 += slot8 & 3;
494 fn->slot += slot8;
496 if (debug['S']) {
497 fprintf(stderr, "\n> Block information:\n");
498 for (b=fn->start; b; b=b->link) {
499 fprintf(stderr, "\t%-10s (% 5d) ", b->name, b->loop);
500 dumpts(b->out, fn->tmp, stderr);
502 fprintf(stderr, "\n> After spilling:\n");
503 printfn(fn, stderr);