remove unused parameter from uffind()
[qbe.git] / rega.c
blob9f02a636d06b082a68316806c72f79a7fea2e6ea
1 #include "all.h"
3 #ifdef TEST_PMOV
4 #undef assert
5 #define assert(x) assert_test(#x, x)
6 #endif
8 typedef struct RMap RMap;
10 struct RMap {
11 int t[NIReg+NFReg];
12 int r[NIReg+NFReg];
13 BSet b[1];
14 int n;
17 static bits regu; /* registers used */
18 static Tmp *tmp; /* function temporaries */
19 static Mem *mem; /* function mem references */
20 static struct {
21 Ref src, dst;
22 int cls;
23 } *pm; /* parallel move constructed */
24 static int cpm, npm; /* capacity and size of pm */
26 static int *
27 hint(int t)
29 return &tmp[phicls(t, tmp)].hint.r;
32 static void
33 sethint(int t, int r)
35 bits m;
37 m = tmp[phicls(t, tmp)].hint.m;
38 if (*hint(t) == -1)
39 if (!(BIT(r) & m))
40 *hint(t) = r;
43 static void
44 rcopy(RMap *ma, RMap *mb)
46 memcpy(ma->t, mb->t, sizeof ma->t);
47 memcpy(ma->r, mb->r, sizeof ma->r);
48 bscopy(ma->b, mb->b);
49 ma->n = mb->n;
52 static int
53 rfind(RMap *m, int t)
55 int i;
57 for (i=0; i<m->n; i++)
58 if (m->t[i] == t)
59 return m->r[i];
60 return -1;
63 static Ref
64 rref(RMap *m, int t)
66 int r, s;
68 r = rfind(m, t);
69 if (r == -1) {
70 s = tmp[t].slot;
71 assert(s != -1 && "should have spilled");
72 return SLOT(s);
73 } else
74 return TMP(r);
77 static void
78 radd(RMap *m, int t, int r)
80 assert((t >= Tmp0 || t == r) && "invalid temporary");
81 assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
82 assert(!bshas(m->b, t) && "temporary has mapping");
83 assert(!bshas(m->b, r) && "register already allocated");
84 assert(m->n <= NIReg+NFReg && "too many mappings");
85 bsset(m->b, t);
86 bsset(m->b, r);
87 m->t[m->n] = t;
88 m->r[m->n] = r;
89 m->n++;
90 regu |= BIT(r);
93 static Ref
94 ralloc(RMap *m, int t)
96 bits regs;
97 int r, r0, r1;
99 if (t < Tmp0) {
100 assert(bshas(m->b, t));
101 return TMP(t);
103 if (bshas(m->b, t)) {
104 r = rfind(m, t);
105 assert(r != -1);
106 return TMP(r);
108 r = *hint(t);
109 if (r == -1 || bshas(m->b, r)) {
110 regs = tmp[phicls(t, tmp)].hint.m;
111 regs |= m->b->t[0];
112 if (KBASE(tmp[t].cls) == 0) {
113 r0 = RAX;
114 r1 = RAX + NIReg;
115 } else {
116 r0 = XMM0;
117 r1 = XMM0 + NFReg;
119 for (r=r0; r<r1; r++)
120 if (!(regs & BIT(r)))
121 goto Found;
122 for (r=r0; r<r1; r++)
123 if (!bshas(m->b, r))
124 goto Found;
125 die("no more regs");
127 Found:
128 radd(m, t, r);
129 sethint(t, r);
130 return TMP(r);
133 static int
134 rfree(RMap *m, int t)
136 int i, r;
138 if (!bshas(m->b, t))
139 return -1;
140 for (i=0; m->t[i] != t; i++)
141 assert(i+1 < m->n);
142 r = m->r[i];
143 bsclr(m->b, t);
144 bsclr(m->b, r);
145 m->n--;
146 memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
147 memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
148 return r;
151 static void
152 mdump(RMap *m)
154 int i;
156 for (i=0; i<m->n; i++)
157 if (m->t[i] >= Tmp0)
158 fprintf(stderr, " (%s, R%d)",
159 tmp[m->t[i]].name,
160 m->r[i]);
161 fprintf(stderr, "\n");
164 static void
165 pmadd(Ref src, Ref dst, int k)
167 if (npm == cpm) {
168 cpm = cpm * 2 + 16;
169 pm = realloc(pm, cpm * sizeof pm[0]);
170 if (!pm)
171 die("pmadd, out of memory");
173 pm[npm].src = src;
174 pm[npm].dst = dst;
175 pm[npm].cls = k;
176 npm++;
179 enum PMStat { ToMove, Moving, Moved };
181 static Ref
182 pmrec(enum PMStat *status, int i, int *k)
184 Ref swp, swp1;
185 int j, k1;
187 /* note, this routine might emit
188 * too many large instructions:
190 * , x -- x
191 * x -- x -- x |
192 * ` x -- x
194 * if only the first move is wide
195 * the whole cycle will be wide,
196 * this is safe but not necessary
199 if (req(pm[i].src, pm[i].dst))
200 return R;
201 status[i] = Moving;
202 assert(KBASE(*k) == KBASE(pm[i].cls));
203 assert((Kw|1) == Kl && (Ks|1) == Kd);
204 *k |= KWIDE(pm[i].cls); /* see above */
205 swp = R;
206 for (j=0; j<npm; j++) {
207 if (req(pm[j].src, pm[i].dst))
208 switch (status[j]) {
209 case ToMove:
210 k1 = *k;
211 swp1 = pmrec(status, j, &k1);
212 if (!req(swp1, R)) {
213 assert(req(swp, R));
214 swp = swp1;
215 *k = k1;
217 break;
218 case Moving:
219 assert(req(swp, R));
220 swp = pm[i].dst;
221 break;
222 case Moved:
223 break;
226 status[i] = Moved;
227 if (req(swp, R)) {
228 *curi++ = (Ins){Ocopy, pm[i].dst, {pm[i].src}, pm[i].cls};
229 return R;
230 } else if (!req(swp, pm[i].src)) {
231 *curi++ = (Ins){Oswap, R, {pm[i].src, pm[i].dst}, *k};
232 return swp;
233 } else
234 return R;
238 static void
239 pmgen()
241 int i, k;
242 enum PMStat *status;
244 status = alloc(npm * sizeof status[0]);
245 assert(!npm || status[npm-1] == ToMove);
246 curi = insb;
247 for (i=0; i<npm; i++)
248 if (status[i] == ToMove) {
249 k = pm[i].cls;
250 pmrec(status, i, &k);
254 static void
255 move(int r, Ref to, RMap *m)
257 int n, t, r1;
259 r1 = req(to, R) ? -1 : rfree(m, to.val);
260 if (bshas(m->b, r) && r1 != r) {
261 /* r is used and not by to */
262 for (n=0; m->r[n] != r; n++)
263 assert(n+1 < m->n);
264 t = m->t[n];
265 rfree(m, t);
266 bsset(m->b, r);
267 ralloc(m, t);
268 bsclr(m->b, r);
270 t = req(to, R) ? r : to.val;
271 radd(m, t, r);
274 static int
275 regcpy(Ins *i)
277 return i->op == Ocopy && isreg(i->arg[0]);
280 static Ins *
281 dopm(Blk *b, Ins *i, RMap *m)
283 RMap m0;
284 int n, r, r1, t, s;
285 Ins *i0, *i1, *ip, *ir;
286 bits def;
288 m0 = *m;
289 i1 = ++i;
290 do {
291 i--;
292 move(i->arg[0].val, i->to, m);
293 } while (i != b->ins && regcpy(i-1));
294 assert(m0.n <= m->n);
295 if (i != b->ins && (i-1)->op == Ocall) {
296 def = retregs((i-1)->arg[1], 0);
297 for (r=0; r<NRSave; r++)
298 if (!(BIT(rsave[r]) & def))
299 move(rsave[r], R, m);
301 for (npm=0, n=0; n<m->n; n++) {
302 t = m->t[n];
303 s = tmp[t].slot;
304 r1 = m->r[n];
305 r = rfind(&m0, t);
306 if (r != -1)
307 pmadd(TMP(r1), TMP(r), tmp[t].cls);
308 else if (s != -1)
309 pmadd(TMP(r1), SLOT(s), tmp[t].cls);
311 for (ip=i; ip<i1; ip++) {
312 if (!req(ip->to, R))
313 rfree(m, ip->to.val);
314 r = ip->arg[0].val;
315 if (rfind(m, r) == -1)
316 radd(m, r, r);
318 pmgen();
319 #ifdef TEST_PMOV
320 return 0;
321 #endif
322 n = b->nins - (i1 - i) + (curi - insb);
323 i0 = alloc(n * sizeof(Ins));
324 ip = icpy(ip = i0, b->ins, i - b->ins);
325 ip = icpy(ir = ip, insb, curi - insb);
326 ip = icpy(ip, i1, &b->ins[b->nins] - i1);
327 b->nins = n;
328 b->ins = i0;
329 return ir;
332 static int
333 prio(Ref r1, Ref r2)
335 /* trivial heuristic to begin with,
336 * later we can use the distance to
337 * the definition instruction
339 (void) r2;
340 return *hint(r1.val) != -1;
343 static void
344 insert(Ref *r, Ref **rs, int p)
346 int i;
348 rs[i = p] = r;
349 while (i-- > 0 && prio(*r, *rs[i])) {
350 rs[i+1] = rs[i];
351 rs[i] = r;
355 static void
356 doblk(Blk *b, RMap *cur)
358 int x, r, nr;
359 bits rs;
360 Ins *i;
361 Mem *m;
362 Ref *ra[4];
364 for (r=0; bsiter(b->out, &r) && r<Tmp0; r++)
365 radd(cur, r, r);
366 if (rtype(b->jmp.arg) == RTmp)
367 b->jmp.arg = ralloc(cur, b->jmp.arg.val);
368 for (i=&b->ins[b->nins]; i!=b->ins;) {
369 switch ((--i)->op) {
370 case Ocall:
371 rs = argregs(i->arg[1], 0);
372 for (r=0; r<NRSave; r++)
373 if (!(BIT(rsave[r]) & rs))
374 rfree(cur, rsave[r]);
375 break;
376 case Ocopy:
377 if (isreg(i->arg[0])) {
378 i = dopm(b, i, cur);
379 continue;
381 if (isreg(i->to))
382 if (rtype(i->arg[0]) == RTmp)
383 sethint(i->arg[0].val, i->to.val);
384 /* fall through */
385 default:
386 if (!req(i->to, R)) {
387 assert(rtype(i->to) == RTmp);
388 r = rfree(cur, i->to.val);
389 if (r == -1 && !isreg(i->to)) {
390 *i = (Ins){.op = Onop};
391 continue;
393 if (i->to.val >= Tmp0)
394 i->to = TMP(r);
396 break;
398 for (x=0, nr=0; x<2; x++)
399 switch (rtype(i->arg[x])) {
400 case RMem:
401 m = &mem[i->arg[x].val];
402 if (rtype(m->base) == RTmp)
403 insert(&m->base, ra, nr++);
404 if (rtype(m->index) == RTmp)
405 insert(&m->index, ra, nr++);
406 break;
407 case RTmp:
408 insert(&i->arg[x], ra, nr++);
409 break;
411 for (r=0; r<nr; r++)
412 *ra[r] = ralloc(cur, ra[r]->val);
416 /* register allocation
417 * depends on rpo, phi, cost, (and obviously spill)
419 void
420 rega(Fn *fn)
422 int j, t, r, r1, x, rl[Tmp0];
423 Blk *b, *b1, *s, ***ps, *blist;
424 RMap *end, *beg, cur, old;
425 Ins *i;
426 Phi *p;
427 uint u, n;
428 Ref src, dst;
430 /* 1. setup */
431 regu = 0;
432 tmp = fn->tmp;
433 mem = fn->mem;
434 end = alloc(fn->nblk * sizeof end[0]);
435 beg = alloc(fn->nblk * sizeof beg[0]);
436 for (n=0; n<fn->nblk; n++) {
437 bsinit(end[n].b, fn->ntmp);
438 bsinit(beg[n].b, fn->ntmp);
440 bsinit(cur.b, fn->ntmp);
441 bsinit(old.b, fn->ntmp);
443 for (t=0; t<fn->ntmp; t++)
444 *hint(t) = t < Tmp0 ? t : -1;
445 for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
446 if (i->op != Ocopy || !isreg(i->arg[0]))
447 break;
448 else {
449 assert(rtype(i->to) == RTmp);
450 sethint(i->to.val, i->arg[0].val);
453 /* 2. assign registers following post-order */
454 for (n=fn->nblk-1; n!=-1u; n--) {
455 b = fn->rpo[n];
456 cur.n = 0;
457 bszero(cur.b);
458 for (x=0; x<2; x++)
459 for (t=Tmp0; bsiter(b->out, &t); t++)
460 if (x || (r=*hint(t)) != -1)
461 if (x || !bshas(cur.b, r))
462 ralloc(&cur, t);
463 rcopy(&end[n], &cur);
464 doblk(b, &cur);
465 bscopy(b->in, cur.b);
466 for (p=b->phi; p; p=p->link)
467 if (rtype(p->to) == RTmp) {
468 bsclr(b->in, p->to.val);
469 /* heuristic 0:
470 * if the phi destination has an
471 * argument from a frequent block
472 * that was already allocated to
473 * 'r', use 'r' as the new hint
475 memset(rl, 0, sizeof rl);
476 for (u=0; u<p->narg; u++) {
477 t = p->arg[u].val;
478 b1 = p->blk[u];
479 if (rtype(p->arg[u]) == RTmp)
480 if ((r=rfind(&end[b1->id], t)) != -1)
481 rl[r] += b1->loop;
483 for (x=0, j=0; j<Tmp0; j++)
484 if (rl[j] > rl[x])
485 x = j;
486 if (rl[x] >= b->loop)
487 *hint(p->to.val) = x;
489 if (b->npred > 1) {
490 /* heuristic 1:
491 * attempt to satisfy hints
492 * when it's simple and we have
493 * multiple predecessors
495 rcopy(&old, &cur);
496 curi = &insb[NIns];
497 for (j=0; j<old.n; j++) {
498 t = old.t[j];
499 r = *hint(t);
500 r1 = rfind(&cur, t);
501 if (r != -1 && r != r1)
502 if (!bshas(cur.b, r)) {
503 rfree(&cur, t);
504 radd(&cur, t, r);
505 x = tmp[t].cls;
506 emit(Ocopy, x, TMP(r1), TMP(r), R);
509 if ((j = &insb[NIns] - curi)) {
510 b->nins += j;
511 i = alloc(b->nins * sizeof(Ins));
512 icpy(icpy(i, curi, j), b->ins, b->nins-j);
513 b->ins = i;
516 rcopy(&beg[n], &cur);
518 if (debug['R']) {
519 fprintf(stderr, "\n> Register mappings:\n");
520 for (n=0; n<fn->nblk; n++) {
521 b = fn->rpo[n];
522 fprintf(stderr, "\t%-10s beg", b->name);
523 mdump(&beg[n]);
524 fprintf(stderr, "\t end");
525 mdump(&end[n]);
527 fprintf(stderr, "\n");
530 /* 3. compose glue code */
531 blist = 0;
532 for (b=fn->start;; b=b->link) {
533 ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
534 for (; (s=**ps); ps++) {
535 npm = 0;
536 for (p=s->phi; p; p=p->link) {
537 dst = p->to;
538 assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
539 if (rtype(dst) == RTmp) {
540 r = rfind(&beg[s->id], dst.val);
541 if (r == -1)
542 continue;
543 dst = TMP(r);
545 for (u=0; p->blk[u]!=b; u++)
546 assert(u+1 < p->narg);
547 src = p->arg[u];
548 if (rtype(src) == RTmp)
549 src = rref(&end[b->id], src.val);
550 pmadd(src, dst, p->cls);
552 for (t=Tmp0; bsiter(s->in, &t); t++) {
553 src = rref(&end[b->id], t);
554 dst = rref(&beg[s->id], t);
555 pmadd(src, dst, tmp[t].cls);
557 pmgen();
558 if (curi == insb)
559 continue;
560 b1 = blknew();
561 b1->loop = (b->loop+s->loop) / 2;
562 b1->link = blist;
563 blist = b1;
564 fn->nblk++;
565 sprintf(b1->name, "%s_%s", b->name, s->name);
566 b1->nins = curi - insb;
567 idup(&b1->ins, insb, b1->nins);
568 b1->jmp.type = Jjmp;
569 b1->s1 = s;
570 **ps = b1;
572 if (!b->link) {
573 b->link = blist;
574 break;
577 for (b=fn->start; b; b=b->link)
578 b->phi = 0;
579 fn->reg = regu;
581 if (debug['R']) {
582 fprintf(stderr, "\n> After register allocation:\n");
583 printfn(fn, stderr);