Fix typo
[qbe.git] / rega.c
blob9ceba7076ee5276dd20cc682cc19bc1b2911c213
1 #include "all.h"
3 #ifdef TEST_PMOV
4 #undef assert
5 #define assert(x) assert_test(#x, x)
6 #endif
8 typedef struct RMap RMap;
10 struct RMap {
11 int t[Tmp0];
12 int r[Tmp0];
13 int w[Tmp0]; /* wait list, for unmatched hints */
14 BSet b[1];
15 int n;
18 static bits regu; /* registers used */
19 static Tmp *tmp; /* function temporaries */
20 static Mem *mem; /* function mem references */
21 static struct {
22 Ref src, dst;
23 int cls;
24 } pm[Tmp0]; /* parallel move constructed */
25 static int npm; /* size of pm */
26 static int loop; /* current loop level */
28 static uint stmov; /* stats: added moves */
29 static uint stblk; /* stats: added blocks */
31 static int *
32 hint(int t)
34 return &tmp[phicls(t, tmp)].hint.r;
37 static void
38 sethint(int t, int r)
40 Tmp *p;
42 p = &tmp[phicls(t, tmp)];
43 if (p->hint.r == -1 || p->hint.w > loop) {
44 p->hint.r = r;
45 p->hint.w = loop;
46 tmp[t].visit = -1;
50 static void
51 rcopy(RMap *ma, RMap *mb)
53 memcpy(ma->t, mb->t, sizeof ma->t);
54 memcpy(ma->r, mb->r, sizeof ma->r);
55 memcpy(ma->w, mb->w, sizeof ma->w);
56 bscopy(ma->b, mb->b);
57 ma->n = mb->n;
60 static int
61 rfind(RMap *m, int t)
63 int i;
65 for (i=0; i<m->n; i++)
66 if (m->t[i] == t)
67 return m->r[i];
68 return -1;
71 static Ref
72 rref(RMap *m, int t)
74 int r, s;
76 r = rfind(m, t);
77 if (r == -1) {
78 s = tmp[t].slot;
79 assert(s != -1 && "should have spilled");
80 return SLOT(s);
81 } else
82 return TMP(r);
85 static void
86 radd(RMap *m, int t, int r)
88 assert((t >= Tmp0 || t == r) && "invalid temporary");
89 assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr)
90 || (T.fpr0 <= r && r < T.fpr0 + T.nfpr))
91 && "invalid register");
92 assert(!bshas(m->b, t) && "temporary has mapping");
93 assert(!bshas(m->b, r) && "register already allocated");
94 assert(m->n <= T.ngpr+T.nfpr && "too many mappings");
95 bsset(m->b, t);
96 bsset(m->b, r);
97 m->t[m->n] = t;
98 m->r[m->n] = r;
99 m->n++;
100 regu |= BIT(r);
103 static Ref
104 ralloctry(RMap *m, int t, int try)
106 bits regs;
107 int h, r, r0, r1;
109 if (t < Tmp0) {
110 assert(bshas(m->b, t));
111 return TMP(t);
113 if (bshas(m->b, t)) {
114 r = rfind(m, t);
115 assert(r != -1);
116 return TMP(r);
118 r = tmp[t].visit;
119 if (r == -1 || bshas(m->b, r))
120 r = *hint(t);
121 if (r == -1 || bshas(m->b, r)) {
122 if (try)
123 return R;
124 regs = tmp[phicls(t, tmp)].hint.m;
125 regs |= m->b->t[0];
126 if (KBASE(tmp[t].cls) == 0) {
127 r0 = T.gpr0;
128 r1 = r0 + T.ngpr;
129 } else {
130 r0 = T.fpr0;
131 r1 = r0 + T.nfpr;
133 for (r=r0; r<r1; r++)
134 if (!(regs & BIT(r)))
135 goto Found;
136 for (r=r0; r<r1; r++)
137 if (!bshas(m->b, r))
138 goto Found;
139 die("no more regs");
141 Found:
142 radd(m, t, r);
143 sethint(t, r);
144 tmp[t].visit = r;
145 h = *hint(t);
146 if (h != -1 && h != r)
147 m->w[h] = t;
148 return TMP(r);
151 static inline Ref
152 ralloc(RMap *m, int t)
154 return ralloctry(m, t, 0);
157 static int
158 rfree(RMap *m, int t)
160 int i, r;
162 assert(t >= Tmp0 || !(BIT(t) & T.rglob));
163 if (!bshas(m->b, t))
164 return -1;
165 for (i=0; m->t[i] != t; i++)
166 assert(i+1 < m->n);
167 r = m->r[i];
168 bsclr(m->b, t);
169 bsclr(m->b, r);
170 m->n--;
171 memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
172 memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
173 assert(t >= Tmp0 || t == r);
174 return r;
177 static void
178 mdump(RMap *m)
180 int i;
182 for (i=0; i<m->n; i++)
183 if (m->t[i] >= Tmp0)
184 fprintf(stderr, " (%s, R%d)",
185 tmp[m->t[i]].name,
186 m->r[i]);
187 fprintf(stderr, "\n");
190 static void
191 pmadd(Ref src, Ref dst, int k)
193 if (npm == Tmp0)
194 die("cannot have more moves than registers");
195 pm[npm].src = src;
196 pm[npm].dst = dst;
197 pm[npm].cls = k;
198 npm++;
201 enum PMStat { ToMove, Moving, Moved };
203 static int
204 pmrec(enum PMStat *status, int i, int *k)
206 int j, c;
208 /* note, this routine might emit
209 * too many large instructions
211 if (req(pm[i].src, pm[i].dst)) {
212 status[i] = Moved;
213 return -1;
215 assert(KBASE(pm[i].cls) == KBASE(*k));
216 assert((Kw|Kl) == Kl && (Ks|Kd) == Kd);
217 *k |= pm[i].cls;
218 for (j=0; j<npm; j++)
219 if (req(pm[j].dst, pm[i].src))
220 break;
221 switch (j == npm ? Moved : status[j]) {
222 case Moving:
223 c = j; /* start of cycle */
224 emit(Oswap, *k, R, pm[i].src, pm[i].dst);
225 break;
226 case ToMove:
227 status[i] = Moving;
228 c = pmrec(status, j, k);
229 if (c == i) {
230 c = -1; /* end of cycle */
231 break;
233 if (c != -1) {
234 emit(Oswap, *k, R, pm[i].src, pm[i].dst);
235 break;
237 /* fall through */
238 case Moved:
239 c = -1;
240 emit(Ocopy, pm[i].cls, pm[i].dst, pm[i].src, R);
241 break;
243 status[i] = Moved;
244 return c;
247 static void
248 pmgen()
250 int i;
251 enum PMStat *status;
253 status = alloc(npm * sizeof status[0]);
254 assert(!npm || status[npm-1] == ToMove);
255 for (i=0; i<npm; i++)
256 if (status[i] == ToMove)
257 pmrec(status, i, (int[]){pm[i].cls});
260 static void
261 move(int r, Ref to, RMap *m)
263 int n, t, r1;
265 r1 = req(to, R) ? -1 : rfree(m, to.val);
266 if (bshas(m->b, r)) {
267 /* r is used and not by to */
268 assert(r1 != r);
269 for (n=0; m->r[n] != r; n++)
270 assert(n+1 < m->n);
271 t = m->t[n];
272 rfree(m, t);
273 bsset(m->b, r);
274 ralloc(m, t);
275 bsclr(m->b, r);
277 t = req(to, R) ? r : to.val;
278 radd(m, t, r);
281 static int
282 regcpy(Ins *i)
284 return i->op == Ocopy && isreg(i->arg[0]);
287 static Ins *
288 dopm(Blk *b, Ins *i, RMap *m)
290 RMap m0;
291 int n, r, r1, t, s;
292 Ins *i1, *ip;
293 bits def;
295 m0 = *m; /* okay since we don't use m0.b */
296 m0.b->t = 0;
297 i1 = ++i;
298 do {
299 i--;
300 move(i->arg[0].val, i->to, m);
301 } while (i != b->ins && regcpy(i-1));
302 assert(m0.n <= m->n);
303 if (i != b->ins && (i-1)->op == Ocall) {
304 def = T.retregs((i-1)->arg[1], 0) | T.rglob;
305 for (r=0; T.rsave[r]>=0; r++)
306 if (!(BIT(T.rsave[r]) & def))
307 move(T.rsave[r], R, m);
309 for (npm=0, n=0; n<m->n; n++) {
310 t = m->t[n];
311 s = tmp[t].slot;
312 r1 = m->r[n];
313 r = rfind(&m0, t);
314 if (r != -1)
315 pmadd(TMP(r1), TMP(r), tmp[t].cls);
316 else if (s != -1)
317 pmadd(TMP(r1), SLOT(s), tmp[t].cls);
319 for (ip=i; ip<i1; ip++) {
320 if (!req(ip->to, R))
321 rfree(m, ip->to.val);
322 r = ip->arg[0].val;
323 if (rfind(m, r) == -1)
324 radd(m, r, r);
326 pmgen();
327 return i;
330 static int
331 prio1(Ref r1, Ref r2)
333 /* trivial heuristic to begin with,
334 * later we can use the distance to
335 * the definition instruction
337 (void) r2;
338 return *hint(r1.val) != -1;
341 static void
342 insert(Ref *r, Ref **rs, int p)
344 int i;
346 rs[i = p] = r;
347 while (i-- > 0 && prio1(*r, *rs[i])) {
348 rs[i+1] = rs[i];
349 rs[i] = r;
353 static void
354 doblk(Blk *b, RMap *cur)
356 int t, x, r, rf, rt, nr;
357 bits rs;
358 Ins *i, *i1;
359 Mem *m;
360 Ref *ra[4];
362 for (r=0; bsiter(b->out, &r) && r<Tmp0; r++)
363 radd(cur, r, r);
364 if (rtype(b->jmp.arg) == RTmp)
365 b->jmp.arg = ralloc(cur, b->jmp.arg.val);
366 curi = &insb[NIns];
367 for (i1=&b->ins[b->nins]; i1!=b->ins;) {
368 emiti(*--i1);
369 i = curi;
370 rf = -1;
371 switch (i->op) {
372 case Ocall:
373 rs = T.argregs(i->arg[1], 0) | T.rglob;
374 for (r=0; T.rsave[r]>=0; r++)
375 if (!(BIT(T.rsave[r]) & rs))
376 rfree(cur, T.rsave[r]);
377 break;
378 case Ocopy:
379 if (regcpy(i)) {
380 curi++;
381 i1 = dopm(b, i1, cur);
382 stmov += i+1 - curi;
383 continue;
385 if (isreg(i->to))
386 if (rtype(i->arg[0]) == RTmp)
387 sethint(i->arg[0].val, i->to.val);
388 /* fall through */
389 default:
390 if (!req(i->to, R)) {
391 assert(rtype(i->to) == RTmp);
392 r = i->to.val;
393 if (r < Tmp0 && (BIT(r) & T.rglob))
394 break;
395 rf = rfree(cur, r);
396 if (rf == -1) {
397 assert(!isreg(i->to));
398 curi++;
399 continue;
401 i->to = TMP(rf);
403 break;
405 for (x=0, nr=0; x<2; x++)
406 switch (rtype(i->arg[x])) {
407 case RMem:
408 m = &mem[i->arg[x].val];
409 if (rtype(m->base) == RTmp)
410 insert(&m->base, ra, nr++);
411 if (rtype(m->index) == RTmp)
412 insert(&m->index, ra, nr++);
413 break;
414 case RTmp:
415 insert(&i->arg[x], ra, nr++);
416 break;
418 for (r=0; r<nr; r++)
419 *ra[r] = ralloc(cur, ra[r]->val);
421 /* try to change the register of a hinted
422 * temporary if rf is available */
423 x = 1;
424 if (rf != -1 && (t = cur->w[rf]) != 0)
425 if (!bshas(cur->b, rf) && *hint(t) == rf
426 && (rt = rfree(cur, t)) != -1) {
427 tmp[t].visit = -1;
428 ralloc(cur, t);
429 assert(bshas(cur->b, rf));
430 emit(Ocopy, tmp[t].cls, TMP(rt), TMP(rf), R);
431 stmov += 1;
432 cur->w[rf] = 0;
433 for (r=0; r<nr; r++)
434 if (req(*ra[r], TMP(rt)))
435 *ra[r] = TMP(rf);
436 /* one could iterate this logic with
437 * the newly freed rt, but in this case
438 * the above loop must be changed */
441 b->nins = &insb[NIns] - curi;
442 idup(&b->ins, curi, b->nins);
445 /* qsort() comparison function to peel
446 * loop nests from inside out */
447 static int
448 carve(const void *a, const void *b)
450 Blk *ba, *bb;
452 /* todo, evaluate if this order is really
453 * better than the simple postorder */
454 ba = *(Blk**)a;
455 bb = *(Blk**)b;
456 if (ba->loop == bb->loop)
457 return ba->id > bb->id ? -1 : ba->id < bb->id;
458 return ba->loop > bb->loop ? -1 : +1;
461 /* comparison function to order temporaries
462 * for allocation at the end of blocks */
463 static int
464 prio2(int t1, int t2)
466 if ((tmp[t1].visit ^ tmp[t2].visit) < 0) /* != signs */
467 return tmp[t1].visit != -1 ? +1 : -1;
468 if ((*hint(t1) ^ *hint(t2)) < 0)
469 return *hint(t1) != -1 ? +1 : -1;
470 return tmp[t1].cost - tmp[t2].cost;
473 /* register allocation
474 * depends on rpo, phi, cost, (and obviously spill)
476 void
477 rega(Fn *fn)
479 int j, t, r, x, rl[Tmp0];
480 Blk *b, *b1, *s, ***ps, *blist, **blk, **bp;
481 RMap *end, *beg, cur, old, *m;
482 Ins *i;
483 Phi *p;
484 uint u, n;
485 Ref src, dst;
487 /* 1. setup */
488 stmov = 0;
489 stblk = 0;
490 regu = 0;
491 tmp = fn->tmp;
492 mem = fn->mem;
493 blk = alloc(fn->nblk * sizeof blk[0]);
494 end = alloc(fn->nblk * sizeof end[0]);
495 beg = alloc(fn->nblk * sizeof beg[0]);
496 for (n=0; n<fn->nblk; n++) {
497 bsinit(end[n].b, fn->ntmp);
498 bsinit(beg[n].b, fn->ntmp);
500 bsinit(cur.b, fn->ntmp);
501 bsinit(old.b, fn->ntmp);
503 loop = INT_MAX;
504 for (t=0; t<fn->ntmp; t++) {
505 tmp[t].hint.r = t < Tmp0 ? t : -1;
506 tmp[t].hint.w = loop;
507 tmp[t].visit = -1;
509 for (bp=blk, b=fn->start; b; b=b->link)
510 *bp++ = b;
511 qsort(blk, fn->nblk, sizeof blk[0], carve);
512 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
513 if (i->op != Ocopy || !isreg(i->arg[0]))
514 break;
515 else {
516 assert(rtype(i->to) == RTmp);
517 sethint(i->to.val, i->arg[0].val);
520 /* 2. assign registers */
521 for (bp=blk; bp<&blk[fn->nblk]; bp++) {
522 b = *bp;
523 n = b->id;
524 loop = b->loop;
525 cur.n = 0;
526 bszero(cur.b);
527 memset(cur.w, 0, sizeof cur.w);
528 for (x=0, t=Tmp0; bsiter(b->out, &t); t++) {
529 j = x++;
530 rl[j] = t;
531 while (j-- > 0 && prio2(t, rl[j]) > 0) {
532 rl[j+1] = rl[j];
533 rl[j] = t;
536 for (j=0; j<x; j++)
537 ralloctry(&cur, rl[j], 1);
538 for (j=0; j<x; j++)
539 ralloc(&cur, rl[j]);
540 rcopy(&end[n], &cur);
541 doblk(b, &cur);
542 bscopy(b->in, cur.b);
543 for (p=b->phi; p; p=p->link)
544 if (rtype(p->to) == RTmp)
545 bsclr(b->in, p->to.val);
546 rcopy(&beg[n], &cur);
549 /* 3. emit copies shared by multiple edges
550 * to the same block */
551 for (s=fn->start; s; s=s->link) {
552 if (s->npred <= 1)
553 continue;
554 m = &beg[s->id];
556 /* rl maps a register that is live at the
557 * beginning of s to the one used in all
558 * predecessors (if any, -1 otherwise) */
559 memset(rl, 0, sizeof rl);
561 /* to find the register of a phi in a
562 * predecessor, we have to find the
563 * corresponding argument */
564 for (p=s->phi; p; p=p->link) {
565 r = rfind(m, p->to.val);
566 if (r == -1)
567 continue;
568 for (u=0; u<p->narg; u++) {
569 b = p->blk[u];
570 src = p->arg[u];
571 if (rtype(src) != RTmp)
572 continue;
573 x = rfind(&end[b->id], src.val);
574 if (x == -1) /* spilled */
575 continue;
576 rl[r] = (!rl[r] || rl[r] == x) ? x : -1;
578 if (rl[r] == 0)
579 rl[r] = -1;
582 /* process non-phis temporaries */
583 for (j=0; j<m->n; j++) {
584 t = m->t[j];
585 r = m->r[j];
586 if (rl[r] || t < Tmp0 /* todo, remove this */)
587 continue;
588 for (bp=s->pred; bp<&s->pred[s->npred]; bp++) {
589 x = rfind(&end[(*bp)->id], t);
590 if (x == -1) /* spilled */
591 continue;
592 rl[r] = (!rl[r] || rl[r] == x) ? x : -1;
596 npm = 0;
597 for (j=0; j<m->n; j++) {
598 t = m->t[j];
599 r = m->r[j];
600 x = rl[r];
601 assert(x != 0 || t < Tmp0 /* todo, ditto */);
602 if (x > 0 && !bshas(m->b, x)) {
603 pmadd(TMP(x), TMP(r), tmp[t].cls);
604 m->r[j] = x;
607 curi = &insb[NIns];
608 pmgen();
609 j = &insb[NIns] - curi;
610 if (j == 0)
611 continue;
612 stmov += j;
613 s->nins += j;
614 i = alloc(s->nins * sizeof(Ins));
615 icpy(icpy(i, curi, j), s->ins, s->nins-j);
616 s->ins = i;
619 if (debug['R']) {
620 fprintf(stderr, "\n> Register mappings:\n");
621 for (n=0; n<fn->nblk; n++) {
622 b = fn->rpo[n];
623 fprintf(stderr, "\t%-10s beg", b->name);
624 mdump(&beg[n]);
625 fprintf(stderr, "\t end");
626 mdump(&end[n]);
628 fprintf(stderr, "\n");
631 /* 4. emit remaining copies in new blocks */
632 blist = 0;
633 for (b=fn->start;; b=b->link) {
634 ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
635 for (; (s=**ps); ps++) {
636 npm = 0;
637 for (p=s->phi; p; p=p->link) {
638 dst = p->to;
639 assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
640 if (rtype(dst) == RTmp) {
641 r = rfind(&beg[s->id], dst.val);
642 if (r == -1)
643 continue;
644 dst = TMP(r);
646 for (u=0; p->blk[u]!=b; u++)
647 assert(u+1 < p->narg);
648 src = p->arg[u];
649 if (rtype(src) == RTmp)
650 src = rref(&end[b->id], src.val);
651 pmadd(src, dst, p->cls);
653 for (t=Tmp0; bsiter(s->in, &t); t++) {
654 src = rref(&end[b->id], t);
655 dst = rref(&beg[s->id], t);
656 pmadd(src, dst, tmp[t].cls);
658 curi = &insb[NIns];
659 pmgen();
660 if (curi == &insb[NIns])
661 continue;
662 b1 = blknew();
663 b1->loop = (b->loop+s->loop) / 2;
664 b1->link = blist;
665 blist = b1;
666 fn->nblk++;
667 sprintf(b1->name, "%s_%s", b->name, s->name);
668 b1->nins = &insb[NIns] - curi;
669 stmov += b1->nins;
670 stblk += 1;
671 idup(&b1->ins, curi, b1->nins);
672 b1->jmp.type = Jjmp;
673 b1->s1 = s;
674 **ps = b1;
676 if (!b->link) {
677 b->link = blist;
678 break;
681 for (b=fn->start; b; b=b->link)
682 b->phi = 0;
683 fn->reg = regu;
685 if (debug['R']) {
686 fprintf(stderr, "\n> Register allocation statistics:\n");
687 fprintf(stderr, "\tnew moves: %d\n", stmov);
688 fprintf(stderr, "\tnew blocks: %d\n", stblk);
689 fprintf(stderr, "\n> After register allocation:\n");
690 printfn(fn, stderr);