5 #define assert(x) assert_test(#x, x)
8 typedef struct RMap RMap
;
13 int w
[Tmp0
]; /* wait list, for unmatched hints */
18 static bits regu
; /* registers used */
19 static Tmp
*tmp
; /* function temporaries */
20 static Mem
*mem
; /* function mem references */
24 } pm
[Tmp0
]; /* parallel move constructed */
25 static int npm
; /* size of pm */
26 static int loop
; /* current loop level */
28 static uint stmov
; /* stats: added moves */
29 static uint stblk
; /* stats: added blocks */
34 return &tmp
[phicls(t
, tmp
)].hint
.r
;
42 p
= &tmp
[phicls(t
, tmp
)];
43 if (p
->hint
.r
== -1 || p
->hint
.w
> loop
) {
51 rcopy(RMap
*ma
, RMap
*mb
)
53 memcpy(ma
->t
, mb
->t
, sizeof ma
->t
);
54 memcpy(ma
->r
, mb
->r
, sizeof ma
->r
);
55 memcpy(ma
->w
, mb
->w
, sizeof ma
->w
);
65 for (i
=0; i
<m
->n
; i
++)
79 assert(s
!= -1 && "should have spilled");
86 radd(RMap
*m
, int t
, int r
)
88 assert((t
>= Tmp0
|| t
== r
) && "invalid temporary");
89 assert(((T
.gpr0
<= r
&& r
< T
.gpr0
+ T
.ngpr
)
90 || (T
.fpr0
<= r
&& r
< T
.fpr0
+ T
.nfpr
))
91 && "invalid register");
92 assert(!bshas(m
->b
, t
) && "temporary has mapping");
93 assert(!bshas(m
->b
, r
) && "register already allocated");
94 assert(m
->n
<= T
.ngpr
+T
.nfpr
&& "too many mappings");
104 ralloctry(RMap
*m
, int t
, int try)
110 assert(bshas(m
->b
, t
));
113 if (bshas(m
->b
, t
)) {
119 if (r
== -1 || bshas(m
->b
, r
))
121 if (r
== -1 || bshas(m
->b
, r
)) {
124 regs
= tmp
[phicls(t
, tmp
)].hint
.m
;
126 if (KBASE(tmp
[t
].cls
) == 0) {
133 for (r
=r0
; r
<r1
; r
++)
134 if (!(regs
& BIT(r
)))
136 for (r
=r0
; r
<r1
; r
++)
146 if (h
!= -1 && h
!= r
)
152 ralloc(RMap
*m
, int t
)
154 return ralloctry(m
, t
, 0);
158 rfree(RMap
*m
, int t
)
162 assert(t
>= Tmp0
|| !(BIT(t
) & T
.rglob
));
165 for (i
=0; m
->t
[i
] != t
; i
++)
171 memmove(&m
->t
[i
], &m
->t
[i
+1], (m
->n
-i
) * sizeof m
->t
[0]);
172 memmove(&m
->r
[i
], &m
->r
[i
+1], (m
->n
-i
) * sizeof m
->r
[0]);
173 assert(t
>= Tmp0
|| t
== r
);
182 for (i
=0; i
<m
->n
; i
++)
184 fprintf(stderr
, " (%s, R%d)",
187 fprintf(stderr
, "\n");
191 pmadd(Ref src
, Ref dst
, int k
)
194 die("cannot have more moves than registers");
201 enum PMStat
{ ToMove
, Moving
, Moved
};
204 pmrec(enum PMStat
*status
, int i
, int *k
)
208 /* note, this routine might emit
209 * too many large instructions
211 if (req(pm
[i
].src
, pm
[i
].dst
)) {
215 assert(KBASE(pm
[i
].cls
) == KBASE(*k
));
216 assert((Kw
|Kl
) == Kl
&& (Ks
|Kd
) == Kd
);
218 for (j
=0; j
<npm
; j
++)
219 if (req(pm
[j
].dst
, pm
[i
].src
))
221 switch (j
== npm
? Moved
: status
[j
]) {
223 c
= j
; /* start of cycle */
224 emit(Oswap
, *k
, R
, pm
[i
].src
, pm
[i
].dst
);
228 c
= pmrec(status
, j
, k
);
230 c
= -1; /* end of cycle */
234 emit(Oswap
, *k
, R
, pm
[i
].src
, pm
[i
].dst
);
240 emit(Ocopy
, pm
[i
].cls
, pm
[i
].dst
, pm
[i
].src
, R
);
253 status
= alloc(npm
* sizeof status
[0]);
254 assert(!npm
|| status
[npm
-1] == ToMove
);
255 for (i
=0; i
<npm
; i
++)
256 if (status
[i
] == ToMove
)
257 pmrec(status
, i
, (int[]){pm
[i
].cls
});
261 move(int r
, Ref to
, RMap
*m
)
265 r1
= req(to
, R
) ? -1 : rfree(m
, to
.val
);
266 if (bshas(m
->b
, r
)) {
267 /* r is used and not by to */
269 for (n
=0; m
->r
[n
] != r
; n
++)
277 t
= req(to
, R
) ? r
: to
.val
;
284 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
288 dopm(Blk
*b
, Ins
*i
, RMap
*m
)
295 m0
= *m
; /* okay since we don't use m0.b */
300 move(i
->arg
[0].val
, i
->to
, m
);
301 } while (i
!= b
->ins
&& regcpy(i
-1));
302 assert(m0
.n
<= m
->n
);
303 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
304 def
= T
.retregs((i
-1)->arg
[1], 0) | T
.rglob
;
305 for (r
=0; T
.rsave
[r
]>=0; r
++)
306 if (!(BIT(T
.rsave
[r
]) & def
))
307 move(T
.rsave
[r
], R
, m
);
309 for (npm
=0, n
=0; n
<m
->n
; n
++) {
315 pmadd(TMP(r1
), TMP(r
), tmp
[t
].cls
);
317 pmadd(TMP(r1
), SLOT(s
), tmp
[t
].cls
);
319 for (ip
=i
; ip
<i1
; ip
++) {
321 rfree(m
, ip
->to
.val
);
323 if (rfind(m
, r
) == -1)
331 prio1(Ref r1
, Ref r2
)
333 /* trivial heuristic to begin with,
334 * later we can use the distance to
335 * the definition instruction
338 return *hint(r1
.val
) != -1;
342 insert(Ref
*r
, Ref
**rs
, int p
)
347 while (i
-- > 0 && prio1(*r
, *rs
[i
])) {
354 doblk(Blk
*b
, RMap
*cur
)
356 int t
, x
, r
, rf
, rt
, nr
;
362 if (rtype(b
->jmp
.arg
) == RTmp
)
363 b
->jmp
.arg
= ralloc(cur
, b
->jmp
.arg
.val
);
365 for (i1
=&b
->ins
[b
->nins
]; i1
!=b
->ins
;) {
371 rs
= T
.argregs(i
->arg
[1], 0) | T
.rglob
;
372 for (r
=0; T
.rsave
[r
]>=0; r
++)
373 if (!(BIT(T
.rsave
[r
]) & rs
))
374 rfree(cur
, T
.rsave
[r
]);
379 i1
= dopm(b
, i1
, cur
);
384 if (rtype(i
->arg
[0]) == RTmp
)
385 sethint(i
->arg
[0].val
, i
->to
.val
);
388 if (!req(i
->to
, R
)) {
389 assert(rtype(i
->to
) == RTmp
);
391 if (r
< Tmp0
&& (BIT(r
) & T
.rglob
))
395 assert(!isreg(i
->to
));
403 for (x
=0, nr
=0; x
<2; x
++)
404 switch (rtype(i
->arg
[x
])) {
406 m
= &mem
[i
->arg
[x
].val
];
407 if (rtype(m
->base
) == RTmp
)
408 insert(&m
->base
, ra
, nr
++);
409 if (rtype(m
->index
) == RTmp
)
410 insert(&m
->index
, ra
, nr
++);
413 insert(&i
->arg
[x
], ra
, nr
++);
417 *ra
[r
] = ralloc(cur
, ra
[r
]->val
);
418 if (i
->op
== Ocopy
&& req(i
->to
, i
->arg
[0]))
421 /* try to change the register of a hinted
422 * temporary if rf is available */
423 if (rf
!= -1 && (t
= cur
->w
[rf
]) != 0)
424 if (!bshas(cur
->b
, rf
) && *hint(t
) == rf
425 && (rt
= rfree(cur
, t
)) != -1) {
428 assert(bshas(cur
->b
, rf
));
429 emit(Ocopy
, tmp
[t
].cls
, TMP(rt
), TMP(rf
), R
);
433 if (req(*ra
[r
], TMP(rt
)))
435 /* one could iterate this logic with
436 * the newly freed rt, but in this case
437 * the above loop must be changed */
440 b
->nins
= &insb
[NIns
] - curi
;
441 idup(&b
->ins
, curi
, b
->nins
);
444 /* qsort() comparison function to peel
445 * loop nests from inside out */
447 carve(const void *a
, const void *b
)
451 /* todo, evaluate if this order is really
452 * better than the simple postorder */
455 if (ba
->loop
== bb
->loop
)
456 return ba
->id
> bb
->id
? -1 : ba
->id
< bb
->id
;
457 return ba
->loop
> bb
->loop
? -1 : +1;
460 /* comparison function to order temporaries
461 * for allocation at the end of blocks */
463 prio2(int t1
, int t2
)
465 if ((tmp
[t1
].visit
^ tmp
[t2
].visit
) < 0) /* != signs */
466 return tmp
[t1
].visit
!= -1 ? +1 : -1;
467 if ((*hint(t1
) ^ *hint(t2
)) < 0)
468 return *hint(t1
) != -1 ? +1 : -1;
469 return tmp
[t1
].cost
- tmp
[t2
].cost
;
472 /* register allocation
473 * depends on rpo, phi, cost, (and obviously spill)
478 int j
, t
, r
, x
, rl
[Tmp0
];
479 Blk
*b
, *b1
, *s
, ***ps
, *blist
, **blk
, **bp
;
480 RMap
*end
, *beg
, cur
, old
, *m
;
492 blk
= alloc(fn
->nblk
* sizeof blk
[0]);
493 end
= alloc(fn
->nblk
* sizeof end
[0]);
494 beg
= alloc(fn
->nblk
* sizeof beg
[0]);
495 for (n
=0; n
<fn
->nblk
; n
++) {
496 bsinit(end
[n
].b
, fn
->ntmp
);
497 bsinit(beg
[n
].b
, fn
->ntmp
);
499 bsinit(cur
.b
, fn
->ntmp
);
500 bsinit(old
.b
, fn
->ntmp
);
503 for (t
=0; t
<fn
->ntmp
; t
++) {
504 tmp
[t
].hint
.r
= t
< Tmp0
? t
: -1;
505 tmp
[t
].hint
.w
= loop
;
508 for (bp
=blk
, b
=fn
->start
; b
; b
=b
->link
)
510 qsort(blk
, fn
->nblk
, sizeof blk
[0], carve
);
511 for (b
=fn
->start
, i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
512 if (i
->op
!= Ocopy
|| !isreg(i
->arg
[0]))
515 assert(rtype(i
->to
) == RTmp
);
516 sethint(i
->to
.val
, i
->arg
[0].val
);
519 /* 2. assign registers */
520 for (bp
=blk
; bp
<&blk
[fn
->nblk
]; bp
++) {
526 memset(cur
.w
, 0, sizeof cur
.w
);
527 for (x
=0, t
=Tmp0
; bsiter(b
->out
, &t
); t
++) {
530 while (j
-- > 0 && prio2(t
, rl
[j
]) > 0) {
535 for (r
=0; bsiter(b
->out
, &r
) && r
<Tmp0
; r
++)
538 ralloctry(&cur
, rl
[j
], 1);
541 rcopy(&end
[n
], &cur
);
543 bscopy(b
->in
, cur
.b
);
544 for (p
=b
->phi
; p
; p
=p
->link
)
545 if (rtype(p
->to
) == RTmp
)
546 bsclr(b
->in
, p
->to
.val
);
547 rcopy(&beg
[n
], &cur
);
550 /* 3. emit copies shared by multiple edges
551 * to the same block */
552 for (s
=fn
->start
; s
; s
=s
->link
) {
557 /* rl maps a register that is live at the
558 * beginning of s to the one used in all
559 * predecessors (if any, -1 otherwise) */
560 memset(rl
, 0, sizeof rl
);
562 /* to find the register of a phi in a
563 * predecessor, we have to find the
564 * corresponding argument */
565 for (p
=s
->phi
; p
; p
=p
->link
) {
566 if (rtype(p
->to
) != RTmp
567 || (r
=rfind(m
, p
->to
.val
)) == -1)
569 for (u
=0; u
<p
->narg
; u
++) {
572 if (rtype(src
) != RTmp
)
574 x
= rfind(&end
[b
->id
], src
.val
);
575 if (x
== -1) /* spilled */
577 rl
[r
] = (!rl
[r
] || rl
[r
] == x
) ? x
: -1;
583 /* process non-phis temporaries */
584 for (j
=0; j
<m
->n
; j
++) {
587 if (rl
[r
] || t
< Tmp0
/* todo, remove this */)
589 for (bp
=s
->pred
; bp
<&s
->pred
[s
->npred
]; bp
++) {
590 x
= rfind(&end
[(*bp
)->id
], t
);
591 if (x
== -1) /* spilled */
593 rl
[r
] = (!rl
[r
] || rl
[r
] == x
) ? x
: -1;
600 for (j
=0; j
<m
->n
; j
++) {
604 assert(x
!= 0 || t
< Tmp0
/* todo, ditto */);
605 if (x
> 0 && !bshas(m
->b
, x
)) {
606 pmadd(TMP(x
), TMP(r
), tmp
[t
].cls
);
613 j
= &insb
[NIns
] - curi
;
618 i
= alloc(s
->nins
* sizeof(Ins
));
619 icpy(icpy(i
, curi
, j
), s
->ins
, s
->nins
-j
);
624 fprintf(stderr
, "\n> Register mappings:\n");
625 for (n
=0; n
<fn
->nblk
; n
++) {
627 fprintf(stderr
, "\t%-10s beg", b
->name
);
629 fprintf(stderr
, "\t end");
632 fprintf(stderr
, "\n");
635 /* 4. emit remaining copies in new blocks */
637 for (b
=fn
->start
;; b
=b
->link
) {
638 ps
= (Blk
**[3]){&b
->s1
, &b
->s2
, (Blk
*[1]){0}};
639 for (; (s
=**ps
); ps
++) {
641 for (p
=s
->phi
; p
; p
=p
->link
) {
643 assert(rtype(dst
)==RSlot
|| rtype(dst
)==RTmp
);
644 if (rtype(dst
) == RTmp
) {
645 r
= rfind(&beg
[s
->id
], dst
.val
);
650 for (u
=0; p
->blk
[u
]!=b
; u
++)
651 assert(u
+1 < p
->narg
);
653 if (rtype(src
) == RTmp
)
654 src
= rref(&end
[b
->id
], src
.val
);
655 pmadd(src
, dst
, p
->cls
);
657 for (t
=Tmp0
; bsiter(s
->in
, &t
); t
++) {
658 src
= rref(&end
[b
->id
], t
);
659 dst
= rref(&beg
[s
->id
], t
);
660 pmadd(src
, dst
, tmp
[t
].cls
);
664 if (curi
== &insb
[NIns
])
667 b1
->loop
= (b
->loop
+s
->loop
) / 2;
671 (void)!snprintf(b1
->name
, sizeof(b1
->name
), "%s_%s", b
->name
, s
->name
);
672 b1
->nins
= &insb
[NIns
] - curi
;
675 idup(&b1
->ins
, curi
, b1
->nins
);
685 for (b
=fn
->start
; b
; b
=b
->link
)
690 fprintf(stderr
, "\n> Register allocation statistics:\n");
691 fprintf(stderr
, "\tnew moves: %d\n", stmov
);
692 fprintf(stderr
, "\tnew blocks: %d\n", stblk
);
693 fprintf(stderr
, "\n> After register allocation:\n");