5 #define assert(x) assert_test(#x, x)
8 typedef struct RMap RMap
;
13 int w
[Tmp0
]; /* wait list, for unmatched hints */
18 static bits regu
; /* registers used */
19 static Tmp
*tmp
; /* function temporaries */
20 static Mem
*mem
; /* function mem references */
24 } pm
[Tmp0
]; /* parallel move constructed */
25 static int npm
; /* size of pm */
26 static int loop
; /* current loop level */
28 static uint stmov
; /* stats: added moves */
29 static uint stblk
; /* stats: added blocks */
34 return &tmp
[phicls(t
, tmp
)].hint
.r
;
42 p
= &tmp
[phicls(t
, tmp
)];
43 if (p
->hint
.r
== -1 || p
->hint
.w
> loop
) {
51 rcopy(RMap
*ma
, RMap
*mb
)
53 memcpy(ma
->t
, mb
->t
, sizeof ma
->t
);
54 memcpy(ma
->r
, mb
->r
, sizeof ma
->r
);
55 memcpy(ma
->w
, mb
->w
, sizeof ma
->w
);
65 for (i
=0; i
<m
->n
; i
++)
79 assert(s
!= -1 && "should have spilled");
86 radd(RMap
*m
, int t
, int r
)
88 assert((t
>= Tmp0
|| t
== r
) && "invalid temporary");
89 assert(((T
.gpr0
<= r
&& r
< T
.gpr0
+ T
.ngpr
)
90 || (T
.fpr0
<= r
&& r
< T
.fpr0
+ T
.nfpr
))
91 && "invalid register");
92 assert(!bshas(m
->b
, t
) && "temporary has mapping");
93 assert(!bshas(m
->b
, r
) && "register already allocated");
94 assert(m
->n
<= T
.ngpr
+T
.nfpr
&& "too many mappings");
104 ralloctry(RMap
*m
, int t
, int try)
110 assert(bshas(m
->b
, t
));
113 if (bshas(m
->b
, t
)) {
119 if (r
== -1 || bshas(m
->b
, r
))
121 if (r
== -1 || bshas(m
->b
, r
)) {
124 regs
= tmp
[phicls(t
, tmp
)].hint
.m
;
126 if (KBASE(tmp
[t
].cls
) == 0) {
133 for (r
=r0
; r
<r1
; r
++)
134 if (!(regs
& BIT(r
)))
136 for (r
=r0
; r
<r1
; r
++)
146 if (h
!= -1 && h
!= r
)
152 ralloc(RMap
*m
, int t
)
154 return ralloctry(m
, t
, 0);
158 rfree(RMap
*m
, int t
)
162 assert(t
>= Tmp0
|| !(BIT(t
) & T
.rglob
));
165 for (i
=0; m
->t
[i
] != t
; i
++)
171 memmove(&m
->t
[i
], &m
->t
[i
+1], (m
->n
-i
) * sizeof m
->t
[0]);
172 memmove(&m
->r
[i
], &m
->r
[i
+1], (m
->n
-i
) * sizeof m
->r
[0]);
173 assert(t
>= Tmp0
|| t
== r
);
182 for (i
=0; i
<m
->n
; i
++)
184 fprintf(stderr
, " (%s, R%d)",
187 fprintf(stderr
, "\n");
191 pmadd(Ref src
, Ref dst
, int k
)
194 die("cannot have more moves than registers");
201 enum PMStat
{ ToMove
, Moving
, Moved
};
204 pmrec(enum PMStat
*status
, int i
, int *k
)
208 /* note, this routine might emit
209 * too many large instructions
211 if (req(pm
[i
].src
, pm
[i
].dst
)) {
215 assert(KBASE(pm
[i
].cls
) == KBASE(*k
));
216 assert((Kw
|Kl
) == Kl
&& (Ks
|Kd
) == Kd
);
218 for (j
=0; j
<npm
; j
++)
219 if (req(pm
[j
].dst
, pm
[i
].src
))
221 switch (j
== npm
? Moved
: status
[j
]) {
223 c
= j
; /* start of cycle */
224 emit(Oswap
, *k
, R
, pm
[i
].src
, pm
[i
].dst
);
228 c
= pmrec(status
, j
, k
);
230 c
= -1; /* end of cycle */
234 emit(Oswap
, *k
, R
, pm
[i
].src
, pm
[i
].dst
);
240 emit(Ocopy
, pm
[i
].cls
, pm
[i
].dst
, pm
[i
].src
, R
);
253 status
= alloc(npm
* sizeof status
[0]);
254 assert(!npm
|| status
[npm
-1] == ToMove
);
255 for (i
=0; i
<npm
; i
++)
256 if (status
[i
] == ToMove
)
257 pmrec(status
, i
, (int[]){pm
[i
].cls
});
261 move(int r
, Ref to
, RMap
*m
)
265 r1
= req(to
, R
) ? -1 : rfree(m
, to
.val
);
266 if (bshas(m
->b
, r
)) {
267 /* r is used and not by to */
269 for (n
=0; m
->r
[n
] != r
; n
++)
277 t
= req(to
, R
) ? r
: to
.val
;
284 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
288 dopm(Blk
*b
, Ins
*i
, RMap
*m
)
295 m0
= *m
; /* okay since we don't use m0.b */
300 move(i
->arg
[0].val
, i
->to
, m
);
301 } while (i
!= b
->ins
&& regcpy(i
-1));
302 assert(m0
.n
<= m
->n
);
303 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
304 def
= T
.retregs((i
-1)->arg
[1], 0) | T
.rglob
;
305 for (r
=0; T
.rsave
[r
]>=0; r
++)
306 if (!(BIT(T
.rsave
[r
]) & def
))
307 move(T
.rsave
[r
], R
, m
);
309 for (npm
=0, n
=0; n
<m
->n
; n
++) {
315 pmadd(TMP(r1
), TMP(r
), tmp
[t
].cls
);
317 pmadd(TMP(r1
), SLOT(s
), tmp
[t
].cls
);
319 for (ip
=i
; ip
<i1
; ip
++) {
321 rfree(m
, ip
->to
.val
);
323 if (rfind(m
, r
) == -1)
331 prio1(Ref r1
, Ref r2
)
333 /* trivial heuristic to begin with,
334 * later we can use the distance to
335 * the definition instruction
338 return *hint(r1
.val
) != -1;
342 insert(Ref
*r
, Ref
**rs
, int p
)
347 while (i
-- > 0 && prio1(*r
, *rs
[i
])) {
354 doblk(Blk
*b
, RMap
*cur
)
356 int t
, x
, r
, rf
, rt
, nr
;
362 for (r
=0; bsiter(b
->out
, &r
) && r
<Tmp0
; r
++)
364 if (rtype(b
->jmp
.arg
) == RTmp
)
365 b
->jmp
.arg
= ralloc(cur
, b
->jmp
.arg
.val
);
367 for (i1
=&b
->ins
[b
->nins
]; i1
!=b
->ins
;) {
373 rs
= T
.argregs(i
->arg
[1], 0) | T
.rglob
;
374 for (r
=0; T
.rsave
[r
]>=0; r
++)
375 if (!(BIT(T
.rsave
[r
]) & rs
))
376 rfree(cur
, T
.rsave
[r
]);
381 i1
= dopm(b
, i1
, cur
);
386 if (rtype(i
->arg
[0]) == RTmp
)
387 sethint(i
->arg
[0].val
, i
->to
.val
);
390 if (!req(i
->to
, R
)) {
391 assert(rtype(i
->to
) == RTmp
);
393 if (r
< Tmp0
&& (BIT(r
) & T
.rglob
))
397 assert(!isreg(i
->to
));
405 for (x
=0, nr
=0; x
<2; x
++)
406 switch (rtype(i
->arg
[x
])) {
408 m
= &mem
[i
->arg
[x
].val
];
409 if (rtype(m
->base
) == RTmp
)
410 insert(&m
->base
, ra
, nr
++);
411 if (rtype(m
->index
) == RTmp
)
412 insert(&m
->index
, ra
, nr
++);
415 insert(&i
->arg
[x
], ra
, nr
++);
419 *ra
[r
] = ralloc(cur
, ra
[r
]->val
);
421 /* try to change the register of a hinted
422 * temporary if rf is available */
424 if (rf
!= -1 && (t
= cur
->w
[rf
]) != 0)
425 if (!bshas(cur
->b
, rf
) && *hint(t
) == rf
426 && (rt
= rfree(cur
, t
)) != -1) {
429 assert(bshas(cur
->b
, rf
));
430 emit(Ocopy
, tmp
[t
].cls
, TMP(rt
), TMP(rf
), R
);
434 if (req(*ra
[r
], TMP(rt
)))
436 /* one could iterate this logic with
437 * the newly freed rt, but in this case
438 * the above loop must be changed */
441 b
->nins
= &insb
[NIns
] - curi
;
442 idup(&b
->ins
, curi
, b
->nins
);
445 /* qsort() comparison function to peel
446 * loop nests from inside out */
448 carve(const void *a
, const void *b
)
452 /* todo, evaluate if this order is really
453 * better than the simple postorder */
456 if (ba
->loop
== bb
->loop
)
457 return ba
->id
> bb
->id
? -1 : ba
->id
< bb
->id
;
458 return ba
->loop
> bb
->loop
? -1 : +1;
461 /* comparison function to order temporaries
462 * for allocation at the end of blocks */
464 prio2(int t1
, int t2
)
466 if ((tmp
[t1
].visit
^ tmp
[t2
].visit
) < 0) /* != signs */
467 return tmp
[t1
].visit
!= -1 ? +1 : -1;
468 if ((*hint(t1
) ^ *hint(t2
)) < 0)
469 return *hint(t1
) != -1 ? +1 : -1;
470 return tmp
[t1
].cost
- tmp
[t2
].cost
;
473 /* register allocation
474 * depends on rpo, phi, cost, (and obviously spill)
479 int j
, t
, r
, x
, rl
[Tmp0
];
480 Blk
*b
, *b1
, *s
, ***ps
, *blist
, **blk
, **bp
;
481 RMap
*end
, *beg
, cur
, old
, *m
;
493 blk
= alloc(fn
->nblk
* sizeof blk
[0]);
494 end
= alloc(fn
->nblk
* sizeof end
[0]);
495 beg
= alloc(fn
->nblk
* sizeof beg
[0]);
496 for (n
=0; n
<fn
->nblk
; n
++) {
497 bsinit(end
[n
].b
, fn
->ntmp
);
498 bsinit(beg
[n
].b
, fn
->ntmp
);
500 bsinit(cur
.b
, fn
->ntmp
);
501 bsinit(old
.b
, fn
->ntmp
);
504 for (t
=0; t
<fn
->ntmp
; t
++) {
505 tmp
[t
].hint
.r
= t
< Tmp0
? t
: -1;
506 tmp
[t
].hint
.w
= loop
;
509 for (bp
=blk
, b
=fn
->start
; b
; b
=b
->link
)
511 qsort(blk
, fn
->nblk
, sizeof blk
[0], carve
);
512 for (b
=fn
->start
, i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
513 if (i
->op
!= Ocopy
|| !isreg(i
->arg
[0]))
516 assert(rtype(i
->to
) == RTmp
);
517 sethint(i
->to
.val
, i
->arg
[0].val
);
520 /* 2. assign registers */
521 for (bp
=blk
; bp
<&blk
[fn
->nblk
]; bp
++) {
527 memset(cur
.w
, 0, sizeof cur
.w
);
528 for (x
=0, t
=Tmp0
; bsiter(b
->out
, &t
); t
++) {
531 while (j
-- > 0 && prio2(t
, rl
[j
]) > 0) {
537 ralloctry(&cur
, rl
[j
], 1);
540 rcopy(&end
[n
], &cur
);
542 bscopy(b
->in
, cur
.b
);
543 for (p
=b
->phi
; p
; p
=p
->link
)
544 if (rtype(p
->to
) == RTmp
)
545 bsclr(b
->in
, p
->to
.val
);
546 rcopy(&beg
[n
], &cur
);
549 /* 3. emit copies shared by multiple edges
550 * to the same block */
551 for (s
=fn
->start
; s
; s
=s
->link
) {
556 /* rl maps a register that is live at the
557 * beginning of s to the one used in all
558 * predecessors (if any, -1 otherwise) */
559 memset(rl
, 0, sizeof rl
);
561 /* to find the register of a phi in a
562 * predecessor, we have to find the
563 * corresponding argument */
564 for (p
=s
->phi
; p
; p
=p
->link
) {
565 r
= rfind(m
, p
->to
.val
);
568 for (u
=0; u
<p
->narg
; u
++) {
571 if (rtype(src
) != RTmp
)
573 x
= rfind(&end
[b
->id
], src
.val
);
574 if (x
== -1) /* spilled */
576 rl
[r
] = (!rl
[r
] || rl
[r
] == x
) ? x
: -1;
582 /* process non-phis temporaries */
583 for (j
=0; j
<m
->n
; j
++) {
586 if (rl
[r
] || t
< Tmp0
/* todo, remove this */)
588 for (bp
=s
->pred
; bp
<&s
->pred
[s
->npred
]; bp
++) {
589 x
= rfind(&end
[(*bp
)->id
], t
);
590 if (x
== -1) /* spilled */
592 rl
[r
] = (!rl
[r
] || rl
[r
] == x
) ? x
: -1;
597 for (j
=0; j
<m
->n
; j
++) {
601 assert(x
!= 0 || t
< Tmp0
/* todo, ditto */);
602 if (x
> 0 && !bshas(m
->b
, x
)) {
603 pmadd(TMP(x
), TMP(r
), tmp
[t
].cls
);
609 j
= &insb
[NIns
] - curi
;
614 i
= alloc(s
->nins
* sizeof(Ins
));
615 icpy(icpy(i
, curi
, j
), s
->ins
, s
->nins
-j
);
620 fprintf(stderr
, "\n> Register mappings:\n");
621 for (n
=0; n
<fn
->nblk
; n
++) {
623 fprintf(stderr
, "\t%-10s beg", b
->name
);
625 fprintf(stderr
, "\t end");
628 fprintf(stderr
, "\n");
631 /* 4. emit remaining copies in new blocks */
633 for (b
=fn
->start
;; b
=b
->link
) {
634 ps
= (Blk
**[3]){&b
->s1
, &b
->s2
, (Blk
*[1]){0}};
635 for (; (s
=**ps
); ps
++) {
637 for (p
=s
->phi
; p
; p
=p
->link
) {
639 assert(rtype(dst
)==RSlot
|| rtype(dst
)==RTmp
);
640 if (rtype(dst
) == RTmp
) {
641 r
= rfind(&beg
[s
->id
], dst
.val
);
646 for (u
=0; p
->blk
[u
]!=b
; u
++)
647 assert(u
+1 < p
->narg
);
649 if (rtype(src
) == RTmp
)
650 src
= rref(&end
[b
->id
], src
.val
);
651 pmadd(src
, dst
, p
->cls
);
653 for (t
=Tmp0
; bsiter(s
->in
, &t
); t
++) {
654 src
= rref(&end
[b
->id
], t
);
655 dst
= rref(&beg
[s
->id
], t
);
656 pmadd(src
, dst
, tmp
[t
].cls
);
660 if (curi
== &insb
[NIns
])
663 b1
->loop
= (b
->loop
+s
->loop
) / 2;
667 sprintf(b1
->name
, "%s_%s", b
->name
, s
->name
);
668 b1
->nins
= &insb
[NIns
] - curi
;
671 idup(&b1
->ins
, curi
, b1
->nins
);
681 for (b
=fn
->start
; b
; b
=b
->link
)
686 fprintf(stderr
, "\n> Register allocation statistics:\n");
687 fprintf(stderr
, "\tnew moves: %d\n", stmov
);
688 fprintf(stderr
, "\tnew blocks: %d\n", stblk
);
689 fprintf(stderr
, "\n> After register allocation:\n");