5 #define assert(x) assert_test(#x, x)
8 typedef struct RMap RMap
;
13 int w
[Tmp0
]; /* wait list, for unmatched hints */
18 static bits regu
; /* registers used */
19 static Tmp
*tmp
; /* function temporaries */
20 static Mem
*mem
; /* function mem references */
24 } pm
[Tmp0
]; /* parallel move constructed */
25 static int npm
; /* size of pm */
26 static int loop
; /* current loop level */
28 static uint stmov
; /* stats: added moves */
29 static uint stblk
; /* stats: added blocks */
34 return &tmp
[phicls(t
, tmp
)].hint
.r
;
42 p
= &tmp
[phicls(t
, tmp
)];
43 if (p
->hint
.r
== -1 || p
->hint
.w
> loop
) {
51 rcopy(RMap
*ma
, RMap
*mb
)
53 memcpy(ma
->t
, mb
->t
, sizeof ma
->t
);
54 memcpy(ma
->r
, mb
->r
, sizeof ma
->r
);
55 memcpy(ma
->w
, mb
->w
, sizeof ma
->w
);
65 for (i
=0; i
<m
->n
; i
++)
79 assert(s
!= -1 && "should have spilled");
86 radd(RMap
*m
, int t
, int r
)
88 assert((t
>= Tmp0
|| t
== r
) && "invalid temporary");
89 assert(((T
.gpr0
<= r
&& r
< T
.gpr0
+ T
.ngpr
)
90 || (T
.fpr0
<= r
&& r
< T
.fpr0
+ T
.nfpr
))
91 && "invalid register");
92 assert(!bshas(m
->b
, t
) && "temporary has mapping");
93 assert(!bshas(m
->b
, r
) && "register already allocated");
94 assert(m
->n
<= T
.ngpr
+T
.nfpr
&& "too many mappings");
104 ralloctry(RMap
*m
, int t
, int try)
110 assert(bshas(m
->b
, t
));
113 if (bshas(m
->b
, t
)) {
119 if (r
== -1 || bshas(m
->b
, r
))
121 if (r
== -1 || bshas(m
->b
, r
)) {
124 regs
= tmp
[phicls(t
, tmp
)].hint
.m
;
126 if (KBASE(tmp
[t
].cls
) == 0) {
133 for (r
=r0
; r
<r1
; r
++)
134 if (!(regs
& BIT(r
)))
136 for (r
=r0
; r
<r1
; r
++)
146 if (h
!= -1 && h
!= r
)
152 ralloc(RMap
*m
, int t
)
154 return ralloctry(m
, t
, 0);
158 rfree(RMap
*m
, int t
)
162 assert(t
>= Tmp0
|| !(BIT(t
) & T
.rglob
));
165 for (i
=0; m
->t
[i
] != t
; i
++)
171 memmove(&m
->t
[i
], &m
->t
[i
+1], (m
->n
-i
) * sizeof m
->t
[0]);
172 memmove(&m
->r
[i
], &m
->r
[i
+1], (m
->n
-i
) * sizeof m
->r
[0]);
173 assert(t
>= Tmp0
|| t
== r
);
182 for (i
=0; i
<m
->n
; i
++)
184 fprintf(stderr
, " (%s, R%d)",
187 fprintf(stderr
, "\n");
191 pmadd(Ref src
, Ref dst
, int k
)
194 die("cannot have more moves than registers");
201 enum PMStat
{ ToMove
, Moving
, Moved
};
204 pmrec(enum PMStat
*status
, int i
, int *k
)
208 /* note, this routine might emit
209 * too many large instructions
211 if (req(pm
[i
].src
, pm
[i
].dst
)) {
215 assert(KBASE(pm
[i
].cls
) == KBASE(*k
));
216 assert((Kw
|Kl
) == Kl
&& (Ks
|Kd
) == Kd
);
218 for (j
=0; j
<npm
; j
++)
219 if (req(pm
[j
].dst
, pm
[i
].src
))
221 switch (j
== npm
? Moved
: status
[j
]) {
223 c
= j
; /* start of cycle */
224 emit(Oswap
, *k
, R
, pm
[i
].src
, pm
[i
].dst
);
228 c
= pmrec(status
, j
, k
);
230 c
= -1; /* end of cycle */
234 emit(Oswap
, *k
, R
, pm
[i
].src
, pm
[i
].dst
);
240 emit(Ocopy
, pm
[i
].cls
, pm
[i
].dst
, pm
[i
].src
, R
);
255 status
= alloc(npm
* sizeof status
[0]);
256 assert(!npm
|| status
[npm
-1] == ToMove
);
257 for (i
=0; i
<npm
; i
++)
258 if (status
[i
] == ToMove
)
259 pmrec(status
, i
, (int[]){pm
[i
].cls
});
263 move(int r
, Ref to
, RMap
*m
)
267 r1
= req(to
, R
) ? -1 : rfree(m
, to
.val
);
268 if (bshas(m
->b
, r
)) {
269 /* r is used and not by to */
271 for (n
=0; m
->r
[n
] != r
; n
++)
279 t
= req(to
, R
) ? r
: to
.val
;
286 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
290 dopm(Blk
*b
, Ins
*i
, RMap
*m
)
297 m0
= *m
; /* okay since we don't use m0.b */
302 move(i
->arg
[0].val
, i
->to
, m
);
303 } while (i
!= b
->ins
&& regcpy(i
-1));
304 assert(m0
.n
<= m
->n
);
305 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
306 def
= T
.retregs((i
-1)->arg
[1], 0) | T
.rglob
;
307 for (r
=0; T
.rsave
[r
]>=0; r
++)
308 if (!(BIT(T
.rsave
[r
]) & def
))
309 move(T
.rsave
[r
], R
, m
);
311 for (npm
=0, n
=0; n
<m
->n
; n
++) {
317 pmadd(TMP(r1
), TMP(r
), tmp
[t
].cls
);
319 pmadd(TMP(r1
), SLOT(s
), tmp
[t
].cls
);
321 for (ip
=i
; ip
<i1
; ip
++) {
323 rfree(m
, ip
->to
.val
);
325 if (rfind(m
, r
) == -1)
333 prio1(Ref r1
, Ref r2
)
335 /* trivial heuristic to begin with,
336 * later we can use the distance to
337 * the definition instruction
340 return *hint(r1
.val
) != -1;
344 insert(Ref
*r
, Ref
**rs
, int p
)
349 while (i
-- > 0 && prio1(*r
, *rs
[i
])) {
356 doblk(Blk
*b
, RMap
*cur
)
358 int t
, x
, r
, rf
, rt
, nr
;
364 if (rtype(b
->jmp
.arg
) == RTmp
)
365 b
->jmp
.arg
= ralloc(cur
, b
->jmp
.arg
.val
);
367 for (i1
=&b
->ins
[b
->nins
]; i1
!=b
->ins
;) {
373 rs
= T
.argregs(i
->arg
[1], 0) | T
.rglob
;
374 for (r
=0; T
.rsave
[r
]>=0; r
++)
375 if (!(BIT(T
.rsave
[r
]) & rs
))
376 rfree(cur
, T
.rsave
[r
]);
381 i1
= dopm(b
, i1
, cur
);
386 if (rtype(i
->arg
[0]) == RTmp
)
387 sethint(i
->arg
[0].val
, i
->to
.val
);
390 if (!req(i
->to
, R
)) {
391 assert(rtype(i
->to
) == RTmp
);
393 if (r
< Tmp0
&& (BIT(r
) & T
.rglob
))
397 assert(!isreg(i
->to
));
405 for (x
=0, nr
=0; x
<2; x
++)
406 switch (rtype(i
->arg
[x
])) {
408 m
= &mem
[i
->arg
[x
].val
];
409 if (rtype(m
->base
) == RTmp
)
410 insert(&m
->base
, ra
, nr
++);
411 if (rtype(m
->index
) == RTmp
)
412 insert(&m
->index
, ra
, nr
++);
415 insert(&i
->arg
[x
], ra
, nr
++);
419 *ra
[r
] = ralloc(cur
, ra
[r
]->val
);
420 if (i
->op
== Ocopy
&& req(i
->to
, i
->arg
[0]))
423 /* try to change the register of a hinted
424 * temporary if rf is available */
425 if (rf
!= -1 && (t
= cur
->w
[rf
]) != 0)
426 if (!bshas(cur
->b
, rf
) && *hint(t
) == rf
427 && (rt
= rfree(cur
, t
)) != -1) {
430 assert(bshas(cur
->b
, rf
));
431 emit(Ocopy
, tmp
[t
].cls
, TMP(rt
), TMP(rf
), R
);
435 if (req(*ra
[r
], TMP(rt
)))
437 /* one could iterate this logic with
438 * the newly freed rt, but in this case
439 * the above loop must be changed */
442 b
->nins
= &insb
[NIns
] - curi
;
443 idup(&b
->ins
, curi
, b
->nins
);
446 /* qsort() comparison function to peel
447 * loop nests from inside out */
449 carve(const void *a
, const void *b
)
453 /* todo, evaluate if this order is really
454 * better than the simple postorder */
457 if (ba
->loop
== bb
->loop
)
458 return ba
->id
> bb
->id
? -1 : ba
->id
< bb
->id
;
459 return ba
->loop
> bb
->loop
? -1 : +1;
462 /* comparison function to order temporaries
463 * for allocation at the end of blocks */
465 prio2(int t1
, int t2
)
467 if ((tmp
[t1
].visit
^ tmp
[t2
].visit
) < 0) /* != signs */
468 return tmp
[t1
].visit
!= -1 ? +1 : -1;
469 if ((*hint(t1
) ^ *hint(t2
)) < 0)
470 return *hint(t1
) != -1 ? +1 : -1;
471 return tmp
[t1
].cost
- tmp
[t2
].cost
;
474 /* register allocation
475 * depends on rpo, phi, cost, (and obviously spill)
480 int j
, t
, r
, x
, rl
[Tmp0
];
481 Blk
*b
, *b1
, *s
, ***ps
, *blist
, **blk
, **bp
;
482 RMap
*end
, *beg
, cur
, old
, *m
;
494 blk
= alloc(fn
->nblk
* sizeof blk
[0]);
495 end
= alloc(fn
->nblk
* sizeof end
[0]);
496 beg
= alloc(fn
->nblk
* sizeof beg
[0]);
497 for (n
=0; n
<fn
->nblk
; n
++) {
498 bsinit(end
[n
].b
, fn
->ntmp
);
499 bsinit(beg
[n
].b
, fn
->ntmp
);
501 bsinit(cur
.b
, fn
->ntmp
);
502 bsinit(old
.b
, fn
->ntmp
);
505 for (t
=0; t
<fn
->ntmp
; t
++) {
506 tmp
[t
].hint
.r
= t
< Tmp0
? t
: -1;
507 tmp
[t
].hint
.w
= loop
;
510 for (bp
=blk
, b
=fn
->start
; b
; b
=b
->link
)
512 qsort(blk
, fn
->nblk
, sizeof blk
[0], carve
);
513 for (b
=fn
->start
, i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++)
514 if (i
->op
!= Ocopy
|| !isreg(i
->arg
[0]))
517 assert(rtype(i
->to
) == RTmp
);
518 sethint(i
->to
.val
, i
->arg
[0].val
);
521 /* 2. assign registers */
522 for (bp
=blk
; bp
<&blk
[fn
->nblk
]; bp
++) {
528 memset(cur
.w
, 0, sizeof cur
.w
);
529 for (x
=0, t
=Tmp0
; bsiter(b
->out
, &t
); t
++) {
532 while (j
-- > 0 && prio2(t
, rl
[j
]) > 0) {
537 for (r
=0; bsiter(b
->out
, &r
) && r
<Tmp0
; r
++)
540 ralloctry(&cur
, rl
[j
], 1);
543 rcopy(&end
[n
], &cur
);
545 bscopy(b
->in
, cur
.b
);
546 for (p
=b
->phi
; p
; p
=p
->link
)
547 if (rtype(p
->to
) == RTmp
)
548 bsclr(b
->in
, p
->to
.val
);
549 rcopy(&beg
[n
], &cur
);
552 /* 3. emit copies shared by multiple edges
553 * to the same block */
554 for (s
=fn
->start
; s
; s
=s
->link
) {
559 /* rl maps a register that is live at the
560 * beginning of s to the one used in all
561 * predecessors (if any, -1 otherwise) */
562 memset(rl
, 0, sizeof rl
);
564 /* to find the register of a phi in a
565 * predecessor, we have to find the
566 * corresponding argument */
567 for (p
=s
->phi
; p
; p
=p
->link
) {
568 if (rtype(p
->to
) != RTmp
569 || (r
=rfind(m
, p
->to
.val
)) == -1)
571 for (u
=0; u
<p
->narg
; u
++) {
574 if (rtype(src
) != RTmp
)
576 x
= rfind(&end
[b
->id
], src
.val
);
577 if (x
== -1) /* spilled */
579 rl
[r
] = (!rl
[r
] || rl
[r
] == x
) ? x
: -1;
585 /* process non-phis temporaries */
586 for (j
=0; j
<m
->n
; j
++) {
589 if (rl
[r
] || t
< Tmp0
/* todo, remove this */)
591 for (bp
=s
->pred
; bp
<&s
->pred
[s
->npred
]; bp
++) {
592 x
= rfind(&end
[(*bp
)->id
], t
);
593 if (x
== -1) /* spilled */
595 rl
[r
] = (!rl
[r
] || rl
[r
] == x
) ? x
: -1;
602 for (j
=0; j
<m
->n
; j
++) {
606 assert(x
!= 0 || t
< Tmp0
/* todo, ditto */);
607 if (x
> 0 && !bshas(m
->b
, x
)) {
608 pmadd(TMP(x
), TMP(r
), tmp
[t
].cls
);
615 j
= &insb
[NIns
] - curi
;
620 i
= alloc(s
->nins
* sizeof(Ins
));
621 icpy(icpy(i
, curi
, j
), s
->ins
, s
->nins
-j
);
626 fprintf(stderr
, "\n> Register mappings:\n");
627 for (n
=0; n
<fn
->nblk
; n
++) {
629 fprintf(stderr
, "\t%-10s beg", b
->name
);
631 fprintf(stderr
, "\t end");
634 fprintf(stderr
, "\n");
637 /* 4. emit remaining copies in new blocks */
639 for (b
=fn
->start
;; b
=b
->link
) {
640 ps
= (Blk
**[3]){&b
->s1
, &b
->s2
, (Blk
*[1]){0}};
641 for (; (s
=**ps
); ps
++) {
643 for (p
=s
->phi
; p
; p
=p
->link
) {
645 assert(rtype(dst
)==RSlot
|| rtype(dst
)==RTmp
);
646 if (rtype(dst
) == RTmp
) {
647 r
= rfind(&beg
[s
->id
], dst
.val
);
652 for (u
=0; p
->blk
[u
]!=b
; u
++)
653 assert(u
+1 < p
->narg
);
655 if (rtype(src
) == RTmp
)
656 src
= rref(&end
[b
->id
], src
.val
);
657 pmadd(src
, dst
, p
->cls
);
659 for (t
=Tmp0
; bsiter(s
->in
, &t
); t
++) {
660 src
= rref(&end
[b
->id
], t
);
661 dst
= rref(&beg
[s
->id
], t
);
662 pmadd(src
, dst
, tmp
[t
].cls
);
666 if (curi
== &insb
[NIns
])
669 b1
->loop
= (b
->loop
+s
->loop
) / 2;
673 strf(b1
->name
, "%s_%s", b
->name
, s
->name
);
674 b1
->nins
= &insb
[NIns
] - curi
;
677 idup(&b1
->ins
, curi
, b1
->nins
);
687 for (b
=fn
->start
; b
; b
=b
->link
)
692 fprintf(stderr
, "\n> Register allocation statistics:\n");
693 fprintf(stderr
, "\tnew moves: %d\n", stmov
);
694 fprintf(stderr
, "\tnew blocks: %d\n", stblk
);
695 fprintf(stderr
, "\n> After register allocation:\n");