5 #define assert(x) assert_test(#x, x)
8 typedef struct RMap RMap
;
17 static bits regu
; /* registers used */
18 static Tmp
*tmp
; /* function temporaries */
19 static Mem
*mem
; /* function mem references */
23 } *pm
; /* parallel move constructed */
24 static int cpm
, npm
; /* capacity and size of pm */
29 return &tmp
[phicls(t
, tmp
)].hint
.r
;
37 m
= tmp
[phicls(t
, tmp
)].hint
.m
;
44 rcopy(RMap
*ma
, RMap
*mb
)
46 memcpy(ma
->t
, mb
->t
, sizeof ma
->t
);
47 memcpy(ma
->r
, mb
->r
, sizeof ma
->r
);
57 for (i
=0; i
<m
->n
; i
++)
71 assert(s
!= -1 && "should have spilled");
78 radd(RMap
*m
, int t
, int r
)
80 assert((t
>= Tmp0
|| t
== r
) && "invalid temporary");
81 assert(((RAX
<= r
&& r
< RAX
+ NIReg
) || (XMM0
<= r
&& r
< XMM0
+ NFReg
)) && "invalid register");
82 assert(!bshas(m
->b
, t
) && "temporary has mapping");
83 assert(!bshas(m
->b
, r
) && "register already allocated");
84 assert(m
->n
<= NIReg
+NFReg
&& "too many mappings");
94 ralloc(RMap
*m
, int t
)
100 assert(bshas(m
->b
, t
));
103 if (bshas(m
->b
, t
)) {
109 if (r
== -1 || bshas(m
->b
, r
)) {
110 regs
= tmp
[phicls(t
, tmp
)].hint
.m
;
112 if (KBASE(tmp
[t
].cls
) == 0) {
119 for (r
=r0
; r
<r1
; r
++)
120 if (!(regs
& BIT(r
)))
122 for (r
=r0
; r
<r1
; r
++)
134 rfree(RMap
*m
, int t
)
140 for (i
=0; m
->t
[i
] != t
; i
++)
146 memmove(&m
->t
[i
], &m
->t
[i
+1], (m
->n
-i
) * sizeof m
->t
[0]);
147 memmove(&m
->r
[i
], &m
->r
[i
+1], (m
->n
-i
) * sizeof m
->r
[0]);
156 for (i
=0; i
<m
->n
; i
++)
158 fprintf(stderr
, " (%s, R%d)",
161 fprintf(stderr
, "\n");
165 pmadd(Ref src
, Ref dst
, int k
)
169 pm
= realloc(pm
, cpm
* sizeof pm
[0]);
171 die("pmadd, out of memory");
179 enum PMStat
{ ToMove
, Moving
, Moved
};
182 pmrec(enum PMStat
*status
, int i
, int *k
)
187 /* note, this routine might emit
188 * too many large instructions:
194 * if only the first move is wide
195 * the whole cycle will be wide,
196 * this is safe but not necessary
199 if (req(pm
[i
].src
, pm
[i
].dst
))
202 assert(KBASE(*k
) == KBASE(pm
[i
].cls
));
203 assert((Kw
|1) == Kl
&& (Ks
|1) == Kd
);
204 *k
|= KWIDE(pm
[i
].cls
); /* see above */
206 for (j
=0; j
<npm
; j
++) {
207 if (req(pm
[j
].src
, pm
[i
].dst
))
211 swp1
= pmrec(status
, j
, &k1
);
228 *curi
++ = (Ins
){Ocopy
, pm
[i
].dst
, {pm
[i
].src
}, pm
[i
].cls
};
230 } else if (!req(swp
, pm
[i
].src
)) {
231 *curi
++ = (Ins
){Oswap
, R
, {pm
[i
].src
, pm
[i
].dst
}, *k
};
244 status
= alloc(npm
* sizeof status
[0]);
245 assert(!npm
|| status
[npm
-1] == ToMove
);
247 for (i
=0; i
<npm
; i
++)
248 if (status
[i
] == ToMove
) {
250 pmrec(status
, i
, &k
);
255 move(int r
, Ref to
, RMap
*m
)
259 r1
= req(to
, R
) ? -1 : rfree(m
, to
.val
);
260 if (bshas(m
->b
, r
) && r1
!= r
) {
261 /* r is used and not by to */
262 for (n
=0; m
->r
[n
] != r
; n
++)
270 t
= req(to
, R
) ? r
: to
.val
;
277 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
281 dopm(Blk
*b
, Ins
*i
, RMap
*m
)
285 Ins
*i0
, *i1
, *ip
, *ir
;
292 move(i
->arg
[0].val
, i
->to
, m
);
293 } while (i
!= b
->ins
&& regcpy(i
-1));
294 assert(m0
.n
<= m
->n
);
295 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
296 def
= retregs((i
-1)->arg
[1], 0);
297 for (r
=0; r
<NRSave
; r
++)
298 if (!(BIT(rsave
[r
]) & def
))
299 move(rsave
[r
], R
, m
);
301 for (npm
=0, n
=0; n
<m
->n
; n
++) {
307 pmadd(TMP(r1
), TMP(r
), tmp
[t
].cls
);
309 pmadd(TMP(r1
), SLOT(s
), tmp
[t
].cls
);
311 for (ip
=i
; ip
<i1
; ip
++) {
313 rfree(m
, ip
->to
.val
);
315 if (rfind(m
, r
) == -1)
322 n
= b
->nins
- (i1
- i
) + (curi
- insb
);
323 i0
= alloc(n
* sizeof(Ins
));
324 ip
= icpy(ip
= i0
, b
->ins
, i
- b
->ins
);
325 ip
= icpy(ir
= ip
, insb
, curi
- insb
);
326 ip
= icpy(ip
, i1
, &b
->ins
[b
->nins
] - i1
);
335 /* trivial heuristic to begin with,
336 * later we can use the distance to
337 * the definition instruction
340 return *hint(r1
.val
) != -1;
344 insert(Ref
*r
, Ref
**rs
, int p
)
349 while (i
-- > 0 && prio(*r
, *rs
[i
])) {
356 doblk(Blk
*b
, RMap
*cur
)
364 for (r
=0; bsiter(b
->out
, &r
) && r
<Tmp0
; r
++)
366 if (rtype(b
->jmp
.arg
) == RTmp
)
367 b
->jmp
.arg
= ralloc(cur
, b
->jmp
.arg
.val
);
368 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;) {
371 rs
= argregs(i
->arg
[1], 0);
372 for (r
=0; r
<NRSave
; r
++)
373 if (!(BIT(rsave
[r
]) & rs
))
374 rfree(cur
, rsave
[r
]);
377 if (isreg(i
->arg
[0])) {
382 if (rtype(i
->arg
[0]) == RTmp
)
383 sethint(i
->arg
[0].val
, i
->to
.val
);
386 if (!req(i
->to
, R
)) {
387 assert(rtype(i
->to
) == RTmp
);
388 r
= rfree(cur
, i
->to
.val
);
389 if (r
== -1 && !isreg(i
->to
)) {
390 *i
= (Ins
){.op
= Onop
};
393 if (i
->to
.val
>= Tmp0
)
398 for (x
=0, nr
=0; x
<2; x
++)
399 switch (rtype(i
->arg
[x
])) {
401 m
= &mem
[i
->arg
[x
].val
];
402 if (rtype(m
->base
) == RTmp
)
403 insert(&m
->base
, ra
, nr
++);
404 if (rtype(m
->index
) == RTmp
)
405 insert(&m
->index
, ra
, nr
++);
408 insert(&i
->arg
[x
], ra
, nr
++);
412 *ra
[r
] = ralloc(cur
, ra
[r
]->val
);
416 /* register allocation
417 * depends on rpo, phi, cost, (and obviously spill)
422 int j
, t
, r
, r1
, x
, rl
[Tmp0
];
423 Blk
*b
, *b1
, *s
, ***ps
, *blist
;
424 RMap
*end
, *beg
, cur
, old
;
434 end
= alloc(fn
->nblk
* sizeof end
[0]);
435 beg
= alloc(fn
->nblk
* sizeof beg
[0]);
436 for (n
=0; n
<fn
->nblk
; n
++) {
437 bsinit(end
[n
].b
, fn
->ntmp
);
438 bsinit(beg
[n
].b
, fn
->ntmp
);
440 bsinit(cur
.b
, fn
->ntmp
);
441 bsinit(old
.b
, fn
->ntmp
);
443 for (t
=0; t
<fn
->ntmp
; t
++)
444 *hint(t
) = t
< Tmp0
? t
: -1;
445 for (b
=fn
->start
, i
=b
->ins
; i
-b
->ins
< b
->nins
; i
++)
446 if (i
->op
!= Ocopy
|| !isreg(i
->arg
[0]))
449 assert(rtype(i
->to
) == RTmp
);
450 sethint(i
->to
.val
, i
->arg
[0].val
);
453 /* 2. assign registers following post-order */
454 for (n
=fn
->nblk
-1; n
!=-1u; n
--) {
459 for (t
=Tmp0
; bsiter(b
->out
, &t
); t
++)
460 if (x
|| (r
=*hint(t
)) != -1)
461 if (x
|| !bshas(cur
.b
, r
))
463 rcopy(&end
[n
], &cur
);
465 bscopy(b
->in
, cur
.b
);
466 for (p
=b
->phi
; p
; p
=p
->link
)
467 if (rtype(p
->to
) == RTmp
) {
468 bsclr(b
->in
, p
->to
.val
);
470 * if the phi destination has an
471 * argument from a frequent block
472 * that was already allocated to
473 * 'r', use 'r' as the new hint
475 memset(rl
, 0, sizeof rl
);
476 for (u
=0; u
<p
->narg
; u
++) {
479 if (rtype(p
->arg
[u
]) == RTmp
)
480 if ((r
=rfind(&end
[b1
->id
], t
)) != -1)
483 for (x
=0, j
=0; j
<Tmp0
; j
++)
486 if (rl
[x
] >= b
->loop
)
487 *hint(p
->to
.val
) = x
;
491 * attempt to satisfy hints
492 * when it's simple and we have
493 * multiple predecessors
497 for (j
=0; j
<old
.n
; j
++) {
501 if (r
!= -1 && r
!= r1
)
502 if (!bshas(cur
.b
, r
)) {
506 emit(Ocopy
, x
, TMP(r1
), TMP(r
), R
);
509 if ((j
= &insb
[NIns
] - curi
)) {
511 i
= alloc(b
->nins
* sizeof(Ins
));
512 icpy(icpy(i
, curi
, j
), b
->ins
, b
->nins
-j
);
516 rcopy(&beg
[n
], &cur
);
519 fprintf(stderr
, "\n> Register mappings:\n");
520 for (n
=0; n
<fn
->nblk
; n
++) {
522 fprintf(stderr
, "\t%-10s beg", b
->name
);
524 fprintf(stderr
, "\t end");
527 fprintf(stderr
, "\n");
530 /* 3. compose glue code */
532 for (b
=fn
->start
;; b
=b
->link
) {
533 ps
= (Blk
**[3]){&b
->s1
, &b
->s2
, (Blk
*[1]){0}};
534 for (; (s
=**ps
); ps
++) {
536 for (p
=s
->phi
; p
; p
=p
->link
) {
538 assert(rtype(dst
)==RSlot
|| rtype(dst
)==RTmp
);
539 if (rtype(dst
) == RTmp
) {
540 r
= rfind(&beg
[s
->id
], dst
.val
);
545 for (u
=0; p
->blk
[u
]!=b
; u
++)
546 assert(u
+1 < p
->narg
);
548 if (rtype(src
) == RTmp
)
549 src
= rref(&end
[b
->id
], src
.val
);
550 pmadd(src
, dst
, p
->cls
);
552 for (t
=Tmp0
; bsiter(s
->in
, &t
); t
++) {
553 src
= rref(&end
[b
->id
], t
);
554 dst
= rref(&beg
[s
->id
], t
);
555 pmadd(src
, dst
, tmp
[t
].cls
);
561 b1
->loop
= (b
->loop
+s
->loop
) / 2;
565 sprintf(b1
->name
, "%s_%s", b
->name
, s
->name
);
566 b1
->nins
= curi
- insb
;
567 idup(&b1
->ins
, insb
, b1
->nins
);
577 for (b
=fn
->start
; b
; b
=b
->link
)
582 fprintf(stderr
, "\n> After register allocation:\n");