4 aggreg(Blk
*hd
, Blk
*b
)
8 /* aggregate looping information at
10 bsunion(hd
->gen
, b
->gen
);
12 if (b
->nlive
[k
] > hd
->nlive
[k
])
13 hd
->nlive
[k
] = b
->nlive
[k
];
17 tmpuse(Ref r
, int use
, int loop
, Fn
*fn
)
22 if (rtype(r
) == RMem
) {
24 tmpuse(m
->base
, 1, loop
, fn
);
25 tmpuse(m
->index
, 1, loop
, fn
);
27 else if (rtype(r
) == RTmp
&& r
.val
>= Tmp0
) {
35 /* evaluate spill costs of temporaries,
36 * this also fills usage information
51 fprintf(stderr
, "\n> Loop information:\n");
52 for (b
=fn
->start
; b
; b
=b
->link
) {
53 for (a
=0; a
<b
->npred
; ++a
)
54 if (b
->id
<= b
->pred
[a
]->id
)
57 fprintf(stderr
, "\t%-10s", b
->name
);
58 fprintf(stderr
, " (% 3d ", b
->nlive
[0]);
59 fprintf(stderr
, "% 3d) ", b
->nlive
[1]);
60 dumpts(b
->gen
, fn
->tmp
, stderr
);
64 for (t
=fn
->tmp
; t
-fn
->tmp
< fn
->ntmp
; t
++) {
65 t
->cost
= t
-fn
->tmp
< Tmp0
? UINT_MAX
: 0;
69 for (b
=fn
->start
; b
; b
=b
->link
) {
70 for (p
=b
->phi
; p
; p
=p
->link
) {
71 t
= &fn
->tmp
[p
->to
.val
];
72 tmpuse(p
->to
, 0, 0, fn
);
73 for (a
=0; a
<p
->narg
; a
++) {
76 tmpuse(p
->arg
[a
], 1, n
, fn
);
80 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
81 tmpuse(i
->to
, 0, n
, fn
);
82 tmpuse(i
->arg
[0], 1, n
, fn
);
83 tmpuse(i
->arg
[1], 1, n
, fn
);
85 tmpuse(b
->jmp
.arg
, 1, n
, fn
);
88 fprintf(stderr
, "\n> Spill costs:\n");
89 for (n
=Tmp0
; n
<fn
->ntmp
; n
++)
90 fprintf(stderr
, "\t%-10s %d\n",
93 fprintf(stderr
, "\n");
97 static BSet
*fst
; /* temps to prioritize in registers (for tcmp1) */
98 static Tmp
*tmp
; /* current temporaries (for tcmpX) */
99 static int ntmp
; /* current # of temps (for limit) */
100 static int locs
; /* stack size used by locals */
101 static int slot4
; /* next slot of 4 bytes */
102 static int slot8
; /* ditto, 8 bytes */
103 static BSet mask
[2][1]; /* class masks */
106 tcmp0(const void *pa
, const void *pb
)
110 ca
= tmp
[*(int *)pa
].cost
;
111 cb
= tmp
[*(int *)pb
].cost
;
112 return (cb
< ca
) ? -1 : (cb
> ca
);
116 tcmp1(const void *pa
, const void *pb
)
120 c
= bshas(fst
, *(int *)pb
) - bshas(fst
, *(int *)pa
);
121 return c
? c
: tcmp0(pa
, pb
);
129 assert(t
>= Tmp0
&& "cannot spill register");
132 /* specific to NAlign == 3 */
133 /* nice logic to pack stack slots
134 * on demand, there can be only
135 * one hole and slot4 points to it
137 * invariant: slot4 <= slot8
139 if (KWIDE(tmp
[t
].cls
)) {
146 if (slot4
== slot8
) {
158 /* restricts b to hold at most k
159 * temporaries, preferring those
160 * present in f (if given), then
161 * those with the largest spill
165 limit(BSet
*b
, int k
, BSet
*f
)
167 static int *tarr
, maxt
;
175 tarr
= emalloc(nt
* sizeof tarr
[0]);
178 for (i
=0, t
=0; bsiter(b
, &t
); t
++) {
184 qsort(tarr
, nt
, sizeof tarr
[0], tcmp0
);
187 qsort(tarr
, nt
, sizeof tarr
[0], tcmp1
);
190 for (i
=0; i
<k
&& i
<nt
; i
++)
196 /* spills temporaries to fit the
197 * target limits using the same
198 * preferences as limit(); assumes
199 * that k1 gprs and k2 fprs are
203 limit2(BSet
*b1
, int k1
, int k2
, BSet
*f
)
207 bsinit(b2
, ntmp
); /* todo, free those */
209 bsinter(b1
, mask
[0]);
210 bsinter(b2
, mask
[1]);
211 limit(b1
, T
.ngpr
- k1
, f
);
212 limit(b2
, T
.nfpr
- k2
, f
);
217 sethint(BSet
*u
, bits r
)
221 for (t
=Tmp0
; bsiter(u
, &t
); t
++)
222 tmp
[phicls(t
, tmp
)].hint
.m
|= r
;
225 /* reloads temporaries in u that are
226 * not in v from their slots
229 reloads(BSet
*u
, BSet
*v
)
233 for (t
=Tmp0
; bsiter(u
, &t
); t
++)
235 emit(Oload
, tmp
[t
].cls
, TMP(t
), slot(t
), R
);
242 emit(Ostorew
+ tmp
[r
.val
].cls
, 0, R
, r
, SLOT(s
));
248 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
252 dopm(Blk
*b
, Ins
*i
, BSet
*v
)
259 bsinit(u
, ntmp
); /* todo, free those */
260 /* consecutive copies from
261 * registers need to be handled
262 * as one large instruction
264 * fixme: there is an assumption
265 * that calls are always followed
266 * by copy instructions here, this
267 * might not be true if previous
277 store(i
->to
, tmp
[t
].slot
);
279 bsset(v
, i
->arg
[0].val
);
280 } while (i
!= b
->ins
&& regcpy(i
-1));
282 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
283 v
->t
[0] &= ~T
.retregs((i
-1)->arg
[1], 0);
284 limit2(v
, T
.nrsave
[0], T
.nrsave
[1], 0);
285 for (n
=0, r
=0; T
.rsave
[n
]>=0; n
++)
286 r
|= BIT(T
.rsave
[n
]);
287 v
->t
[0] |= T
.argregs((i
-1)->arg
[1], 0);
301 merge(BSet
*u
, Blk
*bu
, BSet
*v
, Blk
*bv
)
305 if (bu
->loop
<= bv
->loop
)
308 for (t
=0; bsiter(v
, &t
); t
++)
309 if (tmp
[t
].slot
== -1)
313 /* spill code insertion
314 * requires spill costs, rpo, liveness
316 * Note: this will replace liveness
317 * information (in, out) with temporaries
318 * that must be in registers at block
322 * - Ocopy instructions to ensure register
328 Blk
*b
, *s1
, *s2
, *hd
, **bp
;
329 int j
, l
, t
, k
, lvarg
[2];
331 BSet u
[1], v
[1], w
[1];
342 bsinit(mask
[0], ntmp
);
343 bsinit(mask
[1], ntmp
);
347 for (t
=0; t
<ntmp
; t
++) {
349 if (t
>= T
.fpr0
&& t
< T
.fpr0
+ T
.nfpr
)
352 k
= KBASE(tmp
[t
].cls
);
356 for (bp
=&fn
->rpo
[fn
->nblk
]; bp
!=fn
->rpo
;) {
358 /* invariant: all blocks with bigger rpo got
359 * their in,out updated. */
361 /* 1. find temporaries in registers at
362 * the end of the block (put them in v) */
367 if (s1
&& s1
->id
<= b
->id
)
369 if (s2
&& s2
->id
<= b
->id
)
370 if (!hd
|| s2
->id
>= hd
->id
)
375 hd
->gen
->t
[0] |= T
.rglob
; /* don't spill registers */
376 for (k
=0; k
<2; k
++) {
377 n
= k
== 0 ? T
.ngpr
: T
.nfpr
;
383 if (bscount(u
) < n
) {
384 j
= bscount(w
); /* live through */
386 limit(w
, n
- (l
- j
), 0);
393 /* avoid reloading temporaries
394 * in the middle of loops */
406 if (rtype(b
->jmp
.arg
) == RCall
)
407 v
->t
[0] |= T
.retregs(b
->jmp
.arg
, 0);
409 for (t
=Tmp0
; bsiter(b
->out
, &t
); t
++)
414 /* 2. process the block instructions */
415 if (rtype(b
->jmp
.arg
) == RTmp
) {
417 assert(KBASE(tmp
[t
].cls
) == 0);
418 lvarg
[0] = bshas(v
, t
);
421 limit2(v
, 0, 0, NULL
);
425 b
->jmp
.arg
= slot(t
);
430 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;) {
437 if (!req(i
->to
, R
)) {
438 assert(rtype(i
->to
) == RTmp
);
443 /* make sure we have a reg
445 assert(t
>= Tmp0
&& "dead reg");
450 j
= T
.memargs(i
->op
);
452 if (rtype(i
->arg
[n
]) == RMem
)
455 switch (rtype(i
->arg
[n
])) {
459 if (rtype(m
->base
) == RTmp
) {
460 bsset(v
, m
->base
.val
);
461 bsset(w
, m
->base
.val
);
463 if (rtype(m
->index
) == RTmp
) {
464 bsset(v
, m
->index
.val
);
465 bsset(w
, m
->index
.val
);
470 lvarg
[n
] = bshas(v
, t
);
479 if (rtype(i
->arg
[n
]) == RTmp
) {
482 /* do not reload if the
491 if (!req(i
->to
, R
)) {
493 store(i
->to
, tmp
[t
].slot
);
495 /* in case i->to was a
500 r
= v
->t
[0]; /* Tmp0 is NBit */
505 assert(v
->t
[0] == (T
.rglob
| fn
->reg
));
507 assert(v
->t
[0] == T
.rglob
);
509 for (p
=b
->phi
; p
; p
=p
->link
) {
510 assert(rtype(p
->to
) == RTmp
);
514 store(p
->to
, tmp
[t
].slot
);
515 } else if (bshas(b
->in
, t
))
516 /* only if the phi is live */
517 p
->to
= slot(p
->to
.val
);
520 b
->nins
= &insb
[NIns
] - curi
;
521 idup(&b
->ins
, curi
, b
->nins
);
524 /* align the locals to a 16 byte boundary */
525 /* specific to NAlign == 3 */
530 fprintf(stderr
, "\n> Block information:\n");
531 for (b
=fn
->start
; b
; b
=b
->link
) {
532 fprintf(stderr
, "\t%-10s (% 5d) ", b
->name
, b
->loop
);
533 dumpts(b
->out
, fn
->tmp
, stderr
);
535 fprintf(stderr
, "\n> After spilling:\n");