4 aggreg(Blk
*hd
, Blk
*b
)
8 /* aggregate looping information at
10 bsunion(hd
->gen
, b
->gen
);
12 if (b
->nlive
[k
] > hd
->nlive
[k
])
13 hd
->nlive
[k
] = b
->nlive
[k
];
17 tmpuse(Ref r
, int use
, int loop
, Fn
*fn
)
22 if (rtype(r
) == RMem
) {
24 tmpuse(m
->base
, 1, loop
, fn
);
25 tmpuse(m
->index
, 1, loop
, fn
);
27 else if (rtype(r
) == RTmp
&& r
.val
>= Tmp0
) {
35 /* evaluate spill costs of temporaries,
36 * this also fills usage information
51 fprintf(stderr
, "\n> Loop information:\n");
52 for (b
=fn
->start
; b
; b
=b
->link
) {
53 for (a
=0; a
<b
->npred
; ++a
)
54 if (b
->id
<= b
->pred
[a
]->id
)
57 fprintf(stderr
, "\t%-10s", b
->name
);
58 fprintf(stderr
, " (% 3d ", b
->nlive
[0]);
59 fprintf(stderr
, "% 3d) ", b
->nlive
[1]);
60 dumpts(b
->gen
, fn
->tmp
, stderr
);
64 for (t
=fn
->tmp
; t
-fn
->tmp
< fn
->ntmp
; t
++) {
65 t
->cost
= t
-fn
->tmp
< Tmp0
? UINT_MAX
: 0;
69 for (b
=fn
->start
; b
; b
=b
->link
) {
70 for (p
=b
->phi
; p
; p
=p
->link
) {
71 t
= &fn
->tmp
[p
->to
.val
];
72 tmpuse(p
->to
, 0, 0, fn
);
73 for (a
=0; a
<p
->narg
; a
++) {
76 tmpuse(p
->arg
[a
], 1, n
, fn
);
80 for (i
=b
->ins
; i
-b
->ins
< b
->nins
; i
++) {
81 tmpuse(i
->to
, 0, n
, fn
);
82 tmpuse(i
->arg
[0], 1, n
, fn
);
83 tmpuse(i
->arg
[1], 1, n
, fn
);
85 tmpuse(b
->jmp
.arg
, 1, n
, fn
);
88 fprintf(stderr
, "\n> Spill costs:\n");
89 for (n
=Tmp0
; n
<fn
->ntmp
; n
++)
90 fprintf(stderr
, "\t%-10s %d\n",
93 fprintf(stderr
, "\n");
97 static BSet
*fst
; /* temps to prioritize in registers (for tcmp1) */
98 static Tmp
*tmp
; /* current temporaries (for tcmpX) */
99 static int ntmp
; /* current # of temps (for limit) */
100 static int locs
; /* stack size used by locals */
101 static int slot4
; /* next slot of 4 bytes */
102 static int slot8
; /* ditto, 8 bytes */
103 static BSet mask
[2][1]; /* class masks */
106 tcmp0(const void *pa
, const void *pb
)
110 ca
= tmp
[*(int *)pa
].cost
;
111 cb
= tmp
[*(int *)pb
].cost
;
112 return (cb
< ca
) ? -1 : (cb
> ca
);
116 tcmp1(const void *pa
, const void *pb
)
120 c
= bshas(fst
, *(int *)pb
) - bshas(fst
, *(int *)pa
);
121 return c
? c
: tcmp0(pa
, pb
);
129 assert(t
>= Tmp0
&& "cannot spill register");
132 /* specific to NAlign == 3 */
133 /* nice logic to pack stack slots
134 * on demand, there can be only
135 * one hole and slot4 points to it
137 * invariant: slot4 <= slot8
139 if (KWIDE(tmp
[t
].cls
)) {
146 if (slot4
== slot8
) {
159 limit(BSet
*b
, int k
, BSet
*f
)
161 static int *tarr
, maxt
;
169 tarr
= emalloc(nt
* sizeof tarr
[0]);
172 for (i
=0, t
=0; bsiter(b
, &t
); t
++) {
177 qsort(tarr
, nt
, sizeof tarr
[0], tcmp0
);
180 qsort(tarr
, nt
, sizeof tarr
[0], tcmp1
);
182 for (i
=0; i
<k
&& i
<nt
; i
++)
189 limit2(BSet
*b1
, int k1
, int k2
, BSet
*fst
)
193 bsinit(b2
, ntmp
); /* todo, free those */
195 bsinter(b1
, mask
[0]);
196 bsinter(b2
, mask
[1]);
197 limit(b1
, T
.ngpr
- k1
, fst
);
198 limit(b2
, T
.nfpr
- k2
, fst
);
203 sethint(BSet
*u
, bits r
)
207 for (t
=Tmp0
; bsiter(u
, &t
); t
++)
208 tmp
[phicls(t
, tmp
)].hint
.m
|= r
;
212 reloads(BSet
*u
, BSet
*v
)
216 for (t
=Tmp0
; bsiter(u
, &t
); t
++)
218 emit(Oload
, tmp
[t
].cls
, TMP(t
), slot(t
), R
);
225 emit(Ostorew
+ tmp
[r
.val
].cls
, 0, R
, r
, SLOT(s
));
231 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
235 dopm(Blk
*b
, Ins
*i
, BSet
*v
)
242 bsinit(u
, ntmp
); /* todo, free those */
243 /* consecutive copies from
244 * registers need to be handled
245 * as one large instruction
247 * fixme: there is an assumption
248 * that calls are always followed
249 * by copy instructions here, this
250 * might not be true if previous
260 store(i
->to
, tmp
[t
].slot
);
262 bsset(v
, i
->arg
[0].val
);
263 } while (i
!= b
->ins
&& regcpy(i
-1));
265 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
266 v
->t
[0] &= ~T
.retregs((i
-1)->arg
[1], 0);
267 limit2(v
, T
.nrsave
[0], T
.nrsave
[1], 0);
268 for (n
=0, r
=0; T
.rsave
[n
]>=0; n
++)
269 r
|= BIT(T
.rsave
[n
]);
270 v
->t
[0] |= T
.argregs((i
-1)->arg
[1], 0);
283 /* spill code insertion
284 * requires spill costs, rpo, liveness
286 * Note: this will replace liveness
287 * information (in, out) with temporaries
288 * that must be in registers at block
292 * - Ocopy instructions to ensure register
298 Blk
*b
, *s1
, *s2
, *hd
, **bp
;
299 int j
, l
, t
, k
, lvarg
[2];
301 BSet u
[1], v
[1], w
[1];
312 bsinit(mask
[0], ntmp
);
313 bsinit(mask
[1], ntmp
);
317 for (t
=0; t
<ntmp
; t
++) {
319 if (t
>= T
.fpr0
&& t
< T
.fpr0
+ T
.nfpr
)
322 k
= KBASE(tmp
[t
].cls
);
326 for (bp
=&fn
->rpo
[fn
->nblk
]; bp
!=fn
->rpo
;) {
328 /* invariant: all bocks with bigger rpo got
329 * their in,out updated. */
331 /* 1. find temporaries in registers at
332 * the end of the block (put them in v) */
337 if (s1
&& s1
->id
<= b
->id
)
339 if (s2
&& s2
->id
<= b
->id
)
340 if (!hd
|| s2
->id
>= hd
->id
)
345 hd
->gen
->t
[0] |= T
.rglob
; /* don't spill registers */
346 for (k
=0; k
<2; k
++) {
347 n
= k
== 0 ? T
.ngpr
: T
.nfpr
;
353 if (bscount(u
) < n
) {
354 j
= bscount(w
); /* live through */
356 limit(w
, n
- (l
- j
), 0);
373 if (rtype(b
->jmp
.arg
) == RCall
)
374 v
->t
[0] |= T
.retregs(b
->jmp
.arg
, 0);
376 for (t
=Tmp0
; bsiter(b
->out
, &t
); t
++)
381 /* 2. process the block instructions */
384 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;) {
391 if (!req(i
->to
, R
)) {
392 assert(rtype(i
->to
) == RTmp
);
397 /* make sure we have a reg
403 j
= T
.memargs(i
->op
);
405 if (rtype(i
->arg
[n
]) == RMem
)
408 switch (rtype(i
->arg
[n
])) {
412 if (rtype(m
->base
) == RTmp
) {
413 bsset(v
, m
->base
.val
);
414 bsset(w
, m
->base
.val
);
416 if (rtype(m
->index
) == RTmp
) {
417 bsset(v
, m
->index
.val
);
418 bsset(w
, m
->index
.val
);
423 lvarg
[n
] = bshas(v
, t
);
432 if (rtype(i
->arg
[n
]) == RTmp
) {
435 /* do not reload if the
436 * the temporary was dead
444 if (!req(i
->to
, R
)) {
446 store(i
->to
, tmp
[t
].slot
);
450 r
= v
->t
[0]; /* Tmp0 is NBit */
454 assert(r
== T
.rglob
|| b
== fn
->start
);
456 for (p
=b
->phi
; p
; p
=p
->link
) {
457 assert(rtype(p
->to
) == RTmp
);
461 store(p
->to
, tmp
[t
].slot
);
462 } else if (bshas(b
->in
, t
))
463 /* only if the phi is live */
464 p
->to
= slot(p
->to
.val
);
467 b
->nins
= &insb
[NIns
] - curi
;
468 idup(&b
->ins
, curi
, b
->nins
);
471 /* align the locals to a 16 byte boundary */
472 /* specific to NAlign == 3 */
477 fprintf(stderr
, "\n> Block information:\n");
478 for (b
=fn
->start
; b
; b
=b
->link
) {
479 fprintf(stderr
, "\t%-10s (% 5d) ", b
->name
, b
->loop
);
480 dumpts(b
->out
, fn
->tmp
, stderr
);
482 fprintf(stderr
, "\n> After spilling:\n");