4 aggreg(Blk
*hd
, Blk
*b
)
8 /* aggregate looping information at
10 bsunion(hd
->gen
, b
->gen
);
12 if (b
->nlive
[k
] > hd
->nlive
[k
])
13 hd
->nlive
[k
] = b
->nlive
[k
];
17 tmpuse(Ref r
, int use
, int loop
, Fn
*fn
)
22 if (rtype(r
) == RMem
) {
24 tmpuse(m
->base
, 1, loop
, fn
);
25 tmpuse(m
->index
, 1, loop
, fn
);
27 else if (rtype(r
) == RTmp
&& r
.val
>= Tmp0
) {
35 /* evaluate spill costs of temporaries,
36 * this also fills usage information
51 fprintf(stderr
, "\n> Loop information:\n");
52 for (b
=fn
->start
; b
; b
=b
->link
) {
53 for (a
=0; a
<b
->npred
; ++a
)
54 if (b
->id
<= b
->pred
[a
]->id
)
57 fprintf(stderr
, "\t%-10s", b
->name
);
58 fprintf(stderr
, " (% 3d ", b
->nlive
[0]);
59 fprintf(stderr
, "% 3d) ", b
->nlive
[1]);
60 dumpts(b
->gen
, fn
->tmp
, stderr
);
64 for (t
=fn
->tmp
; t
-fn
->tmp
< fn
->ntmp
; t
++) {
65 t
->cost
= t
-fn
->tmp
< Tmp0
? UINT_MAX
: 0;
69 for (b
=fn
->start
; b
; b
=b
->link
) {
70 for (p
=b
->phi
; p
; p
=p
->link
) {
71 /* todo, the cost computation
72 * for p->to is not great... */
73 tmpuse(p
->to
, 0, 0, fn
);
74 for (a
=0; a
<p
->narg
; a
++) {
76 assert(b
->npred
==p
->narg
&& "wrong cfg");
78 tmpuse(p
->arg
[a
], 1, n
, fn
);
82 for (i
=b
->ins
; i
-b
->ins
< b
->nins
; i
++) {
83 tmpuse(i
->to
, 0, n
, fn
);
84 tmpuse(i
->arg
[0], 1, n
, fn
);
85 tmpuse(i
->arg
[1], 1, n
, fn
);
87 tmpuse(b
->jmp
.arg
, 1, n
, fn
);
90 fprintf(stderr
, "\n> Spill costs:\n");
91 for (n
=Tmp0
; n
<fn
->ntmp
; n
++)
92 fprintf(stderr
, "\t%-10s %d\n",
95 fprintf(stderr
, "\n");
99 static BSet
*fst
; /* temps to prioritize in registers (for tcmp1) */
100 static Tmp
*tmp
; /* current temporaries (for tcmpX) */
101 static int ntmp
; /* current # of temps (for limit) */
102 static int locs
; /* stack size used by locals */
103 static int slot4
; /* next slot of 4 bytes */
104 static int slot8
; /* ditto, 8 bytes */
105 static BSet mask
[2][1]; /* class masks */
108 tcmp0(const void *pa
, const void *pb
)
112 ca
= tmp
[*(int *)pa
].cost
;
113 cb
= tmp
[*(int *)pb
].cost
;
114 return (cb
< ca
) ? -1 : (cb
> ca
);
118 tcmp1(const void *pa
, const void *pb
)
122 c
= bshas(fst
, *(int *)pb
) - bshas(fst
, *(int *)pa
);
123 return c
? c
: tcmp0(pa
, pb
);
131 assert(t
>= Tmp0
&& "cannot spill register");
134 /* specific to NAlign == 3 */
135 /* nice logic to pack stack slots
136 * on demand, there can be only
137 * one hole and slot4 points to it
139 * invariant: slot4 <= slot8
141 if (KWIDE(tmp
[t
].cls
)) {
148 if (slot4
== slot8
) {
161 limit(BSet
*b
, int k
, BSet
*f
)
163 static int *tarr
, maxt
;
171 tarr
= emalloc(nt
* sizeof tarr
[0]);
174 for (i
=0, t
=0; bsiter(b
, &t
); t
++) {
179 qsort(tarr
, nt
, sizeof tarr
[0], tcmp0
);
182 qsort(tarr
, nt
, sizeof tarr
[0], tcmp1
);
184 for (i
=0; i
<k
&& i
<nt
; i
++)
191 limit2(BSet
*b1
, int k1
, int k2
, BSet
*fst
)
195 bsinit(b2
, ntmp
); /* todo, free those */
197 bsinter(b1
, mask
[0]);
198 bsinter(b2
, mask
[1]);
199 limit(b1
, NIReg
- k1
, fst
);
200 limit(b2
, NFReg
- k2
, fst
);
205 sethint(BSet
*u
, bits r
)
209 for (t
=Tmp0
; bsiter(u
, &t
); t
++)
210 tmp
[phicls(t
, tmp
)].hint
.m
|= r
;
214 reloads(BSet
*u
, BSet
*v
)
218 for (t
=Tmp0
; bsiter(u
, &t
); t
++)
220 emit(Oload
, tmp
[t
].cls
, TMP(t
), slot(t
), R
);
227 emit(Ostorew
+ tmp
[r
.val
].cls
, 0, R
, r
, SLOT(s
));
233 return i
->op
== Ocopy
&& isreg(i
->arg
[0]);
237 dopm(Blk
*b
, Ins
*i
, BSet
*v
)
244 bsinit(u
, ntmp
); /* todo, free those */
245 /* consecutive copies from
246 * registers need to be handled
247 * as one large instruction
249 * fixme: there is an assumption
250 * that calls are always followed
251 * by copy instructions here, this
252 * might not be true if previous
262 store(i
->to
, tmp
[t
].slot
);
264 bsset(v
, i
->arg
[0].val
);
265 } while (i
!= b
->ins
&& regcpy(i
-1));
267 if (i
!= b
->ins
&& (i
-1)->op
== Ocall
) {
268 v
->t
[0] &= ~retregs((i
-1)->arg
[1], 0);
269 limit2(v
, NISave
, NFSave
, 0);
270 for (r
=0, n
=0; n
<NRSave
; n
++)
272 v
->t
[0] |= argregs((i
-1)->arg
[1], 0);
285 /* spill code insertion
286 * requires spill costs, rpo, liveness
288 * Note: this will replace liveness
289 * information (in, out) with temporaries
290 * that must be in registers at block
294 * - Ocopy instructions to ensure register
300 Blk
*b
, *s1
, *s2
, *hd
, **bp
;
301 int j
, n
, l
, t
, k
, lvarg
[2];
302 BSet u
[1], v
[1], w
[1];
313 bsinit(mask
[0], ntmp
);
314 bsinit(mask
[1], ntmp
);
318 for (t
=0; t
<ntmp
; t
++) {
320 if (t
>= XMM0
&& t
< XMM0
+ NFReg
)
323 k
= KBASE(tmp
[t
].cls
);
327 for (bp
=&fn
->rpo
[fn
->nblk
]; bp
!=fn
->rpo
;) {
329 /* invariant: all bocks with bigger rpo got
330 * their in,out updated. */
332 /* 1. find temporaries in registers at
333 * the end of the block (put them in v) */
338 if (s1
&& s1
->id
<= b
->id
)
340 if (s2
&& s2
->id
<= b
->id
)
341 if (!hd
|| s2
->id
>= hd
->id
)
347 hd
->gen
->t
[0] |= RGLOB
; /* don't spill registers */
348 for (k
=0; k
<2; k
++) {
349 n
= k
== 0 ? NIReg
: NFReg
;
355 if ((int)bscount(u
) < n
) { /* fixme */
356 j
= bscount(w
); /* live through */
358 limit(w
, n
- (l
- j
), 0);
375 for (t
=Tmp0
; bsiter(b
->out
, &t
); t
++)
380 /* 2. process the block instructions */
382 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;) {
389 if (!req(i
->to
, R
)) {
390 assert(rtype(i
->to
) == RTmp
);
395 /* make sure we have a reg
401 j
= opdesc
[i
->op
].nmem
;
403 if (rtype(i
->arg
[n
]) == RMem
)
406 switch (rtype(i
->arg
[n
])) {
410 if (rtype(m
->base
) == RTmp
) {
411 bsset(v
, m
->base
.val
);
412 bsset(w
, m
->base
.val
);
414 if (rtype(m
->index
) == RTmp
) {
415 bsset(v
, m
->index
.val
);
416 bsset(w
, m
->index
.val
);
421 lvarg
[n
] = bshas(v
, t
);
430 if (rtype(i
->arg
[n
]) == RTmp
) {
433 /* do not reload if the
434 * the temporary was dead
442 if (!req(i
->to
, R
)) {
444 store(i
->to
, tmp
[t
].slot
);
448 r
= v
->t
[0] & (BIT(Tmp0
)-1);
452 assert(!(r
& ~RGLOB
) || b
==fn
->start
);
454 for (p
=b
->phi
; p
; p
=p
->link
) {
455 assert(rtype(p
->to
) == RTmp
);
459 store(p
->to
, tmp
[t
].slot
);
460 } else if (bshas(b
->in
, t
))
461 /* only if the phi is live */
462 p
->to
= slot(p
->to
.val
);
465 b
->nins
= &insb
[NIns
] - curi
;
466 idup(&b
->ins
, curi
, b
->nins
);
469 /* align the locals to a 16 byte boundary */
470 /* specific to NAlign == 3 */
475 fprintf(stderr
, "\n> Block information:\n");
476 for (b
=fn
->start
; b
; b
=b
->link
) {
477 fprintf(stderr
, "\t%-10s (% 5d) ", b
->name
, b
->loop
);
478 dumpts(b
->out
, fn
->tmp
, stderr
);
480 fprintf(stderr
, "\n> After spilling:\n");