3 typedef struct Range Range
;
4 typedef struct Slot Slot
;
6 /* require use, maintains use counts */
16 /* promote uniform stack slots to temporaries */
18 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
19 if (Oalloc
> i
->op
|| i
->op
> Oalloc1
)
21 /* specific to NAlign == 3 */
22 assert(rtype(i
->to
) == RTmp
);
23 t
= &fn
->tmp
[i
->to
.val
];
28 for (u
=t
->use
; u
<&t
->use
[t
->nuse
]; u
++) {
33 if (s
== -1 || s
== loadsz(l
)) {
38 if (req(i
->to
, l
->arg
[1]) && !req(i
->to
, l
->arg
[0]))
39 if (s
== -1 || s
== storesz(l
))
40 if (k
== -1 || k
== optab
[l
->op
].argcls
[0][0]) {
42 k
= optab
[l
->op
].argcls
[0][0];
47 /* get rid of the alloc and replace uses */
48 *i
= (Ins
){.op
= Onop
};
50 ue
= &t
->use
[t
->nuse
];
51 for (u
=t
->use
; u
!=ue
; u
++) {
62 err("slot %%%s is read but never stored to",
63 fn
->tmp
[l
->arg
[0].val
].name
);
64 /* try to turn loads into copies so we
65 * can eliminate them later */
73 if (KBASE(k
) != KBASE(l
->cls
))
80 l
->op
= Oextsb
+ (l
->op
- Oloadsb
);
88 fprintf(stderr
, "\n> After slot promotion:\n");
93 /* [a, b) with 0 <= a */
110 return r
.a
<= n
&& n
< r
.b
;
114 rovlap(Range r0
, Range r1
)
116 return r0
.b
&& r1
.b
&& r0
.a
< r1
.b
&& r1
.a
< r0
.b
;
120 radd(Range
*r
, int n
)
123 *r
= (Range
){n
, n
+1};
131 slot(Slot
**ps
, int64_t *off
, Ref r
, Fn
*fn
, Slot
*sl
)
139 t
= &fn
->tmp
[a
.base
];
148 memr(Ref r
, bits x
, int ip
, Fn
*fn
, Slot
*sl
)
153 if (slot(&s
, &off
, r
, fn
, sl
)) {
162 memw(Ref r
, bits x
, int ip
, Fn
*fn
, Slot
*sl
)
167 if (slot(&s
, &off
, r
, fn
, sl
)) {
175 scmp(const void *pa
, const void *pb
)
179 a
= (Slot
*)pa
, b
= (Slot
*)pb
;
181 return b
->sz
- a
->sz
;
182 return a
->r
.a
- b
->r
.a
;
186 maxrpo(Blk
*hd
, Blk
*b
)
188 if (hd
->loop
< (int)b
->id
)
197 Blk
*b
, **ps
, *succ
[3];
203 int n
, m
, nsl
, ip
, *kill
;
204 uint total
, freed
, fused
;
206 /* minimize the stack usage
207 * by coalescing slots
210 sl
= vnew(0, sizeof sl
[0], PHeap
);
211 for (n
=Tmp0
; n
<fn
->ntmp
; n
++) {
214 if (t
->alias
.type
== ALoc
)
215 if (t
->alias
.slot
== &t
->alias
)
216 if (t
->alias
.u
.loc
.sz
!= -1) {
221 s
->sz
= t
->alias
.u
.loc
.sz
;
222 s
->m
= t
->alias
.u
.loc
.m
;
227 /* one-pass liveness analysis */
228 for (b
=fn
->start
; b
; b
=b
->link
)
230 loopiter(fn
, maxrpo
);
231 br
= vnew(fn
->nblk
, sizeof br
[0], PHeap
);
233 for (n
=fn
->nblk
-1; n
>=0; n
--) {
239 for (s
=sl
; s
<&sl
[nsl
]; s
++) {
241 for (ps
=succ
; *ps
; ps
++) {
243 if (m
> n
&& rin(s
->r
, br
[m
].a
)) {
249 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;) {
251 if (i
->op
== Oargc
) {
252 memr(arg
[1], -1, --ip
, fn
, sl
);
255 x
= BIT(loadsz(i
)) - 1;
256 memr(arg
[0], x
, --ip
, fn
, sl
);
258 if (isstore(i
->op
)) {
259 x
= BIT(storesz(i
)) - 1;
260 memw(arg
[1], x
, ip
--, fn
, sl
);
263 for (s
=sl
; s
<&sl
[nsl
]; s
++)
268 radd(&s
->r
, br
[b
->loop
].b
- 1);
275 /* kill slots with an empty live range */
278 kill
= vnew(0, sizeof kill
[0], PHeap
);
280 for (s
=s0
=sl
; s
<&sl
[nsl
]; s
++) {
291 fputs("\n> Slot coalescing:\n", stderr
);
293 fputs("\tDEAD", stderr
);
295 fprintf(stderr
, " %%%s",
296 fn
->tmp
[kill
[m
]].name
);
301 t
= &fn
->tmp
[kill
[n
]];
302 assert(t
->ndef
== 1 && t
->def
);
303 *t
->def
= (Ins
){.op
= Onop
};
304 for (u
=t
->use
; u
<&t
->use
[t
->nuse
]; u
++) {
305 assert(u
->type
== UIns
);
307 if (!req(i
->to
, R
)) {
308 assert(rtype(i
->to
) == RTmp
);
310 kill
[n
-1] = i
->to
.val
;
312 *i
= (Ins
){.op
= Onop
};
317 /* fuse slots by decreasing size */
318 qsort(sl
, nsl
, sizeof *sl
, scmp
);
320 for (n
=0; n
<nsl
; n
++) {
326 for (s
=s0
+1; s
<&sl
[nsl
]; s
++) {
330 /* O(n) can be approximated
331 * by 'goto Skip;' if need be
333 for (m
=n
; &sl
[m
]<s
; m
++)
335 if (rovlap(sl
[m
].r
, s
->r
))
338 radd(&r
, s
->r
.b
- 1);
345 /* substitute fused slots */
346 for (s
=sl
; s
<&sl
[nsl
]; s
++) {
350 assert(t
->ndef
== 1 && t
->def
);
351 *t
->def
= (Ins
){.op
= Onop
};
352 for (u
=t
->use
; u
<&t
->use
[t
->nuse
]; u
++) {
353 assert(u
->type
== UIns
);
356 if (req(arg
[n
], TMP(s
->t
)))
357 arg
[n
] = TMP(s
->s
->t
);
362 for (s0
=sl
; s0
<&sl
[nsl
]; s0
++) {
365 fprintf(stderr
, "\tLOCL (% 3db) %s:",
366 s0
->sz
, fn
->tmp
[s0
->t
].name
);
367 for (s
=s0
; s
<&sl
[nsl
]; s
++) {
370 fprintf(stderr
, " %%%s", fn
->tmp
[s
->t
].name
);
372 fprintf(stderr
, "[%d,%d)",
373 s
->r
.a
-ip
, s
->r
.b
-ip
);
379 fprintf(stderr
, "\tSUMS %u/%u/%u (freed/fused/total)\n\n",
380 freed
, fused
, total
);