3 typedef struct Range Range
;
4 typedef struct Store Store
;
5 typedef struct Slot Slot
;
7 /* require use, maintains use counts */
17 /* promote uniform stack slots to temporaries */
19 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; i
++) {
20 if (Oalloc
> i
->op
|| i
->op
> Oalloc1
)
22 /* specific to NAlign == 3 */
23 assert(rtype(i
->to
) == RTmp
);
24 t
= &fn
->tmp
[i
->to
.val
];
29 for (u
=t
->use
; u
<&t
->use
[t
->nuse
]; u
++) {
34 if (s
== -1 || s
== loadsz(l
)) {
39 if (req(i
->to
, l
->arg
[1]) && !req(i
->to
, l
->arg
[0]))
40 if (s
== -1 || s
== storesz(l
))
41 if (k
== -1 || k
== optab
[l
->op
].argcls
[0][0]) {
43 k
= optab
[l
->op
].argcls
[0][0];
48 /* get rid of the alloc and replace uses */
49 *i
= (Ins
){.op
= Onop
};
51 ue
= &t
->use
[t
->nuse
];
52 for (u
=t
->use
; u
!=ue
; u
++) {
63 err("slot %%%s is read but never stored to",
64 fn
->tmp
[l
->arg
[0].val
].name
);
65 /* try to turn loads into copies so we
66 * can eliminate them later */
74 if (KBASE(k
) != KBASE(l
->cls
))
81 l
->op
= Oextsb
+ (l
->op
- Oloadsb
);
89 fprintf(stderr
, "\n> After slot promotion:\n");
94 /* [a, b) with 0 <= a */
118 return r
.a
<= n
&& n
< r
.b
;
122 rovlap(Range r0
, Range r1
)
124 return r0
.b
&& r1
.b
&& r0
.a
< r1
.b
&& r1
.a
< r0
.b
;
128 radd(Range
*r
, int n
)
131 *r
= (Range
){n
, n
+1};
139 slot(Slot
**ps
, int64_t *off
, Ref r
, Fn
*fn
, Slot
*sl
)
147 t
= &fn
->tmp
[a
.base
];
156 load(Ref r
, bits x
, int ip
, Fn
*fn
, Slot
*sl
)
161 if (slot(&s
, &off
, r
, fn
, sl
)) {
170 store(Ref r
, bits x
, int ip
, Ins
*i
, Fn
*fn
, Slot
*sl
)
175 if (slot(&s
, &off
, r
, fn
, sl
)) {
180 vgrow(&s
->st
, ++s
->nst
);
181 s
->st
[s
->nst
-1].ip
= ip
;
182 s
->st
[s
->nst
-1].i
= i
;
188 scmp(const void *pa
, const void *pb
)
192 a
= (Slot
*)pa
, b
= (Slot
*)pb
;
194 return b
->sz
- a
->sz
;
195 return a
->r
.a
- b
->r
.a
;
199 maxrpo(Blk
*hd
, Blk
*b
)
201 if (hd
->loop
< (int)b
->id
)
210 Blk
*b
, **ps
, *succ
[3];
217 int n
, m
, ip
, sz
, nsl
, nbl
, *stk
;
218 uint total
, freed
, fused
;
220 /* minimize the stack usage
221 * by coalescing slots
224 sl
= vnew(0, sizeof sl
[0], PHeap
);
225 for (n
=Tmp0
; n
<fn
->ntmp
; n
++) {
228 if (t
->alias
.type
== ALoc
)
229 if (t
->alias
.slot
== &t
->alias
)
230 if (t
->bid
== fn
->start
->id
)
231 if (t
->alias
.u
.loc
.sz
!= -1) {
236 s
->sz
= t
->alias
.u
.loc
.sz
;
237 s
->m
= t
->alias
.u
.loc
.m
;
239 s
->st
= vnew(0, sizeof s
->st
[0], PHeap
);
244 /* one-pass liveness analysis */
245 for (b
=fn
->start
; b
; b
=b
->link
)
247 loopiter(fn
, maxrpo
);
249 bl
= vnew(0, sizeof bl
[0], PHeap
);
250 br
= emalloc(fn
->nblk
* sizeof br
[0]);
252 for (n
=fn
->nblk
-1; n
>=0; n
--) {
258 for (s
=sl
; s
<&sl
[nsl
]; s
++) {
260 for (ps
=succ
; *ps
; ps
++) {
262 if (m
> n
&& rin(s
->r
, br
[m
].a
)) {
268 if (b
->jmp
.type
== Jretc
)
269 load(b
->jmp
.arg
, -1, --ip
, fn
, sl
);
270 for (i
=&b
->ins
[b
->nins
]; i
!=b
->ins
;) {
273 if (i
->op
== Oargc
) {
274 load(arg
[1], -1, --ip
, fn
, sl
);
277 x
= BIT(loadsz(i
)) - 1;
278 load(arg
[0], x
, --ip
, fn
, sl
);
280 if (isstore(i
->op
)) {
281 x
= BIT(storesz(i
)) - 1;
282 store(arg
[1], x
, ip
--, i
, fn
, sl
);
284 if (i
->op
== Oblit0
) {
285 assert((i
+1)->op
== Oblit1
);
286 assert(rtype((i
+1)->arg
[0]) == RInt
);
287 sz
= abs(rsval((i
+1)->arg
[0]));
288 x
= sz
>= NBit
? (bits
)-1 : BIT(sz
) - 1;
289 store(arg
[1], x
, ip
--, i
, fn
, sl
);
290 load(arg
[0], x
, ip
, fn
, sl
);
295 for (s
=sl
; s
<&sl
[nsl
]; s
++)
300 radd(&s
->r
, br
[b
->loop
].b
- 1);
307 /* kill dead stores */
308 for (s
=sl
; s
<&sl
[nsl
]; s
++)
309 for (n
=0; n
<s
->nst
; n
++)
310 if (!rin(s
->r
, s
->st
[n
].ip
)) {
313 *(i
+1) = (Ins
){.op
= Onop
};
314 *i
= (Ins
){.op
= Onop
};
317 /* kill slots with an empty live range */
320 stk
= vnew(0, sizeof stk
[0], PHeap
);
322 for (s
=s0
=sl
; s
<&sl
[nsl
]; s
++) {
334 fputs("\n> Slot coalescing:\n", stderr
);
336 fputs("\tkill [", stderr
);
338 fprintf(stderr
, " %%%s",
339 fn
->tmp
[stk
[m
]].name
);
340 fputs(" ]\n", stderr
);
344 t
= &fn
->tmp
[stk
[n
]];
345 assert(t
->ndef
== 1 && t
->def
);
352 *i
= (Ins
){.op
= Onop
};
353 for (u
=t
->use
; u
<&t
->use
[t
->nuse
]; u
++) {
354 if (u
->type
== UJmp
) {
356 assert(isret(b
->jmp
.type
));
361 assert(u
->type
== UIns
);
363 if (!req(i
->to
, R
)) {
364 assert(rtype(i
->to
) == RTmp
);
366 stk
[n
-1] = i
->to
.val
;
367 } else if (isarg(i
->op
)) {
368 assert(i
->op
== Oargc
);
369 i
->arg
[1] = CON_Z
; /* crash */
372 *(i
+1) = (Ins
){.op
= Onop
};
373 *i
= (Ins
){.op
= Onop
};
379 /* fuse slots by decreasing size */
380 qsort(sl
, nsl
, sizeof *sl
, scmp
);
382 for (n
=0; n
<nsl
; n
++) {
388 for (s
=s0
+1; s
<&sl
[nsl
]; s
++) {
392 /* O(n); can be approximated
393 * by 'goto Skip;' if need be
395 for (m
=n
; &sl
[m
]<s
; m
++)
397 if (rovlap(sl
[m
].r
, s
->r
))
400 radd(&r
, s
->r
.b
- 1);
407 /* substitute fused slots */
408 for (s
=sl
; s
<&sl
[nsl
]; s
++) {
410 /* the visit link is stale,
411 * reset it before the slot()
415 assert(t
->ndef
== 1 && t
->def
);
418 *t
->def
= (Ins
){.op
= Onop
};
419 ts
= &fn
->tmp
[s
->s
->t
];
420 assert(t
->bid
== ts
->bid
);
421 if (t
->def
< ts
->def
) {
422 /* make sure the slot we
423 * selected has a def that
424 * dominates its new uses
427 *ts
->def
= (Ins
){.op
= Onop
};
430 for (u
=t
->use
; u
<&t
->use
[t
->nuse
]; u
++) {
431 if (u
->type
== UJmp
) {
433 b
->jmp
.arg
= TMP(s
->s
->t
);
436 assert(u
->type
== UIns
);
439 if (req(arg
[n
], TMP(s
->t
)))
440 arg
[n
] = TMP(s
->s
->t
);
444 /* fix newly overlapping blits */
445 for (n
=0; n
<nbl
; n
++) {
448 if (slot(&s
, &off0
, i
->arg
[0], fn
, sl
))
449 if (slot(&s0
, &off1
, i
->arg
[1], fn
, sl
))
450 if (s
->s
== s0
->s
&& off0
< off1
) {
451 sz
= rsval((i
+1)->arg
[0]);
453 (i
+1)->arg
[0] = INT(-sz
);
459 for (s0
=sl
; s0
<&sl
[nsl
]; s0
++) {
462 fprintf(stderr
, "\tfuse (% 3db) [", s0
->sz
);
463 for (s
=s0
; s
<&sl
[nsl
]; s
++) {
466 fprintf(stderr
, " %%%s", fn
->tmp
[s
->t
].name
);
468 fprintf(stderr
, "[%d,%d)",
469 s
->r
.a
-ip
, s
->r
.b
-ip
);
473 fputs(" ]\n", stderr
);
475 fprintf(stderr
, "\tsums %u/%u/%u (killed/fused/total)\n\n",
476 freed
, fused
, total
);
480 for (s
=sl
; s
<&sl
[nsl
]; s
++)