3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
5 typedef struct Loc Loc
;
6 typedef struct Slice Slice
;
7 typedef struct Insert Insert
;
11 LRoot
, /* right above the original load */
12 LLoad
, /* inserting a load is allowed */
13 LNoLoad
, /* only scalar operations allowed */
23 short cls
; /* load class */
41 static uint inum
; /* current insertion number */
42 static Insert
*ilog
; /* global insertion log */
43 static uint nlog
; /* number of entries in the log */
49 case Oloadsb
: case Oloadub
: return 1;
50 case Oloadsh
: case Oloaduh
: return 2;
51 case Oloadsw
: case Oloaduw
: return 4;
52 case Oload
: return KWIDE(l
->cls
) ? 8 : 4;
61 case Ostoreb
: return 1;
62 case Ostoreh
: return 2;
63 case Ostorew
: case Ostores
: return 4;
64 case Ostorel
: case Ostored
: return 8;
70 iins(int cls
, int op
, Ref a0
, Ref a1
, Loc
*l
)
78 ist
->bid
= l
->blk
->id
;
80 ist
->new.ins
= (Ins
){op
, cls
, R
, {a0
, a1
}};
81 return ist
->new.ins
.to
= newtmp("ld", cls
, curf
);
85 cast(Ref
*r
, int cls
, Loc
*l
)
89 if (rtype(*r
) == RCon
)
91 assert(rtype(*r
) == RTmp
);
92 cls0
= curf
->tmp
[r
->val
].cls
;
93 if (cls0
== cls
|| (cls
== Kw
&& cls0
== Kl
))
95 if (KWIDE(cls0
) < KWIDE(cls
)) {
97 *r
= iins(Kw
, Ocast
, *r
, R
, l
);
98 *r
= iins(Kl
, Oextuw
, *r
, R
, l
);
100 *r
= iins(Kd
, Ocast
, *r
, R
, l
);
102 if (cls0
== Kd
&& cls
!= Kl
)
103 *r
= iins(Kl
, Ocast
, *r
, R
, l
);
104 if (cls0
!= Kd
|| cls
!= Kw
)
105 *r
= iins(cls
, Ocast
, *r
, R
, l
);
110 mask(int cls
, Ref
*r
, bits msk
, Loc
*l
)
113 *r
= iins(cls
, Oand
, *r
, getcon(msk
, curf
), l
);
117 load(Slice sl
, bits msk
, Loc
*l
)
130 all
= msk
== MASK(sl
.sz
);
134 cls
= sl
.sz
> 4 ? Kl
: Kw
;
136 /* sl.ref might not be live here,
137 * but its alias base ref will be
138 * (see killsl() below) */
139 if (rtype(r
) == RTmp
) {
140 a
= &curf
->tmp
[r
.val
].alias
;
150 r1
= getcon(a
->offset
, curf
);
151 r
= iins(Kl
, Oadd
, r
, r1
, l
);
155 memset(&c
, 0, sizeof c
);
158 c
.bits
.i
= a
->offset
;
159 r
= newcon(&c
, curf
);
163 r
= iins(cls
, ld
, r
, R
, l
);
165 mask(cls
, &r
, msk
, l
);
170 killsl(Ref r
, Slice sl
)
174 if (rtype(sl
.ref
) != RTmp
)
176 a
= &curf
->tmp
[sl
.ref
.val
].alias
;
178 default: die("unreachable");
181 case AUnk
: return req(TMP(a
->base
), r
);
187 /* returns a ref containing the contents of the slice
188 * passed as argument, all the bits set to 0 in the
189 * mask argument are zeroed in the result;
190 * the returned ref has an integer class when the
191 * mask does not cover all the bits of the slice,
192 * otherwise, it has class sl.cls
193 * the procedure returns R when it fails */
195 def(Slice sl
, bits msk
, Blk
*b
, Ins
*i
, Loc
*il
)
200 int off
, cls
, cls1
, op
, sz
, ld
;
208 * -1- b dominates il->blk; so we can use
209 * temporaries of b in il->blk
210 * -2- if il->type != LNoLoad, then il->blk
211 * postdominates the original load; so it
212 * is safe to load in il->blk
213 * -3- if il->type != LNoLoad, then b
214 * postdominates il->blk (and by 2, the
217 assert(dom(b
, il
->blk
));
224 if (il
->type
!= LLoad
)
226 return load(sl
, msk
, il
);
230 i
= &b
->ins
[b
->nins
];
231 cls
= sl
.sz
> 4 ? Kl
: Kw
;
236 if (killsl(i
->to
, sl
)
237 || (i
->op
== Ocall
&& escapes(sl
.ref
, curf
)))
244 } else if (isstore(i
->op
)) {
248 } else if (i
->op
== Oblit1
) {
249 assert(rtype(i
->arg
[0]) == RInt
);
250 sz
= abs(rsval(i
->arg
[0]));
253 assert(i
->op
== Oblit0
);
257 switch (alias(sl
.ref
, sl
.off
, sl
.sz
, r1
, sz
, &off
, curf
)) {
259 if (i
->op
== Oblit0
) {
278 msk1
= (MASK(sz
) << 8*off
) & msks
;
281 msk1
= (MASK(sz
) >> 8*off
) & msks
;
284 if ((msk1
& msk
) == 0)
286 if (i
->op
== Oblit0
) {
287 r
= def(sl1
, MASK(sz
), b
, i
, il
);
293 if (op
== Oshr
&& off
+ sl
.sz
> 4)
296 r1
= getcon(8*off
, curf
);
297 r
= iins(cls1
, op
, r
, r1
, il
);
299 if ((msk1
& msk
) != msk1
|| off
+ sz
< sl
.sz
)
300 mask(cls
, &r
, msk1
& msk
, il
);
301 if ((msk
& ~msk1
) != 0) {
302 r1
= def(sl
, msk
& ~msk1
, b
, i
, il
);
305 r
= iins(cls
, Oor
, r
, r1
, il
);
308 cast(&r
, sl
.cls
, il
);
322 for (ist
=ilog
; ist
<&ilog
[nlog
]; ++ist
)
323 if (ist
->isphi
&& ist
->bid
== b
->id
)
324 if (req(ist
->new.phi
.m
.ref
, sl
.ref
))
325 if (ist
->new.phi
.m
.off
== sl
.off
)
326 if (ist
->new.phi
.m
.sz
== sl
.sz
) {
327 r
= ist
->new.phi
.p
->to
;
329 mask(cls
, &r
, msk
, il
);
331 cast(&r
, sl
.cls
, il
);
335 for (p
=b
->phi
; p
; p
=p
->link
)
336 if (killsl(p
->to
, sl
))
337 /* scanning predecessors in that
338 * case would be unsafe */
345 assert(bp
->loop
>= il
->blk
->loop
);
349 r1
= def(sl
, msk
, bp
, 0, &l
);
355 r
= newtmp("ld", sl
.cls
, curf
);
356 p
= alloc(sizeof *p
);
357 vgrow(&ilog
, ++nlog
);
366 p
->arg
= vnew(p
->narg
, sizeof p
->arg
[0], PFn
);
367 p
->blk
= vnew(p
->narg
, sizeof p
->blk
[0], PFn
);
368 for (np
=0; np
<b
->npred
; ++np
) {
371 && il
->type
!= LNoLoad
372 && bp
->loop
< il
->blk
->loop
)
378 r1
= def(sl
, msks
, bp
, 0, &l
);
385 mask(cls
, &r
, msk
, il
);
390 icmp(const void *pa
, const void *pb
)
397 if ((c
= a
->bid
- b
->bid
))
399 if (a
->isphi
&& b
->isphi
)
405 if ((c
= a
->off
- b
->off
))
407 return a
->num
- b
->num
;
410 /* require rpo ssa alias */
423 ilog
= vnew(0, sizeof ilog
[0], PHeap
);
426 for (b
=fn
->start
; b
; b
=b
->link
)
427 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; ++i
) {
431 sl
= (Slice
){i
->arg
[0], 0, sz
, i
->cls
};
432 l
= (Loc
){LRoot
, i
-b
->ins
, b
};
433 i
->arg
[1] = def(sl
, MASK(sz
), b
, i
, &l
);
435 qsort(ilog
, nlog
, sizeof ilog
[0], icmp
);
436 vgrow(&ilog
, nlog
+1);
437 ilog
[nlog
].bid
= fn
->nblk
; /* add a sentinel */
438 ib
= vnew(0, sizeof(Ins
), PHeap
);
439 for (ist
=ilog
, n
=0; n
<fn
->nblk
; ++n
) {
441 for (; ist
->bid
== n
&& ist
->isphi
; ++ist
) {
442 ist
->new.phi
.p
->link
= b
->phi
;
443 b
->phi
= ist
->new.phi
.p
;
448 if (ist
->bid
== n
&& ist
->off
== ni
)
455 && !req(i
->arg
[1], R
)) {
456 ext
= Oextsb
+ i
->op
- Oloadsb
;
477 i
->arg
[0] = i
->arg
[1];
485 idup(&b
->ins
, ib
, nt
);
490 fprintf(stderr
, "\n> After load elimination:\n");