3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
5 typedef struct Loc Loc
;
6 typedef struct Slice Slice
;
7 typedef struct Insert Insert
;
12 LRoot
, /* right above the original load */
13 LLoad
, /* inserting a load is allowed */
14 LNoLoad
, /* only scalar operations allowed */
23 short cls
; /* load class */
41 static uint inum
; /* current insertion number */
42 static Insert
*ilog
; /* global insertion log */
43 static uint nlog
; /* number of entries in the log */
49 case Oloadsb
: case Oloadub
: return 1;
50 case Oloadsh
: case Oloaduh
: return 2;
51 case Oloadsw
: case Oloaduw
: return 4;
52 case Oload
: return KWIDE(l
->cls
) ? 8 : 4;
61 case Ostoreb
: return 1;
62 case Ostoreh
: return 2;
63 case Ostorew
: case Ostores
: return 4;
64 case Ostorel
: case Ostored
: return 8;
70 iins(int cls
, int op
, Ref a0
, Ref a1
, Loc
*l
)
78 ist
->bid
= l
->blk
->id
;
80 ist
->new.ins
= (Ins
){op
, cls
, R
, {a0
, a1
}};
81 return ist
->new.ins
.to
= newtmp("ld", cls
, curf
);
85 cast(Ref
*r
, int cls
, Loc
*l
)
89 if (rtype(*r
) == RCon
)
91 assert(rtype(*r
) == RTmp
);
92 cls0
= curf
->tmp
[r
->val
].cls
;
93 if (cls0
== cls
|| (cls
== Kw
&& cls0
== Kl
))
95 if (KWIDE(cls0
) < KWIDE(cls
)) {
97 *r
= iins(Kw
, Ocast
, *r
, R
, l
);
98 *r
= iins(Kl
, Oextuw
, *r
, R
, l
);
100 *r
= iins(Kd
, Ocast
, *r
, R
, l
);
102 if (cls0
== Kd
&& cls
!= Kl
)
103 *r
= iins(Kl
, Ocast
, *r
, R
, l
);
104 if (cls0
!= Kd
|| cls
!= Kw
)
105 *r
= iins(cls
, Ocast
, *r
, R
, l
);
110 mask(int cls
, Ref
*r
, bits msk
, Loc
*l
)
113 *r
= iins(cls
, Oand
, *r
, getcon(msk
, curf
), l
);
117 load(Slice sl
, bits msk
, Loc
*l
)
130 all
= msk
== MASK(sl
.sz
);
134 cls
= sl
.sz
> 4 ? Kl
: Kw
;
136 /* sl.ref might not be live here,
137 * but its alias base ref will be
138 * (see killsl() below) */
139 if (rtype(r
) == RTmp
) {
140 a
= &curf
->tmp
[r
.val
].alias
;
150 r1
= getcon(a
->offset
, curf
);
151 r
= iins(Kl
, Oadd
, r
, r1
, l
);
155 memset(&c
, 0, sizeof c
);
158 c
.bits
.i
= a
->offset
;
159 r
= newcon(&c
, curf
);
163 r
= iins(cls
, ld
, r
, R
, l
);
165 mask(cls
, &r
, msk
, l
);
170 killsl(Ref r
, Slice sl
)
174 if (rtype(sl
.ref
) != RTmp
)
176 a
= &curf
->tmp
[sl
.ref
.val
].alias
;
178 default: die("unreachable");
181 case AUnk
: return req(TMP(a
->base
), r
);
187 /* returns a ref containing the contents of the slice
188 * passed as argument, all the bits set to 0 in the
189 * mask argument are zeroed in the result;
190 * the returned ref has an integer class when the
191 * mask does not cover all the bits of the slice,
192 * otherwise, it has class sl.cls
193 * the procedure returns R when it fails */
195 def(Slice sl
, bits msk
, Blk
*b
, Ins
*i
, Loc
*il
)
199 int off
, cls
, cls1
, op
, sz
, ld
;
207 * -1- b dominates il->blk; so we can use
208 * temporaries of b in il->blk
209 * -2- if il->type != LNoLoad, then il->blk
210 * postdominates the original load; so it
211 * is safe to load in il->blk
212 * -3- if il->type != LNoLoad, then b
213 * postdominates il->blk (and by 2, the
216 assert(dom(b
, il
->blk
));
223 if (il
->type
!= LLoad
)
225 return load(sl
, msk
, il
);
229 i
= &b
->ins
[b
->nins
];
230 cls
= sl
.sz
> 4 ? Kl
: Kw
;
235 if (killsl(i
->to
, sl
)
236 || (i
->op
== Ocall
&& escapes(sl
.ref
, curf
)))
243 } else if (isstore(i
->op
)) {
249 switch (alias(sl
.ref
, sl
.sz
, r1
, sz
, &off
, curf
)) {
253 msk1
= (MASK(sz
) << 8*off
) & msks
;
256 msk1
= (MASK(sz
) >> 8*off
) & msks
;
259 if ((msk1
& msk
) == 0)
263 if (op
== Oshr
&& off
+ sl
.sz
> 4)
266 r1
= getcon(8*off
, curf
);
267 r
= iins(cls1
, op
, r
, r1
, il
);
269 if ((msk1
& msk
) != msk1
|| off
+ sz
< sl
.sz
)
270 mask(cls
, &r
, msk1
& msk
, il
);
271 if ((msk
& ~msk1
) != 0) {
272 r1
= def(sl
, msk
& ~msk1
, b
, i
, il
);
275 r
= iins(cls
, Oor
, r
, r1
, il
);
278 cast(&r
, sl
.cls
, il
);
292 for (ist
=ilog
; ist
<&ilog
[nlog
]; ++ist
)
293 if (ist
->isphi
&& ist
->bid
== b
->id
)
294 if (req(ist
->new.phi
.m
.ref
, sl
.ref
))
295 if (ist
->new.phi
.m
.sz
== sl
.sz
) {
296 r
= ist
->new.phi
.p
->to
;
298 mask(cls
, &r
, msk
, il
);
300 cast(&r
, sl
.cls
, il
);
304 for (p
=b
->phi
; p
; p
=p
->link
)
305 if (killsl(p
->to
, sl
))
306 /* scanning predecessors in that
307 * case would be unsafe */
314 assert(bp
->loop
>= il
->blk
->loop
);
318 r1
= def(sl
, msk
, bp
, 0, &l
);
324 r
= newtmp("ld", sl
.cls
, curf
);
325 p
= alloc(sizeof *p
);
326 vgrow(&ilog
, ++nlog
);
335 p
->arg
= vnew(p
->narg
, sizeof p
->arg
[0], PFn
);
336 p
->blk
= vnew(p
->narg
, sizeof p
->blk
[0], PFn
);
337 for (np
=0; np
<b
->npred
; ++np
) {
340 && il
->type
!= LNoLoad
341 && bp
->loop
< il
->blk
->loop
)
347 r1
= def(sl
, msks
, bp
, 0, &l
);
354 mask(cls
, &r
, msk
, il
);
359 icmp(const void *pa
, const void *pb
)
366 if ((c
= a
->bid
- b
->bid
))
368 if (a
->isphi
&& b
->isphi
)
374 if ((c
= a
->off
- b
->off
))
376 return a
->num
- b
->num
;
379 /* require rpo ssa alias */
392 ilog
= vnew(0, sizeof ilog
[0], PHeap
);
395 for (b
=fn
->start
; b
; b
=b
->link
)
396 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; ++i
) {
400 sl
= (Slice
){i
->arg
[0], sz
, i
->cls
};
401 l
= (Loc
){LRoot
, i
-b
->ins
, b
};
402 i
->arg
[1] = def(sl
, MASK(sz
), b
, i
, &l
);
404 qsort(ilog
, nlog
, sizeof ilog
[0], icmp
);
405 vgrow(&ilog
, nlog
+1);
406 ilog
[nlog
].bid
= fn
->nblk
; /* add a sentinel */
407 ib
= vnew(0, sizeof(Ins
), PHeap
);
408 for (ist
=ilog
, n
=0; n
<fn
->nblk
; ++n
) {
410 for (; ist
->bid
== n
&& ist
->isphi
; ++ist
) {
411 ist
->new.phi
.p
->link
= b
->phi
;
412 b
->phi
= ist
->new.phi
.p
;
417 if (ist
->bid
== n
&& ist
->off
== ni
)
424 && !req(i
->arg
[1], R
)) {
425 ext
= Oextsb
+ i
->op
- Oloadsb
;
446 i
->arg
[0] = i
->arg
[1];
454 idup(&b
->ins
, ib
, nt
);
459 fprintf(stderr
, "\n> After load elimination:\n");