3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
5 typedef struct Loc Loc
;
6 typedef struct Slice Slice
;
7 typedef struct Insert Insert
;
12 LRoot
, /* right above the original load */
13 LLoad
, /* inserting a load is allowed */
14 LNoLoad
, /* only scalar operations allowed */
23 short cls
; /* load class */
41 static uint inum
; /* current insertion number */
42 static Insert
*ilog
; /* global insertion log */
43 static uint nlog
; /* number of entries in the log */
49 case Oloadsb
: case Oloadub
: return 1;
50 case Oloadsh
: case Oloaduh
: return 2;
51 case Oloadsw
: case Oloaduw
: return 4;
52 case Oload
: return KWIDE(l
->cls
) ? 8 : 4;
61 case Ostoreb
: return 1;
62 case Ostoreh
: return 2;
63 case Ostorew
: case Ostores
: return 4;
64 case Ostorel
: case Ostored
: return 8;
70 iins(int cls
, int op
, Ref a0
, Ref a1
, Loc
*l
)
78 ist
->bid
= l
->blk
->id
;
80 ist
->new.ins
= (Ins
){op
, cls
, R
, {a0
, a1
}};
81 return ist
->new.ins
.to
= newtmp("ld", cls
, curf
);
85 cast(Ref
*r
, int cls
, Loc
*l
)
89 if (rtype(*r
) == RCon
)
91 assert(rtype(*r
) == RTmp
);
92 cls0
= curf
->tmp
[r
->val
].cls
;
93 if (cls0
== cls
|| (cls
== Kw
&& cls0
== Kl
))
95 assert(!KWIDE(cls0
) || KWIDE(cls
));
96 if (KWIDE(cls
) == KWIDE(cls0
))
97 *r
= iins(cls
, Ocast
, *r
, R
, l
);
101 *r
= iins(Kw
, Ocast
, *r
, R
, l
);
102 *r
= iins(Kl
, Oextuw
, *r
, R
, l
);
107 mask(int cls
, Ref
*r
, bits msk
, Loc
*l
)
110 *r
= iins(cls
, Oand
, *r
, getcon(msk
, curf
), l
);
114 load(Slice sl
, bits msk
, Loc
*l
)
126 all
= msk
== MASK(sl
.sz
);
130 cls
= sl
.sz
> 4 ? Kl
: Kw
;
132 /* sl.ref might not be live here,
133 * but its alias base ref will be
134 * (see killsl() below) */
135 if (rtype(r
) == RTmp
) {
136 a
= &curf
->tmp
[r
.val
].alias
;
146 r1
= getcon(a
->offset
, curf
);
147 r
= iins(Kl
, Oadd
, r
, r1
, l
);
152 vgrow(&curf
->con
, curf
->ncon
);
153 curf
->con
[c
].type
= CAddr
;
154 curf
->con
[c
].label
= a
->label
;
155 curf
->con
[c
].bits
.i
= a
->offset
;
156 curf
->con
[c
].local
= 0;
161 r
= iins(cls
, ld
, r
, R
, l
);
163 mask(cls
, &r
, msk
, l
);
168 killsl(Ref r
, Slice sl
)
172 if (rtype(sl
.ref
) != RTmp
)
174 a
= &curf
->tmp
[sl
.ref
.val
].alias
;
176 default: die("unreachable");
179 case AUnk
: return req(a
->base
, r
);
185 /* returns a ref containing the contents of the slice
186 * passed as argument, all the bits set to 0 in the
187 * mask argument are zeroed in the result;
188 * the returned ref has an integer class when the
189 * mask does not cover all the bits of the slice,
190 * otherwise, it has class sl.cls
191 * the procedure returns R when it fails */
193 def(Slice sl
, bits msk
, Blk
*b
, Ins
*i
, Loc
*il
)
197 int off
, cls
, cls1
, op
, sz
, ld
;
205 * -1- b dominates il->blk; so we can use
206 * temporaries of b in il->blk
207 * -2- if il->type != LNoLoad, then il->blk
208 * postdominates the original load; so it
209 * is safe to load in il->blk
210 * -3- if il->type != LNoLoad, then b
211 * postdominates il->blk (and by 2, the
214 assert(dom(b
, il
->blk
));
221 if (il
->type
!= LLoad
)
223 return load(sl
, msk
, il
);
227 i
= &b
->ins
[b
->nins
];
228 cls
= sl
.sz
> 4 ? Kl
: Kw
;
233 if (killsl(i
->to
, sl
)
234 || (iscall(i
->op
) && escapes(sl
.ref
, curf
)))
241 } else if (isstore(i
->op
)) {
247 switch (alias(sl
.ref
, sl
.sz
, r1
, sz
, &off
, curf
)) {
251 msk1
= (MASK(sz
) << 8*off
) & msks
;
254 msk1
= (MASK(sz
) >> 8*off
) & msks
;
257 if ((msk1
& msk
) == 0)
261 if (op
== Oshr
&& off
+ sl
.sz
> 4)
264 r1
= getcon(8*off
, curf
);
265 r
= iins(cls1
, op
, r
, r1
, il
);
267 if ((msk1
& msk
) != msk1
|| off
+ sz
< sl
.sz
)
268 mask(cls
, &r
, msk1
& msk
, il
);
269 if ((msk
& ~msk1
) != 0) {
270 r1
= def(sl
, msk
& ~msk1
, b
, i
, il
);
273 r
= iins(cls
, Oor
, r
, r1
, il
);
276 cast(&r
, sl
.cls
, il
);
290 for (ist
=ilog
; ist
<&ilog
[nlog
]; ++ist
)
291 if (ist
->isphi
&& ist
->bid
== b
->id
)
292 if (req(ist
->new.phi
.m
.ref
, sl
.ref
))
293 if (ist
->new.phi
.m
.sz
== sl
.sz
) {
294 r
= ist
->new.phi
.p
->to
;
296 mask(cls
, &r
, msk
, il
);
298 cast(&r
, sl
.cls
, il
);
302 for (p
=b
->phi
; p
; p
=p
->link
)
303 if (killsl(p
->to
, sl
))
304 /* scanning predecessors in that
305 * case would be unsafe */
312 assert(bp
->loop
>= il
->blk
->loop
);
316 r1
= def(sl
, msk
, bp
, 0, &l
);
322 r
= newtmp("ld", sl
.cls
, curf
);
323 p
= alloc(sizeof *p
);
324 vgrow(&ilog
, ++nlog
);
333 for (np
=0; np
<b
->npred
; ++np
) {
336 && il
->type
!= LNoLoad
337 && bp
->loop
< il
->blk
->loop
)
343 r1
= def(sl
, msks
, bp
, 0, &l
);
350 mask(cls
, &r
, msk
, il
);
355 icmp(const void *pa
, const void *pb
)
362 if ((c
= a
->bid
- b
->bid
))
364 if (a
->isphi
&& b
->isphi
)
370 if ((c
= a
->off
- b
->off
))
372 return a
->num
- b
->num
;
375 /* require rpo ssa alias */
388 ilog
= vnew(0, sizeof ilog
[0], Pheap
);
391 for (b
=fn
->start
; b
; b
=b
->link
)
392 for (i
=b
->ins
; i
<&b
->ins
[b
->nins
]; ++i
) {
396 sl
= (Slice
){i
->arg
[0], sz
, i
->cls
};
397 l
= (Loc
){LRoot
, i
-b
->ins
, b
};
398 i
->arg
[1] = def(sl
, MASK(sz
), b
, i
, &l
);
400 qsort(ilog
, nlog
, sizeof ilog
[0], icmp
);
401 vgrow(&ilog
, nlog
+1);
402 ilog
[nlog
].bid
= fn
->nblk
; /* add a sentinel */
403 ib
= vnew(0, sizeof(Ins
), Pheap
);
404 for (ist
=ilog
, n
=0; n
<fn
->nblk
; ++n
) {
406 for (; ist
->bid
== n
&& ist
->isphi
; ++ist
) {
407 ist
->new.phi
.p
->link
= b
->phi
;
408 b
->phi
= ist
->new.phi
.p
;
413 if (ist
->bid
== n
&& ist
->off
== ni
)
420 && !req(i
->arg
[1], R
)) {
421 ext
= Oextsb
+ i
->op
- Oloadsb
;
442 i
->arg
[0] = i
->arg
[1];
450 idup(&b
->ins
, ib
, nt
);
455 fprintf(stderr
, "\n> After load elimination:\n");