1 #include "llvm/Transforms/Utils/VNCoercion.h"
2 #include "llvm/Analysis/ConstantFolding.h"
3 #include "llvm/Analysis/ValueTracking.h"
4 #include "llvm/IR/IRBuilder.h"
5 #include "llvm/IR/IntrinsicInst.h"
7 #define DEBUG_TYPE "vncoerce"
10 namespace VNCoercion
{
12 static bool isFirstClassAggregateOrScalableType(Type
*Ty
) {
13 return Ty
->isStructTy() || Ty
->isArrayTy() || isa
<ScalableVectorType
>(Ty
);
16 /// Return true if coerceAvailableValueToLoadType will succeed.
17 bool canCoerceMustAliasedValueToLoad(Value
*StoredVal
, Type
*LoadTy
,
18 const DataLayout
&DL
) {
19 Type
*StoredTy
= StoredVal
->getType();
21 if (StoredTy
== LoadTy
)
24 // If the loaded/stored value is a first class array/struct, or scalable type,
25 // don't try to transform them. We need to be able to bitcast to integer.
26 if (isFirstClassAggregateOrScalableType(LoadTy
) ||
27 isFirstClassAggregateOrScalableType(StoredTy
))
30 uint64_t StoreSize
= DL
.getTypeSizeInBits(StoredTy
).getFixedValue();
32 // The store size must be byte-aligned to support future type casts.
33 if (llvm::alignTo(StoreSize
, 8) != StoreSize
)
36 // The store has to be at least as big as the load.
37 if (StoreSize
< DL
.getTypeSizeInBits(LoadTy
).getFixedValue())
40 bool StoredNI
= DL
.isNonIntegralPointerType(StoredTy
->getScalarType());
41 bool LoadNI
= DL
.isNonIntegralPointerType(LoadTy
->getScalarType());
42 // Don't coerce non-integral pointers to integers or vice versa.
43 if (StoredNI
!= LoadNI
) {
44 // As a special case, allow coercion of memset used to initialize
45 // an array w/null. Despite non-integral pointers not generally having a
46 // specific bit pattern, we do assume null is zero.
47 if (auto *CI
= dyn_cast
<Constant
>(StoredVal
))
48 return CI
->isNullValue();
50 } else if (StoredNI
&& LoadNI
&&
51 StoredTy
->getPointerAddressSpace() !=
52 LoadTy
->getPointerAddressSpace()) {
57 // The implementation below uses inttoptr for vectors of unequal size; we
58 // can't allow this for non integral pointers. We could teach it to extract
59 // exact subvectors if desired.
60 if (StoredNI
&& StoreSize
!= DL
.getTypeSizeInBits(LoadTy
).getFixedValue())
63 if (StoredTy
->isTargetExtTy() || LoadTy
->isTargetExtTy())
69 /// If we saw a store of a value to memory, and
70 /// then a load from a must-aliased pointer of a different type, try to coerce
71 /// the stored value. LoadedTy is the type of the load we want to replace.
72 /// IRB is IRBuilder used to insert new instructions.
74 /// If we can't do it, return null.
75 Value
*coerceAvailableValueToLoadType(Value
*StoredVal
, Type
*LoadedTy
,
76 IRBuilderBase
&Helper
,
77 const DataLayout
&DL
) {
78 assert(canCoerceMustAliasedValueToLoad(StoredVal
, LoadedTy
, DL
) &&
79 "precondition violation - materialization can't fail");
80 if (auto *C
= dyn_cast
<Constant
>(StoredVal
))
81 StoredVal
= ConstantFoldConstant(C
, DL
);
83 // If this is already the right type, just return it.
84 Type
*StoredValTy
= StoredVal
->getType();
86 uint64_t StoredValSize
= DL
.getTypeSizeInBits(StoredValTy
).getFixedValue();
87 uint64_t LoadedValSize
= DL
.getTypeSizeInBits(LoadedTy
).getFixedValue();
89 // If the store and reload are the same size, we can always reuse it.
90 if (StoredValSize
== LoadedValSize
) {
91 // Pointer to Pointer -> use bitcast.
92 if (StoredValTy
->isPtrOrPtrVectorTy() && LoadedTy
->isPtrOrPtrVectorTy()) {
93 StoredVal
= Helper
.CreateBitCast(StoredVal
, LoadedTy
);
95 // Convert source pointers to integers, which can be bitcast.
96 if (StoredValTy
->isPtrOrPtrVectorTy()) {
97 StoredValTy
= DL
.getIntPtrType(StoredValTy
);
98 StoredVal
= Helper
.CreatePtrToInt(StoredVal
, StoredValTy
);
101 Type
*TypeToCastTo
= LoadedTy
;
102 if (TypeToCastTo
->isPtrOrPtrVectorTy())
103 TypeToCastTo
= DL
.getIntPtrType(TypeToCastTo
);
105 if (StoredValTy
!= TypeToCastTo
)
106 StoredVal
= Helper
.CreateBitCast(StoredVal
, TypeToCastTo
);
108 // Cast to pointer if the load needs a pointer type.
109 if (LoadedTy
->isPtrOrPtrVectorTy())
110 StoredVal
= Helper
.CreateIntToPtr(StoredVal
, LoadedTy
);
113 if (auto *C
= dyn_cast
<ConstantExpr
>(StoredVal
))
114 StoredVal
= ConstantFoldConstant(C
, DL
);
118 // If the loaded value is smaller than the available value, then we can
119 // extract out a piece from it. If the available value is too small, then we
120 // can't do anything.
121 assert(StoredValSize
>= LoadedValSize
&&
122 "canCoerceMustAliasedValueToLoad fail");
124 // Convert source pointers to integers, which can be manipulated.
125 if (StoredValTy
->isPtrOrPtrVectorTy()) {
126 StoredValTy
= DL
.getIntPtrType(StoredValTy
);
127 StoredVal
= Helper
.CreatePtrToInt(StoredVal
, StoredValTy
);
130 // Convert vectors and fp to integer, which can be manipulated.
131 if (!StoredValTy
->isIntegerTy()) {
132 StoredValTy
= IntegerType::get(StoredValTy
->getContext(), StoredValSize
);
133 StoredVal
= Helper
.CreateBitCast(StoredVal
, StoredValTy
);
136 // If this is a big-endian system, we need to shift the value down to the low
137 // bits so that a truncate will work.
138 if (DL
.isBigEndian()) {
139 uint64_t ShiftAmt
= DL
.getTypeStoreSizeInBits(StoredValTy
).getFixedValue() -
140 DL
.getTypeStoreSizeInBits(LoadedTy
).getFixedValue();
141 StoredVal
= Helper
.CreateLShr(
142 StoredVal
, ConstantInt::get(StoredVal
->getType(), ShiftAmt
));
145 // Truncate the integer to the right size now.
146 Type
*NewIntTy
= IntegerType::get(StoredValTy
->getContext(), LoadedValSize
);
147 StoredVal
= Helper
.CreateTruncOrBitCast(StoredVal
, NewIntTy
);
149 if (LoadedTy
!= NewIntTy
) {
150 // If the result is a pointer, inttoptr.
151 if (LoadedTy
->isPtrOrPtrVectorTy())
152 StoredVal
= Helper
.CreateIntToPtr(StoredVal
, LoadedTy
);
154 // Otherwise, bitcast.
155 StoredVal
= Helper
.CreateBitCast(StoredVal
, LoadedTy
);
158 if (auto *C
= dyn_cast
<Constant
>(StoredVal
))
159 StoredVal
= ConstantFoldConstant(C
, DL
);
164 /// This function is called when we have a memdep query of a load that ends up
165 /// being a clobbering memory write (store, memset, memcpy, memmove). This
166 /// means that the write *may* provide bits used by the load but we can't be
167 /// sure because the pointers don't must-alias.
169 /// Check this case to see if there is anything more we can do before we give
170 /// up. This returns -1 if we have to give up, or a byte number in the stored
171 /// value of the piece that feeds the load.
172 static int analyzeLoadFromClobberingWrite(Type
*LoadTy
, Value
*LoadPtr
,
174 uint64_t WriteSizeInBits
,
175 const DataLayout
&DL
) {
176 // If the loaded/stored value is a first class array/struct, or scalable type,
177 // don't try to transform them. We need to be able to bitcast to integer.
178 if (isFirstClassAggregateOrScalableType(LoadTy
))
181 int64_t StoreOffset
= 0, LoadOffset
= 0;
183 GetPointerBaseWithConstantOffset(WritePtr
, StoreOffset
, DL
);
184 Value
*LoadBase
= GetPointerBaseWithConstantOffset(LoadPtr
, LoadOffset
, DL
);
185 if (StoreBase
!= LoadBase
)
188 uint64_t LoadSize
= DL
.getTypeSizeInBits(LoadTy
).getFixedValue();
190 if ((WriteSizeInBits
& 7) | (LoadSize
& 7))
192 uint64_t StoreSize
= WriteSizeInBits
/ 8; // Convert to bytes.
195 // If the Load isn't completely contained within the stored bits, we don't
196 // have all the bits to feed it. We could do something crazy in the future
197 // (issue a smaller load then merge the bits in) but this seems unlikely to be
199 if (StoreOffset
> LoadOffset
||
200 StoreOffset
+ int64_t(StoreSize
) < LoadOffset
+ int64_t(LoadSize
))
203 // Okay, we can do this transformation. Return the number of bytes into the
204 // store that the load is.
205 return LoadOffset
- StoreOffset
;
208 /// This function is called when we have a
209 /// memdep query of a load that ends up being a clobbering store.
210 int analyzeLoadFromClobberingStore(Type
*LoadTy
, Value
*LoadPtr
,
211 StoreInst
*DepSI
, const DataLayout
&DL
) {
212 auto *StoredVal
= DepSI
->getValueOperand();
214 // Cannot handle reading from store of first-class aggregate or scalable type.
215 if (isFirstClassAggregateOrScalableType(StoredVal
->getType()))
218 if (!canCoerceMustAliasedValueToLoad(StoredVal
, LoadTy
, DL
))
221 Value
*StorePtr
= DepSI
->getPointerOperand();
223 DL
.getTypeSizeInBits(DepSI
->getValueOperand()->getType()).getFixedValue();
224 return analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, StorePtr
, StoreSize
,
228 /// This function is called when we have a
229 /// memdep query of a load that ends up being clobbered by another load. See if
230 /// the other load can feed into the second load.
231 int analyzeLoadFromClobberingLoad(Type
*LoadTy
, Value
*LoadPtr
, LoadInst
*DepLI
,
232 const DataLayout
&DL
) {
233 // Cannot handle reading from store of first-class aggregate yet.
234 if (DepLI
->getType()->isStructTy() || DepLI
->getType()->isArrayTy())
237 if (!canCoerceMustAliasedValueToLoad(DepLI
, LoadTy
, DL
))
240 Value
*DepPtr
= DepLI
->getPointerOperand();
241 uint64_t DepSize
= DL
.getTypeSizeInBits(DepLI
->getType()).getFixedValue();
242 return analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, DepPtr
, DepSize
, DL
);
245 int analyzeLoadFromClobberingMemInst(Type
*LoadTy
, Value
*LoadPtr
,
246 MemIntrinsic
*MI
, const DataLayout
&DL
) {
247 // If the mem operation is a non-constant size, we can't handle it.
248 ConstantInt
*SizeCst
= dyn_cast
<ConstantInt
>(MI
->getLength());
251 uint64_t MemSizeInBits
= SizeCst
->getZExtValue() * 8;
253 // If this is memset, we just need to see if the offset is valid in the size
255 if (const auto *memset_inst
= dyn_cast
<MemSetInst
>(MI
)) {
256 if (DL
.isNonIntegralPointerType(LoadTy
->getScalarType())) {
257 auto *CI
= dyn_cast
<ConstantInt
>(memset_inst
->getValue());
258 if (!CI
|| !CI
->isZero())
261 return analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, MI
->getDest(),
265 // If we have a memcpy/memmove, the only case we can handle is if this is a
266 // copy from constant memory. In that case, we can read directly from the
268 MemTransferInst
*MTI
= cast
<MemTransferInst
>(MI
);
270 Constant
*Src
= dyn_cast
<Constant
>(MTI
->getSource());
274 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(getUnderlyingObject(Src
));
275 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer())
278 // See if the access is within the bounds of the transfer.
279 int Offset
= analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, MI
->getDest(),
284 // Otherwise, see if we can constant fold a load from the constant with the
285 // offset applied as appropriate.
286 unsigned IndexSize
= DL
.getIndexTypeSizeInBits(Src
->getType());
287 if (ConstantFoldLoadFromConstPtr(Src
, LoadTy
, APInt(IndexSize
, Offset
), DL
))
292 static Value
*getStoreValueForLoadHelper(Value
*SrcVal
, unsigned Offset
,
293 Type
*LoadTy
, IRBuilderBase
&Builder
,
294 const DataLayout
&DL
) {
295 LLVMContext
&Ctx
= SrcVal
->getType()->getContext();
297 // If two pointers are in the same address space, they have the same size,
298 // so we don't need to do any truncation, etc. This avoids introducing
299 // ptrtoint instructions for pointers that may be non-integral.
300 if (SrcVal
->getType()->isPointerTy() && LoadTy
->isPointerTy() &&
301 cast
<PointerType
>(SrcVal
->getType())->getAddressSpace() ==
302 cast
<PointerType
>(LoadTy
)->getAddressSpace()) {
307 (DL
.getTypeSizeInBits(SrcVal
->getType()).getFixedValue() + 7) / 8;
308 uint64_t LoadSize
= (DL
.getTypeSizeInBits(LoadTy
).getFixedValue() + 7) / 8;
309 // Compute which bits of the stored value are being used by the load. Convert
310 // to an integer type to start with.
311 if (SrcVal
->getType()->isPtrOrPtrVectorTy())
313 Builder
.CreatePtrToInt(SrcVal
, DL
.getIntPtrType(SrcVal
->getType()));
314 if (!SrcVal
->getType()->isIntegerTy())
316 Builder
.CreateBitCast(SrcVal
, IntegerType::get(Ctx
, StoreSize
* 8));
318 // Shift the bits to the least significant depending on endianness.
320 if (DL
.isLittleEndian())
321 ShiftAmt
= Offset
* 8;
323 ShiftAmt
= (StoreSize
- LoadSize
- Offset
) * 8;
325 SrcVal
= Builder
.CreateLShr(SrcVal
,
326 ConstantInt::get(SrcVal
->getType(), ShiftAmt
));
328 if (LoadSize
!= StoreSize
)
329 SrcVal
= Builder
.CreateTruncOrBitCast(SrcVal
,
330 IntegerType::get(Ctx
, LoadSize
* 8));
334 Value
*getValueForLoad(Value
*SrcVal
, unsigned Offset
, Type
*LoadTy
,
335 Instruction
*InsertPt
, const DataLayout
&DL
) {
338 unsigned SrcValSize
= DL
.getTypeStoreSize(SrcVal
->getType()).getFixedValue();
339 unsigned LoadSize
= DL
.getTypeStoreSize(LoadTy
).getFixedValue();
340 assert(Offset
+ LoadSize
<= SrcValSize
);
342 IRBuilder
<> Builder(InsertPt
);
343 SrcVal
= getStoreValueForLoadHelper(SrcVal
, Offset
, LoadTy
, Builder
, DL
);
344 return coerceAvailableValueToLoadType(SrcVal
, LoadTy
, Builder
, DL
);
347 Constant
*getConstantValueForLoad(Constant
*SrcVal
, unsigned Offset
,
348 Type
*LoadTy
, const DataLayout
&DL
) {
350 unsigned SrcValSize
= DL
.getTypeStoreSize(SrcVal
->getType()).getFixedValue();
351 unsigned LoadSize
= DL
.getTypeStoreSize(LoadTy
).getFixedValue();
352 assert(Offset
+ LoadSize
<= SrcValSize
);
354 return ConstantFoldLoadFromConst(SrcVal
, LoadTy
, APInt(32, Offset
), DL
);
357 /// This function is called when we have a
358 /// memdep query of a load that ends up being a clobbering mem intrinsic.
359 Value
*getMemInstValueForLoad(MemIntrinsic
*SrcInst
, unsigned Offset
,
360 Type
*LoadTy
, Instruction
*InsertPt
,
361 const DataLayout
&DL
) {
362 LLVMContext
&Ctx
= LoadTy
->getContext();
363 uint64_t LoadSize
= DL
.getTypeSizeInBits(LoadTy
).getFixedValue() / 8;
364 IRBuilder
<> Builder(InsertPt
);
366 // We know that this method is only called when the mem transfer fully
367 // provides the bits for the load.
368 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(SrcInst
)) {
369 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
370 // independently of what the offset is.
371 Value
*Val
= MSI
->getValue();
374 Builder
.CreateZExtOrBitCast(Val
, IntegerType::get(Ctx
, LoadSize
* 8));
377 // Splat the value out to the right number of bits.
378 for (unsigned NumBytesSet
= 1; NumBytesSet
!= LoadSize
;) {
379 // If we can double the number of bytes set, do it.
380 if (NumBytesSet
* 2 <= LoadSize
) {
381 Value
*ShVal
= Builder
.CreateShl(
382 Val
, ConstantInt::get(Val
->getType(), NumBytesSet
* 8));
383 Val
= Builder
.CreateOr(Val
, ShVal
);
388 // Otherwise insert one byte at a time.
390 Builder
.CreateShl(Val
, ConstantInt::get(Val
->getType(), 1 * 8));
391 Val
= Builder
.CreateOr(OneElt
, ShVal
);
395 return coerceAvailableValueToLoadType(Val
, LoadTy
, Builder
, DL
);
398 // Otherwise, this is a memcpy/memmove from a constant global.
399 MemTransferInst
*MTI
= cast
<MemTransferInst
>(SrcInst
);
400 Constant
*Src
= cast
<Constant
>(MTI
->getSource());
401 unsigned IndexSize
= DL
.getIndexTypeSizeInBits(Src
->getType());
402 return ConstantFoldLoadFromConstPtr(Src
, LoadTy
, APInt(IndexSize
, Offset
),
406 Constant
*getConstantMemInstValueForLoad(MemIntrinsic
*SrcInst
, unsigned Offset
,
407 Type
*LoadTy
, const DataLayout
&DL
) {
408 LLVMContext
&Ctx
= LoadTy
->getContext();
409 uint64_t LoadSize
= DL
.getTypeSizeInBits(LoadTy
).getFixedValue() / 8;
411 // We know that this method is only called when the mem transfer fully
412 // provides the bits for the load.
413 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(SrcInst
)) {
414 auto *Val
= dyn_cast
<ConstantInt
>(MSI
->getValue());
418 Val
= ConstantInt::get(Ctx
, APInt::getSplat(LoadSize
* 8, Val
->getValue()));
419 return ConstantFoldLoadFromConst(Val
, LoadTy
, DL
);
422 // Otherwise, this is a memcpy/memmove from a constant global.
423 MemTransferInst
*MTI
= cast
<MemTransferInst
>(SrcInst
);
424 Constant
*Src
= cast
<Constant
>(MTI
->getSource());
425 unsigned IndexSize
= DL
.getIndexTypeSizeInBits(Src
->getType());
426 return ConstantFoldLoadFromConstPtr(Src
, LoadTy
, APInt(IndexSize
, Offset
),
429 } // namespace VNCoercion