1 #include "llvm/Transforms/Utils/VNCoercion.h"
2 #include "llvm/Analysis/ConstantFolding.h"
3 #include "llvm/Analysis/ValueTracking.h"
4 #include "llvm/IR/IRBuilder.h"
5 #include "llvm/IR/IntrinsicInst.h"
6 #include "llvm/Support/Debug.h"
8 #define DEBUG_TYPE "vncoerce"
11 namespace VNCoercion
{
13 static bool isFirstClassAggregateOrScalableType(Type
*Ty
) {
14 return Ty
->isStructTy() || Ty
->isArrayTy() || isa
<ScalableVectorType
>(Ty
);
17 /// Return true if coerceAvailableValueToLoadType will succeed.
18 bool canCoerceMustAliasedValueToLoad(Value
*StoredVal
, Type
*LoadTy
,
19 const DataLayout
&DL
) {
20 Type
*StoredTy
= StoredVal
->getType();
22 if (StoredTy
== LoadTy
)
25 // If the loaded/stored value is a first class array/struct, or scalable type,
26 // don't try to transform them. We need to be able to bitcast to integer.
27 if (isFirstClassAggregateOrScalableType(LoadTy
) ||
28 isFirstClassAggregateOrScalableType(StoredTy
))
31 uint64_t StoreSize
= DL
.getTypeSizeInBits(StoredTy
).getFixedValue();
33 // The store size must be byte-aligned to support future type casts.
34 if (llvm::alignTo(StoreSize
, 8) != StoreSize
)
37 // The store has to be at least as big as the load.
38 if (StoreSize
< DL
.getTypeSizeInBits(LoadTy
).getFixedValue())
41 bool StoredNI
= DL
.isNonIntegralPointerType(StoredTy
->getScalarType());
42 bool LoadNI
= DL
.isNonIntegralPointerType(LoadTy
->getScalarType());
43 // Don't coerce non-integral pointers to integers or vice versa.
44 if (StoredNI
!= LoadNI
) {
45 // As a special case, allow coercion of memset used to initialize
46 // an array w/null. Despite non-integral pointers not generally having a
47 // specific bit pattern, we do assume null is zero.
48 if (auto *CI
= dyn_cast
<Constant
>(StoredVal
))
49 return CI
->isNullValue();
51 } else if (StoredNI
&& LoadNI
&&
52 StoredTy
->getPointerAddressSpace() !=
53 LoadTy
->getPointerAddressSpace()) {
58 // The implementation below uses inttoptr for vectors of unequal size; we
59 // can't allow this for non integral pointers. We could teach it to extract
60 // exact subvectors if desired.
61 if (StoredNI
&& StoreSize
!= DL
.getTypeSizeInBits(LoadTy
).getFixedValue())
64 if (StoredTy
->isTargetExtTy() || LoadTy
->isTargetExtTy())
70 /// If we saw a store of a value to memory, and
71 /// then a load from a must-aliased pointer of a different type, try to coerce
72 /// the stored value. LoadedTy is the type of the load we want to replace.
73 /// IRB is IRBuilder used to insert new instructions.
75 /// If we can't do it, return null.
76 Value
*coerceAvailableValueToLoadType(Value
*StoredVal
, Type
*LoadedTy
,
77 IRBuilderBase
&Helper
,
78 const DataLayout
&DL
) {
79 assert(canCoerceMustAliasedValueToLoad(StoredVal
, LoadedTy
, DL
) &&
80 "precondition violation - materialization can't fail");
81 if (auto *C
= dyn_cast
<Constant
>(StoredVal
))
82 StoredVal
= ConstantFoldConstant(C
, DL
);
84 // If this is already the right type, just return it.
85 Type
*StoredValTy
= StoredVal
->getType();
87 uint64_t StoredValSize
= DL
.getTypeSizeInBits(StoredValTy
).getFixedValue();
88 uint64_t LoadedValSize
= DL
.getTypeSizeInBits(LoadedTy
).getFixedValue();
90 // If the store and reload are the same size, we can always reuse it.
91 if (StoredValSize
== LoadedValSize
) {
92 // Pointer to Pointer -> use bitcast.
93 if (StoredValTy
->isPtrOrPtrVectorTy() && LoadedTy
->isPtrOrPtrVectorTy()) {
94 StoredVal
= Helper
.CreateBitCast(StoredVal
, LoadedTy
);
96 // Convert source pointers to integers, which can be bitcast.
97 if (StoredValTy
->isPtrOrPtrVectorTy()) {
98 StoredValTy
= DL
.getIntPtrType(StoredValTy
);
99 StoredVal
= Helper
.CreatePtrToInt(StoredVal
, StoredValTy
);
102 Type
*TypeToCastTo
= LoadedTy
;
103 if (TypeToCastTo
->isPtrOrPtrVectorTy())
104 TypeToCastTo
= DL
.getIntPtrType(TypeToCastTo
);
106 if (StoredValTy
!= TypeToCastTo
)
107 StoredVal
= Helper
.CreateBitCast(StoredVal
, TypeToCastTo
);
109 // Cast to pointer if the load needs a pointer type.
110 if (LoadedTy
->isPtrOrPtrVectorTy())
111 StoredVal
= Helper
.CreateIntToPtr(StoredVal
, LoadedTy
);
114 if (auto *C
= dyn_cast
<ConstantExpr
>(StoredVal
))
115 StoredVal
= ConstantFoldConstant(C
, DL
);
119 // If the loaded value is smaller than the available value, then we can
120 // extract out a piece from it. If the available value is too small, then we
121 // can't do anything.
122 assert(StoredValSize
>= LoadedValSize
&&
123 "canCoerceMustAliasedValueToLoad fail");
125 // Convert source pointers to integers, which can be manipulated.
126 if (StoredValTy
->isPtrOrPtrVectorTy()) {
127 StoredValTy
= DL
.getIntPtrType(StoredValTy
);
128 StoredVal
= Helper
.CreatePtrToInt(StoredVal
, StoredValTy
);
131 // Convert vectors and fp to integer, which can be manipulated.
132 if (!StoredValTy
->isIntegerTy()) {
133 StoredValTy
= IntegerType::get(StoredValTy
->getContext(), StoredValSize
);
134 StoredVal
= Helper
.CreateBitCast(StoredVal
, StoredValTy
);
137 // If this is a big-endian system, we need to shift the value down to the low
138 // bits so that a truncate will work.
139 if (DL
.isBigEndian()) {
140 uint64_t ShiftAmt
= DL
.getTypeStoreSizeInBits(StoredValTy
).getFixedValue() -
141 DL
.getTypeStoreSizeInBits(LoadedTy
).getFixedValue();
142 StoredVal
= Helper
.CreateLShr(
143 StoredVal
, ConstantInt::get(StoredVal
->getType(), ShiftAmt
));
146 // Truncate the integer to the right size now.
147 Type
*NewIntTy
= IntegerType::get(StoredValTy
->getContext(), LoadedValSize
);
148 StoredVal
= Helper
.CreateTruncOrBitCast(StoredVal
, NewIntTy
);
150 if (LoadedTy
!= NewIntTy
) {
151 // If the result is a pointer, inttoptr.
152 if (LoadedTy
->isPtrOrPtrVectorTy())
153 StoredVal
= Helper
.CreateIntToPtr(StoredVal
, LoadedTy
);
155 // Otherwise, bitcast.
156 StoredVal
= Helper
.CreateBitCast(StoredVal
, LoadedTy
);
159 if (auto *C
= dyn_cast
<Constant
>(StoredVal
))
160 StoredVal
= ConstantFoldConstant(C
, DL
);
165 /// This function is called when we have a memdep query of a load that ends up
166 /// being a clobbering memory write (store, memset, memcpy, memmove). This
167 /// means that the write *may* provide bits used by the load but we can't be
168 /// sure because the pointers don't must-alias.
170 /// Check this case to see if there is anything more we can do before we give
171 /// up. This returns -1 if we have to give up, or a byte number in the stored
172 /// value of the piece that feeds the load.
173 static int analyzeLoadFromClobberingWrite(Type
*LoadTy
, Value
*LoadPtr
,
175 uint64_t WriteSizeInBits
,
176 const DataLayout
&DL
) {
177 // If the loaded/stored value is a first class array/struct, or scalable type,
178 // don't try to transform them. We need to be able to bitcast to integer.
179 if (isFirstClassAggregateOrScalableType(LoadTy
))
182 int64_t StoreOffset
= 0, LoadOffset
= 0;
184 GetPointerBaseWithConstantOffset(WritePtr
, StoreOffset
, DL
);
185 Value
*LoadBase
= GetPointerBaseWithConstantOffset(LoadPtr
, LoadOffset
, DL
);
186 if (StoreBase
!= LoadBase
)
189 uint64_t LoadSize
= DL
.getTypeSizeInBits(LoadTy
).getFixedValue();
191 if ((WriteSizeInBits
& 7) | (LoadSize
& 7))
193 uint64_t StoreSize
= WriteSizeInBits
/ 8; // Convert to bytes.
196 // If the Load isn't completely contained within the stored bits, we don't
197 // have all the bits to feed it. We could do something crazy in the future
198 // (issue a smaller load then merge the bits in) but this seems unlikely to be
200 if (StoreOffset
> LoadOffset
||
201 StoreOffset
+ int64_t(StoreSize
) < LoadOffset
+ int64_t(LoadSize
))
204 // Okay, we can do this transformation. Return the number of bytes into the
205 // store that the load is.
206 return LoadOffset
- StoreOffset
;
209 /// This function is called when we have a
210 /// memdep query of a load that ends up being a clobbering store.
211 int analyzeLoadFromClobberingStore(Type
*LoadTy
, Value
*LoadPtr
,
212 StoreInst
*DepSI
, const DataLayout
&DL
) {
213 auto *StoredVal
= DepSI
->getValueOperand();
215 // Cannot handle reading from store of first-class aggregate or scalable type.
216 if (isFirstClassAggregateOrScalableType(StoredVal
->getType()))
219 if (!canCoerceMustAliasedValueToLoad(StoredVal
, LoadTy
, DL
))
222 Value
*StorePtr
= DepSI
->getPointerOperand();
224 DL
.getTypeSizeInBits(DepSI
->getValueOperand()->getType()).getFixedValue();
225 return analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, StorePtr
, StoreSize
,
229 /// This function is called when we have a
230 /// memdep query of a load that ends up being clobbered by another load. See if
231 /// the other load can feed into the second load.
232 int analyzeLoadFromClobberingLoad(Type
*LoadTy
, Value
*LoadPtr
, LoadInst
*DepLI
,
233 const DataLayout
&DL
) {
234 // Cannot handle reading from store of first-class aggregate yet.
235 if (DepLI
->getType()->isStructTy() || DepLI
->getType()->isArrayTy())
238 if (!canCoerceMustAliasedValueToLoad(DepLI
, LoadTy
, DL
))
241 Value
*DepPtr
= DepLI
->getPointerOperand();
242 uint64_t DepSize
= DL
.getTypeSizeInBits(DepLI
->getType()).getFixedValue();
243 return analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, DepPtr
, DepSize
, DL
);
246 int analyzeLoadFromClobberingMemInst(Type
*LoadTy
, Value
*LoadPtr
,
247 MemIntrinsic
*MI
, const DataLayout
&DL
) {
248 // If the mem operation is a non-constant size, we can't handle it.
249 ConstantInt
*SizeCst
= dyn_cast
<ConstantInt
>(MI
->getLength());
252 uint64_t MemSizeInBits
= SizeCst
->getZExtValue() * 8;
254 // If this is memset, we just need to see if the offset is valid in the size
256 if (const auto *memset_inst
= dyn_cast
<MemSetInst
>(MI
)) {
257 if (DL
.isNonIntegralPointerType(LoadTy
->getScalarType())) {
258 auto *CI
= dyn_cast
<ConstantInt
>(memset_inst
->getValue());
259 if (!CI
|| !CI
->isZero())
262 return analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, MI
->getDest(),
266 // If we have a memcpy/memmove, the only case we can handle is if this is a
267 // copy from constant memory. In that case, we can read directly from the
269 MemTransferInst
*MTI
= cast
<MemTransferInst
>(MI
);
271 Constant
*Src
= dyn_cast
<Constant
>(MTI
->getSource());
275 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(getUnderlyingObject(Src
));
276 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer())
279 // See if the access is within the bounds of the transfer.
280 int Offset
= analyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, MI
->getDest(),
285 // Otherwise, see if we can constant fold a load from the constant with the
286 // offset applied as appropriate.
287 unsigned IndexSize
= DL
.getIndexTypeSizeInBits(Src
->getType());
288 if (ConstantFoldLoadFromConstPtr(Src
, LoadTy
, APInt(IndexSize
, Offset
), DL
))
293 static Value
*getStoreValueForLoadHelper(Value
*SrcVal
, unsigned Offset
,
294 Type
*LoadTy
, IRBuilderBase
&Builder
,
295 const DataLayout
&DL
) {
296 LLVMContext
&Ctx
= SrcVal
->getType()->getContext();
298 // If two pointers are in the same address space, they have the same size,
299 // so we don't need to do any truncation, etc. This avoids introducing
300 // ptrtoint instructions for pointers that may be non-integral.
301 if (SrcVal
->getType()->isPointerTy() && LoadTy
->isPointerTy() &&
302 cast
<PointerType
>(SrcVal
->getType())->getAddressSpace() ==
303 cast
<PointerType
>(LoadTy
)->getAddressSpace()) {
308 (DL
.getTypeSizeInBits(SrcVal
->getType()).getFixedValue() + 7) / 8;
309 uint64_t LoadSize
= (DL
.getTypeSizeInBits(LoadTy
).getFixedValue() + 7) / 8;
310 // Compute which bits of the stored value are being used by the load. Convert
311 // to an integer type to start with.
312 if (SrcVal
->getType()->isPtrOrPtrVectorTy())
314 Builder
.CreatePtrToInt(SrcVal
, DL
.getIntPtrType(SrcVal
->getType()));
315 if (!SrcVal
->getType()->isIntegerTy())
317 Builder
.CreateBitCast(SrcVal
, IntegerType::get(Ctx
, StoreSize
* 8));
319 // Shift the bits to the least significant depending on endianness.
321 if (DL
.isLittleEndian())
322 ShiftAmt
= Offset
* 8;
324 ShiftAmt
= (StoreSize
- LoadSize
- Offset
) * 8;
326 SrcVal
= Builder
.CreateLShr(SrcVal
,
327 ConstantInt::get(SrcVal
->getType(), ShiftAmt
));
329 if (LoadSize
!= StoreSize
)
330 SrcVal
= Builder
.CreateTruncOrBitCast(SrcVal
,
331 IntegerType::get(Ctx
, LoadSize
* 8));
335 Value
*getValueForLoad(Value
*SrcVal
, unsigned Offset
, Type
*LoadTy
,
336 Instruction
*InsertPt
, const DataLayout
&DL
) {
339 unsigned SrcValSize
= DL
.getTypeStoreSize(SrcVal
->getType()).getFixedValue();
340 unsigned LoadSize
= DL
.getTypeStoreSize(LoadTy
).getFixedValue();
341 assert(Offset
+ LoadSize
<= SrcValSize
);
343 IRBuilder
<> Builder(InsertPt
);
344 SrcVal
= getStoreValueForLoadHelper(SrcVal
, Offset
, LoadTy
, Builder
, DL
);
345 return coerceAvailableValueToLoadType(SrcVal
, LoadTy
, Builder
, DL
);
348 Constant
*getConstantValueForLoad(Constant
*SrcVal
, unsigned Offset
,
349 Type
*LoadTy
, const DataLayout
&DL
) {
351 unsigned SrcValSize
= DL
.getTypeStoreSize(SrcVal
->getType()).getFixedValue();
352 unsigned LoadSize
= DL
.getTypeStoreSize(LoadTy
).getFixedValue();
353 assert(Offset
+ LoadSize
<= SrcValSize
);
355 return ConstantFoldLoadFromConst(SrcVal
, LoadTy
, APInt(32, Offset
), DL
);
358 /// This function is called when we have a
359 /// memdep query of a load that ends up being a clobbering mem intrinsic.
360 Value
*getMemInstValueForLoad(MemIntrinsic
*SrcInst
, unsigned Offset
,
361 Type
*LoadTy
, Instruction
*InsertPt
,
362 const DataLayout
&DL
) {
363 LLVMContext
&Ctx
= LoadTy
->getContext();
364 uint64_t LoadSize
= DL
.getTypeSizeInBits(LoadTy
).getFixedValue() / 8;
365 IRBuilder
<> Builder(InsertPt
);
367 // We know that this method is only called when the mem transfer fully
368 // provides the bits for the load.
369 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(SrcInst
)) {
370 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
371 // independently of what the offset is.
372 Value
*Val
= MSI
->getValue();
375 Builder
.CreateZExtOrBitCast(Val
, IntegerType::get(Ctx
, LoadSize
* 8));
378 // Splat the value out to the right number of bits.
379 for (unsigned NumBytesSet
= 1; NumBytesSet
!= LoadSize
;) {
380 // If we can double the number of bytes set, do it.
381 if (NumBytesSet
* 2 <= LoadSize
) {
382 Value
*ShVal
= Builder
.CreateShl(
383 Val
, ConstantInt::get(Val
->getType(), NumBytesSet
* 8));
384 Val
= Builder
.CreateOr(Val
, ShVal
);
389 // Otherwise insert one byte at a time.
391 Builder
.CreateShl(Val
, ConstantInt::get(Val
->getType(), 1 * 8));
392 Val
= Builder
.CreateOr(OneElt
, ShVal
);
396 return coerceAvailableValueToLoadType(Val
, LoadTy
, Builder
, DL
);
399 // Otherwise, this is a memcpy/memmove from a constant global.
400 MemTransferInst
*MTI
= cast
<MemTransferInst
>(SrcInst
);
401 Constant
*Src
= cast
<Constant
>(MTI
->getSource());
402 unsigned IndexSize
= DL
.getIndexTypeSizeInBits(Src
->getType());
403 return ConstantFoldLoadFromConstPtr(Src
, LoadTy
, APInt(IndexSize
, Offset
),
407 Constant
*getConstantMemInstValueForLoad(MemIntrinsic
*SrcInst
, unsigned Offset
,
408 Type
*LoadTy
, const DataLayout
&DL
) {
409 LLVMContext
&Ctx
= LoadTy
->getContext();
410 uint64_t LoadSize
= DL
.getTypeSizeInBits(LoadTy
).getFixedValue() / 8;
412 // We know that this method is only called when the mem transfer fully
413 // provides the bits for the load.
414 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(SrcInst
)) {
415 auto *Val
= dyn_cast
<ConstantInt
>(MSI
->getValue());
419 Val
= ConstantInt::get(Ctx
, APInt::getSplat(LoadSize
* 8, Val
->getValue()));
420 return ConstantFoldLoadFromConst(Val
, LoadTy
, DL
);
423 // Otherwise, this is a memcpy/memmove from a constant global.
424 MemTransferInst
*MTI
= cast
<MemTransferInst
>(SrcInst
);
425 Constant
*Src
= cast
<Constant
>(MTI
->getSource());
426 unsigned IndexSize
= DL
.getIndexTypeSizeInBits(Src
->getType());
427 return ConstantFoldLoadFromConstPtr(Src
, LoadTy
, APInt(IndexSize
, Offset
),
430 } // namespace VNCoercion