1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains logic for simplifying instructions based on information
10 // about how they are used.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombineInternal.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/PatternMatch.h"
18 #include "llvm/Support/KnownBits.h"
21 using namespace llvm::PatternMatch
;
23 #define DEBUG_TYPE "instcombine"
27 struct AMDGPUImageDMaskIntrinsic
{
31 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
32 #include "InstCombineTables.inc"
34 } // end anonymous namespace
36 /// Check to see if the specified operand of the specified instruction is a
37 /// constant integer. If so, check to see if there are any bits set in the
38 /// constant that are not demanded. If so, shrink the constant and return true.
39 static bool ShrinkDemandedConstant(Instruction
*I
, unsigned OpNo
,
40 const APInt
&Demanded
) {
41 assert(I
&& "No instruction?");
42 assert(OpNo
< I
->getNumOperands() && "Operand index too large");
44 // The operand must be a constant integer or splat integer.
45 Value
*Op
= I
->getOperand(OpNo
);
47 if (!match(Op
, m_APInt(C
)))
50 // If there are no bits set that aren't demanded, nothing to do.
51 if (C
->isSubsetOf(Demanded
))
54 // This instruction is producing bits that are not demanded. Shrink the RHS.
55 I
->setOperand(OpNo
, ConstantInt::get(Op
->getType(), *C
& Demanded
));
62 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
63 /// the instruction has any properties that allow us to simplify its operands.
64 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction
&Inst
) {
65 unsigned BitWidth
= Inst
.getType()->getScalarSizeInBits();
66 KnownBits
Known(BitWidth
);
67 APInt
DemandedMask(APInt::getAllOnesValue(BitWidth
));
69 Value
*V
= SimplifyDemandedUseBits(&Inst
, DemandedMask
, Known
,
72 if (V
== &Inst
) return true;
73 replaceInstUsesWith(Inst
, V
);
77 /// This form of SimplifyDemandedBits simplifies the specified instruction
78 /// operand if possible, updating it in place. It returns true if it made any
79 /// change and false otherwise.
80 bool InstCombiner::SimplifyDemandedBits(Instruction
*I
, unsigned OpNo
,
81 const APInt
&DemandedMask
,
84 Use
&U
= I
->getOperandUse(OpNo
);
85 Value
*NewVal
= SimplifyDemandedUseBits(U
.get(), DemandedMask
, Known
,
87 if (!NewVal
) return false;
93 /// This function attempts to replace V with a simpler value based on the
94 /// demanded bits. When this function is called, it is known that only the bits
95 /// set in DemandedMask of the result of V are ever used downstream.
96 /// Consequently, depending on the mask and V, it may be possible to replace V
97 /// with a constant or one of its operands. In such cases, this function does
98 /// the replacement and returns true. In all other cases, it returns false after
99 /// analyzing the expression and setting KnownOne and known to be one in the
100 /// expression. Known.Zero contains all the bits that are known to be zero in
101 /// the expression. These are provided to potentially allow the caller (which
102 /// might recursively be SimplifyDemandedBits itself) to simplify the
104 /// Known.One and Known.Zero always follow the invariant that:
105 /// Known.One & Known.Zero == 0.
106 /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
107 /// Known.Zero may only be accurate for those bits set in DemandedMask. Note
108 /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
111 /// This returns null if it did not change anything and it permits no
112 /// simplification. This returns V itself if it did some simplification of V's
113 /// operands based on the information about what bits are demanded. This returns
114 /// some other non-null value if it found out that V is equal to another value
115 /// in the context where the specified bits are demanded, but not for all users.
116 Value
*InstCombiner::SimplifyDemandedUseBits(Value
*V
, APInt DemandedMask
,
117 KnownBits
&Known
, unsigned Depth
,
119 assert(V
!= nullptr && "Null pointer of Value???");
120 assert(Depth
<= 6 && "Limit Search Depth");
121 uint32_t BitWidth
= DemandedMask
.getBitWidth();
122 Type
*VTy
= V
->getType();
124 (!VTy
->isIntOrIntVectorTy() || VTy
->getScalarSizeInBits() == BitWidth
) &&
125 Known
.getBitWidth() == BitWidth
&&
126 "Value *V, DemandedMask and Known must have same BitWidth");
128 if (isa
<Constant
>(V
)) {
129 computeKnownBits(V
, Known
, Depth
, CxtI
);
134 if (DemandedMask
.isNullValue()) // Not demanding any bits from V.
135 return UndefValue::get(VTy
);
137 if (Depth
== 6) // Limit search depth.
140 Instruction
*I
= dyn_cast
<Instruction
>(V
);
142 computeKnownBits(V
, Known
, Depth
, CxtI
);
143 return nullptr; // Only analyze instructions.
146 // If there are multiple uses of this value and we aren't at the root, then
147 // we can't do any simplifications of the operands, because DemandedMask
148 // only reflects the bits demanded by *one* of the users.
149 if (Depth
!= 0 && !I
->hasOneUse())
150 return SimplifyMultipleUseDemandedBits(I
, DemandedMask
, Known
, Depth
, CxtI
);
152 KnownBits
LHSKnown(BitWidth
), RHSKnown(BitWidth
);
154 // If this is the root being simplified, allow it to have multiple uses,
155 // just set the DemandedMask to all bits so that we can try to simplify the
156 // operands. This allows visitTruncInst (for example) to simplify the
157 // operand of a trunc without duplicating all the logic below.
158 if (Depth
== 0 && !V
->hasOneUse())
159 DemandedMask
.setAllBits();
161 switch (I
->getOpcode()) {
163 computeKnownBits(I
, Known
, Depth
, CxtI
);
165 case Instruction::And
: {
166 // If either the LHS or the RHS are Zero, the result is zero.
167 if (SimplifyDemandedBits(I
, 1, DemandedMask
, RHSKnown
, Depth
+ 1) ||
168 SimplifyDemandedBits(I
, 0, DemandedMask
& ~RHSKnown
.Zero
, LHSKnown
,
171 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
172 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
174 // Output known-0 are known to be clear if zero in either the LHS | RHS.
175 APInt IKnownZero
= RHSKnown
.Zero
| LHSKnown
.Zero
;
176 // Output known-1 bits are only known if set in both the LHS & RHS.
177 APInt IKnownOne
= RHSKnown
.One
& LHSKnown
.One
;
179 // If the client is only demanding bits that we know, return the known
181 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
182 return Constant::getIntegerValue(VTy
, IKnownOne
);
184 // If all of the demanded bits are known 1 on one side, return the other.
185 // These bits cannot contribute to the result of the 'and'.
186 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
| RHSKnown
.One
))
187 return I
->getOperand(0);
188 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.One
))
189 return I
->getOperand(1);
191 // If the RHS is a constant, see if we can simplify it.
192 if (ShrinkDemandedConstant(I
, 1, DemandedMask
& ~LHSKnown
.Zero
))
195 Known
.Zero
= std::move(IKnownZero
);
196 Known
.One
= std::move(IKnownOne
);
199 case Instruction::Or
: {
200 // If either the LHS or the RHS are One, the result is One.
201 if (SimplifyDemandedBits(I
, 1, DemandedMask
, RHSKnown
, Depth
+ 1) ||
202 SimplifyDemandedBits(I
, 0, DemandedMask
& ~RHSKnown
.One
, LHSKnown
,
205 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
206 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
208 // Output known-0 bits are only known if clear in both the LHS & RHS.
209 APInt IKnownZero
= RHSKnown
.Zero
& LHSKnown
.Zero
;
210 // Output known-1 are known. to be set if s.et in either the LHS | RHS.
211 APInt IKnownOne
= RHSKnown
.One
| LHSKnown
.One
;
213 // If the client is only demanding bits that we know, return the known
215 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
216 return Constant::getIntegerValue(VTy
, IKnownOne
);
218 // If all of the demanded bits are known zero on one side, return the other.
219 // These bits cannot contribute to the result of the 'or'.
220 if (DemandedMask
.isSubsetOf(LHSKnown
.One
| RHSKnown
.Zero
))
221 return I
->getOperand(0);
222 if (DemandedMask
.isSubsetOf(RHSKnown
.One
| LHSKnown
.Zero
))
223 return I
->getOperand(1);
225 // If the RHS is a constant, see if we can simplify it.
226 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
229 Known
.Zero
= std::move(IKnownZero
);
230 Known
.One
= std::move(IKnownOne
);
233 case Instruction::Xor
: {
234 if (SimplifyDemandedBits(I
, 1, DemandedMask
, RHSKnown
, Depth
+ 1) ||
235 SimplifyDemandedBits(I
, 0, DemandedMask
, LHSKnown
, Depth
+ 1))
237 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
238 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
240 // Output known-0 bits are known if clear or set in both the LHS & RHS.
241 APInt IKnownZero
= (RHSKnown
.Zero
& LHSKnown
.Zero
) |
242 (RHSKnown
.One
& LHSKnown
.One
);
243 // Output known-1 are known to be set if set in only one of the LHS, RHS.
244 APInt IKnownOne
= (RHSKnown
.Zero
& LHSKnown
.One
) |
245 (RHSKnown
.One
& LHSKnown
.Zero
);
247 // If the client is only demanding bits that we know, return the known
249 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
250 return Constant::getIntegerValue(VTy
, IKnownOne
);
252 // If all of the demanded bits are known zero on one side, return the other.
253 // These bits cannot contribute to the result of the 'xor'.
254 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
))
255 return I
->getOperand(0);
256 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
))
257 return I
->getOperand(1);
259 // If all of the demanded bits are known to be zero on one side or the
260 // other, turn this into an *inclusive* or.
261 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
262 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.Zero
)) {
264 BinaryOperator::CreateOr(I
->getOperand(0), I
->getOperand(1),
266 return InsertNewInstWith(Or
, *I
);
269 // If all of the demanded bits on one side are known, and all of the set
270 // bits on that side are also known to be set on the other side, turn this
271 // into an AND, as we know the bits will be cleared.
272 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
273 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
|RHSKnown
.One
) &&
274 RHSKnown
.One
.isSubsetOf(LHSKnown
.One
)) {
275 Constant
*AndC
= Constant::getIntegerValue(VTy
,
276 ~RHSKnown
.One
& DemandedMask
);
277 Instruction
*And
= BinaryOperator::CreateAnd(I
->getOperand(0), AndC
);
278 return InsertNewInstWith(And
, *I
);
281 // If the RHS is a constant, see if we can simplify it.
282 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
283 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
286 // If our LHS is an 'and' and if it has one use, and if any of the bits we
287 // are flipping are known to be set, then the xor is just resetting those
288 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
289 // simplifying both of them.
290 if (Instruction
*LHSInst
= dyn_cast
<Instruction
>(I
->getOperand(0)))
291 if (LHSInst
->getOpcode() == Instruction::And
&& LHSInst
->hasOneUse() &&
292 isa
<ConstantInt
>(I
->getOperand(1)) &&
293 isa
<ConstantInt
>(LHSInst
->getOperand(1)) &&
294 (LHSKnown
.One
& RHSKnown
.One
& DemandedMask
) != 0) {
295 ConstantInt
*AndRHS
= cast
<ConstantInt
>(LHSInst
->getOperand(1));
296 ConstantInt
*XorRHS
= cast
<ConstantInt
>(I
->getOperand(1));
297 APInt NewMask
= ~(LHSKnown
.One
& RHSKnown
.One
& DemandedMask
);
300 ConstantInt::get(I
->getType(), NewMask
& AndRHS
->getValue());
301 Instruction
*NewAnd
= BinaryOperator::CreateAnd(I
->getOperand(0), AndC
);
302 InsertNewInstWith(NewAnd
, *I
);
305 ConstantInt::get(I
->getType(), NewMask
& XorRHS
->getValue());
306 Instruction
*NewXor
= BinaryOperator::CreateXor(NewAnd
, XorC
);
307 return InsertNewInstWith(NewXor
, *I
);
310 // Output known-0 bits are known if clear or set in both the LHS & RHS.
311 Known
.Zero
= std::move(IKnownZero
);
312 // Output known-1 are known to be set if set in only one of the LHS, RHS.
313 Known
.One
= std::move(IKnownOne
);
316 case Instruction::Select
: {
318 SelectPatternFlavor SPF
= matchSelectPattern(I
, LHS
, RHS
).Flavor
;
319 if (SPF
== SPF_UMAX
) {
320 // UMax(A, C) == A if ...
321 // The lowest non-zero bit of DemandMask is higher than the highest
322 // non-zero bit of C.
324 unsigned CTZ
= DemandedMask
.countTrailingZeros();
325 if (match(RHS
, m_APInt(C
)) && CTZ
>= C
->getActiveBits())
327 } else if (SPF
== SPF_UMIN
) {
328 // UMin(A, C) == A if ...
329 // The lowest non-zero bit of DemandMask is higher than the highest
331 // This comes from using DeMorgans on the above umax example.
333 unsigned CTZ
= DemandedMask
.countTrailingZeros();
334 if (match(RHS
, m_APInt(C
)) &&
335 CTZ
>= C
->getBitWidth() - C
->countLeadingOnes())
339 // If this is a select as part of any other min/max pattern, don't simplify
340 // any further in case we break the structure.
341 if (SPF
!= SPF_UNKNOWN
)
344 if (SimplifyDemandedBits(I
, 2, DemandedMask
, RHSKnown
, Depth
+ 1) ||
345 SimplifyDemandedBits(I
, 1, DemandedMask
, LHSKnown
, Depth
+ 1))
347 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
348 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
350 // If the operands are constants, see if we can simplify them.
351 if (ShrinkDemandedConstant(I
, 1, DemandedMask
) ||
352 ShrinkDemandedConstant(I
, 2, DemandedMask
))
355 // Only known if known in both the LHS and RHS.
356 Known
.One
= RHSKnown
.One
& LHSKnown
.One
;
357 Known
.Zero
= RHSKnown
.Zero
& LHSKnown
.Zero
;
360 case Instruction::ZExt
:
361 case Instruction::Trunc
: {
362 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
364 APInt InputDemandedMask
= DemandedMask
.zextOrTrunc(SrcBitWidth
);
365 KnownBits
InputKnown(SrcBitWidth
);
366 if (SimplifyDemandedBits(I
, 0, InputDemandedMask
, InputKnown
, Depth
+ 1))
368 assert(InputKnown
.getBitWidth() == SrcBitWidth
&& "Src width changed?");
369 Known
= InputKnown
.zextOrTrunc(BitWidth
,
370 true /* ExtendedBitsAreKnownZero */);
371 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
374 case Instruction::BitCast
:
375 if (!I
->getOperand(0)->getType()->isIntOrIntVectorTy())
376 return nullptr; // vector->int or fp->int?
378 if (VectorType
*DstVTy
= dyn_cast
<VectorType
>(I
->getType())) {
379 if (VectorType
*SrcVTy
=
380 dyn_cast
<VectorType
>(I
->getOperand(0)->getType())) {
381 if (DstVTy
->getNumElements() != SrcVTy
->getNumElements())
382 // Don't touch a bitcast between vectors of different element counts.
385 // Don't touch a scalar-to-vector bitcast.
387 } else if (I
->getOperand(0)->getType()->isVectorTy())
388 // Don't touch a vector-to-scalar bitcast.
391 if (SimplifyDemandedBits(I
, 0, DemandedMask
, Known
, Depth
+ 1))
393 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
395 case Instruction::SExt
: {
396 // Compute the bits in the result that are not present in the input.
397 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
399 APInt InputDemandedBits
= DemandedMask
.trunc(SrcBitWidth
);
401 // If any of the sign extended bits are demanded, we know that the sign
403 if (DemandedMask
.getActiveBits() > SrcBitWidth
)
404 InputDemandedBits
.setBit(SrcBitWidth
-1);
406 KnownBits
InputKnown(SrcBitWidth
);
407 if (SimplifyDemandedBits(I
, 0, InputDemandedBits
, InputKnown
, Depth
+ 1))
410 // If the input sign bit is known zero, or if the NewBits are not demanded
411 // convert this into a zero extension.
412 if (InputKnown
.isNonNegative() ||
413 DemandedMask
.getActiveBits() <= SrcBitWidth
) {
414 // Convert to ZExt cast.
415 CastInst
*NewCast
= new ZExtInst(I
->getOperand(0), VTy
, I
->getName());
416 return InsertNewInstWith(NewCast
, *I
);
419 // If the sign bit of the input is known set or clear, then we know the
420 // top bits of the result.
421 Known
= InputKnown
.sext(BitWidth
);
422 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
425 case Instruction::Add
:
426 case Instruction::Sub
: {
427 /// If the high-bits of an ADD/SUB are not demanded, then we do not care
428 /// about the high bits of the operands.
429 unsigned NLZ
= DemandedMask
.countLeadingZeros();
430 // Right fill the mask of bits for this ADD/SUB to demand the most
431 // significant bit and all those below it.
432 APInt
DemandedFromOps(APInt::getLowBitsSet(BitWidth
, BitWidth
-NLZ
));
433 if (ShrinkDemandedConstant(I
, 0, DemandedFromOps
) ||
434 SimplifyDemandedBits(I
, 0, DemandedFromOps
, LHSKnown
, Depth
+ 1) ||
435 ShrinkDemandedConstant(I
, 1, DemandedFromOps
) ||
436 SimplifyDemandedBits(I
, 1, DemandedFromOps
, RHSKnown
, Depth
+ 1)) {
438 // Disable the nsw and nuw flags here: We can no longer guarantee that
439 // we won't wrap after simplification. Removing the nsw/nuw flags is
440 // legal here because the top bit is not demanded.
441 BinaryOperator
&BinOP
= *cast
<BinaryOperator
>(I
);
442 BinOP
.setHasNoSignedWrap(false);
443 BinOP
.setHasNoUnsignedWrap(false);
448 // If we are known to be adding/subtracting zeros to every bit below
449 // the highest demanded bit, we just return the other side.
450 if (DemandedFromOps
.isSubsetOf(RHSKnown
.Zero
))
451 return I
->getOperand(0);
452 // We can't do this with the LHS for subtraction, unless we are only
453 // demanding the LSB.
454 if ((I
->getOpcode() == Instruction::Add
||
455 DemandedFromOps
.isOneValue()) &&
456 DemandedFromOps
.isSubsetOf(LHSKnown
.Zero
))
457 return I
->getOperand(1);
459 // Otherwise just compute the known bits of the result.
460 bool NSW
= cast
<OverflowingBinaryOperator
>(I
)->hasNoSignedWrap();
461 Known
= KnownBits::computeForAddSub(I
->getOpcode() == Instruction::Add
,
462 NSW
, LHSKnown
, RHSKnown
);
465 case Instruction::Shl
: {
467 if (match(I
->getOperand(1), m_APInt(SA
))) {
469 if (match(I
->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt
))))
470 if (Instruction
*Shr
= dyn_cast
<Instruction
>(I
->getOperand(0)))
471 if (Value
*R
= simplifyShrShlDemandedBits(Shr
, *ShrAmt
, I
, *SA
,
472 DemandedMask
, Known
))
475 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
476 APInt
DemandedMaskIn(DemandedMask
.lshr(ShiftAmt
));
478 // If the shift is NUW/NSW, then it does demand the high bits.
479 ShlOperator
*IOp
= cast
<ShlOperator
>(I
);
480 if (IOp
->hasNoSignedWrap())
481 DemandedMaskIn
.setHighBits(ShiftAmt
+1);
482 else if (IOp
->hasNoUnsignedWrap())
483 DemandedMaskIn
.setHighBits(ShiftAmt
);
485 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, Known
, Depth
+ 1))
487 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
488 Known
.Zero
<<= ShiftAmt
;
489 Known
.One
<<= ShiftAmt
;
490 // low bits known zero.
492 Known
.Zero
.setLowBits(ShiftAmt
);
496 case Instruction::LShr
: {
498 if (match(I
->getOperand(1), m_APInt(SA
))) {
499 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
501 // Unsigned shift right.
502 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
504 // If the shift is exact, then it does demand the low bits (and knows that
506 if (cast
<LShrOperator
>(I
)->isExact())
507 DemandedMaskIn
.setLowBits(ShiftAmt
);
509 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, Known
, Depth
+ 1))
511 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
512 Known
.Zero
.lshrInPlace(ShiftAmt
);
513 Known
.One
.lshrInPlace(ShiftAmt
);
515 Known
.Zero
.setHighBits(ShiftAmt
); // high bits known zero.
519 case Instruction::AShr
: {
520 // If this is an arithmetic shift right and only the low-bit is set, we can
521 // always convert this into a logical shr, even if the shift amount is
522 // variable. The low bit of the shift cannot be an input sign bit unless
523 // the shift amount is >= the size of the datatype, which is undefined.
524 if (DemandedMask
.isOneValue()) {
525 // Perform the logical shift right.
526 Instruction
*NewVal
= BinaryOperator::CreateLShr(
527 I
->getOperand(0), I
->getOperand(1), I
->getName());
528 return InsertNewInstWith(NewVal
, *I
);
531 // If the sign bit is the only bit demanded by this ashr, then there is no
532 // need to do it, the shift doesn't change the high bit.
533 if (DemandedMask
.isSignMask())
534 return I
->getOperand(0);
537 if (match(I
->getOperand(1), m_APInt(SA
))) {
538 uint32_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
540 // Signed shift right.
541 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
542 // If any of the high bits are demanded, we should set the sign bit as
544 if (DemandedMask
.countLeadingZeros() <= ShiftAmt
)
545 DemandedMaskIn
.setSignBit();
547 // If the shift is exact, then it does demand the low bits (and knows that
549 if (cast
<AShrOperator
>(I
)->isExact())
550 DemandedMaskIn
.setLowBits(ShiftAmt
);
552 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, Known
, Depth
+ 1))
555 unsigned SignBits
= ComputeNumSignBits(I
->getOperand(0), Depth
+ 1, CxtI
);
557 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
558 // Compute the new bits that are at the top now plus sign bits.
559 APInt
HighBits(APInt::getHighBitsSet(
560 BitWidth
, std::min(SignBits
+ ShiftAmt
- 1, BitWidth
)));
561 Known
.Zero
.lshrInPlace(ShiftAmt
);
562 Known
.One
.lshrInPlace(ShiftAmt
);
564 // If the input sign bit is known to be zero, or if none of the top bits
565 // are demanded, turn this into an unsigned shift right.
566 assert(BitWidth
> ShiftAmt
&& "Shift amount not saturated?");
567 if (Known
.Zero
[BitWidth
-ShiftAmt
-1] ||
568 !DemandedMask
.intersects(HighBits
)) {
569 BinaryOperator
*LShr
= BinaryOperator::CreateLShr(I
->getOperand(0),
571 LShr
->setIsExact(cast
<BinaryOperator
>(I
)->isExact());
572 return InsertNewInstWith(LShr
, *I
);
573 } else if (Known
.One
[BitWidth
-ShiftAmt
-1]) { // New bits are known one.
574 Known
.One
|= HighBits
;
579 case Instruction::UDiv
: {
580 // UDiv doesn't demand low bits that are zero in the divisor.
582 if (match(I
->getOperand(1), m_APInt(SA
))) {
583 // If the shift is exact, then it does demand the low bits.
584 if (cast
<UDivOperator
>(I
)->isExact())
587 // FIXME: Take the demanded mask of the result into account.
588 unsigned RHSTrailingZeros
= SA
->countTrailingZeros();
589 APInt DemandedMaskIn
=
590 APInt::getHighBitsSet(BitWidth
, BitWidth
- RHSTrailingZeros
);
591 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, LHSKnown
, Depth
+ 1))
594 // Propagate zero bits from the input.
595 Known
.Zero
.setHighBits(std::min(
596 BitWidth
, LHSKnown
.Zero
.countLeadingOnes() + RHSTrailingZeros
));
600 case Instruction::SRem
:
601 if (ConstantInt
*Rem
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
602 // X % -1 demands all the bits because we don't want to introduce
603 // INT_MIN % -1 (== undef) by accident.
604 if (Rem
->isMinusOne())
606 APInt RA
= Rem
->getValue().abs();
607 if (RA
.isPowerOf2()) {
608 if (DemandedMask
.ult(RA
)) // srem won't affect demanded bits
609 return I
->getOperand(0);
611 APInt LowBits
= RA
- 1;
612 APInt Mask2
= LowBits
| APInt::getSignMask(BitWidth
);
613 if (SimplifyDemandedBits(I
, 0, Mask2
, LHSKnown
, Depth
+ 1))
616 // The low bits of LHS are unchanged by the srem.
617 Known
.Zero
= LHSKnown
.Zero
& LowBits
;
618 Known
.One
= LHSKnown
.One
& LowBits
;
620 // If LHS is non-negative or has all low bits zero, then the upper bits
622 if (LHSKnown
.isNonNegative() || LowBits
.isSubsetOf(LHSKnown
.Zero
))
623 Known
.Zero
|= ~LowBits
;
625 // If LHS is negative and not all low bits are zero, then the upper bits
627 if (LHSKnown
.isNegative() && LowBits
.intersects(LHSKnown
.One
))
628 Known
.One
|= ~LowBits
;
630 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
635 // The sign bit is the LHS's sign bit, except when the result of the
636 // remainder is zero.
637 if (DemandedMask
.isSignBitSet()) {
638 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1, CxtI
);
639 // If it's known zero, our sign bit is also zero.
640 if (LHSKnown
.isNonNegative())
641 Known
.makeNonNegative();
644 case Instruction::URem
: {
645 KnownBits
Known2(BitWidth
);
646 APInt AllOnes
= APInt::getAllOnesValue(BitWidth
);
647 if (SimplifyDemandedBits(I
, 0, AllOnes
, Known2
, Depth
+ 1) ||
648 SimplifyDemandedBits(I
, 1, AllOnes
, Known2
, Depth
+ 1))
651 unsigned Leaders
= Known2
.countMinLeadingZeros();
652 Known
.Zero
= APInt::getHighBitsSet(BitWidth
, Leaders
) & DemandedMask
;
655 case Instruction::Call
:
656 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
657 switch (II
->getIntrinsicID()) {
659 case Intrinsic::bswap
: {
660 // If the only bits demanded come from one byte of the bswap result,
661 // just shift the input byte into position to eliminate the bswap.
662 unsigned NLZ
= DemandedMask
.countLeadingZeros();
663 unsigned NTZ
= DemandedMask
.countTrailingZeros();
665 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
666 // we need all the bits down to bit 8. Likewise, round NLZ. If we
667 // have 14 leading zeros, round to 8.
670 // If we need exactly one byte, we can do this transformation.
671 if (BitWidth
-NLZ
-NTZ
== 8) {
672 unsigned ResultBit
= NTZ
;
673 unsigned InputBit
= BitWidth
-NTZ
-8;
675 // Replace this with either a left or right shift to get the byte into
678 if (InputBit
> ResultBit
)
679 NewVal
= BinaryOperator::CreateLShr(II
->getArgOperand(0),
680 ConstantInt::get(I
->getType(), InputBit
-ResultBit
));
682 NewVal
= BinaryOperator::CreateShl(II
->getArgOperand(0),
683 ConstantInt::get(I
->getType(), ResultBit
-InputBit
));
685 return InsertNewInstWith(NewVal
, *I
);
688 // TODO: Could compute known zero/one bits based on the input.
691 case Intrinsic::fshr
:
692 case Intrinsic::fshl
: {
694 if (!match(I
->getOperand(2), m_APInt(SA
)))
697 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
698 // defined, so no need to special-case zero shifts here.
699 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
700 if (II
->getIntrinsicID() == Intrinsic::fshr
)
701 ShiftAmt
= BitWidth
- ShiftAmt
;
703 APInt
DemandedMaskLHS(DemandedMask
.lshr(ShiftAmt
));
704 APInt
DemandedMaskRHS(DemandedMask
.shl(BitWidth
- ShiftAmt
));
705 if (SimplifyDemandedBits(I
, 0, DemandedMaskLHS
, LHSKnown
, Depth
+ 1) ||
706 SimplifyDemandedBits(I
, 1, DemandedMaskRHS
, RHSKnown
, Depth
+ 1))
709 Known
.Zero
= LHSKnown
.Zero
.shl(ShiftAmt
) |
710 RHSKnown
.Zero
.lshr(BitWidth
- ShiftAmt
);
711 Known
.One
= LHSKnown
.One
.shl(ShiftAmt
) |
712 RHSKnown
.One
.lshr(BitWidth
- ShiftAmt
);
715 case Intrinsic::x86_mmx_pmovmskb
:
716 case Intrinsic::x86_sse_movmsk_ps
:
717 case Intrinsic::x86_sse2_movmsk_pd
:
718 case Intrinsic::x86_sse2_pmovmskb_128
:
719 case Intrinsic::x86_avx_movmsk_ps_256
:
720 case Intrinsic::x86_avx_movmsk_pd_256
:
721 case Intrinsic::x86_avx2_pmovmskb
: {
722 // MOVMSK copies the vector elements' sign bits to the low bits
723 // and zeros the high bits.
725 if (II
->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb
) {
726 ArgWidth
= 8; // Arg is x86_mmx, but treated as <8 x i8>.
728 auto Arg
= II
->getArgOperand(0);
729 auto ArgType
= cast
<VectorType
>(Arg
->getType());
730 ArgWidth
= ArgType
->getNumElements();
733 // If we don't need any of low bits then return zero,
734 // we know that DemandedMask is non-zero already.
735 APInt DemandedElts
= DemandedMask
.zextOrTrunc(ArgWidth
);
736 if (DemandedElts
.isNullValue())
737 return ConstantInt::getNullValue(VTy
);
739 // We know that the upper bits are set to zero.
740 Known
.Zero
.setBitsFrom(ArgWidth
);
743 case Intrinsic::x86_sse42_crc32_64_64
:
744 Known
.Zero
.setBitsFrom(32);
748 computeKnownBits(V
, Known
, Depth
, CxtI
);
752 // If the client is only demanding bits that we know, return the known
754 if (DemandedMask
.isSubsetOf(Known
.Zero
|Known
.One
))
755 return Constant::getIntegerValue(VTy
, Known
.One
);
759 /// Helper routine of SimplifyDemandedUseBits. It computes Known
760 /// bits. It also tries to handle simplifications that can be done based on
761 /// DemandedMask, but without modifying the Instruction.
762 Value
*InstCombiner::SimplifyMultipleUseDemandedBits(Instruction
*I
,
763 const APInt
&DemandedMask
,
767 unsigned BitWidth
= DemandedMask
.getBitWidth();
768 Type
*ITy
= I
->getType();
770 KnownBits
LHSKnown(BitWidth
);
771 KnownBits
RHSKnown(BitWidth
);
773 // Despite the fact that we can't simplify this instruction in all User's
774 // context, we can at least compute the known bits, and we can
775 // do simplifications that apply to *just* the one user if we know that
776 // this instruction has a simpler value in that context.
777 switch (I
->getOpcode()) {
778 case Instruction::And
: {
779 // If either the LHS or the RHS are Zero, the result is zero.
780 computeKnownBits(I
->getOperand(1), RHSKnown
, Depth
+ 1, CxtI
);
781 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1,
784 // Output known-0 are known to be clear if zero in either the LHS | RHS.
785 APInt IKnownZero
= RHSKnown
.Zero
| LHSKnown
.Zero
;
786 // Output known-1 bits are only known if set in both the LHS & RHS.
787 APInt IKnownOne
= RHSKnown
.One
& LHSKnown
.One
;
789 // If the client is only demanding bits that we know, return the known
791 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
792 return Constant::getIntegerValue(ITy
, IKnownOne
);
794 // If all of the demanded bits are known 1 on one side, return the other.
795 // These bits cannot contribute to the result of the 'and' in this
797 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
| RHSKnown
.One
))
798 return I
->getOperand(0);
799 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.One
))
800 return I
->getOperand(1);
802 Known
.Zero
= std::move(IKnownZero
);
803 Known
.One
= std::move(IKnownOne
);
806 case Instruction::Or
: {
807 // We can simplify (X|Y) -> X or Y in the user's context if we know that
808 // only bits from X or Y are demanded.
810 // If either the LHS or the RHS are One, the result is One.
811 computeKnownBits(I
->getOperand(1), RHSKnown
, Depth
+ 1, CxtI
);
812 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1,
815 // Output known-0 bits are only known if clear in both the LHS & RHS.
816 APInt IKnownZero
= RHSKnown
.Zero
& LHSKnown
.Zero
;
817 // Output known-1 are known to be set if set in either the LHS | RHS.
818 APInt IKnownOne
= RHSKnown
.One
| LHSKnown
.One
;
820 // If the client is only demanding bits that we know, return the known
822 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
823 return Constant::getIntegerValue(ITy
, IKnownOne
);
825 // If all of the demanded bits are known zero on one side, return the
826 // other. These bits cannot contribute to the result of the 'or' in this
828 if (DemandedMask
.isSubsetOf(LHSKnown
.One
| RHSKnown
.Zero
))
829 return I
->getOperand(0);
830 if (DemandedMask
.isSubsetOf(RHSKnown
.One
| LHSKnown
.Zero
))
831 return I
->getOperand(1);
833 Known
.Zero
= std::move(IKnownZero
);
834 Known
.One
= std::move(IKnownOne
);
837 case Instruction::Xor
: {
838 // We can simplify (X^Y) -> X or Y in the user's context if we know that
839 // only bits from X or Y are demanded.
841 computeKnownBits(I
->getOperand(1), RHSKnown
, Depth
+ 1, CxtI
);
842 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1,
845 // Output known-0 bits are known if clear or set in both the LHS & RHS.
846 APInt IKnownZero
= (RHSKnown
.Zero
& LHSKnown
.Zero
) |
847 (RHSKnown
.One
& LHSKnown
.One
);
848 // Output known-1 are known to be set if set in only one of the LHS, RHS.
849 APInt IKnownOne
= (RHSKnown
.Zero
& LHSKnown
.One
) |
850 (RHSKnown
.One
& LHSKnown
.Zero
);
852 // If the client is only demanding bits that we know, return the known
854 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
855 return Constant::getIntegerValue(ITy
, IKnownOne
);
857 // If all of the demanded bits are known zero on one side, return the
859 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
))
860 return I
->getOperand(0);
861 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
))
862 return I
->getOperand(1);
864 // Output known-0 bits are known if clear or set in both the LHS & RHS.
865 Known
.Zero
= std::move(IKnownZero
);
866 // Output known-1 are known to be set if set in only one of the LHS, RHS.
867 Known
.One
= std::move(IKnownOne
);
871 // Compute the Known bits to simplify things downstream.
872 computeKnownBits(I
, Known
, Depth
, CxtI
);
874 // If this user is only demanding bits that we know, return the known
876 if (DemandedMask
.isSubsetOf(Known
.Zero
|Known
.One
))
877 return Constant::getIntegerValue(ITy
, Known
.One
);
886 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
887 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
888 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
891 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
892 /// ..., bn}, without considering the specific value X is holding.
893 /// This transformation is legal iff one of following conditions is hold:
894 /// 1) All the bit in S are 0, in this case E1 == E2.
895 /// 2) We don't care those bits in S, per the input DemandedMask.
896 /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
899 /// Currently we only test condition 2).
901 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
904 InstCombiner::simplifyShrShlDemandedBits(Instruction
*Shr
, const APInt
&ShrOp1
,
905 Instruction
*Shl
, const APInt
&ShlOp1
,
906 const APInt
&DemandedMask
,
908 if (!ShlOp1
|| !ShrOp1
)
909 return nullptr; // No-op.
911 Value
*VarX
= Shr
->getOperand(0);
912 Type
*Ty
= VarX
->getType();
913 unsigned BitWidth
= Ty
->getScalarSizeInBits();
914 if (ShlOp1
.uge(BitWidth
) || ShrOp1
.uge(BitWidth
))
915 return nullptr; // Undef.
917 unsigned ShlAmt
= ShlOp1
.getZExtValue();
918 unsigned ShrAmt
= ShrOp1
.getZExtValue();
920 Known
.One
.clearAllBits();
921 Known
.Zero
.setLowBits(ShlAmt
- 1);
922 Known
.Zero
&= DemandedMask
;
924 APInt
BitMask1(APInt::getAllOnesValue(BitWidth
));
925 APInt
BitMask2(APInt::getAllOnesValue(BitWidth
));
927 bool isLshr
= (Shr
->getOpcode() == Instruction::LShr
);
928 BitMask1
= isLshr
? (BitMask1
.lshr(ShrAmt
) << ShlAmt
) :
929 (BitMask1
.ashr(ShrAmt
) << ShlAmt
);
931 if (ShrAmt
<= ShlAmt
) {
932 BitMask2
<<= (ShlAmt
- ShrAmt
);
934 BitMask2
= isLshr
? BitMask2
.lshr(ShrAmt
- ShlAmt
):
935 BitMask2
.ashr(ShrAmt
- ShlAmt
);
938 // Check if condition-2 (see the comment to this function) is satified.
939 if ((BitMask1
& DemandedMask
) == (BitMask2
& DemandedMask
)) {
940 if (ShrAmt
== ShlAmt
)
943 if (!Shr
->hasOneUse())
947 if (ShrAmt
< ShlAmt
) {
948 Constant
*Amt
= ConstantInt::get(VarX
->getType(), ShlAmt
- ShrAmt
);
949 New
= BinaryOperator::CreateShl(VarX
, Amt
);
950 BinaryOperator
*Orig
= cast
<BinaryOperator
>(Shl
);
951 New
->setHasNoSignedWrap(Orig
->hasNoSignedWrap());
952 New
->setHasNoUnsignedWrap(Orig
->hasNoUnsignedWrap());
954 Constant
*Amt
= ConstantInt::get(VarX
->getType(), ShrAmt
- ShlAmt
);
955 New
= isLshr
? BinaryOperator::CreateLShr(VarX
, Amt
) :
956 BinaryOperator::CreateAShr(VarX
, Amt
);
957 if (cast
<BinaryOperator
>(Shr
)->isExact())
958 New
->setIsExact(true);
961 return InsertNewInstWith(New
, *Shl
);
967 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
969 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
971 Value
*InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst
*II
,
974 unsigned VWidth
= II
->getType()->getVectorNumElements();
978 ConstantInt
*NewDMask
= nullptr;
981 // Pretend that a prefix of elements is demanded to simplify the code
983 DemandedElts
= (1 << DemandedElts
.getActiveBits()) - 1;
985 ConstantInt
*DMask
= cast
<ConstantInt
>(II
->getArgOperand(DMaskIdx
));
986 unsigned DMaskVal
= DMask
->getZExtValue() & 0xf;
988 // Mask off values that are undefined because the dmask doesn't cover them
989 DemandedElts
&= (1 << countPopulation(DMaskVal
)) - 1;
991 unsigned NewDMaskVal
= 0;
992 unsigned OrigLoadIdx
= 0;
993 for (unsigned SrcIdx
= 0; SrcIdx
< 4; ++SrcIdx
) {
994 const unsigned Bit
= 1 << SrcIdx
;
995 if (!!(DMaskVal
& Bit
)) {
996 if (!!DemandedElts
[OrigLoadIdx
])
1002 if (DMaskVal
!= NewDMaskVal
)
1003 NewDMask
= ConstantInt::get(DMask
->getType(), NewDMaskVal
);
1006 unsigned NewNumElts
= DemandedElts
.countPopulation();
1008 return UndefValue::get(II
->getType());
1010 if (NewNumElts
>= VWidth
&& DemandedElts
.isMask()) {
1012 II
->setArgOperand(DMaskIdx
, NewDMask
);
1016 // Determine the overload types of the original intrinsic.
1017 auto IID
= II
->getIntrinsicID();
1018 SmallVector
<Intrinsic::IITDescriptor
, 16> Table
;
1019 getIntrinsicInfoTableEntries(IID
, Table
);
1020 ArrayRef
<Intrinsic::IITDescriptor
> TableRef
= Table
;
1022 // Validate function argument and return types, extracting overloaded types
1024 FunctionType
*FTy
= II
->getCalledFunction()->getFunctionType();
1025 SmallVector
<Type
*, 6> OverloadTys
;
1026 Intrinsic::matchIntrinsicSignature(FTy
, TableRef
, OverloadTys
);
1028 Module
*M
= II
->getParent()->getParent()->getParent();
1029 Type
*EltTy
= II
->getType()->getVectorElementType();
1030 Type
*NewTy
= (NewNumElts
== 1) ? EltTy
: VectorType::get(EltTy
, NewNumElts
);
1032 OverloadTys
[0] = NewTy
;
1033 Function
*NewIntrin
= Intrinsic::getDeclaration(M
, IID
, OverloadTys
);
1035 SmallVector
<Value
*, 16> Args
;
1036 for (unsigned I
= 0, E
= II
->getNumArgOperands(); I
!= E
; ++I
)
1037 Args
.push_back(II
->getArgOperand(I
));
1040 Args
[DMaskIdx
] = NewDMask
;
1042 IRBuilderBase::InsertPointGuard
Guard(Builder
);
1043 Builder
.SetInsertPoint(II
);
1045 CallInst
*NewCall
= Builder
.CreateCall(NewIntrin
, Args
);
1046 NewCall
->takeName(II
);
1047 NewCall
->copyMetadata(*II
);
1049 if (NewNumElts
== 1) {
1050 return Builder
.CreateInsertElement(UndefValue::get(II
->getType()), NewCall
,
1051 DemandedElts
.countTrailingZeros());
1054 SmallVector
<uint32_t, 8> EltMask
;
1055 unsigned NewLoadIdx
= 0;
1056 for (unsigned OrigLoadIdx
= 0; OrigLoadIdx
< VWidth
; ++OrigLoadIdx
) {
1057 if (!!DemandedElts
[OrigLoadIdx
])
1058 EltMask
.push_back(NewLoadIdx
++);
1060 EltMask
.push_back(NewNumElts
);
1064 Builder
.CreateShuffleVector(NewCall
, UndefValue::get(NewTy
), EltMask
);
1069 /// The specified value produces a vector with any number of elements.
1070 /// DemandedElts contains the set of elements that are actually used by the
1071 /// caller. This method analyzes which elements of the operand are undef and
1072 /// returns that information in UndefElts.
1074 /// If the information about demanded elements can be used to simplify the
1075 /// operation, the operation is simplified, then the resultant value is
1076 /// returned. This returns null if no change was made.
1077 Value
*InstCombiner::SimplifyDemandedVectorElts(Value
*V
, APInt DemandedElts
,
1080 unsigned VWidth
= V
->getType()->getVectorNumElements();
1081 APInt
EltMask(APInt::getAllOnesValue(VWidth
));
1082 assert((DemandedElts
& ~EltMask
) == 0 && "Invalid DemandedElts!");
1084 if (isa
<UndefValue
>(V
)) {
1085 // If the entire vector is undefined, just return this info.
1086 UndefElts
= EltMask
;
1090 if (DemandedElts
.isNullValue()) { // If nothing is demanded, provide undef.
1091 UndefElts
= EltMask
;
1092 return UndefValue::get(V
->getType());
1097 if (auto *C
= dyn_cast
<Constant
>(V
)) {
1098 // Check if this is identity. If so, return 0 since we are not simplifying
1100 if (DemandedElts
.isAllOnesValue())
1103 Type
*EltTy
= cast
<VectorType
>(V
->getType())->getElementType();
1104 Constant
*Undef
= UndefValue::get(EltTy
);
1105 SmallVector
<Constant
*, 16> Elts
;
1106 for (unsigned i
= 0; i
!= VWidth
; ++i
) {
1107 if (!DemandedElts
[i
]) { // If not demanded, set to undef.
1108 Elts
.push_back(Undef
);
1109 UndefElts
.setBit(i
);
1113 Constant
*Elt
= C
->getAggregateElement(i
);
1114 if (!Elt
) return nullptr;
1116 if (isa
<UndefValue
>(Elt
)) { // Already undef.
1117 Elts
.push_back(Undef
);
1118 UndefElts
.setBit(i
);
1119 } else { // Otherwise, defined.
1120 Elts
.push_back(Elt
);
1124 // If we changed the constant, return it.
1125 Constant
*NewCV
= ConstantVector::get(Elts
);
1126 return NewCV
!= C
? NewCV
: nullptr;
1129 // Limit search depth.
1133 // If multiple users are using the root value, proceed with
1134 // simplification conservatively assuming that all elements
1136 if (!V
->hasOneUse()) {
1137 // Quit if we find multiple users of a non-root value though.
1138 // They'll be handled when it's their turn to be visited by
1139 // the main instcombine process.
1141 // TODO: Just compute the UndefElts information recursively.
1144 // Conservatively assume that all elements are needed.
1145 DemandedElts
= EltMask
;
1148 Instruction
*I
= dyn_cast
<Instruction
>(V
);
1149 if (!I
) return nullptr; // Only analyze instructions.
1151 bool MadeChange
= false;
1152 auto simplifyAndSetOp
= [&](Instruction
*Inst
, unsigned OpNum
,
1153 APInt Demanded
, APInt
&Undef
) {
1154 auto *II
= dyn_cast
<IntrinsicInst
>(Inst
);
1155 Value
*Op
= II
? II
->getArgOperand(OpNum
) : Inst
->getOperand(OpNum
);
1156 if (Value
*V
= SimplifyDemandedVectorElts(Op
, Demanded
, Undef
, Depth
+ 1)) {
1158 II
->setArgOperand(OpNum
, V
);
1160 Inst
->setOperand(OpNum
, V
);
1165 APInt
UndefElts2(VWidth
, 0);
1166 APInt
UndefElts3(VWidth
, 0);
1167 switch (I
->getOpcode()) {
1170 case Instruction::GetElementPtr
: {
1171 // The LangRef requires that struct geps have all constant indices. As
1172 // such, we can't convert any operand to partial undef.
1173 auto mayIndexStructType
= [](GetElementPtrInst
&GEP
) {
1174 for (auto I
= gep_type_begin(GEP
), E
= gep_type_end(GEP
);
1180 if (mayIndexStructType(cast
<GetElementPtrInst
>(*I
)))
1183 // Conservatively track the demanded elements back through any vector
1184 // operands we may have. We know there must be at least one, or we
1185 // wouldn't have a vector result to get here. Note that we intentionally
1186 // merge the undef bits here since gepping with either an undef base or
1187 // index results in undef.
1188 for (unsigned i
= 0; i
< I
->getNumOperands(); i
++) {
1189 if (isa
<UndefValue
>(I
->getOperand(i
))) {
1190 // If the entire vector is undefined, just return this info.
1191 UndefElts
= EltMask
;
1194 if (I
->getOperand(i
)->getType()->isVectorTy()) {
1195 APInt
UndefEltsOp(VWidth
, 0);
1196 simplifyAndSetOp(I
, i
, DemandedElts
, UndefEltsOp
);
1197 UndefElts
|= UndefEltsOp
;
1203 case Instruction::InsertElement
: {
1204 // If this is a variable index, we don't know which element it overwrites.
1205 // demand exactly the same input as we produce.
1206 ConstantInt
*Idx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
1208 // Note that we can't propagate undef elt info, because we don't know
1209 // which elt is getting updated.
1210 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts2
);
1214 // The element inserted overwrites whatever was there, so the input demanded
1215 // set is simpler than the output set.
1216 unsigned IdxNo
= Idx
->getZExtValue();
1217 APInt PreInsertDemandedElts
= DemandedElts
;
1219 PreInsertDemandedElts
.clearBit(IdxNo
);
1221 simplifyAndSetOp(I
, 0, PreInsertDemandedElts
, UndefElts
);
1223 // If this is inserting an element that isn't demanded, remove this
1225 if (IdxNo
>= VWidth
|| !DemandedElts
[IdxNo
]) {
1227 return I
->getOperand(0);
1230 // The inserted element is defined.
1231 UndefElts
.clearBit(IdxNo
);
1234 case Instruction::ShuffleVector
: {
1235 ShuffleVectorInst
*Shuffle
= cast
<ShuffleVectorInst
>(I
);
1236 unsigned LHSVWidth
=
1237 Shuffle
->getOperand(0)->getType()->getVectorNumElements();
1238 APInt
LeftDemanded(LHSVWidth
, 0), RightDemanded(LHSVWidth
, 0);
1239 for (unsigned i
= 0; i
< VWidth
; i
++) {
1240 if (DemandedElts
[i
]) {
1241 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
1242 if (MaskVal
!= -1u) {
1243 assert(MaskVal
< LHSVWidth
* 2 &&
1244 "shufflevector mask index out of range!");
1245 if (MaskVal
< LHSVWidth
)
1246 LeftDemanded
.setBit(MaskVal
);
1248 RightDemanded
.setBit(MaskVal
- LHSVWidth
);
1253 APInt
LHSUndefElts(LHSVWidth
, 0);
1254 simplifyAndSetOp(I
, 0, LeftDemanded
, LHSUndefElts
);
1256 APInt
RHSUndefElts(LHSVWidth
, 0);
1257 simplifyAndSetOp(I
, 1, RightDemanded
, RHSUndefElts
);
1259 bool NewUndefElts
= false;
1260 unsigned LHSIdx
= -1u, LHSValIdx
= -1u;
1261 unsigned RHSIdx
= -1u, RHSValIdx
= -1u;
1262 bool LHSUniform
= true;
1263 bool RHSUniform
= true;
1264 for (unsigned i
= 0; i
< VWidth
; i
++) {
1265 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
1266 if (MaskVal
== -1u) {
1267 UndefElts
.setBit(i
);
1268 } else if (!DemandedElts
[i
]) {
1269 NewUndefElts
= true;
1270 UndefElts
.setBit(i
);
1271 } else if (MaskVal
< LHSVWidth
) {
1272 if (LHSUndefElts
[MaskVal
]) {
1273 NewUndefElts
= true;
1274 UndefElts
.setBit(i
);
1276 LHSIdx
= LHSIdx
== -1u ? i
: LHSVWidth
;
1277 LHSValIdx
= LHSValIdx
== -1u ? MaskVal
: LHSVWidth
;
1278 LHSUniform
= LHSUniform
&& (MaskVal
== i
);
1281 if (RHSUndefElts
[MaskVal
- LHSVWidth
]) {
1282 NewUndefElts
= true;
1283 UndefElts
.setBit(i
);
1285 RHSIdx
= RHSIdx
== -1u ? i
: LHSVWidth
;
1286 RHSValIdx
= RHSValIdx
== -1u ? MaskVal
- LHSVWidth
: LHSVWidth
;
1287 RHSUniform
= RHSUniform
&& (MaskVal
- LHSVWidth
== i
);
1292 // Try to transform shuffle with constant vector and single element from
1293 // this constant vector to single insertelement instruction.
1294 // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1295 // insertelement V, C[ci], ci-n
1296 if (LHSVWidth
== Shuffle
->getType()->getNumElements()) {
1297 Value
*Op
= nullptr;
1298 Constant
*Value
= nullptr;
1301 // Find constant vector with the single element in shuffle (LHS or RHS).
1302 if (LHSIdx
< LHSVWidth
&& RHSUniform
) {
1303 if (auto *CV
= dyn_cast
<ConstantVector
>(Shuffle
->getOperand(0))) {
1304 Op
= Shuffle
->getOperand(1);
1305 Value
= CV
->getOperand(LHSValIdx
);
1309 if (RHSIdx
< LHSVWidth
&& LHSUniform
) {
1310 if (auto *CV
= dyn_cast
<ConstantVector
>(Shuffle
->getOperand(1))) {
1311 Op
= Shuffle
->getOperand(0);
1312 Value
= CV
->getOperand(RHSValIdx
);
1316 // Found constant vector with single element - convert to insertelement.
1318 Instruction
*New
= InsertElementInst::Create(
1319 Op
, Value
, ConstantInt::get(Type::getInt32Ty(I
->getContext()), Idx
),
1320 Shuffle
->getName());
1321 InsertNewInstWith(New
, *Shuffle
);
1326 // Add additional discovered undefs.
1327 SmallVector
<Constant
*, 16> Elts
;
1328 for (unsigned i
= 0; i
< VWidth
; ++i
) {
1330 Elts
.push_back(UndefValue::get(Type::getInt32Ty(I
->getContext())));
1332 Elts
.push_back(ConstantInt::get(Type::getInt32Ty(I
->getContext()),
1333 Shuffle
->getMaskValue(i
)));
1335 I
->setOperand(2, ConstantVector::get(Elts
));
1340 case Instruction::Select
: {
1341 // If this is a vector select, try to transform the select condition based
1342 // on the current demanded elements.
1343 SelectInst
*Sel
= cast
<SelectInst
>(I
);
1344 if (Sel
->getCondition()->getType()->isVectorTy()) {
1345 // TODO: We are not doing anything with UndefElts based on this call.
1346 // It is overwritten below based on the other select operands. If an
1347 // element of the select condition is known undef, then we are free to
1348 // choose the output value from either arm of the select. If we know that
1349 // one of those values is undef, then the output can be undef.
1350 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts
);
1353 // Next, see if we can transform the arms of the select.
1354 APInt
DemandedLHS(DemandedElts
), DemandedRHS(DemandedElts
);
1355 if (auto *CV
= dyn_cast
<ConstantVector
>(Sel
->getCondition())) {
1356 for (unsigned i
= 0; i
< VWidth
; i
++) {
1357 // isNullValue() always returns false when called on a ConstantExpr.
1358 // Skip constant expressions to avoid propagating incorrect information.
1359 Constant
*CElt
= CV
->getAggregateElement(i
);
1360 if (isa
<ConstantExpr
>(CElt
))
1362 // TODO: If a select condition element is undef, we can demand from
1363 // either side. If one side is known undef, choosing that side would
1365 if (CElt
->isNullValue())
1366 DemandedLHS
.clearBit(i
);
1368 DemandedRHS
.clearBit(i
);
1372 simplifyAndSetOp(I
, 1, DemandedLHS
, UndefElts2
);
1373 simplifyAndSetOp(I
, 2, DemandedRHS
, UndefElts3
);
1375 // Output elements are undefined if the element from each arm is undefined.
1376 // TODO: This can be improved. See comment in select condition handling.
1377 UndefElts
= UndefElts2
& UndefElts3
;
1380 case Instruction::BitCast
: {
1381 // Vector->vector casts only.
1382 VectorType
*VTy
= dyn_cast
<VectorType
>(I
->getOperand(0)->getType());
1384 unsigned InVWidth
= VTy
->getNumElements();
1385 APInt
InputDemandedElts(InVWidth
, 0);
1386 UndefElts2
= APInt(InVWidth
, 0);
1389 if (VWidth
== InVWidth
) {
1390 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1391 // elements as are demanded of us.
1393 InputDemandedElts
= DemandedElts
;
1394 } else if ((VWidth
% InVWidth
) == 0) {
1395 // If the number of elements in the output is a multiple of the number of
1396 // elements in the input then an input element is live if any of the
1397 // corresponding output elements are live.
1398 Ratio
= VWidth
/ InVWidth
;
1399 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
)
1400 if (DemandedElts
[OutIdx
])
1401 InputDemandedElts
.setBit(OutIdx
/ Ratio
);
1402 } else if ((InVWidth
% VWidth
) == 0) {
1403 // If the number of elements in the input is a multiple of the number of
1404 // elements in the output then an input element is live if the
1405 // corresponding output element is live.
1406 Ratio
= InVWidth
/ VWidth
;
1407 for (unsigned InIdx
= 0; InIdx
!= InVWidth
; ++InIdx
)
1408 if (DemandedElts
[InIdx
/ Ratio
])
1409 InputDemandedElts
.setBit(InIdx
);
1411 // Unsupported so far.
1415 simplifyAndSetOp(I
, 0, InputDemandedElts
, UndefElts2
);
1417 if (VWidth
== InVWidth
) {
1418 UndefElts
= UndefElts2
;
1419 } else if ((VWidth
% InVWidth
) == 0) {
1420 // If the number of elements in the output is a multiple of the number of
1421 // elements in the input then an output element is undef if the
1422 // corresponding input element is undef.
1423 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
)
1424 if (UndefElts2
[OutIdx
/ Ratio
])
1425 UndefElts
.setBit(OutIdx
);
1426 } else if ((InVWidth
% VWidth
) == 0) {
1427 // If the number of elements in the input is a multiple of the number of
1428 // elements in the output then an output element is undef if all of the
1429 // corresponding input elements are undef.
1430 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
) {
1431 APInt SubUndef
= UndefElts2
.lshr(OutIdx
* Ratio
).zextOrTrunc(Ratio
);
1432 if (SubUndef
.countPopulation() == Ratio
)
1433 UndefElts
.setBit(OutIdx
);
1436 llvm_unreachable("Unimp");
1440 case Instruction::FPTrunc
:
1441 case Instruction::FPExt
:
1442 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts
);
1445 case Instruction::Call
: {
1446 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
1448 switch (II
->getIntrinsicID()) {
1449 case Intrinsic::masked_gather
: // fallthrough
1450 case Intrinsic::masked_load
: {
1451 // Subtlety: If we load from a pointer, the pointer must be valid
1452 // regardless of whether the element is demanded. Doing otherwise risks
1453 // segfaults which didn't exist in the original program.
1454 APInt
DemandedPtrs(APInt::getAllOnesValue(VWidth
)),
1455 DemandedPassThrough(DemandedElts
);
1456 if (auto *CV
= dyn_cast
<ConstantVector
>(II
->getOperand(2)))
1457 for (unsigned i
= 0; i
< VWidth
; i
++) {
1458 Constant
*CElt
= CV
->getAggregateElement(i
);
1459 if (CElt
->isNullValue())
1460 DemandedPtrs
.clearBit(i
);
1461 else if (CElt
->isAllOnesValue())
1462 DemandedPassThrough
.clearBit(i
);
1464 if (II
->getIntrinsicID() == Intrinsic::masked_gather
)
1465 simplifyAndSetOp(II
, 0, DemandedPtrs
, UndefElts2
);
1466 simplifyAndSetOp(II
, 3, DemandedPassThrough
, UndefElts3
);
1468 // Output elements are undefined if the element from both sources are.
1469 // TODO: can strengthen via mask as well.
1470 UndefElts
= UndefElts2
& UndefElts3
;
1473 case Intrinsic::x86_xop_vfrcz_ss
:
1474 case Intrinsic::x86_xop_vfrcz_sd
:
1475 // The instructions for these intrinsics are speced to zero upper bits not
1476 // pass them through like other scalar intrinsics. So we shouldn't just
1477 // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
1478 // Instead we should return a zero vector.
1479 if (!DemandedElts
[0]) {
1481 return ConstantAggregateZero::get(II
->getType());
1484 // Only the lower element is used.
1486 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1488 // Only the lower element is undefined. The high elements are zero.
1489 UndefElts
= UndefElts
[0];
1492 // Unary scalar-as-vector operations that work column-wise.
1493 case Intrinsic::x86_sse_rcp_ss
:
1494 case Intrinsic::x86_sse_rsqrt_ss
:
1495 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1497 // If lowest element of a scalar op isn't used then use Arg0.
1498 if (!DemandedElts
[0]) {
1500 return II
->getArgOperand(0);
1502 // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
1506 // Binary scalar-as-vector operations that work column-wise. The high
1507 // elements come from operand 0. The low element is a function of both
1509 case Intrinsic::x86_sse_min_ss
:
1510 case Intrinsic::x86_sse_max_ss
:
1511 case Intrinsic::x86_sse_cmp_ss
:
1512 case Intrinsic::x86_sse2_min_sd
:
1513 case Intrinsic::x86_sse2_max_sd
:
1514 case Intrinsic::x86_sse2_cmp_sd
: {
1515 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1517 // If lowest element of a scalar op isn't used then use Arg0.
1518 if (!DemandedElts
[0]) {
1520 return II
->getArgOperand(0);
1523 // Only lower element is used for operand 1.
1525 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts2
);
1527 // Lower element is undefined if both lower elements are undefined.
1528 // Consider things like undef&0. The result is known zero, not undef.
1530 UndefElts
.clearBit(0);
1535 // Binary scalar-as-vector operations that work column-wise. The high
1536 // elements come from operand 0 and the low element comes from operand 1.
1537 case Intrinsic::x86_sse41_round_ss
:
1538 case Intrinsic::x86_sse41_round_sd
: {
1539 // Don't use the low element of operand 0.
1540 APInt DemandedElts2
= DemandedElts
;
1541 DemandedElts2
.clearBit(0);
1542 simplifyAndSetOp(II
, 0, DemandedElts2
, UndefElts
);
1544 // If lowest element of a scalar op isn't used then use Arg0.
1545 if (!DemandedElts
[0]) {
1547 return II
->getArgOperand(0);
1550 // Only lower element is used for operand 1.
1552 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts2
);
1554 // Take the high undef elements from operand 0 and take the lower element
1556 UndefElts
.clearBit(0);
1557 UndefElts
|= UndefElts2
[0];
1561 // Three input scalar-as-vector operations that work column-wise. The high
1562 // elements come from operand 0 and the low element is a function of all
1564 case Intrinsic::x86_avx512_mask_add_ss_round
:
1565 case Intrinsic::x86_avx512_mask_div_ss_round
:
1566 case Intrinsic::x86_avx512_mask_mul_ss_round
:
1567 case Intrinsic::x86_avx512_mask_sub_ss_round
:
1568 case Intrinsic::x86_avx512_mask_max_ss_round
:
1569 case Intrinsic::x86_avx512_mask_min_ss_round
:
1570 case Intrinsic::x86_avx512_mask_add_sd_round
:
1571 case Intrinsic::x86_avx512_mask_div_sd_round
:
1572 case Intrinsic::x86_avx512_mask_mul_sd_round
:
1573 case Intrinsic::x86_avx512_mask_sub_sd_round
:
1574 case Intrinsic::x86_avx512_mask_max_sd_round
:
1575 case Intrinsic::x86_avx512_mask_min_sd_round
:
1576 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1578 // If lowest element of a scalar op isn't used then use Arg0.
1579 if (!DemandedElts
[0]) {
1581 return II
->getArgOperand(0);
1584 // Only lower element is used for operand 1 and 2.
1586 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts2
);
1587 simplifyAndSetOp(II
, 2, DemandedElts
, UndefElts3
);
1589 // Lower element is undefined if all three lower elements are undefined.
1590 // Consider things like undef&0. The result is known zero, not undef.
1591 if (!UndefElts2
[0] || !UndefElts3
[0])
1592 UndefElts
.clearBit(0);
1596 case Intrinsic::x86_sse2_packssdw_128
:
1597 case Intrinsic::x86_sse2_packsswb_128
:
1598 case Intrinsic::x86_sse2_packuswb_128
:
1599 case Intrinsic::x86_sse41_packusdw
:
1600 case Intrinsic::x86_avx2_packssdw
:
1601 case Intrinsic::x86_avx2_packsswb
:
1602 case Intrinsic::x86_avx2_packusdw
:
1603 case Intrinsic::x86_avx2_packuswb
:
1604 case Intrinsic::x86_avx512_packssdw_512
:
1605 case Intrinsic::x86_avx512_packsswb_512
:
1606 case Intrinsic::x86_avx512_packusdw_512
:
1607 case Intrinsic::x86_avx512_packuswb_512
: {
1608 auto *Ty0
= II
->getArgOperand(0)->getType();
1609 unsigned InnerVWidth
= Ty0
->getVectorNumElements();
1610 assert(VWidth
== (InnerVWidth
* 2) && "Unexpected input size");
1612 unsigned NumLanes
= Ty0
->getPrimitiveSizeInBits() / 128;
1613 unsigned VWidthPerLane
= VWidth
/ NumLanes
;
1614 unsigned InnerVWidthPerLane
= InnerVWidth
/ NumLanes
;
1616 // Per lane, pack the elements of the first input and then the second.
1618 // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
1619 // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
1620 for (int OpNum
= 0; OpNum
!= 2; ++OpNum
) {
1621 APInt
OpDemandedElts(InnerVWidth
, 0);
1622 for (unsigned Lane
= 0; Lane
!= NumLanes
; ++Lane
) {
1623 unsigned LaneIdx
= Lane
* VWidthPerLane
;
1624 for (unsigned Elt
= 0; Elt
!= InnerVWidthPerLane
; ++Elt
) {
1625 unsigned Idx
= LaneIdx
+ Elt
+ InnerVWidthPerLane
* OpNum
;
1626 if (DemandedElts
[Idx
])
1627 OpDemandedElts
.setBit((Lane
* InnerVWidthPerLane
) + Elt
);
1631 // Demand elements from the operand.
1632 APInt
OpUndefElts(InnerVWidth
, 0);
1633 simplifyAndSetOp(II
, OpNum
, OpDemandedElts
, OpUndefElts
);
1635 // Pack the operand's UNDEF elements, one lane at a time.
1636 OpUndefElts
= OpUndefElts
.zext(VWidth
);
1637 for (unsigned Lane
= 0; Lane
!= NumLanes
; ++Lane
) {
1638 APInt LaneElts
= OpUndefElts
.lshr(InnerVWidthPerLane
* Lane
);
1639 LaneElts
= LaneElts
.getLoBits(InnerVWidthPerLane
);
1640 LaneElts
<<= InnerVWidthPerLane
* (2 * Lane
+ OpNum
);
1641 UndefElts
|= LaneElts
;
1648 case Intrinsic::x86_ssse3_pshuf_b_128
:
1649 case Intrinsic::x86_avx2_pshuf_b
:
1650 case Intrinsic::x86_avx512_pshuf_b_512
:
1652 case Intrinsic::x86_avx_vpermilvar_ps
:
1653 case Intrinsic::x86_avx_vpermilvar_ps_256
:
1654 case Intrinsic::x86_avx512_vpermilvar_ps_512
:
1655 case Intrinsic::x86_avx_vpermilvar_pd
:
1656 case Intrinsic::x86_avx_vpermilvar_pd_256
:
1657 case Intrinsic::x86_avx512_vpermilvar_pd_512
:
1659 case Intrinsic::x86_avx2_permd
:
1660 case Intrinsic::x86_avx2_permps
: {
1661 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts
);
1665 // SSE4A instructions leave the upper 64-bits of the 128-bit result
1666 // in an undefined state.
1667 case Intrinsic::x86_sse4a_extrq
:
1668 case Intrinsic::x86_sse4a_extrqi
:
1669 case Intrinsic::x86_sse4a_insertq
:
1670 case Intrinsic::x86_sse4a_insertqi
:
1671 UndefElts
.setHighBits(VWidth
/ 2);
1673 case Intrinsic::amdgcn_buffer_load
:
1674 case Intrinsic::amdgcn_buffer_load_format
:
1675 case Intrinsic::amdgcn_raw_buffer_load
:
1676 case Intrinsic::amdgcn_raw_buffer_load_format
:
1677 case Intrinsic::amdgcn_struct_buffer_load
:
1678 case Intrinsic::amdgcn_struct_buffer_load_format
:
1679 return simplifyAMDGCNMemoryIntrinsicDemanded(II
, DemandedElts
);
1681 if (getAMDGPUImageDMaskIntrinsic(II
->getIntrinsicID()))
1682 return simplifyAMDGCNMemoryIntrinsicDemanded(II
, DemandedElts
, 0);
1686 } // switch on IntrinsicID
1689 } // switch on Opcode
1691 // TODO: We bail completely on integer div/rem and shifts because they have
1692 // UB/poison potential, but that should be refined.
1694 if (match(I
, m_BinOp(BO
)) && !BO
->isIntDivRem() && !BO
->isShift()) {
1695 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts
);
1696 simplifyAndSetOp(I
, 1, DemandedElts
, UndefElts2
);
1698 // Any change to an instruction with potential poison must clear those flags
1699 // because we can not guarantee those constraints now. Other analysis may
1700 // determine that it is safe to re-apply the flags.
1702 BO
->dropPoisonGeneratingFlags();
1704 // Output elements are undefined if both are undefined. Consider things
1705 // like undef & 0. The result is known zero, not undef.
1706 UndefElts
&= UndefElts2
;
1709 // If we've proven all of the lanes undef, return an undef value.
1710 // TODO: Intersect w/demanded lanes
1711 if (UndefElts
.isAllOnesValue())
1712 return UndefValue::get(I
->getType());;
1714 return MadeChange
? I
: nullptr;