1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains logic for simplifying instructions based on information
10 // about how they are used.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombineInternal.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/PatternMatch.h"
18 #include "llvm/Support/KnownBits.h"
21 using namespace llvm::PatternMatch
;
23 #define DEBUG_TYPE "instcombine"
27 struct AMDGPUImageDMaskIntrinsic
{
31 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
32 #include "InstCombineTables.inc"
34 } // end anonymous namespace
36 /// Check to see if the specified operand of the specified instruction is a
37 /// constant integer. If so, check to see if there are any bits set in the
38 /// constant that are not demanded. If so, shrink the constant and return true.
39 static bool ShrinkDemandedConstant(Instruction
*I
, unsigned OpNo
,
40 const APInt
&Demanded
) {
41 assert(I
&& "No instruction?");
42 assert(OpNo
< I
->getNumOperands() && "Operand index too large");
44 // The operand must be a constant integer or splat integer.
45 Value
*Op
= I
->getOperand(OpNo
);
47 if (!match(Op
, m_APInt(C
)))
50 // If there are no bits set that aren't demanded, nothing to do.
51 if (C
->isSubsetOf(Demanded
))
54 // This instruction is producing bits that are not demanded. Shrink the RHS.
55 I
->setOperand(OpNo
, ConstantInt::get(Op
->getType(), *C
& Demanded
));
62 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
63 /// the instruction has any properties that allow us to simplify its operands.
64 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction
&Inst
) {
65 unsigned BitWidth
= Inst
.getType()->getScalarSizeInBits();
66 KnownBits
Known(BitWidth
);
67 APInt
DemandedMask(APInt::getAllOnesValue(BitWidth
));
69 Value
*V
= SimplifyDemandedUseBits(&Inst
, DemandedMask
, Known
,
72 if (V
== &Inst
) return true;
73 replaceInstUsesWith(Inst
, V
);
77 /// This form of SimplifyDemandedBits simplifies the specified instruction
78 /// operand if possible, updating it in place. It returns true if it made any
79 /// change and false otherwise.
80 bool InstCombiner::SimplifyDemandedBits(Instruction
*I
, unsigned OpNo
,
81 const APInt
&DemandedMask
,
84 Use
&U
= I
->getOperandUse(OpNo
);
85 Value
*NewVal
= SimplifyDemandedUseBits(U
.get(), DemandedMask
, Known
,
87 if (!NewVal
) return false;
93 /// This function attempts to replace V with a simpler value based on the
94 /// demanded bits. When this function is called, it is known that only the bits
95 /// set in DemandedMask of the result of V are ever used downstream.
96 /// Consequently, depending on the mask and V, it may be possible to replace V
97 /// with a constant or one of its operands. In such cases, this function does
98 /// the replacement and returns true. In all other cases, it returns false after
99 /// analyzing the expression and setting KnownOne and known to be one in the
100 /// expression. Known.Zero contains all the bits that are known to be zero in
101 /// the expression. These are provided to potentially allow the caller (which
102 /// might recursively be SimplifyDemandedBits itself) to simplify the
104 /// Known.One and Known.Zero always follow the invariant that:
105 /// Known.One & Known.Zero == 0.
106 /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
107 /// Known.Zero may only be accurate for those bits set in DemandedMask. Note
108 /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
111 /// This returns null if it did not change anything and it permits no
112 /// simplification. This returns V itself if it did some simplification of V's
113 /// operands based on the information about what bits are demanded. This returns
114 /// some other non-null value if it found out that V is equal to another value
115 /// in the context where the specified bits are demanded, but not for all users.
116 Value
*InstCombiner::SimplifyDemandedUseBits(Value
*V
, APInt DemandedMask
,
117 KnownBits
&Known
, unsigned Depth
,
119 assert(V
!= nullptr && "Null pointer of Value???");
120 assert(Depth
<= 6 && "Limit Search Depth");
121 uint32_t BitWidth
= DemandedMask
.getBitWidth();
122 Type
*VTy
= V
->getType();
124 (!VTy
->isIntOrIntVectorTy() || VTy
->getScalarSizeInBits() == BitWidth
) &&
125 Known
.getBitWidth() == BitWidth
&&
126 "Value *V, DemandedMask and Known must have same BitWidth");
128 if (isa
<Constant
>(V
)) {
129 computeKnownBits(V
, Known
, Depth
, CxtI
);
134 if (DemandedMask
.isNullValue()) // Not demanding any bits from V.
135 return UndefValue::get(VTy
);
137 if (Depth
== 6) // Limit search depth.
140 Instruction
*I
= dyn_cast
<Instruction
>(V
);
142 computeKnownBits(V
, Known
, Depth
, CxtI
);
143 return nullptr; // Only analyze instructions.
146 // If there are multiple uses of this value and we aren't at the root, then
147 // we can't do any simplifications of the operands, because DemandedMask
148 // only reflects the bits demanded by *one* of the users.
149 if (Depth
!= 0 && !I
->hasOneUse())
150 return SimplifyMultipleUseDemandedBits(I
, DemandedMask
, Known
, Depth
, CxtI
);
152 KnownBits
LHSKnown(BitWidth
), RHSKnown(BitWidth
);
154 // If this is the root being simplified, allow it to have multiple uses,
155 // just set the DemandedMask to all bits so that we can try to simplify the
156 // operands. This allows visitTruncInst (for example) to simplify the
157 // operand of a trunc without duplicating all the logic below.
158 if (Depth
== 0 && !V
->hasOneUse())
159 DemandedMask
.setAllBits();
161 switch (I
->getOpcode()) {
163 computeKnownBits(I
, Known
, Depth
, CxtI
);
165 case Instruction::And
: {
166 // If either the LHS or the RHS are Zero, the result is zero.
167 if (SimplifyDemandedBits(I
, 1, DemandedMask
, RHSKnown
, Depth
+ 1) ||
168 SimplifyDemandedBits(I
, 0, DemandedMask
& ~RHSKnown
.Zero
, LHSKnown
,
171 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
172 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
174 // Output known-0 are known to be clear if zero in either the LHS | RHS.
175 APInt IKnownZero
= RHSKnown
.Zero
| LHSKnown
.Zero
;
176 // Output known-1 bits are only known if set in both the LHS & RHS.
177 APInt IKnownOne
= RHSKnown
.One
& LHSKnown
.One
;
179 // If the client is only demanding bits that we know, return the known
181 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
182 return Constant::getIntegerValue(VTy
, IKnownOne
);
184 // If all of the demanded bits are known 1 on one side, return the other.
185 // These bits cannot contribute to the result of the 'and'.
186 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
| RHSKnown
.One
))
187 return I
->getOperand(0);
188 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.One
))
189 return I
->getOperand(1);
191 // If the RHS is a constant, see if we can simplify it.
192 if (ShrinkDemandedConstant(I
, 1, DemandedMask
& ~LHSKnown
.Zero
))
195 Known
.Zero
= std::move(IKnownZero
);
196 Known
.One
= std::move(IKnownOne
);
199 case Instruction::Or
: {
200 // If either the LHS or the RHS are One, the result is One.
201 if (SimplifyDemandedBits(I
, 1, DemandedMask
, RHSKnown
, Depth
+ 1) ||
202 SimplifyDemandedBits(I
, 0, DemandedMask
& ~RHSKnown
.One
, LHSKnown
,
205 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
206 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
208 // Output known-0 bits are only known if clear in both the LHS & RHS.
209 APInt IKnownZero
= RHSKnown
.Zero
& LHSKnown
.Zero
;
210 // Output known-1 are known. to be set if s.et in either the LHS | RHS.
211 APInt IKnownOne
= RHSKnown
.One
| LHSKnown
.One
;
213 // If the client is only demanding bits that we know, return the known
215 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
216 return Constant::getIntegerValue(VTy
, IKnownOne
);
218 // If all of the demanded bits are known zero on one side, return the other.
219 // These bits cannot contribute to the result of the 'or'.
220 if (DemandedMask
.isSubsetOf(LHSKnown
.One
| RHSKnown
.Zero
))
221 return I
->getOperand(0);
222 if (DemandedMask
.isSubsetOf(RHSKnown
.One
| LHSKnown
.Zero
))
223 return I
->getOperand(1);
225 // If the RHS is a constant, see if we can simplify it.
226 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
229 Known
.Zero
= std::move(IKnownZero
);
230 Known
.One
= std::move(IKnownOne
);
233 case Instruction::Xor
: {
234 if (SimplifyDemandedBits(I
, 1, DemandedMask
, RHSKnown
, Depth
+ 1) ||
235 SimplifyDemandedBits(I
, 0, DemandedMask
, LHSKnown
, Depth
+ 1))
237 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
238 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
240 // Output known-0 bits are known if clear or set in both the LHS & RHS.
241 APInt IKnownZero
= (RHSKnown
.Zero
& LHSKnown
.Zero
) |
242 (RHSKnown
.One
& LHSKnown
.One
);
243 // Output known-1 are known to be set if set in only one of the LHS, RHS.
244 APInt IKnownOne
= (RHSKnown
.Zero
& LHSKnown
.One
) |
245 (RHSKnown
.One
& LHSKnown
.Zero
);
247 // If the client is only demanding bits that we know, return the known
249 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
250 return Constant::getIntegerValue(VTy
, IKnownOne
);
252 // If all of the demanded bits are known zero on one side, return the other.
253 // These bits cannot contribute to the result of the 'xor'.
254 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
))
255 return I
->getOperand(0);
256 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
))
257 return I
->getOperand(1);
259 // If all of the demanded bits are known to be zero on one side or the
260 // other, turn this into an *inclusive* or.
261 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
262 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.Zero
)) {
264 BinaryOperator::CreateOr(I
->getOperand(0), I
->getOperand(1),
266 return InsertNewInstWith(Or
, *I
);
269 // If all of the demanded bits on one side are known, and all of the set
270 // bits on that side are also known to be set on the other side, turn this
271 // into an AND, as we know the bits will be cleared.
272 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
273 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
|RHSKnown
.One
) &&
274 RHSKnown
.One
.isSubsetOf(LHSKnown
.One
)) {
275 Constant
*AndC
= Constant::getIntegerValue(VTy
,
276 ~RHSKnown
.One
& DemandedMask
);
277 Instruction
*And
= BinaryOperator::CreateAnd(I
->getOperand(0), AndC
);
278 return InsertNewInstWith(And
, *I
);
281 // If the RHS is a constant, see if we can simplify it.
282 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
283 if (ShrinkDemandedConstant(I
, 1, DemandedMask
))
286 // If our LHS is an 'and' and if it has one use, and if any of the bits we
287 // are flipping are known to be set, then the xor is just resetting those
288 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
289 // simplifying both of them.
290 if (Instruction
*LHSInst
= dyn_cast
<Instruction
>(I
->getOperand(0)))
291 if (LHSInst
->getOpcode() == Instruction::And
&& LHSInst
->hasOneUse() &&
292 isa
<ConstantInt
>(I
->getOperand(1)) &&
293 isa
<ConstantInt
>(LHSInst
->getOperand(1)) &&
294 (LHSKnown
.One
& RHSKnown
.One
& DemandedMask
) != 0) {
295 ConstantInt
*AndRHS
= cast
<ConstantInt
>(LHSInst
->getOperand(1));
296 ConstantInt
*XorRHS
= cast
<ConstantInt
>(I
->getOperand(1));
297 APInt NewMask
= ~(LHSKnown
.One
& RHSKnown
.One
& DemandedMask
);
300 ConstantInt::get(I
->getType(), NewMask
& AndRHS
->getValue());
301 Instruction
*NewAnd
= BinaryOperator::CreateAnd(I
->getOperand(0), AndC
);
302 InsertNewInstWith(NewAnd
, *I
);
305 ConstantInt::get(I
->getType(), NewMask
& XorRHS
->getValue());
306 Instruction
*NewXor
= BinaryOperator::CreateXor(NewAnd
, XorC
);
307 return InsertNewInstWith(NewXor
, *I
);
310 // Output known-0 bits are known if clear or set in both the LHS & RHS.
311 Known
.Zero
= std::move(IKnownZero
);
312 // Output known-1 are known to be set if set in only one of the LHS, RHS.
313 Known
.One
= std::move(IKnownOne
);
316 case Instruction::Select
: {
318 SelectPatternFlavor SPF
= matchSelectPattern(I
, LHS
, RHS
).Flavor
;
319 if (SPF
== SPF_UMAX
) {
320 // UMax(A, C) == A if ...
321 // The lowest non-zero bit of DemandMask is higher than the highest
322 // non-zero bit of C.
324 unsigned CTZ
= DemandedMask
.countTrailingZeros();
325 if (match(RHS
, m_APInt(C
)) && CTZ
>= C
->getActiveBits())
327 } else if (SPF
== SPF_UMIN
) {
328 // UMin(A, C) == A if ...
329 // The lowest non-zero bit of DemandMask is higher than the highest
331 // This comes from using DeMorgans on the above umax example.
333 unsigned CTZ
= DemandedMask
.countTrailingZeros();
334 if (match(RHS
, m_APInt(C
)) &&
335 CTZ
>= C
->getBitWidth() - C
->countLeadingOnes())
339 // If this is a select as part of any other min/max pattern, don't simplify
340 // any further in case we break the structure.
341 if (SPF
!= SPF_UNKNOWN
)
344 if (SimplifyDemandedBits(I
, 2, DemandedMask
, RHSKnown
, Depth
+ 1) ||
345 SimplifyDemandedBits(I
, 1, DemandedMask
, LHSKnown
, Depth
+ 1))
347 assert(!RHSKnown
.hasConflict() && "Bits known to be one AND zero?");
348 assert(!LHSKnown
.hasConflict() && "Bits known to be one AND zero?");
350 // If the operands are constants, see if we can simplify them.
351 if (ShrinkDemandedConstant(I
, 1, DemandedMask
) ||
352 ShrinkDemandedConstant(I
, 2, DemandedMask
))
355 // Only known if known in both the LHS and RHS.
356 Known
.One
= RHSKnown
.One
& LHSKnown
.One
;
357 Known
.Zero
= RHSKnown
.Zero
& LHSKnown
.Zero
;
360 case Instruction::ZExt
:
361 case Instruction::Trunc
: {
362 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
364 APInt InputDemandedMask
= DemandedMask
.zextOrTrunc(SrcBitWidth
);
365 KnownBits
InputKnown(SrcBitWidth
);
366 if (SimplifyDemandedBits(I
, 0, InputDemandedMask
, InputKnown
, Depth
+ 1))
368 Known
= InputKnown
.zextOrTrunc(BitWidth
);
369 // Any top bits are known to be zero.
370 if (BitWidth
> SrcBitWidth
)
371 Known
.Zero
.setBitsFrom(SrcBitWidth
);
372 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
375 case Instruction::BitCast
:
376 if (!I
->getOperand(0)->getType()->isIntOrIntVectorTy())
377 return nullptr; // vector->int or fp->int?
379 if (VectorType
*DstVTy
= dyn_cast
<VectorType
>(I
->getType())) {
380 if (VectorType
*SrcVTy
=
381 dyn_cast
<VectorType
>(I
->getOperand(0)->getType())) {
382 if (DstVTy
->getNumElements() != SrcVTy
->getNumElements())
383 // Don't touch a bitcast between vectors of different element counts.
386 // Don't touch a scalar-to-vector bitcast.
388 } else if (I
->getOperand(0)->getType()->isVectorTy())
389 // Don't touch a vector-to-scalar bitcast.
392 if (SimplifyDemandedBits(I
, 0, DemandedMask
, Known
, Depth
+ 1))
394 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
396 case Instruction::SExt
: {
397 // Compute the bits in the result that are not present in the input.
398 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
400 APInt InputDemandedBits
= DemandedMask
.trunc(SrcBitWidth
);
402 // If any of the sign extended bits are demanded, we know that the sign
404 if (DemandedMask
.getActiveBits() > SrcBitWidth
)
405 InputDemandedBits
.setBit(SrcBitWidth
-1);
407 KnownBits
InputKnown(SrcBitWidth
);
408 if (SimplifyDemandedBits(I
, 0, InputDemandedBits
, InputKnown
, Depth
+ 1))
411 // If the input sign bit is known zero, or if the NewBits are not demanded
412 // convert this into a zero extension.
413 if (InputKnown
.isNonNegative() ||
414 DemandedMask
.getActiveBits() <= SrcBitWidth
) {
415 // Convert to ZExt cast.
416 CastInst
*NewCast
= new ZExtInst(I
->getOperand(0), VTy
, I
->getName());
417 return InsertNewInstWith(NewCast
, *I
);
420 // If the sign bit of the input is known set or clear, then we know the
421 // top bits of the result.
422 Known
= InputKnown
.sext(BitWidth
);
423 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
426 case Instruction::Add
:
427 case Instruction::Sub
: {
428 /// If the high-bits of an ADD/SUB are not demanded, then we do not care
429 /// about the high bits of the operands.
430 unsigned NLZ
= DemandedMask
.countLeadingZeros();
431 // Right fill the mask of bits for this ADD/SUB to demand the most
432 // significant bit and all those below it.
433 APInt
DemandedFromOps(APInt::getLowBitsSet(BitWidth
, BitWidth
-NLZ
));
434 if (ShrinkDemandedConstant(I
, 0, DemandedFromOps
) ||
435 SimplifyDemandedBits(I
, 0, DemandedFromOps
, LHSKnown
, Depth
+ 1) ||
436 ShrinkDemandedConstant(I
, 1, DemandedFromOps
) ||
437 SimplifyDemandedBits(I
, 1, DemandedFromOps
, RHSKnown
, Depth
+ 1)) {
439 // Disable the nsw and nuw flags here: We can no longer guarantee that
440 // we won't wrap after simplification. Removing the nsw/nuw flags is
441 // legal here because the top bit is not demanded.
442 BinaryOperator
&BinOP
= *cast
<BinaryOperator
>(I
);
443 BinOP
.setHasNoSignedWrap(false);
444 BinOP
.setHasNoUnsignedWrap(false);
449 // If we are known to be adding/subtracting zeros to every bit below
450 // the highest demanded bit, we just return the other side.
451 if (DemandedFromOps
.isSubsetOf(RHSKnown
.Zero
))
452 return I
->getOperand(0);
453 // We can't do this with the LHS for subtraction, unless we are only
454 // demanding the LSB.
455 if ((I
->getOpcode() == Instruction::Add
||
456 DemandedFromOps
.isOneValue()) &&
457 DemandedFromOps
.isSubsetOf(LHSKnown
.Zero
))
458 return I
->getOperand(1);
460 // Otherwise just compute the known bits of the result.
461 bool NSW
= cast
<OverflowingBinaryOperator
>(I
)->hasNoSignedWrap();
462 Known
= KnownBits::computeForAddSub(I
->getOpcode() == Instruction::Add
,
463 NSW
, LHSKnown
, RHSKnown
);
466 case Instruction::Shl
: {
468 if (match(I
->getOperand(1), m_APInt(SA
))) {
470 if (match(I
->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt
))))
471 if (Instruction
*Shr
= dyn_cast
<Instruction
>(I
->getOperand(0)))
472 if (Value
*R
= simplifyShrShlDemandedBits(Shr
, *ShrAmt
, I
, *SA
,
473 DemandedMask
, Known
))
476 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
477 APInt
DemandedMaskIn(DemandedMask
.lshr(ShiftAmt
));
479 // If the shift is NUW/NSW, then it does demand the high bits.
480 ShlOperator
*IOp
= cast
<ShlOperator
>(I
);
481 if (IOp
->hasNoSignedWrap())
482 DemandedMaskIn
.setHighBits(ShiftAmt
+1);
483 else if (IOp
->hasNoUnsignedWrap())
484 DemandedMaskIn
.setHighBits(ShiftAmt
);
486 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, Known
, Depth
+ 1))
488 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
489 Known
.Zero
<<= ShiftAmt
;
490 Known
.One
<<= ShiftAmt
;
491 // low bits known zero.
493 Known
.Zero
.setLowBits(ShiftAmt
);
497 case Instruction::LShr
: {
499 if (match(I
->getOperand(1), m_APInt(SA
))) {
500 uint64_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
502 // Unsigned shift right.
503 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
505 // If the shift is exact, then it does demand the low bits (and knows that
507 if (cast
<LShrOperator
>(I
)->isExact())
508 DemandedMaskIn
.setLowBits(ShiftAmt
);
510 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, Known
, Depth
+ 1))
512 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
513 Known
.Zero
.lshrInPlace(ShiftAmt
);
514 Known
.One
.lshrInPlace(ShiftAmt
);
516 Known
.Zero
.setHighBits(ShiftAmt
); // high bits known zero.
520 case Instruction::AShr
: {
521 // If this is an arithmetic shift right and only the low-bit is set, we can
522 // always convert this into a logical shr, even if the shift amount is
523 // variable. The low bit of the shift cannot be an input sign bit unless
524 // the shift amount is >= the size of the datatype, which is undefined.
525 if (DemandedMask
.isOneValue()) {
526 // Perform the logical shift right.
527 Instruction
*NewVal
= BinaryOperator::CreateLShr(
528 I
->getOperand(0), I
->getOperand(1), I
->getName());
529 return InsertNewInstWith(NewVal
, *I
);
532 // If the sign bit is the only bit demanded by this ashr, then there is no
533 // need to do it, the shift doesn't change the high bit.
534 if (DemandedMask
.isSignMask())
535 return I
->getOperand(0);
538 if (match(I
->getOperand(1), m_APInt(SA
))) {
539 uint32_t ShiftAmt
= SA
->getLimitedValue(BitWidth
-1);
541 // Signed shift right.
542 APInt
DemandedMaskIn(DemandedMask
.shl(ShiftAmt
));
543 // If any of the high bits are demanded, we should set the sign bit as
545 if (DemandedMask
.countLeadingZeros() <= ShiftAmt
)
546 DemandedMaskIn
.setSignBit();
548 // If the shift is exact, then it does demand the low bits (and knows that
550 if (cast
<AShrOperator
>(I
)->isExact())
551 DemandedMaskIn
.setLowBits(ShiftAmt
);
553 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, Known
, Depth
+ 1))
556 unsigned SignBits
= ComputeNumSignBits(I
->getOperand(0), Depth
+ 1, CxtI
);
558 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
559 // Compute the new bits that are at the top now plus sign bits.
560 APInt
HighBits(APInt::getHighBitsSet(
561 BitWidth
, std::min(SignBits
+ ShiftAmt
- 1, BitWidth
)));
562 Known
.Zero
.lshrInPlace(ShiftAmt
);
563 Known
.One
.lshrInPlace(ShiftAmt
);
565 // If the input sign bit is known to be zero, or if none of the top bits
566 // are demanded, turn this into an unsigned shift right.
567 assert(BitWidth
> ShiftAmt
&& "Shift amount not saturated?");
568 if (Known
.Zero
[BitWidth
-ShiftAmt
-1] ||
569 !DemandedMask
.intersects(HighBits
)) {
570 BinaryOperator
*LShr
= BinaryOperator::CreateLShr(I
->getOperand(0),
572 LShr
->setIsExact(cast
<BinaryOperator
>(I
)->isExact());
573 return InsertNewInstWith(LShr
, *I
);
574 } else if (Known
.One
[BitWidth
-ShiftAmt
-1]) { // New bits are known one.
575 Known
.One
|= HighBits
;
580 case Instruction::UDiv
: {
581 // UDiv doesn't demand low bits that are zero in the divisor.
583 if (match(I
->getOperand(1), m_APInt(SA
))) {
584 // If the shift is exact, then it does demand the low bits.
585 if (cast
<UDivOperator
>(I
)->isExact())
588 // FIXME: Take the demanded mask of the result into account.
589 unsigned RHSTrailingZeros
= SA
->countTrailingZeros();
590 APInt DemandedMaskIn
=
591 APInt::getHighBitsSet(BitWidth
, BitWidth
- RHSTrailingZeros
);
592 if (SimplifyDemandedBits(I
, 0, DemandedMaskIn
, LHSKnown
, Depth
+ 1))
595 // Propagate zero bits from the input.
596 Known
.Zero
.setHighBits(std::min(
597 BitWidth
, LHSKnown
.Zero
.countLeadingOnes() + RHSTrailingZeros
));
601 case Instruction::SRem
:
602 if (ConstantInt
*Rem
= dyn_cast
<ConstantInt
>(I
->getOperand(1))) {
603 // X % -1 demands all the bits because we don't want to introduce
604 // INT_MIN % -1 (== undef) by accident.
605 if (Rem
->isMinusOne())
607 APInt RA
= Rem
->getValue().abs();
608 if (RA
.isPowerOf2()) {
609 if (DemandedMask
.ult(RA
)) // srem won't affect demanded bits
610 return I
->getOperand(0);
612 APInt LowBits
= RA
- 1;
613 APInt Mask2
= LowBits
| APInt::getSignMask(BitWidth
);
614 if (SimplifyDemandedBits(I
, 0, Mask2
, LHSKnown
, Depth
+ 1))
617 // The low bits of LHS are unchanged by the srem.
618 Known
.Zero
= LHSKnown
.Zero
& LowBits
;
619 Known
.One
= LHSKnown
.One
& LowBits
;
621 // If LHS is non-negative or has all low bits zero, then the upper bits
623 if (LHSKnown
.isNonNegative() || LowBits
.isSubsetOf(LHSKnown
.Zero
))
624 Known
.Zero
|= ~LowBits
;
626 // If LHS is negative and not all low bits are zero, then the upper bits
628 if (LHSKnown
.isNegative() && LowBits
.intersects(LHSKnown
.One
))
629 Known
.One
|= ~LowBits
;
631 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
636 // The sign bit is the LHS's sign bit, except when the result of the
637 // remainder is zero.
638 if (DemandedMask
.isSignBitSet()) {
639 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1, CxtI
);
640 // If it's known zero, our sign bit is also zero.
641 if (LHSKnown
.isNonNegative())
642 Known
.makeNonNegative();
645 case Instruction::URem
: {
646 KnownBits
Known2(BitWidth
);
647 APInt AllOnes
= APInt::getAllOnesValue(BitWidth
);
648 if (SimplifyDemandedBits(I
, 0, AllOnes
, Known2
, Depth
+ 1) ||
649 SimplifyDemandedBits(I
, 1, AllOnes
, Known2
, Depth
+ 1))
652 unsigned Leaders
= Known2
.countMinLeadingZeros();
653 Known
.Zero
= APInt::getHighBitsSet(BitWidth
, Leaders
) & DemandedMask
;
656 case Instruction::Call
:
657 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
658 switch (II
->getIntrinsicID()) {
660 case Intrinsic::bswap
: {
661 // If the only bits demanded come from one byte of the bswap result,
662 // just shift the input byte into position to eliminate the bswap.
663 unsigned NLZ
= DemandedMask
.countLeadingZeros();
664 unsigned NTZ
= DemandedMask
.countTrailingZeros();
666 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
667 // we need all the bits down to bit 8. Likewise, round NLZ. If we
668 // have 14 leading zeros, round to 8.
671 // If we need exactly one byte, we can do this transformation.
672 if (BitWidth
-NLZ
-NTZ
== 8) {
673 unsigned ResultBit
= NTZ
;
674 unsigned InputBit
= BitWidth
-NTZ
-8;
676 // Replace this with either a left or right shift to get the byte into
679 if (InputBit
> ResultBit
)
680 NewVal
= BinaryOperator::CreateLShr(II
->getArgOperand(0),
681 ConstantInt::get(I
->getType(), InputBit
-ResultBit
));
683 NewVal
= BinaryOperator::CreateShl(II
->getArgOperand(0),
684 ConstantInt::get(I
->getType(), ResultBit
-InputBit
));
686 return InsertNewInstWith(NewVal
, *I
);
689 // TODO: Could compute known zero/one bits based on the input.
692 case Intrinsic::fshr
:
693 case Intrinsic::fshl
: {
695 if (!match(I
->getOperand(2), m_APInt(SA
)))
698 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
699 // defined, so no need to special-case zero shifts here.
700 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
701 if (II
->getIntrinsicID() == Intrinsic::fshr
)
702 ShiftAmt
= BitWidth
- ShiftAmt
;
704 APInt
DemandedMaskLHS(DemandedMask
.lshr(ShiftAmt
));
705 APInt
DemandedMaskRHS(DemandedMask
.shl(BitWidth
- ShiftAmt
));
706 if (SimplifyDemandedBits(I
, 0, DemandedMaskLHS
, LHSKnown
, Depth
+ 1) ||
707 SimplifyDemandedBits(I
, 1, DemandedMaskRHS
, RHSKnown
, Depth
+ 1))
710 Known
.Zero
= LHSKnown
.Zero
.shl(ShiftAmt
) |
711 RHSKnown
.Zero
.lshr(BitWidth
- ShiftAmt
);
712 Known
.One
= LHSKnown
.One
.shl(ShiftAmt
) |
713 RHSKnown
.One
.lshr(BitWidth
- ShiftAmt
);
716 case Intrinsic::x86_mmx_pmovmskb
:
717 case Intrinsic::x86_sse_movmsk_ps
:
718 case Intrinsic::x86_sse2_movmsk_pd
:
719 case Intrinsic::x86_sse2_pmovmskb_128
:
720 case Intrinsic::x86_avx_movmsk_ps_256
:
721 case Intrinsic::x86_avx_movmsk_pd_256
:
722 case Intrinsic::x86_avx2_pmovmskb
: {
723 // MOVMSK copies the vector elements' sign bits to the low bits
724 // and zeros the high bits.
726 if (II
->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb
) {
727 ArgWidth
= 8; // Arg is x86_mmx, but treated as <8 x i8>.
729 auto Arg
= II
->getArgOperand(0);
730 auto ArgType
= cast
<VectorType
>(Arg
->getType());
731 ArgWidth
= ArgType
->getNumElements();
734 // If we don't need any of low bits then return zero,
735 // we know that DemandedMask is non-zero already.
736 APInt DemandedElts
= DemandedMask
.zextOrTrunc(ArgWidth
);
737 if (DemandedElts
.isNullValue())
738 return ConstantInt::getNullValue(VTy
);
740 // We know that the upper bits are set to zero.
741 Known
.Zero
.setBitsFrom(ArgWidth
);
744 case Intrinsic::x86_sse42_crc32_64_64
:
745 Known
.Zero
.setBitsFrom(32);
749 computeKnownBits(V
, Known
, Depth
, CxtI
);
753 // If the client is only demanding bits that we know, return the known
755 if (DemandedMask
.isSubsetOf(Known
.Zero
|Known
.One
))
756 return Constant::getIntegerValue(VTy
, Known
.One
);
760 /// Helper routine of SimplifyDemandedUseBits. It computes Known
761 /// bits. It also tries to handle simplifications that can be done based on
762 /// DemandedMask, but without modifying the Instruction.
763 Value
*InstCombiner::SimplifyMultipleUseDemandedBits(Instruction
*I
,
764 const APInt
&DemandedMask
,
768 unsigned BitWidth
= DemandedMask
.getBitWidth();
769 Type
*ITy
= I
->getType();
771 KnownBits
LHSKnown(BitWidth
);
772 KnownBits
RHSKnown(BitWidth
);
774 // Despite the fact that we can't simplify this instruction in all User's
775 // context, we can at least compute the known bits, and we can
776 // do simplifications that apply to *just* the one user if we know that
777 // this instruction has a simpler value in that context.
778 switch (I
->getOpcode()) {
779 case Instruction::And
: {
780 // If either the LHS or the RHS are Zero, the result is zero.
781 computeKnownBits(I
->getOperand(1), RHSKnown
, Depth
+ 1, CxtI
);
782 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1,
785 // Output known-0 are known to be clear if zero in either the LHS | RHS.
786 APInt IKnownZero
= RHSKnown
.Zero
| LHSKnown
.Zero
;
787 // Output known-1 bits are only known if set in both the LHS & RHS.
788 APInt IKnownOne
= RHSKnown
.One
& LHSKnown
.One
;
790 // If the client is only demanding bits that we know, return the known
792 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
793 return Constant::getIntegerValue(ITy
, IKnownOne
);
795 // If all of the demanded bits are known 1 on one side, return the other.
796 // These bits cannot contribute to the result of the 'and' in this
798 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
| RHSKnown
.One
))
799 return I
->getOperand(0);
800 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.One
))
801 return I
->getOperand(1);
803 Known
.Zero
= std::move(IKnownZero
);
804 Known
.One
= std::move(IKnownOne
);
807 case Instruction::Or
: {
808 // We can simplify (X|Y) -> X or Y in the user's context if we know that
809 // only bits from X or Y are demanded.
811 // If either the LHS or the RHS are One, the result is One.
812 computeKnownBits(I
->getOperand(1), RHSKnown
, Depth
+ 1, CxtI
);
813 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1,
816 // Output known-0 bits are only known if clear in both the LHS & RHS.
817 APInt IKnownZero
= RHSKnown
.Zero
& LHSKnown
.Zero
;
818 // Output known-1 are known to be set if set in either the LHS | RHS.
819 APInt IKnownOne
= RHSKnown
.One
| LHSKnown
.One
;
821 // If the client is only demanding bits that we know, return the known
823 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
824 return Constant::getIntegerValue(ITy
, IKnownOne
);
826 // If all of the demanded bits are known zero on one side, return the
827 // other. These bits cannot contribute to the result of the 'or' in this
829 if (DemandedMask
.isSubsetOf(LHSKnown
.One
| RHSKnown
.Zero
))
830 return I
->getOperand(0);
831 if (DemandedMask
.isSubsetOf(RHSKnown
.One
| LHSKnown
.Zero
))
832 return I
->getOperand(1);
834 Known
.Zero
= std::move(IKnownZero
);
835 Known
.One
= std::move(IKnownOne
);
838 case Instruction::Xor
: {
839 // We can simplify (X^Y) -> X or Y in the user's context if we know that
840 // only bits from X or Y are demanded.
842 computeKnownBits(I
->getOperand(1), RHSKnown
, Depth
+ 1, CxtI
);
843 computeKnownBits(I
->getOperand(0), LHSKnown
, Depth
+ 1,
846 // Output known-0 bits are known if clear or set in both the LHS & RHS.
847 APInt IKnownZero
= (RHSKnown
.Zero
& LHSKnown
.Zero
) |
848 (RHSKnown
.One
& LHSKnown
.One
);
849 // Output known-1 are known to be set if set in only one of the LHS, RHS.
850 APInt IKnownOne
= (RHSKnown
.Zero
& LHSKnown
.One
) |
851 (RHSKnown
.One
& LHSKnown
.Zero
);
853 // If the client is only demanding bits that we know, return the known
855 if (DemandedMask
.isSubsetOf(IKnownZero
|IKnownOne
))
856 return Constant::getIntegerValue(ITy
, IKnownOne
);
858 // If all of the demanded bits are known zero on one side, return the
860 if (DemandedMask
.isSubsetOf(RHSKnown
.Zero
))
861 return I
->getOperand(0);
862 if (DemandedMask
.isSubsetOf(LHSKnown
.Zero
))
863 return I
->getOperand(1);
865 // Output known-0 bits are known if clear or set in both the LHS & RHS.
866 Known
.Zero
= std::move(IKnownZero
);
867 // Output known-1 are known to be set if set in only one of the LHS, RHS.
868 Known
.One
= std::move(IKnownOne
);
872 // Compute the Known bits to simplify things downstream.
873 computeKnownBits(I
, Known
, Depth
, CxtI
);
875 // If this user is only demanding bits that we know, return the known
877 if (DemandedMask
.isSubsetOf(Known
.Zero
|Known
.One
))
878 return Constant::getIntegerValue(ITy
, Known
.One
);
887 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
888 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
889 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
892 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
893 /// ..., bn}, without considering the specific value X is holding.
894 /// This transformation is legal iff one of following conditions is hold:
895 /// 1) All the bit in S are 0, in this case E1 == E2.
896 /// 2) We don't care those bits in S, per the input DemandedMask.
897 /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
900 /// Currently we only test condition 2).
902 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
905 InstCombiner::simplifyShrShlDemandedBits(Instruction
*Shr
, const APInt
&ShrOp1
,
906 Instruction
*Shl
, const APInt
&ShlOp1
,
907 const APInt
&DemandedMask
,
909 if (!ShlOp1
|| !ShrOp1
)
910 return nullptr; // No-op.
912 Value
*VarX
= Shr
->getOperand(0);
913 Type
*Ty
= VarX
->getType();
914 unsigned BitWidth
= Ty
->getScalarSizeInBits();
915 if (ShlOp1
.uge(BitWidth
) || ShrOp1
.uge(BitWidth
))
916 return nullptr; // Undef.
918 unsigned ShlAmt
= ShlOp1
.getZExtValue();
919 unsigned ShrAmt
= ShrOp1
.getZExtValue();
921 Known
.One
.clearAllBits();
922 Known
.Zero
.setLowBits(ShlAmt
- 1);
923 Known
.Zero
&= DemandedMask
;
925 APInt
BitMask1(APInt::getAllOnesValue(BitWidth
));
926 APInt
BitMask2(APInt::getAllOnesValue(BitWidth
));
928 bool isLshr
= (Shr
->getOpcode() == Instruction::LShr
);
929 BitMask1
= isLshr
? (BitMask1
.lshr(ShrAmt
) << ShlAmt
) :
930 (BitMask1
.ashr(ShrAmt
) << ShlAmt
);
932 if (ShrAmt
<= ShlAmt
) {
933 BitMask2
<<= (ShlAmt
- ShrAmt
);
935 BitMask2
= isLshr
? BitMask2
.lshr(ShrAmt
- ShlAmt
):
936 BitMask2
.ashr(ShrAmt
- ShlAmt
);
939 // Check if condition-2 (see the comment to this function) is satified.
940 if ((BitMask1
& DemandedMask
) == (BitMask2
& DemandedMask
)) {
941 if (ShrAmt
== ShlAmt
)
944 if (!Shr
->hasOneUse())
948 if (ShrAmt
< ShlAmt
) {
949 Constant
*Amt
= ConstantInt::get(VarX
->getType(), ShlAmt
- ShrAmt
);
950 New
= BinaryOperator::CreateShl(VarX
, Amt
);
951 BinaryOperator
*Orig
= cast
<BinaryOperator
>(Shl
);
952 New
->setHasNoSignedWrap(Orig
->hasNoSignedWrap());
953 New
->setHasNoUnsignedWrap(Orig
->hasNoUnsignedWrap());
955 Constant
*Amt
= ConstantInt::get(VarX
->getType(), ShrAmt
- ShlAmt
);
956 New
= isLshr
? BinaryOperator::CreateLShr(VarX
, Amt
) :
957 BinaryOperator::CreateAShr(VarX
, Amt
);
958 if (cast
<BinaryOperator
>(Shr
)->isExact())
959 New
->setIsExact(true);
962 return InsertNewInstWith(New
, *Shl
);
968 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
970 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
972 Value
*InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst
*II
,
975 unsigned VWidth
= II
->getType()->getVectorNumElements();
979 ConstantInt
*NewDMask
= nullptr;
982 // Pretend that a prefix of elements is demanded to simplify the code
984 DemandedElts
= (1 << DemandedElts
.getActiveBits()) - 1;
986 ConstantInt
*DMask
= dyn_cast
<ConstantInt
>(II
->getArgOperand(DMaskIdx
));
988 return nullptr; // non-constant dmask is not supported by codegen
990 unsigned DMaskVal
= DMask
->getZExtValue() & 0xf;
992 // Mask off values that are undefined because the dmask doesn't cover them
993 DemandedElts
&= (1 << countPopulation(DMaskVal
)) - 1;
995 unsigned NewDMaskVal
= 0;
996 unsigned OrigLoadIdx
= 0;
997 for (unsigned SrcIdx
= 0; SrcIdx
< 4; ++SrcIdx
) {
998 const unsigned Bit
= 1 << SrcIdx
;
999 if (!!(DMaskVal
& Bit
)) {
1000 if (!!DemandedElts
[OrigLoadIdx
])
1006 if (DMaskVal
!= NewDMaskVal
)
1007 NewDMask
= ConstantInt::get(DMask
->getType(), NewDMaskVal
);
1010 // TODO: Handle 3 vectors when supported in code gen.
1011 unsigned NewNumElts
= PowerOf2Ceil(DemandedElts
.countPopulation());
1013 return UndefValue::get(II
->getType());
1015 if (NewNumElts
>= VWidth
&& DemandedElts
.isMask()) {
1017 II
->setArgOperand(DMaskIdx
, NewDMask
);
1021 // Determine the overload types of the original intrinsic.
1022 auto IID
= II
->getIntrinsicID();
1023 SmallVector
<Intrinsic::IITDescriptor
, 16> Table
;
1024 getIntrinsicInfoTableEntries(IID
, Table
);
1025 ArrayRef
<Intrinsic::IITDescriptor
> TableRef
= Table
;
1027 FunctionType
*FTy
= II
->getCalledFunction()->getFunctionType();
1028 SmallVector
<Type
*, 6> OverloadTys
;
1029 Intrinsic::matchIntrinsicType(FTy
->getReturnType(), TableRef
, OverloadTys
);
1030 for (unsigned i
= 0, e
= FTy
->getNumParams(); i
!= e
; ++i
)
1031 Intrinsic::matchIntrinsicType(FTy
->getParamType(i
), TableRef
, OverloadTys
);
1033 // Get the new return type overload of the intrinsic.
1034 Module
*M
= II
->getParent()->getParent()->getParent();
1035 Type
*EltTy
= II
->getType()->getVectorElementType();
1036 Type
*NewTy
= (NewNumElts
== 1) ? EltTy
: VectorType::get(EltTy
, NewNumElts
);
1038 OverloadTys
[0] = NewTy
;
1039 Function
*NewIntrin
= Intrinsic::getDeclaration(M
, IID
, OverloadTys
);
1041 SmallVector
<Value
*, 16> Args
;
1042 for (unsigned I
= 0, E
= II
->getNumArgOperands(); I
!= E
; ++I
)
1043 Args
.push_back(II
->getArgOperand(I
));
1046 Args
[DMaskIdx
] = NewDMask
;
1048 IRBuilderBase::InsertPointGuard
Guard(Builder
);
1049 Builder
.SetInsertPoint(II
);
1051 CallInst
*NewCall
= Builder
.CreateCall(NewIntrin
, Args
);
1052 NewCall
->takeName(II
);
1053 NewCall
->copyMetadata(*II
);
1055 if (NewNumElts
== 1) {
1056 return Builder
.CreateInsertElement(UndefValue::get(II
->getType()), NewCall
,
1057 DemandedElts
.countTrailingZeros());
1060 SmallVector
<uint32_t, 8> EltMask
;
1061 unsigned NewLoadIdx
= 0;
1062 for (unsigned OrigLoadIdx
= 0; OrigLoadIdx
< VWidth
; ++OrigLoadIdx
) {
1063 if (!!DemandedElts
[OrigLoadIdx
])
1064 EltMask
.push_back(NewLoadIdx
++);
1066 EltMask
.push_back(NewNumElts
);
1070 Builder
.CreateShuffleVector(NewCall
, UndefValue::get(NewTy
), EltMask
);
1075 /// The specified value produces a vector with any number of elements.
1076 /// DemandedElts contains the set of elements that are actually used by the
1077 /// caller. This method analyzes which elements of the operand are undef and
1078 /// returns that information in UndefElts.
1080 /// If the information about demanded elements can be used to simplify the
1081 /// operation, the operation is simplified, then the resultant value is
1082 /// returned. This returns null if no change was made.
1083 Value
*InstCombiner::SimplifyDemandedVectorElts(Value
*V
, APInt DemandedElts
,
1086 unsigned VWidth
= V
->getType()->getVectorNumElements();
1087 APInt
EltMask(APInt::getAllOnesValue(VWidth
));
1088 assert((DemandedElts
& ~EltMask
) == 0 && "Invalid DemandedElts!");
1090 if (isa
<UndefValue
>(V
)) {
1091 // If the entire vector is undefined, just return this info.
1092 UndefElts
= EltMask
;
1096 if (DemandedElts
.isNullValue()) { // If nothing is demanded, provide undef.
1097 UndefElts
= EltMask
;
1098 return UndefValue::get(V
->getType());
1103 if (auto *C
= dyn_cast
<Constant
>(V
)) {
1104 // Check if this is identity. If so, return 0 since we are not simplifying
1106 if (DemandedElts
.isAllOnesValue())
1109 Type
*EltTy
= cast
<VectorType
>(V
->getType())->getElementType();
1110 Constant
*Undef
= UndefValue::get(EltTy
);
1111 SmallVector
<Constant
*, 16> Elts
;
1112 for (unsigned i
= 0; i
!= VWidth
; ++i
) {
1113 if (!DemandedElts
[i
]) { // If not demanded, set to undef.
1114 Elts
.push_back(Undef
);
1115 UndefElts
.setBit(i
);
1119 Constant
*Elt
= C
->getAggregateElement(i
);
1120 if (!Elt
) return nullptr;
1122 if (isa
<UndefValue
>(Elt
)) { // Already undef.
1123 Elts
.push_back(Undef
);
1124 UndefElts
.setBit(i
);
1125 } else { // Otherwise, defined.
1126 Elts
.push_back(Elt
);
1130 // If we changed the constant, return it.
1131 Constant
*NewCV
= ConstantVector::get(Elts
);
1132 return NewCV
!= C
? NewCV
: nullptr;
1135 // Limit search depth.
1139 // If multiple users are using the root value, proceed with
1140 // simplification conservatively assuming that all elements
1142 if (!V
->hasOneUse()) {
1143 // Quit if we find multiple users of a non-root value though.
1144 // They'll be handled when it's their turn to be visited by
1145 // the main instcombine process.
1147 // TODO: Just compute the UndefElts information recursively.
1150 // Conservatively assume that all elements are needed.
1151 DemandedElts
= EltMask
;
1154 Instruction
*I
= dyn_cast
<Instruction
>(V
);
1155 if (!I
) return nullptr; // Only analyze instructions.
1157 bool MadeChange
= false;
1158 auto simplifyAndSetOp
= [&](Instruction
*Inst
, unsigned OpNum
,
1159 APInt Demanded
, APInt
&Undef
) {
1160 auto *II
= dyn_cast
<IntrinsicInst
>(Inst
);
1161 Value
*Op
= II
? II
->getArgOperand(OpNum
) : Inst
->getOperand(OpNum
);
1162 if (Value
*V
= SimplifyDemandedVectorElts(Op
, Demanded
, Undef
, Depth
+ 1)) {
1164 II
->setArgOperand(OpNum
, V
);
1166 Inst
->setOperand(OpNum
, V
);
1171 APInt
UndefElts2(VWidth
, 0);
1172 APInt
UndefElts3(VWidth
, 0);
1173 switch (I
->getOpcode()) {
1176 case Instruction::GetElementPtr
: {
1177 // Conservatively track the demanded elements back through any vector
1178 // operands we may have. We know there must be at least one, or we
1179 // wouldn't have a vector result to get here. Note that we intentionally
1180 // merge the undef bits here since gepping with either an undef base or
1181 // index results in undef.
1182 for (unsigned i
= 0; i
< I
->getNumOperands(); i
++)
1183 if (I
->getOperand(i
)->getType()->isVectorTy())
1184 simplifyAndSetOp(I
, i
, DemandedElts
, UndefElts
);
1188 case Instruction::InsertElement
: {
1189 // If this is a variable index, we don't know which element it overwrites.
1190 // demand exactly the same input as we produce.
1191 ConstantInt
*Idx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
1193 // Note that we can't propagate undef elt info, because we don't know
1194 // which elt is getting updated.
1195 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts2
);
1199 // The element inserted overwrites whatever was there, so the input demanded
1200 // set is simpler than the output set.
1201 unsigned IdxNo
= Idx
->getZExtValue();
1202 APInt PreInsertDemandedElts
= DemandedElts
;
1204 PreInsertDemandedElts
.clearBit(IdxNo
);
1206 simplifyAndSetOp(I
, 0, PreInsertDemandedElts
, UndefElts
);
1208 // If this is inserting an element that isn't demanded, remove this
1210 if (IdxNo
>= VWidth
|| !DemandedElts
[IdxNo
]) {
1212 return I
->getOperand(0);
1215 // The inserted element is defined.
1216 UndefElts
.clearBit(IdxNo
);
1219 case Instruction::ShuffleVector
: {
1220 ShuffleVectorInst
*Shuffle
= cast
<ShuffleVectorInst
>(I
);
1221 unsigned LHSVWidth
=
1222 Shuffle
->getOperand(0)->getType()->getVectorNumElements();
1223 APInt
LeftDemanded(LHSVWidth
, 0), RightDemanded(LHSVWidth
, 0);
1224 for (unsigned i
= 0; i
< VWidth
; i
++) {
1225 if (DemandedElts
[i
]) {
1226 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
1227 if (MaskVal
!= -1u) {
1228 assert(MaskVal
< LHSVWidth
* 2 &&
1229 "shufflevector mask index out of range!");
1230 if (MaskVal
< LHSVWidth
)
1231 LeftDemanded
.setBit(MaskVal
);
1233 RightDemanded
.setBit(MaskVal
- LHSVWidth
);
1238 APInt
LHSUndefElts(LHSVWidth
, 0);
1239 simplifyAndSetOp(I
, 0, LeftDemanded
, LHSUndefElts
);
1241 APInt
RHSUndefElts(LHSVWidth
, 0);
1242 simplifyAndSetOp(I
, 1, RightDemanded
, RHSUndefElts
);
1244 bool NewUndefElts
= false;
1245 unsigned LHSIdx
= -1u, LHSValIdx
= -1u;
1246 unsigned RHSIdx
= -1u, RHSValIdx
= -1u;
1247 bool LHSUniform
= true;
1248 bool RHSUniform
= true;
1249 for (unsigned i
= 0; i
< VWidth
; i
++) {
1250 unsigned MaskVal
= Shuffle
->getMaskValue(i
);
1251 if (MaskVal
== -1u) {
1252 UndefElts
.setBit(i
);
1253 } else if (!DemandedElts
[i
]) {
1254 NewUndefElts
= true;
1255 UndefElts
.setBit(i
);
1256 } else if (MaskVal
< LHSVWidth
) {
1257 if (LHSUndefElts
[MaskVal
]) {
1258 NewUndefElts
= true;
1259 UndefElts
.setBit(i
);
1261 LHSIdx
= LHSIdx
== -1u ? i
: LHSVWidth
;
1262 LHSValIdx
= LHSValIdx
== -1u ? MaskVal
: LHSVWidth
;
1263 LHSUniform
= LHSUniform
&& (MaskVal
== i
);
1266 if (RHSUndefElts
[MaskVal
- LHSVWidth
]) {
1267 NewUndefElts
= true;
1268 UndefElts
.setBit(i
);
1270 RHSIdx
= RHSIdx
== -1u ? i
: LHSVWidth
;
1271 RHSValIdx
= RHSValIdx
== -1u ? MaskVal
- LHSVWidth
: LHSVWidth
;
1272 RHSUniform
= RHSUniform
&& (MaskVal
- LHSVWidth
== i
);
1277 // Try to transform shuffle with constant vector and single element from
1278 // this constant vector to single insertelement instruction.
1279 // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1280 // insertelement V, C[ci], ci-n
1281 if (LHSVWidth
== Shuffle
->getType()->getNumElements()) {
1282 Value
*Op
= nullptr;
1283 Constant
*Value
= nullptr;
1286 // Find constant vector with the single element in shuffle (LHS or RHS).
1287 if (LHSIdx
< LHSVWidth
&& RHSUniform
) {
1288 if (auto *CV
= dyn_cast
<ConstantVector
>(Shuffle
->getOperand(0))) {
1289 Op
= Shuffle
->getOperand(1);
1290 Value
= CV
->getOperand(LHSValIdx
);
1294 if (RHSIdx
< LHSVWidth
&& LHSUniform
) {
1295 if (auto *CV
= dyn_cast
<ConstantVector
>(Shuffle
->getOperand(1))) {
1296 Op
= Shuffle
->getOperand(0);
1297 Value
= CV
->getOperand(RHSValIdx
);
1301 // Found constant vector with single element - convert to insertelement.
1303 Instruction
*New
= InsertElementInst::Create(
1304 Op
, Value
, ConstantInt::get(Type::getInt32Ty(I
->getContext()), Idx
),
1305 Shuffle
->getName());
1306 InsertNewInstWith(New
, *Shuffle
);
1311 // Add additional discovered undefs.
1312 SmallVector
<Constant
*, 16> Elts
;
1313 for (unsigned i
= 0; i
< VWidth
; ++i
) {
1315 Elts
.push_back(UndefValue::get(Type::getInt32Ty(I
->getContext())));
1317 Elts
.push_back(ConstantInt::get(Type::getInt32Ty(I
->getContext()),
1318 Shuffle
->getMaskValue(i
)));
1320 I
->setOperand(2, ConstantVector::get(Elts
));
1325 case Instruction::Select
: {
1326 // If this is a vector select, try to transform the select condition based
1327 // on the current demanded elements.
1328 SelectInst
*Sel
= cast
<SelectInst
>(I
);
1329 if (Sel
->getCondition()->getType()->isVectorTy()) {
1330 // TODO: We are not doing anything with UndefElts based on this call.
1331 // It is overwritten below based on the other select operands. If an
1332 // element of the select condition is known undef, then we are free to
1333 // choose the output value from either arm of the select. If we know that
1334 // one of those values is undef, then the output can be undef.
1335 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts
);
1338 // Next, see if we can transform the arms of the select.
1339 APInt
DemandedLHS(DemandedElts
), DemandedRHS(DemandedElts
);
1340 if (auto *CV
= dyn_cast
<ConstantVector
>(Sel
->getCondition())) {
1341 for (unsigned i
= 0; i
< VWidth
; i
++) {
1342 // isNullValue() always returns false when called on a ConstantExpr.
1343 // Skip constant expressions to avoid propagating incorrect information.
1344 Constant
*CElt
= CV
->getAggregateElement(i
);
1345 if (isa
<ConstantExpr
>(CElt
))
1347 // TODO: If a select condition element is undef, we can demand from
1348 // either side. If one side is known undef, choosing that side would
1350 if (CElt
->isNullValue())
1351 DemandedLHS
.clearBit(i
);
1353 DemandedRHS
.clearBit(i
);
1357 simplifyAndSetOp(I
, 1, DemandedLHS
, UndefElts2
);
1358 simplifyAndSetOp(I
, 2, DemandedRHS
, UndefElts3
);
1360 // Output elements are undefined if the element from each arm is undefined.
1361 // TODO: This can be improved. See comment in select condition handling.
1362 UndefElts
= UndefElts2
& UndefElts3
;
1365 case Instruction::BitCast
: {
1366 // Vector->vector casts only.
1367 VectorType
*VTy
= dyn_cast
<VectorType
>(I
->getOperand(0)->getType());
1369 unsigned InVWidth
= VTy
->getNumElements();
1370 APInt
InputDemandedElts(InVWidth
, 0);
1371 UndefElts2
= APInt(InVWidth
, 0);
1374 if (VWidth
== InVWidth
) {
1375 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1376 // elements as are demanded of us.
1378 InputDemandedElts
= DemandedElts
;
1379 } else if ((VWidth
% InVWidth
) == 0) {
1380 // If the number of elements in the output is a multiple of the number of
1381 // elements in the input then an input element is live if any of the
1382 // corresponding output elements are live.
1383 Ratio
= VWidth
/ InVWidth
;
1384 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
)
1385 if (DemandedElts
[OutIdx
])
1386 InputDemandedElts
.setBit(OutIdx
/ Ratio
);
1387 } else if ((InVWidth
% VWidth
) == 0) {
1388 // If the number of elements in the input is a multiple of the number of
1389 // elements in the output then an input element is live if the
1390 // corresponding output element is live.
1391 Ratio
= InVWidth
/ VWidth
;
1392 for (unsigned InIdx
= 0; InIdx
!= InVWidth
; ++InIdx
)
1393 if (DemandedElts
[InIdx
/ Ratio
])
1394 InputDemandedElts
.setBit(InIdx
);
1396 // Unsupported so far.
1400 simplifyAndSetOp(I
, 0, InputDemandedElts
, UndefElts2
);
1402 if (VWidth
== InVWidth
) {
1403 UndefElts
= UndefElts2
;
1404 } else if ((VWidth
% InVWidth
) == 0) {
1405 // If the number of elements in the output is a multiple of the number of
1406 // elements in the input then an output element is undef if the
1407 // corresponding input element is undef.
1408 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
)
1409 if (UndefElts2
[OutIdx
/ Ratio
])
1410 UndefElts
.setBit(OutIdx
);
1411 } else if ((InVWidth
% VWidth
) == 0) {
1412 // If the number of elements in the input is a multiple of the number of
1413 // elements in the output then an output element is undef if all of the
1414 // corresponding input elements are undef.
1415 for (unsigned OutIdx
= 0; OutIdx
!= VWidth
; ++OutIdx
) {
1416 APInt SubUndef
= UndefElts2
.lshr(OutIdx
* Ratio
).zextOrTrunc(Ratio
);
1417 if (SubUndef
.countPopulation() == Ratio
)
1418 UndefElts
.setBit(OutIdx
);
1421 llvm_unreachable("Unimp");
1425 case Instruction::FPTrunc
:
1426 case Instruction::FPExt
:
1427 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts
);
1430 case Instruction::Call
: {
1431 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
1433 switch (II
->getIntrinsicID()) {
1434 case Intrinsic::x86_xop_vfrcz_ss
:
1435 case Intrinsic::x86_xop_vfrcz_sd
:
1436 // The instructions for these intrinsics are speced to zero upper bits not
1437 // pass them through like other scalar intrinsics. So we shouldn't just
1438 // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
1439 // Instead we should return a zero vector.
1440 if (!DemandedElts
[0]) {
1442 return ConstantAggregateZero::get(II
->getType());
1445 // Only the lower element is used.
1447 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1449 // Only the lower element is undefined. The high elements are zero.
1450 UndefElts
= UndefElts
[0];
1453 // Unary scalar-as-vector operations that work column-wise.
1454 case Intrinsic::x86_sse_rcp_ss
:
1455 case Intrinsic::x86_sse_rsqrt_ss
:
1456 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1458 // If lowest element of a scalar op isn't used then use Arg0.
1459 if (!DemandedElts
[0]) {
1461 return II
->getArgOperand(0);
1463 // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
1467 // Binary scalar-as-vector operations that work column-wise. The high
1468 // elements come from operand 0. The low element is a function of both
1470 case Intrinsic::x86_sse_min_ss
:
1471 case Intrinsic::x86_sse_max_ss
:
1472 case Intrinsic::x86_sse_cmp_ss
:
1473 case Intrinsic::x86_sse2_min_sd
:
1474 case Intrinsic::x86_sse2_max_sd
:
1475 case Intrinsic::x86_sse2_cmp_sd
: {
1476 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1478 // If lowest element of a scalar op isn't used then use Arg0.
1479 if (!DemandedElts
[0]) {
1481 return II
->getArgOperand(0);
1484 // Only lower element is used for operand 1.
1486 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts2
);
1488 // Lower element is undefined if both lower elements are undefined.
1489 // Consider things like undef&0. The result is known zero, not undef.
1491 UndefElts
.clearBit(0);
1496 // Binary scalar-as-vector operations that work column-wise. The high
1497 // elements come from operand 0 and the low element comes from operand 1.
1498 case Intrinsic::x86_sse41_round_ss
:
1499 case Intrinsic::x86_sse41_round_sd
: {
1500 // Don't use the low element of operand 0.
1501 APInt DemandedElts2
= DemandedElts
;
1502 DemandedElts2
.clearBit(0);
1503 simplifyAndSetOp(II
, 0, DemandedElts2
, UndefElts
);
1505 // If lowest element of a scalar op isn't used then use Arg0.
1506 if (!DemandedElts
[0]) {
1508 return II
->getArgOperand(0);
1511 // Only lower element is used for operand 1.
1513 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts2
);
1515 // Take the high undef elements from operand 0 and take the lower element
1517 UndefElts
.clearBit(0);
1518 UndefElts
|= UndefElts2
[0];
1522 // Three input scalar-as-vector operations that work column-wise. The high
1523 // elements come from operand 0 and the low element is a function of all
1525 case Intrinsic::x86_avx512_mask_add_ss_round
:
1526 case Intrinsic::x86_avx512_mask_div_ss_round
:
1527 case Intrinsic::x86_avx512_mask_mul_ss_round
:
1528 case Intrinsic::x86_avx512_mask_sub_ss_round
:
1529 case Intrinsic::x86_avx512_mask_max_ss_round
:
1530 case Intrinsic::x86_avx512_mask_min_ss_round
:
1531 case Intrinsic::x86_avx512_mask_add_sd_round
:
1532 case Intrinsic::x86_avx512_mask_div_sd_round
:
1533 case Intrinsic::x86_avx512_mask_mul_sd_round
:
1534 case Intrinsic::x86_avx512_mask_sub_sd_round
:
1535 case Intrinsic::x86_avx512_mask_max_sd_round
:
1536 case Intrinsic::x86_avx512_mask_min_sd_round
:
1537 simplifyAndSetOp(II
, 0, DemandedElts
, UndefElts
);
1539 // If lowest element of a scalar op isn't used then use Arg0.
1540 if (!DemandedElts
[0]) {
1542 return II
->getArgOperand(0);
1545 // Only lower element is used for operand 1 and 2.
1547 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts2
);
1548 simplifyAndSetOp(II
, 2, DemandedElts
, UndefElts3
);
1550 // Lower element is undefined if all three lower elements are undefined.
1551 // Consider things like undef&0. The result is known zero, not undef.
1552 if (!UndefElts2
[0] || !UndefElts3
[0])
1553 UndefElts
.clearBit(0);
1557 case Intrinsic::x86_sse2_packssdw_128
:
1558 case Intrinsic::x86_sse2_packsswb_128
:
1559 case Intrinsic::x86_sse2_packuswb_128
:
1560 case Intrinsic::x86_sse41_packusdw
:
1561 case Intrinsic::x86_avx2_packssdw
:
1562 case Intrinsic::x86_avx2_packsswb
:
1563 case Intrinsic::x86_avx2_packusdw
:
1564 case Intrinsic::x86_avx2_packuswb
:
1565 case Intrinsic::x86_avx512_packssdw_512
:
1566 case Intrinsic::x86_avx512_packsswb_512
:
1567 case Intrinsic::x86_avx512_packusdw_512
:
1568 case Intrinsic::x86_avx512_packuswb_512
: {
1569 auto *Ty0
= II
->getArgOperand(0)->getType();
1570 unsigned InnerVWidth
= Ty0
->getVectorNumElements();
1571 assert(VWidth
== (InnerVWidth
* 2) && "Unexpected input size");
1573 unsigned NumLanes
= Ty0
->getPrimitiveSizeInBits() / 128;
1574 unsigned VWidthPerLane
= VWidth
/ NumLanes
;
1575 unsigned InnerVWidthPerLane
= InnerVWidth
/ NumLanes
;
1577 // Per lane, pack the elements of the first input and then the second.
1579 // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
1580 // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
1581 for (int OpNum
= 0; OpNum
!= 2; ++OpNum
) {
1582 APInt
OpDemandedElts(InnerVWidth
, 0);
1583 for (unsigned Lane
= 0; Lane
!= NumLanes
; ++Lane
) {
1584 unsigned LaneIdx
= Lane
* VWidthPerLane
;
1585 for (unsigned Elt
= 0; Elt
!= InnerVWidthPerLane
; ++Elt
) {
1586 unsigned Idx
= LaneIdx
+ Elt
+ InnerVWidthPerLane
* OpNum
;
1587 if (DemandedElts
[Idx
])
1588 OpDemandedElts
.setBit((Lane
* InnerVWidthPerLane
) + Elt
);
1592 // Demand elements from the operand.
1593 APInt
OpUndefElts(InnerVWidth
, 0);
1594 simplifyAndSetOp(II
, OpNum
, OpDemandedElts
, OpUndefElts
);
1596 // Pack the operand's UNDEF elements, one lane at a time.
1597 OpUndefElts
= OpUndefElts
.zext(VWidth
);
1598 for (unsigned Lane
= 0; Lane
!= NumLanes
; ++Lane
) {
1599 APInt LaneElts
= OpUndefElts
.lshr(InnerVWidthPerLane
* Lane
);
1600 LaneElts
= LaneElts
.getLoBits(InnerVWidthPerLane
);
1601 LaneElts
<<= InnerVWidthPerLane
* (2 * Lane
+ OpNum
);
1602 UndefElts
|= LaneElts
;
1609 case Intrinsic::x86_ssse3_pshuf_b_128
:
1610 case Intrinsic::x86_avx2_pshuf_b
:
1611 case Intrinsic::x86_avx512_pshuf_b_512
:
1613 case Intrinsic::x86_avx_vpermilvar_ps
:
1614 case Intrinsic::x86_avx_vpermilvar_ps_256
:
1615 case Intrinsic::x86_avx512_vpermilvar_ps_512
:
1616 case Intrinsic::x86_avx_vpermilvar_pd
:
1617 case Intrinsic::x86_avx_vpermilvar_pd_256
:
1618 case Intrinsic::x86_avx512_vpermilvar_pd_512
:
1620 case Intrinsic::x86_avx2_permd
:
1621 case Intrinsic::x86_avx2_permps
: {
1622 simplifyAndSetOp(II
, 1, DemandedElts
, UndefElts
);
1626 // SSE4A instructions leave the upper 64-bits of the 128-bit result
1627 // in an undefined state.
1628 case Intrinsic::x86_sse4a_extrq
:
1629 case Intrinsic::x86_sse4a_extrqi
:
1630 case Intrinsic::x86_sse4a_insertq
:
1631 case Intrinsic::x86_sse4a_insertqi
:
1632 UndefElts
.setHighBits(VWidth
/ 2);
1634 case Intrinsic::amdgcn_buffer_load
:
1635 case Intrinsic::amdgcn_buffer_load_format
:
1636 case Intrinsic::amdgcn_raw_buffer_load
:
1637 case Intrinsic::amdgcn_raw_buffer_load_format
:
1638 case Intrinsic::amdgcn_struct_buffer_load
:
1639 case Intrinsic::amdgcn_struct_buffer_load_format
:
1640 return simplifyAMDGCNMemoryIntrinsicDemanded(II
, DemandedElts
);
1642 if (getAMDGPUImageDMaskIntrinsic(II
->getIntrinsicID())) {
1644 Value
*TFC
= II
->getArgOperand(II
->getNumOperands() - 2);
1645 assert(!isa
<ConstantInt
>(TFC
) ||
1646 dyn_cast
<ConstantInt
>(TFC
)->getZExtValue() == 0);
1649 return simplifyAMDGCNMemoryIntrinsicDemanded(II
, DemandedElts
, 0);
1654 } // switch on IntrinsicID
1657 } // switch on Opcode
1659 // TODO: We bail completely on integer div/rem and shifts because they have
1660 // UB/poison potential, but that should be refined.
1662 if (match(I
, m_BinOp(BO
)) && !BO
->isIntDivRem() && !BO
->isShift()) {
1663 simplifyAndSetOp(I
, 0, DemandedElts
, UndefElts
);
1664 simplifyAndSetOp(I
, 1, DemandedElts
, UndefElts2
);
1666 // Any change to an instruction with potential poison must clear those flags
1667 // because we can not guarantee those constraints now. Other analysis may
1668 // determine that it is safe to re-apply the flags.
1670 BO
->dropPoisonGeneratingFlags();
1672 // Output elements are undefined if both are undefined. Consider things
1673 // like undef & 0. The result is known zero, not undef.
1674 UndefElts
&= UndefElts2
;
1677 return MadeChange
? I
: nullptr;