1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains routines that help analyze properties that chains of
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/ValueTracking.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/iterator_range.h"
25 #include "llvm/Analysis/AliasAnalysis.h"
26 #include "llvm/Analysis/AssumeBundleQueries.h"
27 #include "llvm/Analysis/AssumptionCache.h"
28 #include "llvm/Analysis/ConstantFolding.h"
29 #include "llvm/Analysis/DomConditionCache.h"
30 #include "llvm/Analysis/GuardUtils.h"
31 #include "llvm/Analysis/InstructionSimplify.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/Analysis/WithCache.h"
37 #include "llvm/IR/Argument.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/EHPersonalities.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalAlias.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/GlobalVariable.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/IntrinsicsAArch64.h"
58 #include "llvm/IR/IntrinsicsAMDGPU.h"
59 #include "llvm/IR/IntrinsicsRISCV.h"
60 #include "llvm/IR/IntrinsicsX86.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/IR/Metadata.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/Operator.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/ErrorHandling.h"
73 #include "llvm/Support/KnownBits.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/TargetParser/RISCVTargetParser.h"
83 using namespace llvm::PatternMatch
;
85 // Controls the number of uses of the value searched for possible
86 // dominating comparisons.
87 static cl::opt
<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
88 cl::Hidden
, cl::init(20));
91 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
92 /// returns the element type's bitwidth.
93 static unsigned getBitWidth(Type
*Ty
, const DataLayout
&DL
) {
94 if (unsigned BitWidth
= Ty
->getScalarSizeInBits())
97 return DL
.getPointerTypeSizeInBits(Ty
);
100 // Given the provided Value and, potentially, a context instruction, return
101 // the preferred context instruction (if any).
102 static const Instruction
*safeCxtI(const Value
*V
, const Instruction
*CxtI
) {
103 // If we've been provided with a context instruction, then use that (provided
104 // it has been inserted).
105 if (CxtI
&& CxtI
->getParent())
108 // If the value is really an already-inserted instruction, then use that.
109 CxtI
= dyn_cast
<Instruction
>(V
);
110 if (CxtI
&& CxtI
->getParent())
116 static bool getShuffleDemandedElts(const ShuffleVectorInst
*Shuf
,
117 const APInt
&DemandedElts
,
118 APInt
&DemandedLHS
, APInt
&DemandedRHS
) {
119 if (isa
<ScalableVectorType
>(Shuf
->getType())) {
120 assert(DemandedElts
== APInt(1,1));
121 DemandedLHS
= DemandedRHS
= DemandedElts
;
126 cast
<FixedVectorType
>(Shuf
->getOperand(0)->getType())->getNumElements();
127 return llvm::getShuffleDemandedElts(NumElts
, Shuf
->getShuffleMask(),
128 DemandedElts
, DemandedLHS
, DemandedRHS
);
131 static void computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
132 KnownBits
&Known
, unsigned Depth
,
133 const SimplifyQuery
&Q
);
135 void llvm::computeKnownBits(const Value
*V
, KnownBits
&Known
, unsigned Depth
,
136 const SimplifyQuery
&Q
) {
137 // Since the number of lanes in a scalable vector is unknown at compile time,
138 // we track one bit which is implicitly broadcast to all lanes. This means
139 // that all lanes in a scalable vector are considered demanded.
140 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
142 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
143 ::computeKnownBits(V
, DemandedElts
, Known
, Depth
, Q
);
146 void llvm::computeKnownBits(const Value
*V
, KnownBits
&Known
,
147 const DataLayout
&DL
, unsigned Depth
,
148 AssumptionCache
*AC
, const Instruction
*CxtI
,
149 const DominatorTree
*DT
, bool UseInstrInfo
) {
152 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
155 KnownBits
llvm::computeKnownBits(const Value
*V
, const DataLayout
&DL
,
156 unsigned Depth
, AssumptionCache
*AC
,
157 const Instruction
*CxtI
,
158 const DominatorTree
*DT
, bool UseInstrInfo
) {
159 return computeKnownBits(
160 V
, Depth
, SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
163 KnownBits
llvm::computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
164 const DataLayout
&DL
, unsigned Depth
,
165 AssumptionCache
*AC
, const Instruction
*CxtI
,
166 const DominatorTree
*DT
, bool UseInstrInfo
) {
167 return computeKnownBits(
168 V
, DemandedElts
, Depth
,
169 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
172 static bool haveNoCommonBitsSetSpecialCases(const Value
*LHS
, const Value
*RHS
,
173 const SimplifyQuery
&SQ
) {
174 // Look for an inverted mask: (X & ~M) op (Y & M).
177 if (match(LHS
, m_c_And(m_Not(m_Value(M
)), m_Value())) &&
178 match(RHS
, m_c_And(m_Specific(M
), m_Value())) &&
179 isGuaranteedNotToBeUndef(M
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
184 if (match(RHS
, m_c_And(m_Not(m_Specific(LHS
)), m_Value())) &&
185 isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
188 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
192 m_c_Xor(m_c_And(m_Specific(LHS
), m_Value(Y
)), m_Deferred(Y
))) &&
193 isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
) &&
194 isGuaranteedNotToBeUndef(Y
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
197 // Peek through extends to find a 'not' of the other side:
198 // (ext Y) op ext(~Y)
199 if (match(LHS
, m_ZExtOrSExt(m_Value(Y
))) &&
200 match(RHS
, m_ZExtOrSExt(m_Not(m_Specific(Y
)))) &&
201 isGuaranteedNotToBeUndef(Y
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
204 // Look for: (A & B) op ~(A | B)
207 if (match(LHS
, m_And(m_Value(A
), m_Value(B
))) &&
208 match(RHS
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
)))) &&
209 isGuaranteedNotToBeUndef(A
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
) &&
210 isGuaranteedNotToBeUndef(B
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
214 // Look for: (X << V) op (Y >> (BitWidth - V))
215 // or (X >> V) op (Y << (BitWidth - V))
219 if (((match(RHS
, m_Shl(m_Value(), m_Sub(m_APInt(R
), m_Value(V
)))) &&
220 match(LHS
, m_LShr(m_Value(), m_Specific(V
)))) ||
221 (match(RHS
, m_LShr(m_Value(), m_Sub(m_APInt(R
), m_Value(V
)))) &&
222 match(LHS
, m_Shl(m_Value(), m_Specific(V
))))) &&
223 R
->uge(LHS
->getType()->getScalarSizeInBits()))
230 bool llvm::haveNoCommonBitsSet(const WithCache
<const Value
*> &LHSCache
,
231 const WithCache
<const Value
*> &RHSCache
,
232 const SimplifyQuery
&SQ
) {
233 const Value
*LHS
= LHSCache
.getValue();
234 const Value
*RHS
= RHSCache
.getValue();
236 assert(LHS
->getType() == RHS
->getType() &&
237 "LHS and RHS should have the same type");
238 assert(LHS
->getType()->isIntOrIntVectorTy() &&
239 "LHS and RHS should be integers");
241 if (haveNoCommonBitsSetSpecialCases(LHS
, RHS
, SQ
) ||
242 haveNoCommonBitsSetSpecialCases(RHS
, LHS
, SQ
))
245 return KnownBits::haveNoCommonBitsSet(LHSCache
.getKnownBits(SQ
),
246 RHSCache
.getKnownBits(SQ
));
249 bool llvm::isOnlyUsedInZeroComparison(const Instruction
*I
) {
250 return !I
->user_empty() && all_of(I
->users(), [](const User
*U
) {
251 return match(U
, m_ICmp(m_Value(), m_Zero()));
255 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction
*I
) {
256 return !I
->user_empty() && all_of(I
->users(), [](const User
*U
) {
258 return match(U
, m_ICmp(P
, m_Value(), m_Zero())) && ICmpInst::isEquality(P
);
262 bool llvm::isKnownToBeAPowerOfTwo(const Value
*V
, const DataLayout
&DL
,
263 bool OrZero
, unsigned Depth
,
264 AssumptionCache
*AC
, const Instruction
*CxtI
,
265 const DominatorTree
*DT
, bool UseInstrInfo
) {
266 return ::isKnownToBeAPowerOfTwo(
268 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
271 static bool isKnownNonZero(const Value
*V
, const APInt
&DemandedElts
,
272 const SimplifyQuery
&Q
, unsigned Depth
);
274 bool llvm::isKnownNonNegative(const Value
*V
, const SimplifyQuery
&SQ
,
276 return computeKnownBits(V
, Depth
, SQ
).isNonNegative();
279 bool llvm::isKnownPositive(const Value
*V
, const SimplifyQuery
&SQ
,
281 if (auto *CI
= dyn_cast
<ConstantInt
>(V
))
282 return CI
->getValue().isStrictlyPositive();
284 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
286 KnownBits Known
= computeKnownBits(V
, Depth
, SQ
);
287 return Known
.isNonNegative() &&
288 (Known
.isNonZero() || isKnownNonZero(V
, SQ
, Depth
));
291 bool llvm::isKnownNegative(const Value
*V
, const SimplifyQuery
&SQ
,
293 return computeKnownBits(V
, Depth
, SQ
).isNegative();
296 static bool isKnownNonEqual(const Value
*V1
, const Value
*V2
,
297 const APInt
&DemandedElts
, unsigned Depth
,
298 const SimplifyQuery
&Q
);
300 bool llvm::isKnownNonEqual(const Value
*V1
, const Value
*V2
,
301 const SimplifyQuery
&Q
, unsigned Depth
) {
302 // We don't support looking through casts.
303 if (V1
== V2
|| V1
->getType() != V2
->getType())
305 auto *FVTy
= dyn_cast
<FixedVectorType
>(V1
->getType());
307 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
308 return ::isKnownNonEqual(V1
, V2
, DemandedElts
, Depth
, Q
);
311 bool llvm::MaskedValueIsZero(const Value
*V
, const APInt
&Mask
,
312 const SimplifyQuery
&SQ
, unsigned Depth
) {
313 KnownBits
Known(Mask
.getBitWidth());
314 computeKnownBits(V
, Known
, Depth
, SQ
);
315 return Mask
.isSubsetOf(Known
.Zero
);
318 static unsigned ComputeNumSignBits(const Value
*V
, const APInt
&DemandedElts
,
319 unsigned Depth
, const SimplifyQuery
&Q
);
321 static unsigned ComputeNumSignBits(const Value
*V
, unsigned Depth
,
322 const SimplifyQuery
&Q
) {
323 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
325 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
326 return ComputeNumSignBits(V
, DemandedElts
, Depth
, Q
);
329 unsigned llvm::ComputeNumSignBits(const Value
*V
, const DataLayout
&DL
,
330 unsigned Depth
, AssumptionCache
*AC
,
331 const Instruction
*CxtI
,
332 const DominatorTree
*DT
, bool UseInstrInfo
) {
333 return ::ComputeNumSignBits(
334 V
, Depth
, SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
337 unsigned llvm::ComputeMaxSignificantBits(const Value
*V
, const DataLayout
&DL
,
338 unsigned Depth
, AssumptionCache
*AC
,
339 const Instruction
*CxtI
,
340 const DominatorTree
*DT
) {
341 unsigned SignBits
= ComputeNumSignBits(V
, DL
, Depth
, AC
, CxtI
, DT
);
342 return V
->getType()->getScalarSizeInBits() - SignBits
+ 1;
345 static void computeKnownBitsAddSub(bool Add
, const Value
*Op0
, const Value
*Op1
,
347 const APInt
&DemandedElts
,
348 KnownBits
&KnownOut
, KnownBits
&Known2
,
349 unsigned Depth
, const SimplifyQuery
&Q
) {
350 computeKnownBits(Op1
, DemandedElts
, KnownOut
, Depth
+ 1, Q
);
352 // If one operand is unknown and we have no nowrap information,
353 // the result will be unknown independently of the second operand.
354 if (KnownOut
.isUnknown() && !NSW
&& !NUW
)
357 computeKnownBits(Op0
, DemandedElts
, Known2
, Depth
+ 1, Q
);
358 KnownOut
= KnownBits::computeForAddSub(Add
, NSW
, NUW
, Known2
, KnownOut
);
361 static void computeKnownBitsMul(const Value
*Op0
, const Value
*Op1
, bool NSW
,
362 bool NUW
, const APInt
&DemandedElts
,
363 KnownBits
&Known
, KnownBits
&Known2
,
364 unsigned Depth
, const SimplifyQuery
&Q
) {
365 computeKnownBits(Op1
, DemandedElts
, Known
, Depth
+ 1, Q
);
366 computeKnownBits(Op0
, DemandedElts
, Known2
, Depth
+ 1, Q
);
368 bool isKnownNegative
= false;
369 bool isKnownNonNegative
= false;
370 // If the multiplication is known not to overflow, compute the sign bit.
373 // The product of a number with itself is non-negative.
374 isKnownNonNegative
= true;
376 bool isKnownNonNegativeOp1
= Known
.isNonNegative();
377 bool isKnownNonNegativeOp0
= Known2
.isNonNegative();
378 bool isKnownNegativeOp1
= Known
.isNegative();
379 bool isKnownNegativeOp0
= Known2
.isNegative();
380 // The product of two numbers with the same sign is non-negative.
381 isKnownNonNegative
= (isKnownNegativeOp1
&& isKnownNegativeOp0
) ||
382 (isKnownNonNegativeOp1
&& isKnownNonNegativeOp0
);
383 if (!isKnownNonNegative
&& NUW
) {
384 // mul nuw nsw with a factor > 1 is non-negative.
385 KnownBits One
= KnownBits::makeConstant(APInt(Known
.getBitWidth(), 1));
386 isKnownNonNegative
= KnownBits::sgt(Known
, One
).value_or(false) ||
387 KnownBits::sgt(Known2
, One
).value_or(false);
390 // The product of a negative number and a non-negative number is either
392 if (!isKnownNonNegative
)
394 (isKnownNegativeOp1
&& isKnownNonNegativeOp0
&&
395 Known2
.isNonZero()) ||
396 (isKnownNegativeOp0
&& isKnownNonNegativeOp1
&& Known
.isNonZero());
400 bool SelfMultiply
= Op0
== Op1
;
403 isGuaranteedNotToBeUndef(Op0
, Q
.AC
, Q
.CxtI
, Q
.DT
, Depth
+ 1);
404 Known
= KnownBits::mul(Known
, Known2
, SelfMultiply
);
406 // Only make use of no-wrap flags if we failed to compute the sign bit
407 // directly. This matters if the multiplication always overflows, in
408 // which case we prefer to follow the result of the direct computation,
409 // though as the program is invoking undefined behaviour we can choose
410 // whatever we like here.
411 if (isKnownNonNegative
&& !Known
.isNegative())
412 Known
.makeNonNegative();
413 else if (isKnownNegative
&& !Known
.isNonNegative())
414 Known
.makeNegative();
417 void llvm::computeKnownBitsFromRangeMetadata(const MDNode
&Ranges
,
419 unsigned BitWidth
= Known
.getBitWidth();
420 unsigned NumRanges
= Ranges
.getNumOperands() / 2;
421 assert(NumRanges
>= 1);
423 Known
.Zero
.setAllBits();
424 Known
.One
.setAllBits();
426 for (unsigned i
= 0; i
< NumRanges
; ++i
) {
428 mdconst::extract
<ConstantInt
>(Ranges
.getOperand(2 * i
+ 0));
430 mdconst::extract
<ConstantInt
>(Ranges
.getOperand(2 * i
+ 1));
431 ConstantRange
Range(Lower
->getValue(), Upper
->getValue());
433 // The first CommonPrefixBits of all values in Range are equal.
434 unsigned CommonPrefixBits
=
435 (Range
.getUnsignedMax() ^ Range
.getUnsignedMin()).countl_zero();
436 APInt Mask
= APInt::getHighBitsSet(BitWidth
, CommonPrefixBits
);
437 APInt UnsignedMax
= Range
.getUnsignedMax().zextOrTrunc(BitWidth
);
438 Known
.One
&= UnsignedMax
& Mask
;
439 Known
.Zero
&= ~UnsignedMax
& Mask
;
443 static bool isEphemeralValueOf(const Instruction
*I
, const Value
*E
) {
444 SmallVector
<const Value
*, 16> WorkSet(1, I
);
445 SmallPtrSet
<const Value
*, 32> Visited
;
446 SmallPtrSet
<const Value
*, 16> EphValues
;
448 // The instruction defining an assumption's condition itself is always
449 // considered ephemeral to that assumption (even if it has other
450 // non-ephemeral users). See r246696's test case for an example.
451 if (is_contained(I
->operands(), E
))
454 while (!WorkSet
.empty()) {
455 const Value
*V
= WorkSet
.pop_back_val();
456 if (!Visited
.insert(V
).second
)
459 // If all uses of this value are ephemeral, then so is this value.
460 if (llvm::all_of(V
->users(), [&](const User
*U
) {
461 return EphValues
.count(U
);
466 if (V
== I
|| (isa
<Instruction
>(V
) &&
467 !cast
<Instruction
>(V
)->mayHaveSideEffects() &&
468 !cast
<Instruction
>(V
)->isTerminator())) {
470 if (const User
*U
= dyn_cast
<User
>(V
))
471 append_range(WorkSet
, U
->operands());
479 // Is this an intrinsic that cannot be speculated but also cannot trap?
480 bool llvm::isAssumeLikeIntrinsic(const Instruction
*I
) {
481 if (const IntrinsicInst
*CI
= dyn_cast
<IntrinsicInst
>(I
))
482 return CI
->isAssumeLikeIntrinsic();
487 bool llvm::isValidAssumeForContext(const Instruction
*Inv
,
488 const Instruction
*CxtI
,
489 const DominatorTree
*DT
,
490 bool AllowEphemerals
) {
491 // There are two restrictions on the use of an assume:
492 // 1. The assume must dominate the context (or the control flow must
493 // reach the assume whenever it reaches the context).
494 // 2. The context must not be in the assume's set of ephemeral values
495 // (otherwise we will use the assume to prove that the condition
496 // feeding the assume is trivially true, thus causing the removal of
499 if (Inv
->getParent() == CxtI
->getParent()) {
500 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
502 if (Inv
->comesBefore(CxtI
))
505 // Don't let an assume affect itself - this would cause the problems
506 // `isEphemeralValueOf` is trying to prevent, and it would also make
507 // the loop below go out of bounds.
508 if (!AllowEphemerals
&& Inv
== CxtI
)
511 // The context comes first, but they're both in the same block.
512 // Make sure there is nothing in between that might interrupt
513 // the control flow, not even CxtI itself.
514 // We limit the scan distance between the assume and its context instruction
515 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
516 // it can be adjusted if needed (could be turned into a cl::opt).
517 auto Range
= make_range(CxtI
->getIterator(), Inv
->getIterator());
518 if (!isGuaranteedToTransferExecutionToSuccessor(Range
, 15))
521 return AllowEphemerals
|| !isEphemeralValueOf(Inv
, CxtI
);
524 // Inv and CxtI are in different blocks.
526 if (DT
->dominates(Inv
, CxtI
))
528 } else if (Inv
->getParent() == CxtI
->getParent()->getSinglePredecessor() ||
529 Inv
->getParent()->isEntryBlock()) {
530 // We don't have a DT, but this trivially dominates.
537 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
538 // we still have enough information about `RHS` to conclude non-zero. For
539 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
540 // so the extra compile time may not be worth it, but possibly a second API
541 // should be created for use outside of loops.
542 static bool cmpExcludesZero(CmpInst::Predicate Pred
, const Value
*RHS
) {
543 // v u> y implies v != 0.
544 if (Pred
== ICmpInst::ICMP_UGT
)
547 // Special-case v != 0 to also handle v != null.
548 if (Pred
== ICmpInst::ICMP_NE
)
549 return match(RHS
, m_Zero());
551 // All other predicates - rely on generic ConstantRange handling.
553 auto Zero
= APInt::getZero(RHS
->getType()->getScalarSizeInBits());
554 if (match(RHS
, m_APInt(C
))) {
555 ConstantRange TrueValues
= ConstantRange::makeExactICmpRegion(Pred
, *C
);
556 return !TrueValues
.contains(Zero
);
559 auto *VC
= dyn_cast
<ConstantDataVector
>(RHS
);
563 for (unsigned ElemIdx
= 0, NElem
= VC
->getNumElements(); ElemIdx
< NElem
;
565 ConstantRange TrueValues
= ConstantRange::makeExactICmpRegion(
566 Pred
, VC
->getElementAsAPInt(ElemIdx
));
567 if (TrueValues
.contains(Zero
))
573 static void breakSelfRecursivePHI(const Use
*U
, const PHINode
*PHI
,
574 Value
*&ValOut
, Instruction
*&CtxIOut
,
575 const PHINode
**PhiOut
= nullptr) {
579 CtxIOut
= PHI
->getIncomingBlock(*U
)->getTerminator();
583 // If the Use is a select of this phi, compute analysis on other arm to break
586 if (match(ValOut
, m_Select(m_Value(), m_Specific(PHI
), m_Value(V
))) ||
587 match(ValOut
, m_Select(m_Value(), m_Value(V
), m_Specific(PHI
))))
590 // Same for select, if this phi is 2-operand phi, compute analysis on other
591 // incoming value to break recursion.
592 // TODO: We could handle any number of incoming edges as long as we only have
593 // two unique values.
594 if (auto *IncPhi
= dyn_cast
<PHINode
>(ValOut
);
595 IncPhi
&& IncPhi
->getNumIncomingValues() == 2) {
596 for (int Idx
= 0; Idx
< 2; ++Idx
) {
597 if (IncPhi
->getIncomingValue(Idx
) == PHI
) {
598 ValOut
= IncPhi
->getIncomingValue(1 - Idx
);
601 CtxIOut
= IncPhi
->getIncomingBlock(1 - Idx
)->getTerminator();
608 static bool isKnownNonZeroFromAssume(const Value
*V
, const SimplifyQuery
&Q
) {
609 // Use of assumptions is context-sensitive. If we don't have a context, we
611 if (!Q
.AC
|| !Q
.CxtI
)
614 for (AssumptionCache::ResultElem
&Elem
: Q
.AC
->assumptionsFor(V
)) {
618 AssumeInst
*I
= cast
<AssumeInst
>(Elem
.Assume
);
619 assert(I
->getFunction() == Q
.CxtI
->getFunction() &&
620 "Got assumption for the wrong function!");
622 if (Elem
.Index
!= AssumptionCache::ExprResultIdx
) {
623 if (!V
->getType()->isPointerTy())
625 if (RetainedKnowledge RK
= getKnowledgeFromBundle(
626 *I
, I
->bundle_op_info_begin()[Elem
.Index
])) {
628 (RK
.AttrKind
== Attribute::NonNull
||
629 (RK
.AttrKind
== Attribute::Dereferenceable
&&
630 !NullPointerIsDefined(Q
.CxtI
->getFunction(),
631 V
->getType()->getPointerAddressSpace()))) &&
632 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
638 // Warning: This loop can end up being somewhat performance sensitive.
639 // We're running this loop for once for each value queried resulting in a
640 // runtime of ~O(#assumes * #values).
644 auto m_V
= m_CombineOr(m_Specific(V
), m_PtrToInt(m_Specific(V
)));
645 if (!match(I
->getArgOperand(0), m_c_ICmp(Pred
, m_V
, m_Value(RHS
))))
648 if (cmpExcludesZero(Pred
, RHS
) && isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
655 static void computeKnownBitsFromCmp(const Value
*V
, CmpInst::Predicate Pred
,
656 Value
*LHS
, Value
*RHS
, KnownBits
&Known
,
657 const SimplifyQuery
&Q
) {
658 if (RHS
->getType()->isPointerTy()) {
659 // Handle comparison of pointer to null explicitly, as it will not be
660 // covered by the m_APInt() logic below.
661 if (LHS
== V
&& match(RHS
, m_Zero())) {
663 case ICmpInst::ICMP_EQ
:
666 case ICmpInst::ICMP_SGE
:
667 case ICmpInst::ICMP_SGT
:
668 Known
.makeNonNegative();
670 case ICmpInst::ICMP_SLT
:
671 Known
.makeNegative();
680 unsigned BitWidth
= Known
.getBitWidth();
682 m_CombineOr(m_Specific(V
), m_PtrToIntSameSize(Q
.DL
, m_Specific(V
)));
685 const APInt
*Mask
, *C
;
688 case ICmpInst::ICMP_EQ
:
690 if (match(LHS
, m_V
) && match(RHS
, m_APInt(C
))) {
691 Known
= Known
.unionWith(KnownBits::makeConstant(*C
));
692 // assume(V & Mask = C)
693 } else if (match(LHS
, m_c_And(m_V
, m_Value(Y
))) &&
694 match(RHS
, m_APInt(C
))) {
695 // For one bits in Mask, we can propagate bits from C to V.
697 if (match(Y
, m_APInt(Mask
)))
698 Known
.Zero
|= ~*C
& *Mask
;
699 // assume(V | Mask = C)
700 } else if (match(LHS
, m_c_Or(m_V
, m_Value(Y
))) && match(RHS
, m_APInt(C
))) {
701 // For zero bits in Mask, we can propagate bits from C to V.
703 if (match(Y
, m_APInt(Mask
)))
704 Known
.One
|= *C
& ~*Mask
;
705 // assume(V ^ Mask = C)
706 } else if (match(LHS
, m_Xor(m_V
, m_APInt(Mask
))) &&
707 match(RHS
, m_APInt(C
))) {
708 // Equivalent to assume(V == Mask ^ C)
709 Known
= Known
.unionWith(KnownBits::makeConstant(*C
^ *Mask
));
710 // assume(V << ShAmt = C)
711 } else if (match(LHS
, m_Shl(m_V
, m_ConstantInt(ShAmt
))) &&
712 match(RHS
, m_APInt(C
)) && ShAmt
< BitWidth
) {
713 // For those bits in C that are known, we can propagate them to known
714 // bits in V shifted to the right by ShAmt.
715 KnownBits RHSKnown
= KnownBits::makeConstant(*C
);
716 RHSKnown
.Zero
.lshrInPlace(ShAmt
);
717 RHSKnown
.One
.lshrInPlace(ShAmt
);
718 Known
= Known
.unionWith(RHSKnown
);
719 // assume(V >> ShAmt = C)
720 } else if (match(LHS
, m_Shr(m_V
, m_ConstantInt(ShAmt
))) &&
721 match(RHS
, m_APInt(C
)) && ShAmt
< BitWidth
) {
722 KnownBits RHSKnown
= KnownBits::makeConstant(*C
);
723 // For those bits in RHS that are known, we can propagate them to known
724 // bits in V shifted to the right by C.
725 Known
.Zero
|= RHSKnown
.Zero
<< ShAmt
;
726 Known
.One
|= RHSKnown
.One
<< ShAmt
;
729 case ICmpInst::ICMP_NE
: {
730 // assume (V & B != 0) where B is a power of 2
732 if (match(LHS
, m_And(m_V
, m_Power2(BPow2
))) && match(RHS
, m_Zero()))
737 if (match(RHS
, m_APInt(C
))) {
738 const APInt
*Offset
= nullptr;
739 if (match(LHS
, m_CombineOr(m_V
, m_AddLike(m_V
, m_APInt(Offset
))))) {
740 ConstantRange LHSRange
= ConstantRange::makeAllowedICmpRegion(Pred
, *C
);
742 LHSRange
= LHSRange
.sub(*Offset
);
743 Known
= Known
.unionWith(LHSRange
.toKnownBits());
745 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
) {
746 // X & Y u> C -> X u> C && Y u> C
747 // X nuw- Y u> C -> X u> C
748 if (match(LHS
, m_c_And(m_V
, m_Value())) ||
749 match(LHS
, m_NUWSub(m_V
, m_Value())))
750 Known
.One
.setHighBits(
751 (*C
+ (Pred
== ICmpInst::ICMP_UGT
)).countLeadingOnes());
753 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
) {
754 // X | Y u< C -> X u< C && Y u< C
755 // X nuw+ Y u< C -> X u< C && Y u< C
756 if (match(LHS
, m_c_Or(m_V
, m_Value())) ||
757 match(LHS
, m_c_NUWAdd(m_V
, m_Value()))) {
758 Known
.Zero
.setHighBits(
759 (*C
- (Pred
== ICmpInst::ICMP_ULT
)).countLeadingZeros());
767 static void computeKnownBitsFromICmpCond(const Value
*V
, ICmpInst
*Cmp
,
769 const SimplifyQuery
&SQ
, bool Invert
) {
770 ICmpInst::Predicate Pred
=
771 Invert
? Cmp
->getInversePredicate() : Cmp
->getPredicate();
772 Value
*LHS
= Cmp
->getOperand(0);
773 Value
*RHS
= Cmp
->getOperand(1);
775 // Handle icmp pred (trunc V), C
776 if (match(LHS
, m_Trunc(m_Specific(V
)))) {
777 KnownBits
DstKnown(LHS
->getType()->getScalarSizeInBits());
778 computeKnownBitsFromCmp(LHS
, Pred
, LHS
, RHS
, DstKnown
, SQ
);
779 Known
= Known
.unionWith(DstKnown
.anyext(Known
.getBitWidth()));
783 computeKnownBitsFromCmp(V
, Pred
, LHS
, RHS
, Known
, SQ
);
786 static void computeKnownBitsFromCond(const Value
*V
, Value
*Cond
,
787 KnownBits
&Known
, unsigned Depth
,
788 const SimplifyQuery
&SQ
, bool Invert
) {
790 if (Depth
< MaxAnalysisRecursionDepth
&&
791 match(Cond
, m_LogicalOp(m_Value(A
), m_Value(B
)))) {
792 KnownBits
Known2(Known
.getBitWidth());
793 KnownBits
Known3(Known
.getBitWidth());
794 computeKnownBitsFromCond(V
, A
, Known2
, Depth
+ 1, SQ
, Invert
);
795 computeKnownBitsFromCond(V
, B
, Known3
, Depth
+ 1, SQ
, Invert
);
796 if (Invert
? match(Cond
, m_LogicalOr(m_Value(), m_Value()))
797 : match(Cond
, m_LogicalAnd(m_Value(), m_Value())))
798 Known2
= Known2
.unionWith(Known3
);
800 Known2
= Known2
.intersectWith(Known3
);
801 Known
= Known
.unionWith(Known2
);
804 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Cond
))
805 computeKnownBitsFromICmpCond(V
, Cmp
, Known
, SQ
, Invert
);
808 void llvm::computeKnownBitsFromContext(const Value
*V
, KnownBits
&Known
,
809 unsigned Depth
, const SimplifyQuery
&Q
) {
810 // Handle injected condition.
811 if (Q
.CC
&& Q
.CC
->AffectedValues
.contains(V
))
812 computeKnownBitsFromCond(V
, Q
.CC
->Cond
, Known
, Depth
, Q
, Q
.CC
->Invert
);
818 // Handle dominating conditions.
819 for (BranchInst
*BI
: Q
.DC
->conditionsFor(V
)) {
820 BasicBlockEdge
Edge0(BI
->getParent(), BI
->getSuccessor(0));
821 if (Q
.DT
->dominates(Edge0
, Q
.CxtI
->getParent()))
822 computeKnownBitsFromCond(V
, BI
->getCondition(), Known
, Depth
, Q
,
825 BasicBlockEdge
Edge1(BI
->getParent(), BI
->getSuccessor(1));
826 if (Q
.DT
->dominates(Edge1
, Q
.CxtI
->getParent()))
827 computeKnownBitsFromCond(V
, BI
->getCondition(), Known
, Depth
, Q
,
831 if (Known
.hasConflict())
838 unsigned BitWidth
= Known
.getBitWidth();
840 // Note that the patterns below need to be kept in sync with the code
841 // in AssumptionCache::updateAffectedValues.
843 for (AssumptionCache::ResultElem
&Elem
: Q
.AC
->assumptionsFor(V
)) {
847 AssumeInst
*I
= cast
<AssumeInst
>(Elem
.Assume
);
848 assert(I
->getParent()->getParent() == Q
.CxtI
->getParent()->getParent() &&
849 "Got assumption for the wrong function!");
851 if (Elem
.Index
!= AssumptionCache::ExprResultIdx
) {
852 if (!V
->getType()->isPointerTy())
854 if (RetainedKnowledge RK
= getKnowledgeFromBundle(
855 *I
, I
->bundle_op_info_begin()[Elem
.Index
])) {
856 // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
857 // be the producer of the pointer in the bundle. At the moment, align
858 // assumptions aren't optimized away.
859 if (RK
.WasOn
== V
&& RK
.AttrKind
== Attribute::Alignment
&&
860 isPowerOf2_64(RK
.ArgValue
) &&
861 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
, /*AllowEphemerals*/ true))
862 Known
.Zero
.setLowBits(Log2_64(RK
.ArgValue
));
867 // Warning: This loop can end up being somewhat performance sensitive.
868 // We're running this loop for once for each value queried resulting in a
869 // runtime of ~O(#assumes * #values).
871 Value
*Arg
= I
->getArgOperand(0);
873 if (Arg
== V
&& isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
874 assert(BitWidth
== 1 && "assume operand is not i1?");
879 if (match(Arg
, m_Not(m_Specific(V
))) &&
880 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
881 assert(BitWidth
== 1 && "assume operand is not i1?");
887 // The remaining tests are all recursive, so bail out if we hit the limit.
888 if (Depth
== MaxAnalysisRecursionDepth
)
891 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(Arg
);
895 if (!isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
898 computeKnownBitsFromICmpCond(V
, Cmp
, Known
, Q
, /*Invert=*/false);
901 // Conflicting assumption: Undefined behavior will occur on this execution
903 if (Known
.hasConflict())
907 /// Compute known bits from a shift operator, including those with a
908 /// non-constant shift amount. Known is the output of this function. Known2 is a
909 /// pre-allocated temporary with the same bit width as Known and on return
910 /// contains the known bit of the shift value source. KF is an
911 /// operator-specific function that, given the known-bits and a shift amount,
912 /// compute the implied known-bits of the shift operator's result respectively
913 /// for that shift amount. The results from calling KF are conservatively
914 /// combined for all permitted shift amounts.
915 static void computeKnownBitsFromShiftOperator(
916 const Operator
*I
, const APInt
&DemandedElts
, KnownBits
&Known
,
917 KnownBits
&Known2
, unsigned Depth
, const SimplifyQuery
&Q
,
918 function_ref
<KnownBits(const KnownBits
&, const KnownBits
&, bool)> KF
) {
919 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
920 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
921 // To limit compile-time impact, only query isKnownNonZero() if we know at
922 // least something about the shift amount.
925 (Known
.getMaxValue().ult(Known
.getBitWidth()) &&
926 isKnownNonZero(I
->getOperand(1), DemandedElts
, Q
, Depth
+ 1));
927 Known
= KF(Known2
, Known
, ShAmtNonZero
);
931 getKnownBitsFromAndXorOr(const Operator
*I
, const APInt
&DemandedElts
,
932 const KnownBits
&KnownLHS
, const KnownBits
&KnownRHS
,
933 unsigned Depth
, const SimplifyQuery
&Q
) {
934 unsigned BitWidth
= KnownLHS
.getBitWidth();
935 KnownBits
KnownOut(BitWidth
);
937 bool HasKnownOne
= !KnownLHS
.One
.isZero() || !KnownRHS
.One
.isZero();
938 Value
*X
= nullptr, *Y
= nullptr;
940 switch (I
->getOpcode()) {
941 case Instruction::And
:
942 KnownOut
= KnownLHS
& KnownRHS
;
944 // and(x, -x) is common idioms that will clear all but lowest set
945 // bit. If we have a single known bit in x, we can clear all bits
947 // TODO: instcombine often reassociates independent `and` which can hide
948 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
949 if (HasKnownOne
&& match(I
, m_c_And(m_Value(X
), m_Neg(m_Deferred(X
))))) {
950 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
951 if (KnownLHS
.countMaxTrailingZeros() <= KnownRHS
.countMaxTrailingZeros())
952 KnownOut
= KnownLHS
.blsi();
954 KnownOut
= KnownRHS
.blsi();
957 case Instruction::Or
:
958 KnownOut
= KnownLHS
| KnownRHS
;
960 case Instruction::Xor
:
961 KnownOut
= KnownLHS
^ KnownRHS
;
962 // xor(x, x-1) is common idioms that will clear all but lowest set
963 // bit. If we have a single known bit in x, we can clear all bits
965 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
966 // -1 but for the purpose of demanded bits (xor(x, x-C) &
967 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
968 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
970 match(I
, m_c_Xor(m_Value(X
), m_Add(m_Deferred(X
), m_AllOnes())))) {
971 const KnownBits
&XBits
= I
->getOperand(0) == X
? KnownLHS
: KnownRHS
;
972 KnownOut
= XBits
.blsmsk();
976 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
979 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
980 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
981 // here we handle the more general case of adding any odd number by
982 // matching the form and/xor/or(x, add(x, y)) where y is odd.
983 // TODO: This could be generalized to clearing any bit set in y where the
984 // following bit is known to be unset in y.
985 if (!KnownOut
.Zero
[0] && !KnownOut
.One
[0] &&
986 (match(I
, m_c_BinOp(m_Value(X
), m_c_Add(m_Deferred(X
), m_Value(Y
)))) ||
987 match(I
, m_c_BinOp(m_Value(X
), m_Sub(m_Deferred(X
), m_Value(Y
)))) ||
988 match(I
, m_c_BinOp(m_Value(X
), m_Sub(m_Value(Y
), m_Deferred(X
)))))) {
989 KnownBits
KnownY(BitWidth
);
990 computeKnownBits(Y
, DemandedElts
, KnownY
, Depth
+ 1, Q
);
991 if (KnownY
.countMinTrailingOnes() > 0) {
993 KnownOut
.Zero
.setBit(0);
995 KnownOut
.One
.setBit(0);
1001 static KnownBits
computeKnownBitsForHorizontalOperation(
1002 const Operator
*I
, const APInt
&DemandedElts
, unsigned Depth
,
1003 const SimplifyQuery
&Q
,
1004 const function_ref
<KnownBits(const KnownBits
&, const KnownBits
&)>
1006 APInt DemandedEltsLHS
, DemandedEltsRHS
;
1007 getHorizDemandedEltsForFirstOperand(Q
.DL
.getTypeSizeInBits(I
->getType()),
1008 DemandedElts
, DemandedEltsLHS
,
1011 const auto ComputeForSingleOpFunc
=
1012 [Depth
, &Q
, KnownBitsFunc
](const Value
*Op
, APInt
&DemandedEltsOp
) {
1013 return KnownBitsFunc(
1014 computeKnownBits(Op
, DemandedEltsOp
, Depth
+ 1, Q
),
1015 computeKnownBits(Op
, DemandedEltsOp
<< 1, Depth
+ 1, Q
));
1018 if (DemandedEltsRHS
.isZero())
1019 return ComputeForSingleOpFunc(I
->getOperand(0), DemandedEltsLHS
);
1020 if (DemandedEltsLHS
.isZero())
1021 return ComputeForSingleOpFunc(I
->getOperand(1), DemandedEltsRHS
);
1023 return ComputeForSingleOpFunc(I
->getOperand(0), DemandedEltsLHS
)
1024 .intersectWith(ComputeForSingleOpFunc(I
->getOperand(1), DemandedEltsRHS
));
1027 // Public so this can be used in `SimplifyDemandedUseBits`.
1028 KnownBits
llvm::analyzeKnownBitsFromAndXorOr(const Operator
*I
,
1029 const KnownBits
&KnownLHS
,
1030 const KnownBits
&KnownRHS
,
1032 const SimplifyQuery
&SQ
) {
1033 auto *FVTy
= dyn_cast
<FixedVectorType
>(I
->getType());
1034 APInt DemandedElts
=
1035 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
1037 return getKnownBitsFromAndXorOr(I
, DemandedElts
, KnownLHS
, KnownRHS
, Depth
,
1041 ConstantRange
llvm::getVScaleRange(const Function
*F
, unsigned BitWidth
) {
1042 Attribute Attr
= F
->getFnAttribute(Attribute::VScaleRange
);
1043 // Without vscale_range, we only know that vscale is non-zero.
1044 if (!Attr
.isValid())
1045 return ConstantRange(APInt(BitWidth
, 1), APInt::getZero(BitWidth
));
1047 unsigned AttrMin
= Attr
.getVScaleRangeMin();
1048 // Minimum is larger than vscale width, result is always poison.
1049 if ((unsigned)llvm::bit_width(AttrMin
) > BitWidth
)
1050 return ConstantRange::getEmpty(BitWidth
);
1052 APInt
Min(BitWidth
, AttrMin
);
1053 std::optional
<unsigned> AttrMax
= Attr
.getVScaleRangeMax();
1054 if (!AttrMax
|| (unsigned)llvm::bit_width(*AttrMax
) > BitWidth
)
1055 return ConstantRange(Min
, APInt::getZero(BitWidth
));
1057 return ConstantRange(Min
, APInt(BitWidth
, *AttrMax
) + 1);
1060 void llvm::adjustKnownBitsForSelectArm(KnownBits
&Known
, Value
*Cond
,
1061 Value
*Arm
, bool Invert
, unsigned Depth
,
1062 const SimplifyQuery
&Q
) {
1063 // If we have a constant arm, we are done.
1064 if (Known
.isConstant())
1067 // See what condition implies about the bits of the select arm.
1068 KnownBits
CondRes(Known
.getBitWidth());
1069 computeKnownBitsFromCond(Arm
, Cond
, CondRes
, Depth
+ 1, Q
, Invert
);
1070 // If we don't get any information from the condition, no reason to
1072 if (CondRes
.isUnknown())
1075 // We can have conflict if the condition is dead. I.e if we have
1076 // (x | 64) < 32 ? (x | 64) : y
1077 // we will have conflict at bit 6 from the condition/the `or`.
1078 // In that case just return. Its not particularly important
1079 // what we do, as this select is going to be simplified soon.
1080 CondRes
= CondRes
.unionWith(Known
);
1081 if (CondRes
.hasConflict())
1084 // Finally make sure the information we found is valid. This is relatively
1085 // expensive so it's left for the very end.
1086 if (!isGuaranteedNotToBeUndef(Arm
, Q
.AC
, Q
.CxtI
, Q
.DT
, Depth
+ 1))
1089 // Finally, we know we get information from the condition and its valid,
1094 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
1095 // Returns the input and lower/upper bounds.
1096 static bool isSignedMinMaxClamp(const Value
*Select
, const Value
*&In
,
1097 const APInt
*&CLow
, const APInt
*&CHigh
) {
1098 assert(isa
<Operator
>(Select
) &&
1099 cast
<Operator
>(Select
)->getOpcode() == Instruction::Select
&&
1100 "Input should be a Select!");
1102 const Value
*LHS
= nullptr, *RHS
= nullptr;
1103 SelectPatternFlavor SPF
= matchSelectPattern(Select
, LHS
, RHS
).Flavor
;
1104 if (SPF
!= SPF_SMAX
&& SPF
!= SPF_SMIN
)
1107 if (!match(RHS
, m_APInt(CLow
)))
1110 const Value
*LHS2
= nullptr, *RHS2
= nullptr;
1111 SelectPatternFlavor SPF2
= matchSelectPattern(LHS
, LHS2
, RHS2
).Flavor
;
1112 if (getInverseMinMaxFlavor(SPF
) != SPF2
)
1115 if (!match(RHS2
, m_APInt(CHigh
)))
1118 if (SPF
== SPF_SMIN
)
1119 std::swap(CLow
, CHigh
);
1122 return CLow
->sle(*CHigh
);
1125 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst
*II
,
1127 const APInt
*&CHigh
) {
1128 assert((II
->getIntrinsicID() == Intrinsic::smin
||
1129 II
->getIntrinsicID() == Intrinsic::smax
) &&
1130 "Must be smin/smax");
1132 Intrinsic::ID InverseID
= getInverseMinMaxIntrinsic(II
->getIntrinsicID());
1133 auto *InnerII
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
1134 if (!InnerII
|| InnerII
->getIntrinsicID() != InverseID
||
1135 !match(II
->getArgOperand(1), m_APInt(CLow
)) ||
1136 !match(InnerII
->getArgOperand(1), m_APInt(CHigh
)))
1139 if (II
->getIntrinsicID() == Intrinsic::smin
)
1140 std::swap(CLow
, CHigh
);
1141 return CLow
->sle(*CHigh
);
1144 static void unionWithMinMaxIntrinsicClamp(const IntrinsicInst
*II
,
1146 const APInt
*CLow
, *CHigh
;
1147 if (isSignedMinMaxIntrinsicClamp(II
, CLow
, CHigh
))
1148 Known
= Known
.unionWith(
1149 ConstantRange::getNonEmpty(*CLow
, *CHigh
+ 1).toKnownBits());
1152 static void computeKnownBitsFromOperator(const Operator
*I
,
1153 const APInt
&DemandedElts
,
1154 KnownBits
&Known
, unsigned Depth
,
1155 const SimplifyQuery
&Q
) {
1156 unsigned BitWidth
= Known
.getBitWidth();
1158 KnownBits
Known2(BitWidth
);
1159 switch (I
->getOpcode()) {
1161 case Instruction::Load
:
1163 Q
.IIQ
.getMetadata(cast
<LoadInst
>(I
), LLVMContext::MD_range
))
1164 computeKnownBitsFromRangeMetadata(*MD
, Known
);
1166 case Instruction::And
:
1167 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
1168 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1170 Known
= getKnownBitsFromAndXorOr(I
, DemandedElts
, Known2
, Known
, Depth
, Q
);
1172 case Instruction::Or
:
1173 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
1174 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1176 Known
= getKnownBitsFromAndXorOr(I
, DemandedElts
, Known2
, Known
, Depth
, Q
);
1178 case Instruction::Xor
:
1179 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
1180 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1182 Known
= getKnownBitsFromAndXorOr(I
, DemandedElts
, Known2
, Known
, Depth
, Q
);
1184 case Instruction::Mul
: {
1185 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1186 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1187 computeKnownBitsMul(I
->getOperand(0), I
->getOperand(1), NSW
, NUW
,
1188 DemandedElts
, Known
, Known2
, Depth
, Q
);
1191 case Instruction::UDiv
: {
1192 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1193 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1195 KnownBits::udiv(Known
, Known2
, Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)));
1198 case Instruction::SDiv
: {
1199 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1200 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1202 KnownBits::sdiv(Known
, Known2
, Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)));
1205 case Instruction::Select
: {
1206 auto ComputeForArm
= [&](Value
*Arm
, bool Invert
) {
1207 KnownBits
Res(Known
.getBitWidth());
1208 computeKnownBits(Arm
, DemandedElts
, Res
, Depth
+ 1, Q
);
1209 adjustKnownBitsForSelectArm(Res
, I
->getOperand(0), Arm
, Invert
, Depth
, Q
);
1212 // Only known if known in both the LHS and RHS.
1214 ComputeForArm(I
->getOperand(1), /*Invert=*/false)
1215 .intersectWith(ComputeForArm(I
->getOperand(2), /*Invert=*/true));
1218 case Instruction::FPTrunc
:
1219 case Instruction::FPExt
:
1220 case Instruction::FPToUI
:
1221 case Instruction::FPToSI
:
1222 case Instruction::SIToFP
:
1223 case Instruction::UIToFP
:
1224 break; // Can't work with floating point.
1225 case Instruction::PtrToInt
:
1226 case Instruction::IntToPtr
:
1227 // Fall through and handle them the same as zext/trunc.
1229 case Instruction::ZExt
:
1230 case Instruction::Trunc
: {
1231 Type
*SrcTy
= I
->getOperand(0)->getType();
1233 unsigned SrcBitWidth
;
1234 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1235 // which fall through here.
1236 Type
*ScalarTy
= SrcTy
->getScalarType();
1237 SrcBitWidth
= ScalarTy
->isPointerTy() ?
1238 Q
.DL
.getPointerTypeSizeInBits(ScalarTy
) :
1239 Q
.DL
.getTypeSizeInBits(ScalarTy
);
1241 assert(SrcBitWidth
&& "SrcBitWidth can't be zero");
1242 Known
= Known
.anyextOrTrunc(SrcBitWidth
);
1243 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1244 if (auto *Inst
= dyn_cast
<PossiblyNonNegInst
>(I
);
1245 Inst
&& Inst
->hasNonNeg() && !Known
.isNegative())
1246 Known
.makeNonNegative();
1247 Known
= Known
.zextOrTrunc(BitWidth
);
1250 case Instruction::BitCast
: {
1251 Type
*SrcTy
= I
->getOperand(0)->getType();
1252 if (SrcTy
->isIntOrPtrTy() &&
1253 // TODO: For now, not handling conversions like:
1254 // (bitcast i64 %x to <2 x i32>)
1255 !I
->getType()->isVectorTy()) {
1256 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1261 // Handle bitcast from floating point to integer.
1262 if (match(I
, m_ElementWiseBitCast(m_Value(V
))) &&
1263 V
->getType()->isFPOrFPVectorTy()) {
1264 Type
*FPType
= V
->getType()->getScalarType();
1265 KnownFPClass Result
=
1266 computeKnownFPClass(V
, DemandedElts
, fcAllFlags
, Depth
+ 1, Q
);
1267 FPClassTest FPClasses
= Result
.KnownFPClasses
;
1269 // TODO: Treat it as zero/poison if the use of I is unreachable.
1270 if (FPClasses
== fcNone
)
1273 if (Result
.isKnownNever(fcNormal
| fcSubnormal
| fcNan
)) {
1274 Known
.Zero
.setAllBits();
1275 Known
.One
.setAllBits();
1277 if (FPClasses
& fcInf
)
1278 Known
= Known
.intersectWith(KnownBits::makeConstant(
1279 APFloat::getInf(FPType
->getFltSemantics()).bitcastToAPInt()));
1281 if (FPClasses
& fcZero
)
1282 Known
= Known
.intersectWith(KnownBits::makeConstant(
1283 APInt::getZero(FPType
->getScalarSizeInBits())));
1285 Known
.Zero
.clearSignBit();
1286 Known
.One
.clearSignBit();
1289 if (Result
.SignBit
) {
1290 if (*Result
.SignBit
)
1291 Known
.makeNegative();
1293 Known
.makeNonNegative();
1299 // Handle cast from vector integer type to scalar or vector integer.
1300 auto *SrcVecTy
= dyn_cast
<FixedVectorType
>(SrcTy
);
1301 if (!SrcVecTy
|| !SrcVecTy
->getElementType()->isIntegerTy() ||
1302 !I
->getType()->isIntOrIntVectorTy() ||
1303 isa
<ScalableVectorType
>(I
->getType()))
1306 // Look through a cast from narrow vector elements to wider type.
1307 // Examples: v4i32 -> v2i64, v3i8 -> v24
1308 unsigned SubBitWidth
= SrcVecTy
->getScalarSizeInBits();
1309 if (BitWidth
% SubBitWidth
== 0) {
1310 // Known bits are automatically intersected across demanded elements of a
1311 // vector. So for example, if a bit is computed as known zero, it must be
1312 // zero across all demanded elements of the vector.
1314 // For this bitcast, each demanded element of the output is sub-divided
1315 // across a set of smaller vector elements in the source vector. To get
1316 // the known bits for an entire element of the output, compute the known
1317 // bits for each sub-element sequentially. This is done by shifting the
1318 // one-set-bit demanded elements parameter across the sub-elements for
1319 // consecutive calls to computeKnownBits. We are using the demanded
1320 // elements parameter as a mask operator.
1322 // The known bits of each sub-element are then inserted into place
1323 // (dependent on endian) to form the full result of known bits.
1324 unsigned NumElts
= DemandedElts
.getBitWidth();
1325 unsigned SubScale
= BitWidth
/ SubBitWidth
;
1326 APInt SubDemandedElts
= APInt::getZero(NumElts
* SubScale
);
1327 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1328 if (DemandedElts
[i
])
1329 SubDemandedElts
.setBit(i
* SubScale
);
1332 KnownBits
KnownSrc(SubBitWidth
);
1333 for (unsigned i
= 0; i
!= SubScale
; ++i
) {
1334 computeKnownBits(I
->getOperand(0), SubDemandedElts
.shl(i
), KnownSrc
,
1336 unsigned ShiftElt
= Q
.DL
.isLittleEndian() ? i
: SubScale
- 1 - i
;
1337 Known
.insertBits(KnownSrc
, ShiftElt
* SubBitWidth
);
1342 case Instruction::SExt
: {
1343 // Compute the bits in the result that are not present in the input.
1344 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
1346 Known
= Known
.trunc(SrcBitWidth
);
1347 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1348 // If the sign bit of the input is known set or clear, then we know the
1349 // top bits of the result.
1350 Known
= Known
.sext(BitWidth
);
1353 case Instruction::Shl
: {
1354 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1355 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1356 auto KF
= [NUW
, NSW
](const KnownBits
&KnownVal
, const KnownBits
&KnownAmt
,
1357 bool ShAmtNonZero
) {
1358 return KnownBits::shl(KnownVal
, KnownAmt
, NUW
, NSW
, ShAmtNonZero
);
1360 computeKnownBitsFromShiftOperator(I
, DemandedElts
, Known
, Known2
, Depth
, Q
,
1362 // Trailing zeros of a right-shifted constant never decrease.
1364 if (match(I
->getOperand(0), m_APInt(C
)))
1365 Known
.Zero
.setLowBits(C
->countr_zero());
1368 case Instruction::LShr
: {
1369 bool Exact
= Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
));
1370 auto KF
= [Exact
](const KnownBits
&KnownVal
, const KnownBits
&KnownAmt
,
1371 bool ShAmtNonZero
) {
1372 return KnownBits::lshr(KnownVal
, KnownAmt
, ShAmtNonZero
, Exact
);
1374 computeKnownBitsFromShiftOperator(I
, DemandedElts
, Known
, Known2
, Depth
, Q
,
1376 // Leading zeros of a left-shifted constant never decrease.
1378 if (match(I
->getOperand(0), m_APInt(C
)))
1379 Known
.Zero
.setHighBits(C
->countl_zero());
1382 case Instruction::AShr
: {
1383 bool Exact
= Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
));
1384 auto KF
= [Exact
](const KnownBits
&KnownVal
, const KnownBits
&KnownAmt
,
1385 bool ShAmtNonZero
) {
1386 return KnownBits::ashr(KnownVal
, KnownAmt
, ShAmtNonZero
, Exact
);
1388 computeKnownBitsFromShiftOperator(I
, DemandedElts
, Known
, Known2
, Depth
, Q
,
1392 case Instruction::Sub
: {
1393 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1394 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1395 computeKnownBitsAddSub(false, I
->getOperand(0), I
->getOperand(1), NSW
, NUW
,
1396 DemandedElts
, Known
, Known2
, Depth
, Q
);
1399 case Instruction::Add
: {
1400 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1401 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1402 computeKnownBitsAddSub(true, I
->getOperand(0), I
->getOperand(1), NSW
, NUW
,
1403 DemandedElts
, Known
, Known2
, Depth
, Q
);
1406 case Instruction::SRem
:
1407 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1408 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1409 Known
= KnownBits::srem(Known
, Known2
);
1412 case Instruction::URem
:
1413 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1414 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1415 Known
= KnownBits::urem(Known
, Known2
);
1417 case Instruction::Alloca
:
1418 Known
.Zero
.setLowBits(Log2(cast
<AllocaInst
>(I
)->getAlign()));
1420 case Instruction::GetElementPtr
: {
1421 // Analyze all of the subscripts of this getelementptr instruction
1422 // to determine if we can prove known low zero bits.
1423 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1424 // Accumulate the constant indices in a separate variable
1425 // to minimize the number of calls to computeForAddSub.
1426 APInt
AccConstIndices(BitWidth
, 0, /*IsSigned*/ true);
1428 gep_type_iterator GTI
= gep_type_begin(I
);
1429 for (unsigned i
= 1, e
= I
->getNumOperands(); i
!= e
; ++i
, ++GTI
) {
1430 // TrailZ can only become smaller, short-circuit if we hit zero.
1431 if (Known
.isUnknown())
1434 Value
*Index
= I
->getOperand(i
);
1436 // Handle case when index is zero.
1437 Constant
*CIndex
= dyn_cast
<Constant
>(Index
);
1438 if (CIndex
&& CIndex
->isZeroValue())
1441 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
1442 // Handle struct member offset arithmetic.
1445 "Access to structure field must be known at compile time");
1447 if (CIndex
->getType()->isVectorTy())
1448 Index
= CIndex
->getSplatValue();
1450 unsigned Idx
= cast
<ConstantInt
>(Index
)->getZExtValue();
1451 const StructLayout
*SL
= Q
.DL
.getStructLayout(STy
);
1452 uint64_t Offset
= SL
->getElementOffset(Idx
);
1453 AccConstIndices
+= Offset
;
1457 // Handle array index arithmetic.
1458 Type
*IndexedTy
= GTI
.getIndexedType();
1459 if (!IndexedTy
->isSized()) {
1464 unsigned IndexBitWidth
= Index
->getType()->getScalarSizeInBits();
1465 KnownBits
IndexBits(IndexBitWidth
);
1466 computeKnownBits(Index
, IndexBits
, Depth
+ 1, Q
);
1467 TypeSize IndexTypeSize
= GTI
.getSequentialElementStride(Q
.DL
);
1468 uint64_t TypeSizeInBytes
= IndexTypeSize
.getKnownMinValue();
1469 KnownBits
ScalingFactor(IndexBitWidth
);
1470 // Multiply by current sizeof type.
1471 // &A[i] == A + i * sizeof(*A[i]).
1472 if (IndexTypeSize
.isScalable()) {
1473 // For scalable types the only thing we know about sizeof is
1474 // that this is a multiple of the minimum size.
1475 ScalingFactor
.Zero
.setLowBits(llvm::countr_zero(TypeSizeInBytes
));
1476 } else if (IndexBits
.isConstant()) {
1477 APInt IndexConst
= IndexBits
.getConstant();
1478 APInt
ScalingFactor(IndexBitWidth
, TypeSizeInBytes
);
1479 IndexConst
*= ScalingFactor
;
1480 AccConstIndices
+= IndexConst
.sextOrTrunc(BitWidth
);
1484 KnownBits::makeConstant(APInt(IndexBitWidth
, TypeSizeInBytes
));
1486 IndexBits
= KnownBits::mul(IndexBits
, ScalingFactor
);
1488 // If the offsets have a different width from the pointer, according
1489 // to the language reference we need to sign-extend or truncate them
1490 // to the width of the pointer.
1491 IndexBits
= IndexBits
.sextOrTrunc(BitWidth
);
1493 // Note that inbounds does *not* guarantee nsw for the addition, as only
1494 // the offset is signed, while the base address is unsigned.
1495 Known
= KnownBits::add(Known
, IndexBits
);
1497 if (!Known
.isUnknown() && !AccConstIndices
.isZero()) {
1498 KnownBits Index
= KnownBits::makeConstant(AccConstIndices
);
1499 Known
= KnownBits::add(Known
, Index
);
1503 case Instruction::PHI
: {
1504 const PHINode
*P
= cast
<PHINode
>(I
);
1505 BinaryOperator
*BO
= nullptr;
1506 Value
*R
= nullptr, *L
= nullptr;
1507 if (matchSimpleRecurrence(P
, BO
, R
, L
)) {
1508 // Handle the case of a simple two-predecessor recurrence PHI.
1509 // There's a lot more that could theoretically be done here, but
1510 // this is sufficient to catch some interesting cases.
1511 unsigned Opcode
= BO
->getOpcode();
1514 // If this is a shift recurrence, we know the bits being shifted in. We
1515 // can combine that with information about the start value of the
1516 // recurrence to conclude facts about the result. If this is a udiv
1517 // recurrence, we know that the result can never exceed either the
1518 // numerator or the start value, whichever is greater.
1519 case Instruction::LShr
:
1520 case Instruction::AShr
:
1521 case Instruction::Shl
:
1522 case Instruction::UDiv
:
1523 if (BO
->getOperand(0) != I
)
1527 // For a urem recurrence, the result can never exceed the start value. The
1528 // phi could either be the numerator or the denominator.
1529 case Instruction::URem
: {
1530 // We have matched a recurrence of the form:
1531 // %iv = [R, %entry], [%iv.next, %backedge]
1532 // %iv.next = shift_op %iv, L
1534 // Recurse with the phi context to avoid concern about whether facts
1535 // inferred hold at original context instruction. TODO: It may be
1536 // correct to use the original context. IF warranted, explore and
1537 // add sufficient tests to cover.
1538 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
1540 computeKnownBits(R
, DemandedElts
, Known2
, Depth
+ 1, RecQ
);
1542 case Instruction::Shl
:
1543 // A shl recurrence will only increase the tailing zeros
1544 Known
.Zero
.setLowBits(Known2
.countMinTrailingZeros());
1546 case Instruction::LShr
:
1547 case Instruction::UDiv
:
1548 case Instruction::URem
:
1549 // lshr, udiv, and urem recurrences will preserve the leading zeros of
1551 Known
.Zero
.setHighBits(Known2
.countMinLeadingZeros());
1553 case Instruction::AShr
:
1554 // An ashr recurrence will extend the initial sign bit
1555 Known
.Zero
.setHighBits(Known2
.countMinLeadingZeros());
1556 Known
.One
.setHighBits(Known2
.countMinLeadingOnes());
1562 // Check for operations that have the property that if
1563 // both their operands have low zero bits, the result
1564 // will have low zero bits.
1565 case Instruction::Add
:
1566 case Instruction::Sub
:
1567 case Instruction::And
:
1568 case Instruction::Or
:
1569 case Instruction::Mul
: {
1570 // Change the context instruction to the "edge" that flows into the
1571 // phi. This is important because that is where the value is actually
1572 // "evaluated" even though it is used later somewhere else. (see also
1574 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
1576 unsigned OpNum
= P
->getOperand(0) == R
? 0 : 1;
1577 Instruction
*RInst
= P
->getIncomingBlock(OpNum
)->getTerminator();
1578 Instruction
*LInst
= P
->getIncomingBlock(1 - OpNum
)->getTerminator();
1580 // Ok, we have a PHI of the form L op= R. Check for low
1583 computeKnownBits(R
, DemandedElts
, Known2
, Depth
+ 1, RecQ
);
1585 // We need to take the minimum number of known bits
1586 KnownBits
Known3(BitWidth
);
1588 computeKnownBits(L
, DemandedElts
, Known3
, Depth
+ 1, RecQ
);
1590 Known
.Zero
.setLowBits(std::min(Known2
.countMinTrailingZeros(),
1591 Known3
.countMinTrailingZeros()));
1593 auto *OverflowOp
= dyn_cast
<OverflowingBinaryOperator
>(BO
);
1594 if (!OverflowOp
|| !Q
.IIQ
.hasNoSignedWrap(OverflowOp
))
1598 // If initial value of recurrence is nonnegative, and we are adding
1599 // a nonnegative number with nsw, the result can only be nonnegative
1600 // or poison value regardless of the number of times we execute the
1601 // add in phi recurrence. If initial value is negative and we are
1602 // adding a negative number with nsw, the result can only be
1603 // negative or poison value. Similar arguments apply to sub and mul.
1605 // (add non-negative, non-negative) --> non-negative
1606 // (add negative, negative) --> negative
1607 case Instruction::Add
: {
1608 if (Known2
.isNonNegative() && Known3
.isNonNegative())
1609 Known
.makeNonNegative();
1610 else if (Known2
.isNegative() && Known3
.isNegative())
1611 Known
.makeNegative();
1615 // (sub nsw non-negative, negative) --> non-negative
1616 // (sub nsw negative, non-negative) --> negative
1617 case Instruction::Sub
: {
1618 if (BO
->getOperand(0) != I
)
1620 if (Known2
.isNonNegative() && Known3
.isNegative())
1621 Known
.makeNonNegative();
1622 else if (Known2
.isNegative() && Known3
.isNonNegative())
1623 Known
.makeNegative();
1627 // (mul nsw non-negative, non-negative) --> non-negative
1628 case Instruction::Mul
:
1629 if (Known2
.isNonNegative() && Known3
.isNonNegative())
1630 Known
.makeNonNegative();
1644 // Unreachable blocks may have zero-operand PHI nodes.
1645 if (P
->getNumIncomingValues() == 0)
1648 // Otherwise take the unions of the known bit sets of the operands,
1649 // taking conservative care to avoid excessive recursion.
1650 if (Depth
< MaxAnalysisRecursionDepth
- 1 && Known
.isUnknown()) {
1651 // Skip if every incoming value references to ourself.
1652 if (isa_and_nonnull
<UndefValue
>(P
->hasConstantValue()))
1655 Known
.Zero
.setAllBits();
1656 Known
.One
.setAllBits();
1657 for (const Use
&U
: P
->operands()) {
1659 const PHINode
*CxtPhi
;
1661 breakSelfRecursivePHI(&U
, P
, IncValue
, CxtI
, &CxtPhi
);
1662 // Skip direct self references.
1666 // Change the context instruction to the "edge" that flows into the
1667 // phi. This is important because that is where the value is actually
1668 // "evaluated" even though it is used later somewhere else. (see also
1670 SimplifyQuery RecQ
= Q
.getWithoutCondContext().getWithInstruction(CxtI
);
1672 Known2
= KnownBits(BitWidth
);
1674 // Recurse, but cap the recursion to one level, because we don't
1675 // want to waste time spinning around in loops.
1676 // TODO: See if we can base recursion limiter on number of incoming phi
1677 // edges so we don't overly clamp analysis.
1678 computeKnownBits(IncValue
, DemandedElts
, Known2
,
1679 MaxAnalysisRecursionDepth
- 1, RecQ
);
1681 // See if we can further use a conditional branch into the phi
1682 // to help us determine the range of the value.
1683 if (!Known2
.isConstant()) {
1686 BasicBlock
*TrueSucc
, *FalseSucc
;
1687 // TODO: Use RHS Value and compute range from its known bits.
1688 if (match(RecQ
.CxtI
,
1689 m_Br(m_c_ICmp(Pred
, m_Specific(IncValue
), m_APInt(RHSC
)),
1690 m_BasicBlock(TrueSucc
), m_BasicBlock(FalseSucc
)))) {
1691 // Check for cases of duplicate successors.
1692 if ((TrueSucc
== CxtPhi
->getParent()) !=
1693 (FalseSucc
== CxtPhi
->getParent())) {
1694 // If we're using the false successor, invert the predicate.
1695 if (FalseSucc
== CxtPhi
->getParent())
1696 Pred
= CmpInst::getInversePredicate(Pred
);
1697 // Get the knownbits implied by the incoming phi condition.
1698 auto CR
= ConstantRange::makeExactICmpRegion(Pred
, *RHSC
);
1699 KnownBits KnownUnion
= Known2
.unionWith(CR
.toKnownBits());
1700 // We can have conflicts here if we are analyzing deadcode (its
1701 // impossible for us reach this BB based the icmp).
1702 if (KnownUnion
.hasConflict()) {
1703 // No reason to continue analyzing in a known dead region, so
1704 // just resetAll and break. This will cause us to also exit the
1709 Known2
= KnownUnion
;
1714 Known
= Known
.intersectWith(Known2
);
1715 // If all bits have been ruled out, there's no need to check
1717 if (Known
.isUnknown())
1723 case Instruction::Call
:
1724 case Instruction::Invoke
: {
1725 // If range metadata is attached to this call, set known bits from that,
1726 // and then intersect with known bits based on other properties of the
1729 Q
.IIQ
.getMetadata(cast
<Instruction
>(I
), LLVMContext::MD_range
))
1730 computeKnownBitsFromRangeMetadata(*MD
, Known
);
1732 const auto *CB
= cast
<CallBase
>(I
);
1734 if (std::optional
<ConstantRange
> Range
= CB
->getRange())
1735 Known
= Known
.unionWith(Range
->toKnownBits());
1737 if (const Value
*RV
= CB
->getReturnedArgOperand()) {
1738 if (RV
->getType() == I
->getType()) {
1739 computeKnownBits(RV
, Known2
, Depth
+ 1, Q
);
1740 Known
= Known
.unionWith(Known2
);
1741 // If the function doesn't return properly for all input values
1742 // (e.g. unreachable exits) then there might be conflicts between the
1743 // argument value and the range metadata. Simply discard the known bits
1744 // in case of conflicts.
1745 if (Known
.hasConflict())
1749 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
1750 switch (II
->getIntrinsicID()) {
1753 case Intrinsic::abs
: {
1754 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1755 bool IntMinIsPoison
= match(II
->getArgOperand(1), m_One());
1756 Known
= Known2
.abs(IntMinIsPoison
);
1759 case Intrinsic::bitreverse
:
1760 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1761 Known
.Zero
|= Known2
.Zero
.reverseBits();
1762 Known
.One
|= Known2
.One
.reverseBits();
1764 case Intrinsic::bswap
:
1765 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1766 Known
.Zero
|= Known2
.Zero
.byteSwap();
1767 Known
.One
|= Known2
.One
.byteSwap();
1769 case Intrinsic::ctlz
: {
1770 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1771 // If we have a known 1, its position is our upper bound.
1772 unsigned PossibleLZ
= Known2
.countMaxLeadingZeros();
1773 // If this call is poison for 0 input, the result will be less than 2^n.
1774 if (II
->getArgOperand(1) == ConstantInt::getTrue(II
->getContext()))
1775 PossibleLZ
= std::min(PossibleLZ
, BitWidth
- 1);
1776 unsigned LowBits
= llvm::bit_width(PossibleLZ
);
1777 Known
.Zero
.setBitsFrom(LowBits
);
1780 case Intrinsic::cttz
: {
1781 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1782 // If we have a known 1, its position is our upper bound.
1783 unsigned PossibleTZ
= Known2
.countMaxTrailingZeros();
1784 // If this call is poison for 0 input, the result will be less than 2^n.
1785 if (II
->getArgOperand(1) == ConstantInt::getTrue(II
->getContext()))
1786 PossibleTZ
= std::min(PossibleTZ
, BitWidth
- 1);
1787 unsigned LowBits
= llvm::bit_width(PossibleTZ
);
1788 Known
.Zero
.setBitsFrom(LowBits
);
1791 case Intrinsic::ctpop
: {
1792 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1793 // We can bound the space the count needs. Also, bits known to be zero
1794 // can't contribute to the population.
1795 unsigned BitsPossiblySet
= Known2
.countMaxPopulation();
1796 unsigned LowBits
= llvm::bit_width(BitsPossiblySet
);
1797 Known
.Zero
.setBitsFrom(LowBits
);
1798 // TODO: we could bound KnownOne using the lower bound on the number
1799 // of bits which might be set provided by popcnt KnownOne2.
1802 case Intrinsic::fshr
:
1803 case Intrinsic::fshl
: {
1805 if (!match(I
->getOperand(2), m_APInt(SA
)))
1808 // Normalize to funnel shift left.
1809 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
1810 if (II
->getIntrinsicID() == Intrinsic::fshr
)
1811 ShiftAmt
= BitWidth
- ShiftAmt
;
1813 KnownBits
Known3(BitWidth
);
1814 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1815 computeKnownBits(I
->getOperand(1), DemandedElts
, Known3
, Depth
+ 1, Q
);
1818 Known2
.Zero
.shl(ShiftAmt
) | Known3
.Zero
.lshr(BitWidth
- ShiftAmt
);
1820 Known2
.One
.shl(ShiftAmt
) | Known3
.One
.lshr(BitWidth
- ShiftAmt
);
1823 case Intrinsic::uadd_sat
:
1824 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1825 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1826 Known
= KnownBits::uadd_sat(Known
, Known2
);
1828 case Intrinsic::usub_sat
:
1829 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1830 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1831 Known
= KnownBits::usub_sat(Known
, Known2
);
1833 case Intrinsic::sadd_sat
:
1834 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1835 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1836 Known
= KnownBits::sadd_sat(Known
, Known2
);
1838 case Intrinsic::ssub_sat
:
1839 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1840 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1841 Known
= KnownBits::ssub_sat(Known
, Known2
);
1843 // Vec reverse preserves bits from input vec.
1844 case Intrinsic::vector_reverse
:
1845 computeKnownBits(I
->getOperand(0), DemandedElts
.reverseBits(), Known
,
1848 // for min/max/and/or reduce, any bit common to each element in the
1849 // input vec is set in the output.
1850 case Intrinsic::vector_reduce_and
:
1851 case Intrinsic::vector_reduce_or
:
1852 case Intrinsic::vector_reduce_umax
:
1853 case Intrinsic::vector_reduce_umin
:
1854 case Intrinsic::vector_reduce_smax
:
1855 case Intrinsic::vector_reduce_smin
:
1856 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1858 case Intrinsic::vector_reduce_xor
: {
1859 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1860 // The zeros common to all vecs are zero in the output.
1861 // If the number of elements is odd, then the common ones remain. If the
1862 // number of elements is even, then the common ones becomes zeros.
1863 auto *VecTy
= cast
<VectorType
>(I
->getOperand(0)->getType());
1864 // Even, so the ones become zeros.
1865 bool EvenCnt
= VecTy
->getElementCount().isKnownEven();
1867 Known
.Zero
|= Known
.One
;
1868 // Maybe even element count so need to clear ones.
1869 if (VecTy
->isScalableTy() || EvenCnt
)
1870 Known
.One
.clearAllBits();
1873 case Intrinsic::umin
:
1874 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1875 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1876 Known
= KnownBits::umin(Known
, Known2
);
1878 case Intrinsic::umax
:
1879 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1880 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1881 Known
= KnownBits::umax(Known
, Known2
);
1883 case Intrinsic::smin
:
1884 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1885 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1886 Known
= KnownBits::smin(Known
, Known2
);
1887 unionWithMinMaxIntrinsicClamp(II
, Known
);
1889 case Intrinsic::smax
:
1890 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1891 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1892 Known
= KnownBits::smax(Known
, Known2
);
1893 unionWithMinMaxIntrinsicClamp(II
, Known
);
1895 case Intrinsic::ptrmask
: {
1896 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1898 const Value
*Mask
= I
->getOperand(1);
1899 Known2
= KnownBits(Mask
->getType()->getScalarSizeInBits());
1900 computeKnownBits(Mask
, DemandedElts
, Known2
, Depth
+ 1, Q
);
1901 // TODO: 1-extend would be more precise.
1902 Known
&= Known2
.anyextOrTrunc(BitWidth
);
1905 case Intrinsic::x86_sse2_pmulh_w
:
1906 case Intrinsic::x86_avx2_pmulh_w
:
1907 case Intrinsic::x86_avx512_pmulh_w_512
:
1908 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1909 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1910 Known
= KnownBits::mulhs(Known
, Known2
);
1912 case Intrinsic::x86_sse2_pmulhu_w
:
1913 case Intrinsic::x86_avx2_pmulhu_w
:
1914 case Intrinsic::x86_avx512_pmulhu_w_512
:
1915 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1916 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1917 Known
= KnownBits::mulhu(Known
, Known2
);
1919 case Intrinsic::x86_sse42_crc32_64_64
:
1920 Known
.Zero
.setBitsFrom(32);
1922 case Intrinsic::x86_ssse3_phadd_d_128
:
1923 case Intrinsic::x86_ssse3_phadd_w_128
:
1924 case Intrinsic::x86_avx2_phadd_d
:
1925 case Intrinsic::x86_avx2_phadd_w
: {
1926 Known
= computeKnownBitsForHorizontalOperation(
1927 I
, DemandedElts
, Depth
, Q
,
1928 [](const KnownBits
&KnownLHS
, const KnownBits
&KnownRHS
) {
1929 return KnownBits::add(KnownLHS
, KnownRHS
);
1933 case Intrinsic::x86_ssse3_phadd_sw_128
:
1934 case Intrinsic::x86_avx2_phadd_sw
: {
1935 Known
= computeKnownBitsForHorizontalOperation(I
, DemandedElts
, Depth
,
1936 Q
, KnownBits::sadd_sat
);
1939 case Intrinsic::x86_ssse3_phsub_d_128
:
1940 case Intrinsic::x86_ssse3_phsub_w_128
:
1941 case Intrinsic::x86_avx2_phsub_d
:
1942 case Intrinsic::x86_avx2_phsub_w
: {
1943 Known
= computeKnownBitsForHorizontalOperation(
1944 I
, DemandedElts
, Depth
, Q
,
1945 [](const KnownBits
&KnownLHS
, const KnownBits
&KnownRHS
) {
1946 return KnownBits::sub(KnownLHS
, KnownRHS
);
1950 case Intrinsic::x86_ssse3_phsub_sw_128
:
1951 case Intrinsic::x86_avx2_phsub_sw
: {
1952 Known
= computeKnownBitsForHorizontalOperation(I
, DemandedElts
, Depth
,
1953 Q
, KnownBits::ssub_sat
);
1956 case Intrinsic::riscv_vsetvli
:
1957 case Intrinsic::riscv_vsetvlimax
: {
1958 bool HasAVL
= II
->getIntrinsicID() == Intrinsic::riscv_vsetvli
;
1959 const ConstantRange Range
= getVScaleRange(II
->getFunction(), BitWidth
);
1960 uint64_t SEW
= RISCVVType::decodeVSEW(
1961 cast
<ConstantInt
>(II
->getArgOperand(HasAVL
))->getZExtValue());
1962 RISCVII::VLMUL VLMUL
= static_cast<RISCVII::VLMUL
>(
1963 cast
<ConstantInt
>(II
->getArgOperand(1 + HasAVL
))->getZExtValue());
1965 Range
.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock
;
1966 uint64_t MaxVL
= MaxVLEN
/ RISCVVType::getSEWLMULRatio(SEW
, VLMUL
);
1968 // Result of vsetvli must be not larger than AVL.
1970 if (auto *CI
= dyn_cast
<ConstantInt
>(II
->getArgOperand(0)))
1971 MaxVL
= std::min(MaxVL
, CI
->getZExtValue());
1973 unsigned KnownZeroFirstBit
= Log2_32(MaxVL
) + 1;
1974 if (BitWidth
> KnownZeroFirstBit
)
1975 Known
.Zero
.setBitsFrom(KnownZeroFirstBit
);
1978 case Intrinsic::vscale
: {
1979 if (!II
->getParent() || !II
->getFunction())
1982 Known
= getVScaleRange(II
->getFunction(), BitWidth
).toKnownBits();
1989 case Instruction::ShuffleVector
: {
1990 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(I
);
1991 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1996 // For undef elements, we don't know anything about the common state of
1997 // the shuffle result.
1998 APInt DemandedLHS
, DemandedRHS
;
1999 if (!getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
)) {
2003 Known
.One
.setAllBits();
2004 Known
.Zero
.setAllBits();
2005 if (!!DemandedLHS
) {
2006 const Value
*LHS
= Shuf
->getOperand(0);
2007 computeKnownBits(LHS
, DemandedLHS
, Known
, Depth
+ 1, Q
);
2008 // If we don't know any bits, early out.
2009 if (Known
.isUnknown())
2012 if (!!DemandedRHS
) {
2013 const Value
*RHS
= Shuf
->getOperand(1);
2014 computeKnownBits(RHS
, DemandedRHS
, Known2
, Depth
+ 1, Q
);
2015 Known
= Known
.intersectWith(Known2
);
2019 case Instruction::InsertElement
: {
2020 if (isa
<ScalableVectorType
>(I
->getType())) {
2024 const Value
*Vec
= I
->getOperand(0);
2025 const Value
*Elt
= I
->getOperand(1);
2026 auto *CIdx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
2027 unsigned NumElts
= DemandedElts
.getBitWidth();
2028 APInt DemandedVecElts
= DemandedElts
;
2029 bool NeedsElt
= true;
2030 // If we know the index we are inserting too, clear it from Vec check.
2031 if (CIdx
&& CIdx
->getValue().ult(NumElts
)) {
2032 DemandedVecElts
.clearBit(CIdx
->getZExtValue());
2033 NeedsElt
= DemandedElts
[CIdx
->getZExtValue()];
2036 Known
.One
.setAllBits();
2037 Known
.Zero
.setAllBits();
2039 computeKnownBits(Elt
, Known
, Depth
+ 1, Q
);
2040 // If we don't know any bits, early out.
2041 if (Known
.isUnknown())
2045 if (!DemandedVecElts
.isZero()) {
2046 computeKnownBits(Vec
, DemandedVecElts
, Known2
, Depth
+ 1, Q
);
2047 Known
= Known
.intersectWith(Known2
);
2051 case Instruction::ExtractElement
: {
2052 // Look through extract element. If the index is non-constant or
2053 // out-of-range demand all elements, otherwise just the extracted element.
2054 const Value
*Vec
= I
->getOperand(0);
2055 const Value
*Idx
= I
->getOperand(1);
2056 auto *CIdx
= dyn_cast
<ConstantInt
>(Idx
);
2057 if (isa
<ScalableVectorType
>(Vec
->getType())) {
2058 // FIXME: there's probably *something* we can do with scalable vectors
2062 unsigned NumElts
= cast
<FixedVectorType
>(Vec
->getType())->getNumElements();
2063 APInt DemandedVecElts
= APInt::getAllOnes(NumElts
);
2064 if (CIdx
&& CIdx
->getValue().ult(NumElts
))
2065 DemandedVecElts
= APInt::getOneBitSet(NumElts
, CIdx
->getZExtValue());
2066 computeKnownBits(Vec
, DemandedVecElts
, Known
, Depth
+ 1, Q
);
2069 case Instruction::ExtractValue
:
2070 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
->getOperand(0))) {
2071 const ExtractValueInst
*EVI
= cast
<ExtractValueInst
>(I
);
2072 if (EVI
->getNumIndices() != 1) break;
2073 if (EVI
->getIndices()[0] == 0) {
2074 switch (II
->getIntrinsicID()) {
2076 case Intrinsic::uadd_with_overflow
:
2077 case Intrinsic::sadd_with_overflow
:
2078 computeKnownBitsAddSub(
2079 true, II
->getArgOperand(0), II
->getArgOperand(1), /*NSW=*/false,
2080 /* NUW=*/false, DemandedElts
, Known
, Known2
, Depth
, Q
);
2082 case Intrinsic::usub_with_overflow
:
2083 case Intrinsic::ssub_with_overflow
:
2084 computeKnownBitsAddSub(
2085 false, II
->getArgOperand(0), II
->getArgOperand(1), /*NSW=*/false,
2086 /* NUW=*/false, DemandedElts
, Known
, Known2
, Depth
, Q
);
2088 case Intrinsic::umul_with_overflow
:
2089 case Intrinsic::smul_with_overflow
:
2090 computeKnownBitsMul(II
->getArgOperand(0), II
->getArgOperand(1), false,
2091 false, DemandedElts
, Known
, Known2
, Depth
, Q
);
2097 case Instruction::Freeze
:
2098 if (isGuaranteedNotToBePoison(I
->getOperand(0), Q
.AC
, Q
.CxtI
, Q
.DT
,
2100 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
2105 /// Determine which bits of V are known to be either zero or one and return
2107 KnownBits
llvm::computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
2108 unsigned Depth
, const SimplifyQuery
&Q
) {
2109 KnownBits
Known(getBitWidth(V
->getType(), Q
.DL
));
2110 ::computeKnownBits(V
, DemandedElts
, Known
, Depth
, Q
);
2114 /// Determine which bits of V are known to be either zero or one and return
2116 KnownBits
llvm::computeKnownBits(const Value
*V
, unsigned Depth
,
2117 const SimplifyQuery
&Q
) {
2118 KnownBits
Known(getBitWidth(V
->getType(), Q
.DL
));
2119 computeKnownBits(V
, Known
, Depth
, Q
);
2123 /// Determine which bits of V are known to be either zero or one and return
2124 /// them in the Known bit set.
2126 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2127 /// we cannot optimize based on the assumption that it is zero without changing
2128 /// it to be an explicit zero. If we don't change it to zero, other code could
2129 /// optimized based on the contradictory assumption that it is non-zero.
2130 /// Because instcombine aggressively folds operations with undef args anyway,
2131 /// this won't lose us code quality.
2133 /// This function is defined on values with integer type, values with pointer
2134 /// type, and vectors of integers. In the case
2135 /// where V is a vector, known zero, and known one values are the
2136 /// same width as the vector element, and the bit is set only if it is true
2137 /// for all of the demanded elements in the vector specified by DemandedElts.
2138 void computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
2139 KnownBits
&Known
, unsigned Depth
,
2140 const SimplifyQuery
&Q
) {
2141 if (!DemandedElts
) {
2142 // No demanded elts, better to assume we don't know anything.
2147 assert(V
&& "No Value?");
2148 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
2151 Type
*Ty
= V
->getType();
2152 unsigned BitWidth
= Known
.getBitWidth();
2154 assert((Ty
->isIntOrIntVectorTy(BitWidth
) || Ty
->isPtrOrPtrVectorTy()) &&
2155 "Not integer or pointer type!");
2157 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
2159 FVTy
->getNumElements() == DemandedElts
.getBitWidth() &&
2160 "DemandedElt width should equal the fixed vector number of elements");
2162 assert(DemandedElts
== APInt(1, 1) &&
2163 "DemandedElt width should be 1 for scalars or scalable vectors");
2166 Type
*ScalarTy
= Ty
->getScalarType();
2167 if (ScalarTy
->isPointerTy()) {
2168 assert(BitWidth
== Q
.DL
.getPointerTypeSizeInBits(ScalarTy
) &&
2169 "V and Known should have same BitWidth");
2171 assert(BitWidth
== Q
.DL
.getTypeSizeInBits(ScalarTy
) &&
2172 "V and Known should have same BitWidth");
2177 if (match(V
, m_APInt(C
))) {
2178 // We know all of the bits for a scalar constant or a splat vector constant!
2179 Known
= KnownBits::makeConstant(*C
);
2182 // Null and aggregate-zero are all-zeros.
2183 if (isa
<ConstantPointerNull
>(V
) || isa
<ConstantAggregateZero
>(V
)) {
2187 // Handle a constant vector by taking the intersection of the known bits of
2189 if (const ConstantDataVector
*CDV
= dyn_cast
<ConstantDataVector
>(V
)) {
2190 assert(!isa
<ScalableVectorType
>(V
->getType()));
2191 // We know that CDV must be a vector of integers. Take the intersection of
2193 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
2194 for (unsigned i
= 0, e
= CDV
->getNumElements(); i
!= e
; ++i
) {
2195 if (!DemandedElts
[i
])
2197 APInt Elt
= CDV
->getElementAsAPInt(i
);
2201 if (Known
.hasConflict())
2206 if (const auto *CV
= dyn_cast
<ConstantVector
>(V
)) {
2207 assert(!isa
<ScalableVectorType
>(V
->getType()));
2208 // We know that CV must be a vector of integers. Take the intersection of
2210 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
2211 for (unsigned i
= 0, e
= CV
->getNumOperands(); i
!= e
; ++i
) {
2212 if (!DemandedElts
[i
])
2214 Constant
*Element
= CV
->getAggregateElement(i
);
2215 if (isa
<PoisonValue
>(Element
))
2217 auto *ElementCI
= dyn_cast_or_null
<ConstantInt
>(Element
);
2222 const APInt
&Elt
= ElementCI
->getValue();
2226 if (Known
.hasConflict())
2231 // Start out not knowing anything.
2234 // We can't imply anything about undefs.
2235 if (isa
<UndefValue
>(V
))
2238 // There's no point in looking through other users of ConstantData for
2239 // assumptions. Confirm that we've handled them all.
2240 assert(!isa
<ConstantData
>(V
) && "Unhandled constant data!");
2242 if (const auto *A
= dyn_cast
<Argument
>(V
))
2243 if (std::optional
<ConstantRange
> Range
= A
->getRange())
2244 Known
= Range
->toKnownBits();
2246 // All recursive calls that increase depth must come after this.
2247 if (Depth
== MaxAnalysisRecursionDepth
)
2250 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2251 // the bits of its aliasee.
2252 if (const GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(V
)) {
2253 if (!GA
->isInterposable())
2254 computeKnownBits(GA
->getAliasee(), Known
, Depth
+ 1, Q
);
2258 if (const Operator
*I
= dyn_cast
<Operator
>(V
))
2259 computeKnownBitsFromOperator(I
, DemandedElts
, Known
, Depth
, Q
);
2260 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
2261 if (std::optional
<ConstantRange
> CR
= GV
->getAbsoluteSymbolRange())
2262 Known
= CR
->toKnownBits();
2265 // Aligned pointers have trailing zeros - refine Known.Zero set
2266 if (isa
<PointerType
>(V
->getType())) {
2267 Align Alignment
= V
->getPointerAlignment(Q
.DL
);
2268 Known
.Zero
.setLowBits(Log2(Alignment
));
2271 // computeKnownBitsFromContext strictly refines Known.
2272 // Therefore, we run them after computeKnownBitsFromOperator.
2274 // Check whether we can determine known bits from context such as assumes.
2275 computeKnownBitsFromContext(V
, Known
, Depth
, Q
);
2278 /// Try to detect a recurrence that the value of the induction variable is
2279 /// always a power of two (or zero).
2280 static bool isPowerOfTwoRecurrence(const PHINode
*PN
, bool OrZero
,
2281 unsigned Depth
, SimplifyQuery
&Q
) {
2282 BinaryOperator
*BO
= nullptr;
2283 Value
*Start
= nullptr, *Step
= nullptr;
2284 if (!matchSimpleRecurrence(PN
, BO
, Start
, Step
))
2287 // Initial value must be a power of two.
2288 for (const Use
&U
: PN
->operands()) {
2289 if (U
.get() == Start
) {
2290 // Initial value comes from a different BB, need to adjust context
2291 // instruction for analysis.
2292 Q
.CxtI
= PN
->getIncomingBlock(U
)->getTerminator();
2293 if (!isKnownToBeAPowerOfTwo(Start
, OrZero
, Depth
, Q
))
2298 // Except for Mul, the induction variable must be on the left side of the
2299 // increment expression, otherwise its value can be arbitrary.
2300 if (BO
->getOpcode() != Instruction::Mul
&& BO
->getOperand(1) != Step
)
2303 Q
.CxtI
= BO
->getParent()->getTerminator();
2304 switch (BO
->getOpcode()) {
2305 case Instruction::Mul
:
2306 // Power of two is closed under multiplication.
2307 return (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(BO
) ||
2308 Q
.IIQ
.hasNoSignedWrap(BO
)) &&
2309 isKnownToBeAPowerOfTwo(Step
, OrZero
, Depth
, Q
);
2310 case Instruction::SDiv
:
2311 // Start value must not be signmask for signed division, so simply being a
2312 // power of two is not sufficient, and it has to be a constant.
2313 if (!match(Start
, m_Power2()) || match(Start
, m_SignMask()))
2316 case Instruction::UDiv
:
2317 // Divisor must be a power of two.
2318 // If OrZero is false, cannot guarantee induction variable is non-zero after
2319 // division, same for Shr, unless it is exact division.
2320 return (OrZero
|| Q
.IIQ
.isExact(BO
)) &&
2321 isKnownToBeAPowerOfTwo(Step
, false, Depth
, Q
);
2322 case Instruction::Shl
:
2323 return OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(BO
) || Q
.IIQ
.hasNoSignedWrap(BO
);
2324 case Instruction::AShr
:
2325 if (!match(Start
, m_Power2()) || match(Start
, m_SignMask()))
2328 case Instruction::LShr
:
2329 return OrZero
|| Q
.IIQ
.isExact(BO
);
2335 /// Return true if we can infer that \p V is known to be a power of 2 from
2336 /// dominating condition \p Cond (e.g., ctpop(V) == 1).
2337 static bool isImpliedToBeAPowerOfTwoFromCond(const Value
*V
, bool OrZero
,
2342 if (!match(Cond
, m_ICmp(Pred
, m_Intrinsic
<Intrinsic::ctpop
>(m_Specific(V
)),
2346 Pred
= ICmpInst::getInversePredicate(Pred
);
2348 if (OrZero
&& Pred
== ICmpInst::ICMP_ULT
&& *RHSC
== 2)
2351 return Pred
== ICmpInst::ICMP_EQ
&& *RHSC
== 1;
2354 /// Return true if the given value is known to have exactly one
2355 /// bit set when defined. For vectors return true if every element is known to
2356 /// be a power of two when defined. Supports values with integer or pointer
2357 /// types and vectors of integers.
2358 bool llvm::isKnownToBeAPowerOfTwo(const Value
*V
, bool OrZero
, unsigned Depth
,
2359 const SimplifyQuery
&Q
) {
2360 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
2362 if (isa
<Constant
>(V
))
2363 return OrZero
? match(V
, m_Power2OrZero()) : match(V
, m_Power2());
2365 // i1 is by definition a power of 2 or zero.
2366 if (OrZero
&& V
->getType()->getScalarSizeInBits() == 1)
2369 // Try to infer from assumptions.
2370 if (Q
.AC
&& Q
.CxtI
) {
2371 for (auto &AssumeVH
: Q
.AC
->assumptionsFor(V
)) {
2374 CallInst
*I
= cast
<CallInst
>(AssumeVH
);
2375 if (isImpliedToBeAPowerOfTwoFromCond(V
, OrZero
, I
->getArgOperand(0),
2376 /*CondIsTrue=*/true) &&
2377 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
2382 // Handle dominating conditions.
2383 if (Q
.DC
&& Q
.CxtI
&& Q
.DT
) {
2384 for (BranchInst
*BI
: Q
.DC
->conditionsFor(V
)) {
2385 Value
*Cond
= BI
->getCondition();
2387 BasicBlockEdge
Edge0(BI
->getParent(), BI
->getSuccessor(0));
2388 if (isImpliedToBeAPowerOfTwoFromCond(V
, OrZero
, Cond
,
2389 /*CondIsTrue=*/true) &&
2390 Q
.DT
->dominates(Edge0
, Q
.CxtI
->getParent()))
2393 BasicBlockEdge
Edge1(BI
->getParent(), BI
->getSuccessor(1));
2394 if (isImpliedToBeAPowerOfTwoFromCond(V
, OrZero
, Cond
,
2395 /*CondIsTrue=*/false) &&
2396 Q
.DT
->dominates(Edge1
, Q
.CxtI
->getParent()))
2401 auto *I
= dyn_cast
<Instruction
>(V
);
2405 if (Q
.CxtI
&& match(V
, m_VScale())) {
2406 const Function
*F
= Q
.CxtI
->getFunction();
2407 // The vscale_range indicates vscale is a power-of-two.
2408 return F
->hasFnAttribute(Attribute::VScaleRange
);
2411 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2412 // it is shifted off the end then the result is undefined.
2413 if (match(I
, m_Shl(m_One(), m_Value())))
2416 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2417 // the bottom. If it is shifted off the bottom then the result is undefined.
2418 if (match(I
, m_LShr(m_SignMask(), m_Value())))
2421 // The remaining tests are all recursive, so bail out if we hit the limit.
2422 if (Depth
++ == MaxAnalysisRecursionDepth
)
2425 switch (I
->getOpcode()) {
2426 case Instruction::ZExt
:
2427 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2428 case Instruction::Trunc
:
2429 return OrZero
&& isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2430 case Instruction::Shl
:
2431 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(I
) || Q
.IIQ
.hasNoSignedWrap(I
))
2432 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2434 case Instruction::LShr
:
2435 if (OrZero
|| Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)))
2436 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2438 case Instruction::UDiv
:
2439 if (Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)))
2440 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2442 case Instruction::Mul
:
2443 return isKnownToBeAPowerOfTwo(I
->getOperand(1), OrZero
, Depth
, Q
) &&
2444 isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
) &&
2445 (OrZero
|| isKnownNonZero(I
, Q
, Depth
));
2446 case Instruction::And
:
2447 // A power of two and'd with anything is a power of two or zero.
2449 (isKnownToBeAPowerOfTwo(I
->getOperand(1), /*OrZero*/ true, Depth
, Q
) ||
2450 isKnownToBeAPowerOfTwo(I
->getOperand(0), /*OrZero*/ true, Depth
, Q
)))
2452 // X & (-X) is always a power of two or zero.
2453 if (match(I
->getOperand(0), m_Neg(m_Specific(I
->getOperand(1)))) ||
2454 match(I
->getOperand(1), m_Neg(m_Specific(I
->getOperand(0)))))
2455 return OrZero
|| isKnownNonZero(I
->getOperand(0), Q
, Depth
);
2457 case Instruction::Add
: {
2458 // Adding a power-of-two or zero to the same power-of-two or zero yields
2459 // either the original power-of-two, a larger power-of-two or zero.
2460 const OverflowingBinaryOperator
*VOBO
= cast
<OverflowingBinaryOperator
>(V
);
2461 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(VOBO
) ||
2462 Q
.IIQ
.hasNoSignedWrap(VOBO
)) {
2463 if (match(I
->getOperand(0),
2464 m_c_And(m_Specific(I
->getOperand(1)), m_Value())) &&
2465 isKnownToBeAPowerOfTwo(I
->getOperand(1), OrZero
, Depth
, Q
))
2467 if (match(I
->getOperand(1),
2468 m_c_And(m_Specific(I
->getOperand(0)), m_Value())) &&
2469 isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
))
2472 unsigned BitWidth
= V
->getType()->getScalarSizeInBits();
2473 KnownBits
LHSBits(BitWidth
);
2474 computeKnownBits(I
->getOperand(0), LHSBits
, Depth
, Q
);
2476 KnownBits
RHSBits(BitWidth
);
2477 computeKnownBits(I
->getOperand(1), RHSBits
, Depth
, Q
);
2478 // If i8 V is a power of two or zero:
2479 // ZeroBits: 1 1 1 0 1 1 1 1
2480 // ~ZeroBits: 0 0 0 1 0 0 0 0
2481 if ((~(LHSBits
.Zero
& RHSBits
.Zero
)).isPowerOf2())
2482 // If OrZero isn't set, we cannot give back a zero result.
2483 // Make sure either the LHS or RHS has a bit set.
2484 if (OrZero
|| RHSBits
.One
.getBoolValue() || LHSBits
.One
.getBoolValue())
2488 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2489 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(VOBO
))
2490 if (match(I
, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2494 case Instruction::Select
:
2495 return isKnownToBeAPowerOfTwo(I
->getOperand(1), OrZero
, Depth
, Q
) &&
2496 isKnownToBeAPowerOfTwo(I
->getOperand(2), OrZero
, Depth
, Q
);
2497 case Instruction::PHI
: {
2498 // A PHI node is power of two if all incoming values are power of two, or if
2499 // it is an induction variable where in each step its value is a power of
2501 auto *PN
= cast
<PHINode
>(I
);
2502 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
2504 // Check if it is an induction variable and always power of two.
2505 if (isPowerOfTwoRecurrence(PN
, OrZero
, Depth
, RecQ
))
2508 // Recursively check all incoming values. Limit recursion to 2 levels, so
2509 // that search complexity is limited to number of operands^2.
2510 unsigned NewDepth
= std::max(Depth
, MaxAnalysisRecursionDepth
- 1);
2511 return llvm::all_of(PN
->operands(), [&](const Use
&U
) {
2512 // Value is power of 2 if it is coming from PHI node itself by induction.
2516 // Change the context instruction to the incoming block where it is
2518 RecQ
.CxtI
= PN
->getIncomingBlock(U
)->getTerminator();
2519 return isKnownToBeAPowerOfTwo(U
.get(), OrZero
, NewDepth
, RecQ
);
2522 case Instruction::Invoke
:
2523 case Instruction::Call
: {
2524 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
)) {
2525 switch (II
->getIntrinsicID()) {
2526 case Intrinsic::umax
:
2527 case Intrinsic::smax
:
2528 case Intrinsic::umin
:
2529 case Intrinsic::smin
:
2530 return isKnownToBeAPowerOfTwo(II
->getArgOperand(1), OrZero
, Depth
, Q
) &&
2531 isKnownToBeAPowerOfTwo(II
->getArgOperand(0), OrZero
, Depth
, Q
);
2532 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2533 // thus dont change pow2/non-pow2 status.
2534 case Intrinsic::bitreverse
:
2535 case Intrinsic::bswap
:
2536 return isKnownToBeAPowerOfTwo(II
->getArgOperand(0), OrZero
, Depth
, Q
);
2537 case Intrinsic::fshr
:
2538 case Intrinsic::fshl
:
2539 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2540 if (II
->getArgOperand(0) == II
->getArgOperand(1))
2541 return isKnownToBeAPowerOfTwo(II
->getArgOperand(0), OrZero
, Depth
, Q
);
2554 /// Test whether a GEP's result is known to be non-null.
2556 /// Uses properties inherent in a GEP to try to determine whether it is known
2559 /// Currently this routine does not support vector GEPs.
2560 static bool isGEPKnownNonNull(const GEPOperator
*GEP
, unsigned Depth
,
2561 const SimplifyQuery
&Q
) {
2562 const Function
*F
= nullptr;
2563 if (const Instruction
*I
= dyn_cast
<Instruction
>(GEP
))
2564 F
= I
->getFunction();
2566 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2567 // may be null iff the base pointer is null and the offset is zero.
2568 if (!GEP
->hasNoUnsignedWrap() &&
2569 !(GEP
->isInBounds() &&
2570 !NullPointerIsDefined(F
, GEP
->getPointerAddressSpace())))
2573 // FIXME: Support vector-GEPs.
2574 assert(GEP
->getType()->isPointerTy() && "We only support plain pointer GEP");
2576 // If the base pointer is non-null, we cannot walk to a null address with an
2577 // inbounds GEP in address space zero.
2578 if (isKnownNonZero(GEP
->getPointerOperand(), Q
, Depth
))
2581 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2582 // If so, then the GEP cannot produce a null pointer, as doing so would
2583 // inherently violate the inbounds contract within address space zero.
2584 for (gep_type_iterator GTI
= gep_type_begin(GEP
), GTE
= gep_type_end(GEP
);
2585 GTI
!= GTE
; ++GTI
) {
2586 // Struct types are easy -- they must always be indexed by a constant.
2587 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
2588 ConstantInt
*OpC
= cast
<ConstantInt
>(GTI
.getOperand());
2589 unsigned ElementIdx
= OpC
->getZExtValue();
2590 const StructLayout
*SL
= Q
.DL
.getStructLayout(STy
);
2591 uint64_t ElementOffset
= SL
->getElementOffset(ElementIdx
);
2592 if (ElementOffset
> 0)
2597 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2598 if (GTI
.getSequentialElementStride(Q
.DL
).isZero())
2601 // Fast path the constant operand case both for efficiency and so we don't
2602 // increment Depth when just zipping down an all-constant GEP.
2603 if (ConstantInt
*OpC
= dyn_cast
<ConstantInt
>(GTI
.getOperand())) {
2609 // We post-increment Depth here because while isKnownNonZero increments it
2610 // as well, when we pop back up that increment won't persist. We don't want
2611 // to recurse 10k times just because we have 10k GEP operands. We don't
2612 // bail completely out because we want to handle constant GEPs regardless
2614 if (Depth
++ >= MaxAnalysisRecursionDepth
)
2617 if (isKnownNonZero(GTI
.getOperand(), Q
, Depth
))
2624 static bool isKnownNonNullFromDominatingCondition(const Value
*V
,
2625 const Instruction
*CtxI
,
2626 const DominatorTree
*DT
) {
2627 assert(!isa
<Constant
>(V
) && "Called for constant?");
2632 unsigned NumUsesExplored
= 0;
2633 for (auto &U
: V
->uses()) {
2634 // Avoid massive lists
2635 if (NumUsesExplored
>= DomConditionsMaxUses
)
2639 const Instruction
*UI
= cast
<Instruction
>(U
.getUser());
2640 // If the value is used as an argument to a call or invoke, then argument
2641 // attributes may provide an answer about null-ness.
2642 if (V
->getType()->isPointerTy()) {
2643 if (const auto *CB
= dyn_cast
<CallBase
>(UI
)) {
2644 if (CB
->isArgOperand(&U
) &&
2645 CB
->paramHasNonNullAttr(CB
->getArgOperandNo(&U
),
2646 /*AllowUndefOrPoison=*/false) &&
2647 DT
->dominates(CB
, CtxI
))
2652 // If the value is used as a load/store, then the pointer must be non null.
2653 if (V
== getLoadStorePointerOperand(UI
)) {
2654 if (!NullPointerIsDefined(UI
->getFunction(),
2655 V
->getType()->getPointerAddressSpace()) &&
2656 DT
->dominates(UI
, CtxI
))
2660 if ((match(UI
, m_IDiv(m_Value(), m_Specific(V
))) ||
2661 match(UI
, m_IRem(m_Value(), m_Specific(V
)))) &&
2662 isValidAssumeForContext(UI
, CtxI
, DT
))
2665 // Consider only compare instructions uniquely controlling a branch
2668 if (!match(UI
, m_c_ICmp(Pred
, m_Specific(V
), m_Value(RHS
))))
2672 if (cmpExcludesZero(Pred
, RHS
))
2673 NonNullIfTrue
= true;
2674 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred
), RHS
))
2675 NonNullIfTrue
= false;
2679 SmallVector
<const User
*, 4> WorkList
;
2680 SmallPtrSet
<const User
*, 4> Visited
;
2681 for (const auto *CmpU
: UI
->users()) {
2682 assert(WorkList
.empty() && "Should be!");
2683 if (Visited
.insert(CmpU
).second
)
2684 WorkList
.push_back(CmpU
);
2686 while (!WorkList
.empty()) {
2687 auto *Curr
= WorkList
.pop_back_val();
2689 // If a user is an AND, add all its users to the work list. We only
2690 // propagate "pred != null" condition through AND because it is only
2691 // correct to assume that all conditions of AND are met in true branch.
2692 // TODO: Support similar logic of OR and EQ predicate?
2694 if (match(Curr
, m_LogicalAnd(m_Value(), m_Value()))) {
2695 for (const auto *CurrU
: Curr
->users())
2696 if (Visited
.insert(CurrU
).second
)
2697 WorkList
.push_back(CurrU
);
2701 if (const BranchInst
*BI
= dyn_cast
<BranchInst
>(Curr
)) {
2702 assert(BI
->isConditional() && "uses a comparison!");
2704 BasicBlock
*NonNullSuccessor
=
2705 BI
->getSuccessor(NonNullIfTrue
? 0 : 1);
2706 BasicBlockEdge
Edge(BI
->getParent(), NonNullSuccessor
);
2707 if (Edge
.isSingleEdge() && DT
->dominates(Edge
, CtxI
->getParent()))
2709 } else if (NonNullIfTrue
&& isGuard(Curr
) &&
2710 DT
->dominates(cast
<Instruction
>(Curr
), CtxI
)) {
2720 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
2721 /// ensure that the value it's attached to is never Value? 'RangeType' is
2722 /// is the type of the value described by the range.
2723 static bool rangeMetadataExcludesValue(const MDNode
* Ranges
, const APInt
& Value
) {
2724 const unsigned NumRanges
= Ranges
->getNumOperands() / 2;
2725 assert(NumRanges
>= 1);
2726 for (unsigned i
= 0; i
< NumRanges
; ++i
) {
2727 ConstantInt
*Lower
=
2728 mdconst::extract
<ConstantInt
>(Ranges
->getOperand(2 * i
+ 0));
2729 ConstantInt
*Upper
=
2730 mdconst::extract
<ConstantInt
>(Ranges
->getOperand(2 * i
+ 1));
2731 ConstantRange
Range(Lower
->getValue(), Upper
->getValue());
2732 if (Range
.contains(Value
))
2738 /// Try to detect a recurrence that monotonically increases/decreases from a
2739 /// non-zero starting value. These are common as induction variables.
2740 static bool isNonZeroRecurrence(const PHINode
*PN
) {
2741 BinaryOperator
*BO
= nullptr;
2742 Value
*Start
= nullptr, *Step
= nullptr;
2743 const APInt
*StartC
, *StepC
;
2744 if (!matchSimpleRecurrence(PN
, BO
, Start
, Step
) ||
2745 !match(Start
, m_APInt(StartC
)) || StartC
->isZero())
2748 switch (BO
->getOpcode()) {
2749 case Instruction::Add
:
2750 // Starting from non-zero and stepping away from zero can never wrap back
2752 return BO
->hasNoUnsignedWrap() ||
2753 (BO
->hasNoSignedWrap() && match(Step
, m_APInt(StepC
)) &&
2754 StartC
->isNegative() == StepC
->isNegative());
2755 case Instruction::Mul
:
2756 return (BO
->hasNoUnsignedWrap() || BO
->hasNoSignedWrap()) &&
2757 match(Step
, m_APInt(StepC
)) && !StepC
->isZero();
2758 case Instruction::Shl
:
2759 return BO
->hasNoUnsignedWrap() || BO
->hasNoSignedWrap();
2760 case Instruction::AShr
:
2761 case Instruction::LShr
:
2762 return BO
->isExact();
2768 static bool matchOpWithOpEqZero(Value
*Op0
, Value
*Op1
) {
2769 return match(Op0
, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ
,
2770 m_Specific(Op1
), m_Zero()))) ||
2771 match(Op1
, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ
,
2772 m_Specific(Op0
), m_Zero())));
2775 static bool isNonZeroAdd(const APInt
&DemandedElts
, unsigned Depth
,
2776 const SimplifyQuery
&Q
, unsigned BitWidth
, Value
*X
,
2777 Value
*Y
, bool NSW
, bool NUW
) {
2778 // (X + (X != 0)) is non zero
2779 if (matchOpWithOpEqZero(X
, Y
))
2783 return isKnownNonZero(Y
, DemandedElts
, Q
, Depth
) ||
2784 isKnownNonZero(X
, DemandedElts
, Q
, Depth
);
2786 KnownBits XKnown
= computeKnownBits(X
, DemandedElts
, Depth
, Q
);
2787 KnownBits YKnown
= computeKnownBits(Y
, DemandedElts
, Depth
, Q
);
2789 // If X and Y are both non-negative (as signed values) then their sum is not
2790 // zero unless both X and Y are zero.
2791 if (XKnown
.isNonNegative() && YKnown
.isNonNegative())
2792 if (isKnownNonZero(Y
, DemandedElts
, Q
, Depth
) ||
2793 isKnownNonZero(X
, DemandedElts
, Q
, Depth
))
2796 // If X and Y are both negative (as signed values) then their sum is not
2797 // zero unless both X and Y equal INT_MIN.
2798 if (XKnown
.isNegative() && YKnown
.isNegative()) {
2799 APInt Mask
= APInt::getSignedMaxValue(BitWidth
);
2800 // The sign bit of X is set. If some other bit is set then X is not equal
2802 if (XKnown
.One
.intersects(Mask
))
2804 // The sign bit of Y is set. If some other bit is set then Y is not equal
2806 if (YKnown
.One
.intersects(Mask
))
2810 // The sum of a non-negative number and a power of two is not zero.
2811 if (XKnown
.isNonNegative() &&
2812 isKnownToBeAPowerOfTwo(Y
, /*OrZero*/ false, Depth
, Q
))
2814 if (YKnown
.isNonNegative() &&
2815 isKnownToBeAPowerOfTwo(X
, /*OrZero*/ false, Depth
, Q
))
2818 return KnownBits::add(XKnown
, YKnown
, NSW
, NUW
).isNonZero();
2821 static bool isNonZeroSub(const APInt
&DemandedElts
, unsigned Depth
,
2822 const SimplifyQuery
&Q
, unsigned BitWidth
, Value
*X
,
2824 // (X - (X != 0)) is non zero
2825 // ((X != 0) - X) is non zero
2826 if (matchOpWithOpEqZero(X
, Y
))
2829 // TODO: Move this case into isKnownNonEqual().
2830 if (auto *C
= dyn_cast
<Constant
>(X
))
2831 if (C
->isNullValue() && isKnownNonZero(Y
, DemandedElts
, Q
, Depth
))
2834 return ::isKnownNonEqual(X
, Y
, DemandedElts
, Depth
, Q
);
2837 static bool isNonZeroMul(const APInt
&DemandedElts
, unsigned Depth
,
2838 const SimplifyQuery
&Q
, unsigned BitWidth
, Value
*X
,
2839 Value
*Y
, bool NSW
, bool NUW
) {
2840 // If X and Y are non-zero then so is X * Y as long as the multiplication
2841 // does not overflow.
2843 return isKnownNonZero(X
, DemandedElts
, Q
, Depth
) &&
2844 isKnownNonZero(Y
, DemandedElts
, Q
, Depth
);
2846 // If either X or Y is odd, then if the other is non-zero the result can't
2848 KnownBits XKnown
= computeKnownBits(X
, DemandedElts
, Depth
, Q
);
2850 return isKnownNonZero(Y
, DemandedElts
, Q
, Depth
);
2852 KnownBits YKnown
= computeKnownBits(Y
, DemandedElts
, Depth
, Q
);
2854 return XKnown
.isNonZero() || isKnownNonZero(X
, DemandedElts
, Q
, Depth
);
2856 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2857 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2858 // the lowest known One of X and Y. If they are non-zero, the result
2859 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2860 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2861 return (XKnown
.countMaxTrailingZeros() + YKnown
.countMaxTrailingZeros()) <
2865 static bool isNonZeroShift(const Operator
*I
, const APInt
&DemandedElts
,
2866 unsigned Depth
, const SimplifyQuery
&Q
,
2867 const KnownBits
&KnownVal
) {
2868 auto ShiftOp
= [&](const APInt
&Lhs
, const APInt
&Rhs
) {
2869 switch (I
->getOpcode()) {
2870 case Instruction::Shl
:
2871 return Lhs
.shl(Rhs
);
2872 case Instruction::LShr
:
2873 return Lhs
.lshr(Rhs
);
2874 case Instruction::AShr
:
2875 return Lhs
.ashr(Rhs
);
2877 llvm_unreachable("Unknown Shift Opcode");
2881 auto InvShiftOp
= [&](const APInt
&Lhs
, const APInt
&Rhs
) {
2882 switch (I
->getOpcode()) {
2883 case Instruction::Shl
:
2884 return Lhs
.lshr(Rhs
);
2885 case Instruction::LShr
:
2886 case Instruction::AShr
:
2887 return Lhs
.shl(Rhs
);
2889 llvm_unreachable("Unknown Shift Opcode");
2893 if (KnownVal
.isUnknown())
2896 KnownBits KnownCnt
=
2897 computeKnownBits(I
->getOperand(1), DemandedElts
, Depth
, Q
);
2898 APInt MaxShift
= KnownCnt
.getMaxValue();
2899 unsigned NumBits
= KnownVal
.getBitWidth();
2900 if (MaxShift
.uge(NumBits
))
2903 if (!ShiftOp(KnownVal
.One
, MaxShift
).isZero())
2906 // If all of the bits shifted out are known to be zero, and Val is known
2907 // non-zero then at least one non-zero bit must remain.
2908 if (InvShiftOp(KnownVal
.Zero
, NumBits
- MaxShift
)
2909 .eq(InvShiftOp(APInt::getAllOnes(NumBits
), NumBits
- MaxShift
)) &&
2910 isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
))
2916 static bool isKnownNonZeroFromOperator(const Operator
*I
,
2917 const APInt
&DemandedElts
,
2918 unsigned Depth
, const SimplifyQuery
&Q
) {
2919 unsigned BitWidth
= getBitWidth(I
->getType()->getScalarType(), Q
.DL
);
2920 switch (I
->getOpcode()) {
2921 case Instruction::Alloca
:
2922 // Alloca never returns null, malloc might.
2923 return I
->getType()->getPointerAddressSpace() == 0;
2924 case Instruction::GetElementPtr
:
2925 if (I
->getType()->isPointerTy())
2926 return isGEPKnownNonNull(cast
<GEPOperator
>(I
), Depth
, Q
);
2928 case Instruction::BitCast
: {
2929 // We need to be a bit careful here. We can only peek through the bitcast
2930 // if the scalar size of elements in the operand are smaller than and a
2931 // multiple of the size they are casting too. Take three cases:
2934 // bitcast <2 x i16> %NonZero to <4 x i8>
2936 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2937 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2938 // guranteed (imagine just sign bit set in the 2 i16 elements).
2941 // bitcast <4 x i3> %NonZero to <3 x i4>
2943 // Even though the scalar size of the src (`i3`) is smaller than the
2944 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2945 // its possible for the `3 x i4` elements to be zero because there are
2946 // some elements in the destination that don't contain any full src
2950 // bitcast <4 x i8> %NonZero to <2 x i16>
2952 // This is always safe as non-zero in the 4 i8 elements implies
2953 // non-zero in the combination of any two adjacent ones. Since i8 is a
2954 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2955 // This all implies the 2 i16 elements are non-zero.
2956 Type
*FromTy
= I
->getOperand(0)->getType();
2957 if ((FromTy
->isIntOrIntVectorTy() || FromTy
->isPtrOrPtrVectorTy()) &&
2958 (BitWidth
% getBitWidth(FromTy
->getScalarType(), Q
.DL
)) == 0)
2959 return isKnownNonZero(I
->getOperand(0), Q
, Depth
);
2961 case Instruction::IntToPtr
:
2962 // Note that we have to take special care to avoid looking through
2963 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2964 // as casts that can alter the value, e.g., AddrSpaceCasts.
2965 if (!isa
<ScalableVectorType
>(I
->getType()) &&
2966 Q
.DL
.getTypeSizeInBits(I
->getOperand(0)->getType()).getFixedValue() <=
2967 Q
.DL
.getTypeSizeInBits(I
->getType()).getFixedValue())
2968 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2970 case Instruction::PtrToInt
:
2971 // Similar to int2ptr above, we can look through ptr2int here if the cast
2972 // is a no-op or an extend and not a truncate.
2973 if (!isa
<ScalableVectorType
>(I
->getType()) &&
2974 Q
.DL
.getTypeSizeInBits(I
->getOperand(0)->getType()).getFixedValue() <=
2975 Q
.DL
.getTypeSizeInBits(I
->getType()).getFixedValue())
2976 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2978 case Instruction::Trunc
:
2979 // nuw/nsw trunc preserves zero/non-zero status of input.
2980 if (auto *TI
= dyn_cast
<TruncInst
>(I
))
2981 if (TI
->hasNoSignedWrap() || TI
->hasNoUnsignedWrap())
2982 return isKnownNonZero(TI
->getOperand(0), DemandedElts
, Q
, Depth
);
2985 case Instruction::Sub
:
2986 return isNonZeroSub(DemandedElts
, Depth
, Q
, BitWidth
, I
->getOperand(0),
2988 case Instruction::Xor
:
2989 // (X ^ (X != 0)) is non zero
2990 if (matchOpWithOpEqZero(I
->getOperand(0), I
->getOperand(1)))
2993 case Instruction::Or
:
2994 // (X | (X != 0)) is non zero
2995 if (matchOpWithOpEqZero(I
->getOperand(0), I
->getOperand(1)))
2997 // X | Y != 0 if X != 0 or Y != 0.
2998 return isKnownNonZero(I
->getOperand(1), DemandedElts
, Q
, Depth
) ||
2999 isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
3000 case Instruction::SExt
:
3001 case Instruction::ZExt
:
3002 // ext X != 0 if X != 0.
3003 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
3005 case Instruction::Shl
: {
3006 // shl nsw/nuw can't remove any non-zero bits.
3007 const OverflowingBinaryOperator
*BO
= cast
<OverflowingBinaryOperator
>(I
);
3008 if (Q
.IIQ
.hasNoUnsignedWrap(BO
) || Q
.IIQ
.hasNoSignedWrap(BO
))
3009 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
3011 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
3012 // if the lowest bit is shifted off the end.
3013 KnownBits
Known(BitWidth
);
3014 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
, Q
);
3018 return isNonZeroShift(I
, DemandedElts
, Depth
, Q
, Known
);
3020 case Instruction::LShr
:
3021 case Instruction::AShr
: {
3022 // shr exact can only shift out zero bits.
3023 const PossiblyExactOperator
*BO
= cast
<PossiblyExactOperator
>(I
);
3025 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
3027 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
3028 // defined if the sign bit is shifted off the end.
3030 computeKnownBits(I
->getOperand(0), DemandedElts
, Depth
, Q
);
3031 if (Known
.isNegative())
3034 return isNonZeroShift(I
, DemandedElts
, Depth
, Q
, Known
);
3036 case Instruction::UDiv
:
3037 case Instruction::SDiv
: {
3039 // div exact can only produce a zero if the dividend is zero.
3040 if (cast
<PossiblyExactOperator
>(I
)->isExact())
3041 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
3044 computeKnownBits(I
->getOperand(0), DemandedElts
, Depth
, Q
);
3045 // If X is fully unknown we won't be able to figure anything out so don't
3046 // both computing knownbits for Y.
3047 if (XKnown
.isUnknown())
3051 computeKnownBits(I
->getOperand(1), DemandedElts
, Depth
, Q
);
3052 if (I
->getOpcode() == Instruction::SDiv
) {
3053 // For signed division need to compare abs value of the operands.
3054 XKnown
= XKnown
.abs(/*IntMinIsPoison*/ false);
3055 YKnown
= YKnown
.abs(/*IntMinIsPoison*/ false);
3057 // If X u>= Y then div is non zero (0/0 is UB).
3058 std::optional
<bool> XUgeY
= KnownBits::uge(XKnown
, YKnown
);
3059 // If X is total unknown or X u< Y we won't be able to prove non-zero
3060 // with compute known bits so just return early.
3061 return XUgeY
&& *XUgeY
;
3063 case Instruction::Add
: {
3066 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
3068 auto *BO
= cast
<OverflowingBinaryOperator
>(I
);
3069 return isNonZeroAdd(DemandedElts
, Depth
, Q
, BitWidth
, I
->getOperand(0),
3070 I
->getOperand(1), Q
.IIQ
.hasNoSignedWrap(BO
),
3071 Q
.IIQ
.hasNoUnsignedWrap(BO
));
3073 case Instruction::Mul
: {
3074 const OverflowingBinaryOperator
*BO
= cast
<OverflowingBinaryOperator
>(I
);
3075 return isNonZeroMul(DemandedElts
, Depth
, Q
, BitWidth
, I
->getOperand(0),
3076 I
->getOperand(1), Q
.IIQ
.hasNoSignedWrap(BO
),
3077 Q
.IIQ
.hasNoUnsignedWrap(BO
));
3079 case Instruction::Select
: {
3080 // (C ? X : Y) != 0 if X != 0 and Y != 0.
3082 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
3083 // then see if the select condition implies the arm is non-zero. For example
3084 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3085 // dominated by `X != 0`.
3086 auto SelectArmIsNonZero
= [&](bool IsTrueArm
) {
3088 Op
= IsTrueArm
? I
->getOperand(1) : I
->getOperand(2);
3089 // Op is trivially non-zero.
3090 if (isKnownNonZero(Op
, DemandedElts
, Q
, Depth
))
3093 // The condition of the select dominates the true/false arm. Check if the
3094 // condition implies that a given arm is non-zero.
3097 if (!match(I
->getOperand(0), m_c_ICmp(Pred
, m_Specific(Op
), m_Value(X
))))
3101 Pred
= ICmpInst::getInversePredicate(Pred
);
3103 return cmpExcludesZero(Pred
, X
);
3106 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3107 SelectArmIsNonZero(/* IsTrueArm */ false))
3111 case Instruction::PHI
: {
3112 auto *PN
= cast
<PHINode
>(I
);
3113 if (Q
.IIQ
.UseInstrInfo
&& isNonZeroRecurrence(PN
))
3116 // Check if all incoming values are non-zero using recursion.
3117 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
3118 unsigned NewDepth
= std::max(Depth
, MaxAnalysisRecursionDepth
- 1);
3119 return llvm::all_of(PN
->operands(), [&](const Use
&U
) {
3122 RecQ
.CxtI
= PN
->getIncomingBlock(U
)->getTerminator();
3123 // Check if the branch on the phi excludes zero.
3126 BasicBlock
*TrueSucc
, *FalseSucc
;
3127 if (match(RecQ
.CxtI
,
3128 m_Br(m_c_ICmp(Pred
, m_Specific(U
.get()), m_Value(X
)),
3129 m_BasicBlock(TrueSucc
), m_BasicBlock(FalseSucc
)))) {
3130 // Check for cases of duplicate successors.
3131 if ((TrueSucc
== PN
->getParent()) != (FalseSucc
== PN
->getParent())) {
3132 // If we're using the false successor, invert the predicate.
3133 if (FalseSucc
== PN
->getParent())
3134 Pred
= CmpInst::getInversePredicate(Pred
);
3135 if (cmpExcludesZero(Pred
, X
))
3139 // Finally recurse on the edge and check it directly.
3140 return isKnownNonZero(U
.get(), DemandedElts
, RecQ
, NewDepth
);
3143 case Instruction::InsertElement
: {
3144 if (isa
<ScalableVectorType
>(I
->getType()))
3147 const Value
*Vec
= I
->getOperand(0);
3148 const Value
*Elt
= I
->getOperand(1);
3149 auto *CIdx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
3151 unsigned NumElts
= DemandedElts
.getBitWidth();
3152 APInt DemandedVecElts
= DemandedElts
;
3153 bool SkipElt
= false;
3154 // If we know the index we are inserting too, clear it from Vec check.
3155 if (CIdx
&& CIdx
->getValue().ult(NumElts
)) {
3156 DemandedVecElts
.clearBit(CIdx
->getZExtValue());
3157 SkipElt
= !DemandedElts
[CIdx
->getZExtValue()];
3160 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3162 return (SkipElt
|| isKnownNonZero(Elt
, Q
, Depth
)) &&
3163 (DemandedVecElts
.isZero() ||
3164 isKnownNonZero(Vec
, DemandedVecElts
, Q
, Depth
));
3166 case Instruction::ExtractElement
:
3167 if (const auto *EEI
= dyn_cast
<ExtractElementInst
>(I
)) {
3168 const Value
*Vec
= EEI
->getVectorOperand();
3169 const Value
*Idx
= EEI
->getIndexOperand();
3170 auto *CIdx
= dyn_cast
<ConstantInt
>(Idx
);
3171 if (auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType())) {
3172 unsigned NumElts
= VecTy
->getNumElements();
3173 APInt DemandedVecElts
= APInt::getAllOnes(NumElts
);
3174 if (CIdx
&& CIdx
->getValue().ult(NumElts
))
3175 DemandedVecElts
= APInt::getOneBitSet(NumElts
, CIdx
->getZExtValue());
3176 return isKnownNonZero(Vec
, DemandedVecElts
, Q
, Depth
);
3180 case Instruction::ShuffleVector
: {
3181 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(I
);
3184 APInt DemandedLHS
, DemandedRHS
;
3185 // For undef elements, we don't know anything about the common state of
3186 // the shuffle result.
3187 if (!getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
))
3189 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3190 return (DemandedRHS
.isZero() ||
3191 isKnownNonZero(Shuf
->getOperand(1), DemandedRHS
, Q
, Depth
)) &&
3192 (DemandedLHS
.isZero() ||
3193 isKnownNonZero(Shuf
->getOperand(0), DemandedLHS
, Q
, Depth
));
3195 case Instruction::Freeze
:
3196 return isKnownNonZero(I
->getOperand(0), Q
, Depth
) &&
3197 isGuaranteedNotToBePoison(I
->getOperand(0), Q
.AC
, Q
.CxtI
, Q
.DT
,
3199 case Instruction::Load
: {
3200 auto *LI
= cast
<LoadInst
>(I
);
3201 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3203 if (auto *PtrT
= dyn_cast
<PointerType
>(I
->getType())) {
3204 if (Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_nonnull
) ||
3205 (Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_dereferenceable
) &&
3206 !NullPointerIsDefined(LI
->getFunction(), PtrT
->getAddressSpace())))
3208 } else if (MDNode
*Ranges
= Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_range
)) {
3209 return rangeMetadataExcludesValue(Ranges
, APInt::getZero(BitWidth
));
3212 // No need to fall through to computeKnownBits as range metadata is already
3213 // handled in isKnownNonZero.
3216 case Instruction::ExtractValue
: {
3217 const WithOverflowInst
*WO
;
3218 if (match(I
, m_ExtractValue
<0>(m_WithOverflowInst(WO
)))) {
3219 switch (WO
->getBinaryOp()) {
3222 case Instruction::Add
:
3223 return isNonZeroAdd(DemandedElts
, Depth
, Q
, BitWidth
,
3224 WO
->getArgOperand(0), WO
->getArgOperand(1),
3227 case Instruction::Sub
:
3228 return isNonZeroSub(DemandedElts
, Depth
, Q
, BitWidth
,
3229 WO
->getArgOperand(0), WO
->getArgOperand(1));
3230 case Instruction::Mul
:
3231 return isNonZeroMul(DemandedElts
, Depth
, Q
, BitWidth
,
3232 WO
->getArgOperand(0), WO
->getArgOperand(1),
3233 /*NSW=*/false, /*NUW=*/false);
3239 case Instruction::Call
:
3240 case Instruction::Invoke
: {
3241 const auto *Call
= cast
<CallBase
>(I
);
3242 if (I
->getType()->isPointerTy()) {
3243 if (Call
->isReturnNonNull())
3245 if (const auto *RP
= getArgumentAliasingToReturnedPointer(Call
, true))
3246 return isKnownNonZero(RP
, Q
, Depth
);
3248 if (MDNode
*Ranges
= Q
.IIQ
.getMetadata(Call
, LLVMContext::MD_range
))
3249 return rangeMetadataExcludesValue(Ranges
, APInt::getZero(BitWidth
));
3250 if (std::optional
<ConstantRange
> Range
= Call
->getRange()) {
3251 const APInt
ZeroValue(Range
->getBitWidth(), 0);
3252 if (!Range
->contains(ZeroValue
))
3255 if (const Value
*RV
= Call
->getReturnedArgOperand())
3256 if (RV
->getType() == I
->getType() && isKnownNonZero(RV
, Q
, Depth
))
3260 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
)) {
3261 switch (II
->getIntrinsicID()) {
3262 case Intrinsic::sshl_sat
:
3263 case Intrinsic::ushl_sat
:
3264 case Intrinsic::abs
:
3265 case Intrinsic::bitreverse
:
3266 case Intrinsic::bswap
:
3267 case Intrinsic::ctpop
:
3268 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
);
3269 // NB: We don't do usub_sat here as in any case we can prove its
3270 // non-zero, we will fold it to `sub nuw` in InstCombine.
3271 case Intrinsic::ssub_sat
:
3272 return isNonZeroSub(DemandedElts
, Depth
, Q
, BitWidth
,
3273 II
->getArgOperand(0), II
->getArgOperand(1));
3274 case Intrinsic::sadd_sat
:
3275 return isNonZeroAdd(DemandedElts
, Depth
, Q
, BitWidth
,
3276 II
->getArgOperand(0), II
->getArgOperand(1),
3277 /*NSW=*/true, /* NUW=*/false);
3278 // Vec reverse preserves zero/non-zero status from input vec.
3279 case Intrinsic::vector_reverse
:
3280 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
.reverseBits(),
3282 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3283 case Intrinsic::vector_reduce_or
:
3284 case Intrinsic::vector_reduce_umax
:
3285 case Intrinsic::vector_reduce_umin
:
3286 case Intrinsic::vector_reduce_smax
:
3287 case Intrinsic::vector_reduce_smin
:
3288 return isKnownNonZero(II
->getArgOperand(0), Q
, Depth
);
3289 case Intrinsic::umax
:
3290 case Intrinsic::uadd_sat
:
3291 // umax(X, (X != 0)) is non zero
3292 // X +usat (X != 0) is non zero
3293 if (matchOpWithOpEqZero(II
->getArgOperand(0), II
->getArgOperand(1)))
3296 return isKnownNonZero(II
->getArgOperand(1), DemandedElts
, Q
, Depth
) ||
3297 isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
);
3298 case Intrinsic::smax
: {
3299 // If either arg is strictly positive the result is non-zero. Otherwise
3300 // the result is non-zero if both ops are non-zero.
3301 auto IsNonZero
= [&](Value
*Op
, std::optional
<bool> &OpNonZero
,
3302 const KnownBits
&OpKnown
) {
3303 if (!OpNonZero
.has_value())
3304 OpNonZero
= OpKnown
.isNonZero() ||
3305 isKnownNonZero(Op
, DemandedElts
, Q
, Depth
);
3308 // Avoid re-computing isKnownNonZero.
3309 std::optional
<bool> Op0NonZero
, Op1NonZero
;
3310 KnownBits Op1Known
=
3311 computeKnownBits(II
->getArgOperand(1), DemandedElts
, Depth
, Q
);
3312 if (Op1Known
.isNonNegative() &&
3313 IsNonZero(II
->getArgOperand(1), Op1NonZero
, Op1Known
))
3315 KnownBits Op0Known
=
3316 computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
);
3317 if (Op0Known
.isNonNegative() &&
3318 IsNonZero(II
->getArgOperand(0), Op0NonZero
, Op0Known
))
3320 return IsNonZero(II
->getArgOperand(1), Op1NonZero
, Op1Known
) &&
3321 IsNonZero(II
->getArgOperand(0), Op0NonZero
, Op0Known
);
3323 case Intrinsic::smin
: {
3324 // If either arg is negative the result is non-zero. Otherwise
3325 // the result is non-zero if both ops are non-zero.
3326 KnownBits Op1Known
=
3327 computeKnownBits(II
->getArgOperand(1), DemandedElts
, Depth
, Q
);
3328 if (Op1Known
.isNegative())
3330 KnownBits Op0Known
=
3331 computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
);
3332 if (Op0Known
.isNegative())
3335 if (Op1Known
.isNonZero() && Op0Known
.isNonZero())
3339 case Intrinsic::umin
:
3340 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
) &&
3341 isKnownNonZero(II
->getArgOperand(1), DemandedElts
, Q
, Depth
);
3342 case Intrinsic::cttz
:
3343 return computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
)
3345 case Intrinsic::ctlz
:
3346 return computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
)
3348 case Intrinsic::fshr
:
3349 case Intrinsic::fshl
:
3350 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3351 if (II
->getArgOperand(0) == II
->getArgOperand(1))
3352 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
);
3354 case Intrinsic::vscale
:
3356 case Intrinsic::experimental_get_vector_length
:
3357 return isKnownNonZero(I
->getOperand(0), Q
, Depth
);
3368 KnownBits
Known(BitWidth
);
3369 computeKnownBits(I
, DemandedElts
, Known
, Depth
, Q
);
3370 return Known
.One
!= 0;
3373 /// Return true if the given value is known to be non-zero when defined. For
3374 /// vectors, return true if every demanded element is known to be non-zero when
3375 /// defined. For pointers, if the context instruction and dominator tree are
3376 /// specified, perform context-sensitive analysis and return true if the
3377 /// pointer couldn't possibly be null at the specified instruction.
3378 /// Supports values with integer or pointer type and vectors of integers.
3379 bool isKnownNonZero(const Value
*V
, const APInt
&DemandedElts
,
3380 const SimplifyQuery
&Q
, unsigned Depth
) {
3381 Type
*Ty
= V
->getType();
3384 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
3386 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
3388 FVTy
->getNumElements() == DemandedElts
.getBitWidth() &&
3389 "DemandedElt width should equal the fixed vector number of elements");
3391 assert(DemandedElts
== APInt(1, 1) &&
3392 "DemandedElt width should be 1 for scalars");
3396 if (auto *C
= dyn_cast
<Constant
>(V
)) {
3397 if (C
->isNullValue())
3399 if (isa
<ConstantInt
>(C
))
3400 // Must be non-zero due to null test above.
3403 // For constant vectors, check that all elements are poison or known
3404 // non-zero to determine that the whole vector is known non-zero.
3405 if (auto *VecTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
3406 for (unsigned i
= 0, e
= VecTy
->getNumElements(); i
!= e
; ++i
) {
3407 if (!DemandedElts
[i
])
3409 Constant
*Elt
= C
->getAggregateElement(i
);
3410 if (!Elt
|| Elt
->isNullValue())
3412 if (!isa
<PoisonValue
>(Elt
) && !isa
<ConstantInt
>(Elt
))
3418 // Constant ptrauth can be null, iff the base pointer can be.
3419 if (auto *CPA
= dyn_cast
<ConstantPtrAuth
>(V
))
3420 return isKnownNonZero(CPA
->getPointer(), DemandedElts
, Q
, Depth
);
3422 // A global variable in address space 0 is non null unless extern weak
3423 // or an absolute symbol reference. Other address spaces may have null as a
3424 // valid address for a global, so we can't assume anything.
3425 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
3426 if (!GV
->isAbsoluteSymbolRef() && !GV
->hasExternalWeakLinkage() &&
3427 GV
->getType()->getAddressSpace() == 0)
3431 // For constant expressions, fall through to the Operator code below.
3432 if (!isa
<ConstantExpr
>(V
))
3436 if (const auto *A
= dyn_cast
<Argument
>(V
))
3437 if (std::optional
<ConstantRange
> Range
= A
->getRange()) {
3438 const APInt
ZeroValue(Range
->getBitWidth(), 0);
3439 if (!Range
->contains(ZeroValue
))
3443 if (!isa
<Constant
>(V
) && isKnownNonZeroFromAssume(V
, Q
))
3446 // Some of the tests below are recursive, so bail out if we hit the limit.
3447 if (Depth
++ >= MaxAnalysisRecursionDepth
)
3450 // Check for pointer simplifications.
3452 if (PointerType
*PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
3453 // A byval, inalloca may not be null in a non-default addres space. A
3454 // nonnull argument is assumed never 0.
3455 if (const Argument
*A
= dyn_cast
<Argument
>(V
)) {
3456 if (((A
->hasPassPointeeByValueCopyAttr() &&
3457 !NullPointerIsDefined(A
->getParent(), PtrTy
->getAddressSpace())) ||
3458 A
->hasNonNullAttr()))
3463 if (const auto *I
= dyn_cast
<Operator
>(V
))
3464 if (isKnownNonZeroFromOperator(I
, DemandedElts
, Depth
, Q
))
3467 if (!isa
<Constant
>(V
) &&
3468 isKnownNonNullFromDominatingCondition(V
, Q
.CxtI
, Q
.DT
))
3474 bool llvm::isKnownNonZero(const Value
*V
, const SimplifyQuery
&Q
,
3476 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
3477 APInt DemandedElts
=
3478 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
3479 return ::isKnownNonZero(V
, DemandedElts
, Q
, Depth
);
3482 /// If the pair of operators are the same invertible function, return the
3483 /// the operands of the function corresponding to each input. Otherwise,
3484 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3485 /// every input value to exactly one output value. This is equivalent to
3486 /// saying that Op1 and Op2 are equal exactly when the specified pair of
3487 /// operands are equal, (except that Op1 and Op2 may be poison more often.)
3488 static std::optional
<std::pair
<Value
*, Value
*>>
3489 getInvertibleOperands(const Operator
*Op1
,
3490 const Operator
*Op2
) {
3491 if (Op1
->getOpcode() != Op2
->getOpcode())
3492 return std::nullopt
;
3494 auto getOperands
= [&](unsigned OpNum
) -> auto {
3495 return std::make_pair(Op1
->getOperand(OpNum
), Op2
->getOperand(OpNum
));
3498 switch (Op1
->getOpcode()) {
3501 case Instruction::Or
:
3502 if (!cast
<PossiblyDisjointInst
>(Op1
)->isDisjoint() ||
3503 !cast
<PossiblyDisjointInst
>(Op2
)->isDisjoint())
3506 case Instruction::Xor
:
3507 case Instruction::Add
: {
3509 if (match(Op2
, m_c_BinOp(m_Specific(Op1
->getOperand(0)), m_Value(Other
))))
3510 return std::make_pair(Op1
->getOperand(1), Other
);
3511 if (match(Op2
, m_c_BinOp(m_Specific(Op1
->getOperand(1)), m_Value(Other
))))
3512 return std::make_pair(Op1
->getOperand(0), Other
);
3515 case Instruction::Sub
:
3516 if (Op1
->getOperand(0) == Op2
->getOperand(0))
3517 return getOperands(1);
3518 if (Op1
->getOperand(1) == Op2
->getOperand(1))
3519 return getOperands(0);
3521 case Instruction::Mul
: {
3522 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3523 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3524 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3525 auto *OBO1
= cast
<OverflowingBinaryOperator
>(Op1
);
3526 auto *OBO2
= cast
<OverflowingBinaryOperator
>(Op2
);
3527 if ((!OBO1
->hasNoUnsignedWrap() || !OBO2
->hasNoUnsignedWrap()) &&
3528 (!OBO1
->hasNoSignedWrap() || !OBO2
->hasNoSignedWrap()))
3531 // Assume operand order has been canonicalized
3532 if (Op1
->getOperand(1) == Op2
->getOperand(1) &&
3533 isa
<ConstantInt
>(Op1
->getOperand(1)) &&
3534 !cast
<ConstantInt
>(Op1
->getOperand(1))->isZero())
3535 return getOperands(0);
3538 case Instruction::Shl
: {
3539 // Same as multiplies, with the difference that we don't need to check
3540 // for a non-zero multiply. Shifts always multiply by non-zero.
3541 auto *OBO1
= cast
<OverflowingBinaryOperator
>(Op1
);
3542 auto *OBO2
= cast
<OverflowingBinaryOperator
>(Op2
);
3543 if ((!OBO1
->hasNoUnsignedWrap() || !OBO2
->hasNoUnsignedWrap()) &&
3544 (!OBO1
->hasNoSignedWrap() || !OBO2
->hasNoSignedWrap()))
3547 if (Op1
->getOperand(1) == Op2
->getOperand(1))
3548 return getOperands(0);
3551 case Instruction::AShr
:
3552 case Instruction::LShr
: {
3553 auto *PEO1
= cast
<PossiblyExactOperator
>(Op1
);
3554 auto *PEO2
= cast
<PossiblyExactOperator
>(Op2
);
3555 if (!PEO1
->isExact() || !PEO2
->isExact())
3558 if (Op1
->getOperand(1) == Op2
->getOperand(1))
3559 return getOperands(0);
3562 case Instruction::SExt
:
3563 case Instruction::ZExt
:
3564 if (Op1
->getOperand(0)->getType() == Op2
->getOperand(0)->getType())
3565 return getOperands(0);
3567 case Instruction::PHI
: {
3568 const PHINode
*PN1
= cast
<PHINode
>(Op1
);
3569 const PHINode
*PN2
= cast
<PHINode
>(Op2
);
3571 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3572 // are a single invertible function of the start values? Note that repeated
3573 // application of an invertible function is also invertible
3574 BinaryOperator
*BO1
= nullptr;
3575 Value
*Start1
= nullptr, *Step1
= nullptr;
3576 BinaryOperator
*BO2
= nullptr;
3577 Value
*Start2
= nullptr, *Step2
= nullptr;
3578 if (PN1
->getParent() != PN2
->getParent() ||
3579 !matchSimpleRecurrence(PN1
, BO1
, Start1
, Step1
) ||
3580 !matchSimpleRecurrence(PN2
, BO2
, Start2
, Step2
))
3583 auto Values
= getInvertibleOperands(cast
<Operator
>(BO1
),
3584 cast
<Operator
>(BO2
));
3588 // We have to be careful of mutually defined recurrences here. Ex:
3589 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3590 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3591 // The invertibility of these is complicated, and not worth reasoning
3593 if (Values
->first
!= PN1
|| Values
->second
!= PN2
)
3596 return std::make_pair(Start1
, Start2
);
3599 return std::nullopt
;
3602 /// Return true if V1 == (binop V2, X), where X is known non-zero.
3603 /// Only handle a small subset of binops where (binop V2, X) with non-zero X
3604 /// implies V2 != V1.
3605 static bool isModifyingBinopOfNonZero(const Value
*V1
, const Value
*V2
,
3606 const APInt
&DemandedElts
, unsigned Depth
,
3607 const SimplifyQuery
&Q
) {
3608 const BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(V1
);
3611 switch (BO
->getOpcode()) {
3614 case Instruction::Or
:
3615 if (!cast
<PossiblyDisjointInst
>(V1
)->isDisjoint())
3618 case Instruction::Xor
:
3619 case Instruction::Add
:
3620 Value
*Op
= nullptr;
3621 if (V2
== BO
->getOperand(0))
3622 Op
= BO
->getOperand(1);
3623 else if (V2
== BO
->getOperand(1))
3624 Op
= BO
->getOperand(0);
3627 return isKnownNonZero(Op
, DemandedElts
, Q
, Depth
+ 1);
3632 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3633 /// the multiplication is nuw or nsw.
3634 static bool isNonEqualMul(const Value
*V1
, const Value
*V2
,
3635 const APInt
&DemandedElts
, unsigned Depth
,
3636 const SimplifyQuery
&Q
) {
3637 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(V2
)) {
3639 return match(OBO
, m_Mul(m_Specific(V1
), m_APInt(C
))) &&
3640 (OBO
->hasNoUnsignedWrap() || OBO
->hasNoSignedWrap()) &&
3641 !C
->isZero() && !C
->isOne() &&
3642 isKnownNonZero(V1
, DemandedElts
, Q
, Depth
+ 1);
3647 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3648 /// the shift is nuw or nsw.
3649 static bool isNonEqualShl(const Value
*V1
, const Value
*V2
,
3650 const APInt
&DemandedElts
, unsigned Depth
,
3651 const SimplifyQuery
&Q
) {
3652 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(V2
)) {
3654 return match(OBO
, m_Shl(m_Specific(V1
), m_APInt(C
))) &&
3655 (OBO
->hasNoUnsignedWrap() || OBO
->hasNoSignedWrap()) &&
3656 !C
->isZero() && isKnownNonZero(V1
, DemandedElts
, Q
, Depth
+ 1);
3661 static bool isNonEqualPHIs(const PHINode
*PN1
, const PHINode
*PN2
,
3662 const APInt
&DemandedElts
, unsigned Depth
,
3663 const SimplifyQuery
&Q
) {
3664 // Check two PHIs are in same block.
3665 if (PN1
->getParent() != PN2
->getParent())
3668 SmallPtrSet
<const BasicBlock
*, 8> VisitedBBs
;
3669 bool UsedFullRecursion
= false;
3670 for (const BasicBlock
*IncomBB
: PN1
->blocks()) {
3671 if (!VisitedBBs
.insert(IncomBB
).second
)
3672 continue; // Don't reprocess blocks that we have dealt with already.
3673 const Value
*IV1
= PN1
->getIncomingValueForBlock(IncomBB
);
3674 const Value
*IV2
= PN2
->getIncomingValueForBlock(IncomBB
);
3675 const APInt
*C1
, *C2
;
3676 if (match(IV1
, m_APInt(C1
)) && match(IV2
, m_APInt(C2
)) && *C1
!= *C2
)
3679 // Only one pair of phi operands is allowed for full recursion.
3680 if (UsedFullRecursion
)
3683 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
3684 RecQ
.CxtI
= IncomBB
->getTerminator();
3685 if (!isKnownNonEqual(IV1
, IV2
, DemandedElts
, Depth
+ 1, RecQ
))
3687 UsedFullRecursion
= true;
3692 static bool isNonEqualSelect(const Value
*V1
, const Value
*V2
,
3693 const APInt
&DemandedElts
, unsigned Depth
,
3694 const SimplifyQuery
&Q
) {
3695 const SelectInst
*SI1
= dyn_cast
<SelectInst
>(V1
);
3699 if (const SelectInst
*SI2
= dyn_cast
<SelectInst
>(V2
)) {
3700 const Value
*Cond1
= SI1
->getCondition();
3701 const Value
*Cond2
= SI2
->getCondition();
3703 return isKnownNonEqual(SI1
->getTrueValue(), SI2
->getTrueValue(),
3704 DemandedElts
, Depth
+ 1, Q
) &&
3705 isKnownNonEqual(SI1
->getFalseValue(), SI2
->getFalseValue(),
3706 DemandedElts
, Depth
+ 1, Q
);
3708 return isKnownNonEqual(SI1
->getTrueValue(), V2
, DemandedElts
, Depth
+ 1, Q
) &&
3709 isKnownNonEqual(SI1
->getFalseValue(), V2
, DemandedElts
, Depth
+ 1, Q
);
3712 // Check to see if A is both a GEP and is the incoming value for a PHI in the
3713 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3714 // one of them being the recursive GEP A and the other a ptr at same base and at
3715 // the same/higher offset than B we are only incrementing the pointer further in
3716 // loop if offset of recursive GEP is greater than 0.
3717 static bool isNonEqualPointersWithRecursiveGEP(const Value
*A
, const Value
*B
,
3718 const SimplifyQuery
&Q
) {
3719 if (!A
->getType()->isPointerTy() || !B
->getType()->isPointerTy())
3722 auto *GEPA
= dyn_cast
<GEPOperator
>(A
);
3723 if (!GEPA
|| GEPA
->getNumIndices() != 1 || !isa
<Constant
>(GEPA
->idx_begin()))
3726 // Handle 2 incoming PHI values with one being a recursive GEP.
3727 auto *PN
= dyn_cast
<PHINode
>(GEPA
->getPointerOperand());
3728 if (!PN
|| PN
->getNumIncomingValues() != 2)
3731 // Search for the recursive GEP as an incoming operand, and record that as
3733 Value
*Start
= nullptr;
3734 Value
*Step
= const_cast<Value
*>(A
);
3735 if (PN
->getIncomingValue(0) == Step
)
3736 Start
= PN
->getIncomingValue(1);
3737 else if (PN
->getIncomingValue(1) == Step
)
3738 Start
= PN
->getIncomingValue(0);
3742 // Other incoming node base should match the B base.
3743 // StartOffset >= OffsetB && StepOffset > 0?
3744 // StartOffset <= OffsetB && StepOffset < 0?
3745 // Is non-equal if above are true.
3746 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3747 // optimisation to inbounds GEPs only.
3748 unsigned IndexWidth
= Q
.DL
.getIndexTypeSizeInBits(Start
->getType());
3749 APInt
StartOffset(IndexWidth
, 0);
3750 Start
= Start
->stripAndAccumulateInBoundsConstantOffsets(Q
.DL
, StartOffset
);
3751 APInt
StepOffset(IndexWidth
, 0);
3752 Step
= Step
->stripAndAccumulateInBoundsConstantOffsets(Q
.DL
, StepOffset
);
3754 // Check if Base Pointer of Step matches the PHI.
3757 APInt
OffsetB(IndexWidth
, 0);
3758 B
= B
->stripAndAccumulateInBoundsConstantOffsets(Q
.DL
, OffsetB
);
3759 return Start
== B
&&
3760 ((StartOffset
.sge(OffsetB
) && StepOffset
.isStrictlyPositive()) ||
3761 (StartOffset
.sle(OffsetB
) && StepOffset
.isNegative()));
3764 /// Return true if it is known that V1 != V2.
3765 static bool isKnownNonEqual(const Value
*V1
, const Value
*V2
,
3766 const APInt
&DemandedElts
, unsigned Depth
,
3767 const SimplifyQuery
&Q
) {
3770 if (V1
->getType() != V2
->getType())
3771 // We can't look through casts yet.
3774 if (Depth
>= MaxAnalysisRecursionDepth
)
3777 // See if we can recurse through (exactly one of) our operands. This
3778 // requires our operation be 1-to-1 and map every input value to exactly
3779 // one output value. Such an operation is invertible.
3780 auto *O1
= dyn_cast
<Operator
>(V1
);
3781 auto *O2
= dyn_cast
<Operator
>(V2
);
3782 if (O1
&& O2
&& O1
->getOpcode() == O2
->getOpcode()) {
3783 if (auto Values
= getInvertibleOperands(O1
, O2
))
3784 return isKnownNonEqual(Values
->first
, Values
->second
, DemandedElts
,
3787 if (const PHINode
*PN1
= dyn_cast
<PHINode
>(V1
)) {
3788 const PHINode
*PN2
= cast
<PHINode
>(V2
);
3789 // FIXME: This is missing a generalization to handle the case where one is
3790 // a PHI and another one isn't.
3791 if (isNonEqualPHIs(PN1
, PN2
, DemandedElts
, Depth
, Q
))
3796 if (isModifyingBinopOfNonZero(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3797 isModifyingBinopOfNonZero(V2
, V1
, DemandedElts
, Depth
, Q
))
3800 if (isNonEqualMul(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3801 isNonEqualMul(V2
, V1
, DemandedElts
, Depth
, Q
))
3804 if (isNonEqualShl(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3805 isNonEqualShl(V2
, V1
, DemandedElts
, Depth
, Q
))
3808 if (V1
->getType()->isIntOrIntVectorTy()) {
3809 // Are any known bits in V1 contradictory to known bits in V2? If V1
3810 // has a known zero where V2 has a known one, they must not be equal.
3811 KnownBits Known1
= computeKnownBits(V1
, DemandedElts
, Depth
, Q
);
3812 if (!Known1
.isUnknown()) {
3813 KnownBits Known2
= computeKnownBits(V2
, DemandedElts
, Depth
, Q
);
3814 if (Known1
.Zero
.intersects(Known2
.One
) ||
3815 Known2
.Zero
.intersects(Known1
.One
))
3820 if (isNonEqualSelect(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3821 isNonEqualSelect(V2
, V1
, DemandedElts
, Depth
, Q
))
3824 if (isNonEqualPointersWithRecursiveGEP(V1
, V2
, Q
) ||
3825 isNonEqualPointersWithRecursiveGEP(V2
, V1
, Q
))
3829 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3830 // Check PtrToInt type matches the pointer size.
3831 if (match(V1
, m_PtrToIntSameSize(Q
.DL
, m_Value(A
))) &&
3832 match(V2
, m_PtrToIntSameSize(Q
.DL
, m_Value(B
))))
3833 return isKnownNonEqual(A
, B
, DemandedElts
, Depth
+ 1, Q
);
3838 /// For vector constants, loop over the elements and find the constant with the
3839 /// minimum number of sign bits. Return 0 if the value is not a vector constant
3840 /// or if any element was not analyzed; otherwise, return the count for the
3841 /// element with the minimum number of sign bits.
3842 static unsigned computeNumSignBitsVectorConstant(const Value
*V
,
3843 const APInt
&DemandedElts
,
3845 const auto *CV
= dyn_cast
<Constant
>(V
);
3846 if (!CV
|| !isa
<FixedVectorType
>(CV
->getType()))
3849 unsigned MinSignBits
= TyBits
;
3850 unsigned NumElts
= cast
<FixedVectorType
>(CV
->getType())->getNumElements();
3851 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
3852 if (!DemandedElts
[i
])
3854 // If we find a non-ConstantInt, bail out.
3855 auto *Elt
= dyn_cast_or_null
<ConstantInt
>(CV
->getAggregateElement(i
));
3859 MinSignBits
= std::min(MinSignBits
, Elt
->getValue().getNumSignBits());
3865 static unsigned ComputeNumSignBitsImpl(const Value
*V
,
3866 const APInt
&DemandedElts
,
3867 unsigned Depth
, const SimplifyQuery
&Q
);
3869 static unsigned ComputeNumSignBits(const Value
*V
, const APInt
&DemandedElts
,
3870 unsigned Depth
, const SimplifyQuery
&Q
) {
3871 unsigned Result
= ComputeNumSignBitsImpl(V
, DemandedElts
, Depth
, Q
);
3872 assert(Result
> 0 && "At least one sign bit needs to be present!");
3876 /// Return the number of times the sign bit of the register is replicated into
3877 /// the other bits. We know that at least 1 bit is always equal to the sign bit
3878 /// (itself), but other cases can give us information. For example, immediately
3879 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3880 /// other, so we return 3. For vectors, return the number of sign bits for the
3881 /// vector element with the minimum number of known sign bits of the demanded
3882 /// elements in the vector specified by DemandedElts.
3883 static unsigned ComputeNumSignBitsImpl(const Value
*V
,
3884 const APInt
&DemandedElts
,
3885 unsigned Depth
, const SimplifyQuery
&Q
) {
3886 Type
*Ty
= V
->getType();
3888 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
3890 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
3892 FVTy
->getNumElements() == DemandedElts
.getBitWidth() &&
3893 "DemandedElt width should equal the fixed vector number of elements");
3895 assert(DemandedElts
== APInt(1, 1) &&
3896 "DemandedElt width should be 1 for scalars");
3900 // We return the minimum number of sign bits that are guaranteed to be present
3901 // in V, so for undef we have to conservatively return 1. We don't have the
3902 // same behavior for poison though -- that's a FIXME today.
3904 Type
*ScalarTy
= Ty
->getScalarType();
3905 unsigned TyBits
= ScalarTy
->isPointerTy() ?
3906 Q
.DL
.getPointerTypeSizeInBits(ScalarTy
) :
3907 Q
.DL
.getTypeSizeInBits(ScalarTy
);
3910 unsigned FirstAnswer
= 1;
3912 // Note that ConstantInt is handled by the general computeKnownBits case
3915 if (Depth
== MaxAnalysisRecursionDepth
)
3918 if (auto *U
= dyn_cast
<Operator
>(V
)) {
3919 switch (Operator::getOpcode(V
)) {
3921 case Instruction::SExt
:
3922 Tmp
= TyBits
- U
->getOperand(0)->getType()->getScalarSizeInBits();
3923 return ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
) +
3926 case Instruction::SDiv
: {
3927 const APInt
*Denominator
;
3928 // sdiv X, C -> adds log(C) sign bits.
3929 if (match(U
->getOperand(1), m_APInt(Denominator
))) {
3931 // Ignore non-positive denominator.
3932 if (!Denominator
->isStrictlyPositive())
3935 // Calculate the incoming numerator bits.
3937 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3939 // Add floor(log(C)) bits to the numerator bits.
3940 return std::min(TyBits
, NumBits
+ Denominator
->logBase2());
3945 case Instruction::SRem
: {
3946 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3948 const APInt
*Denominator
;
3949 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3950 // positive constant. This let us put a lower bound on the number of sign
3952 if (match(U
->getOperand(1), m_APInt(Denominator
))) {
3954 // Ignore non-positive denominator.
3955 if (Denominator
->isStrictlyPositive()) {
3956 // Calculate the leading sign bit constraints by examining the
3957 // denominator. Given that the denominator is positive, there are two
3960 // 1. The numerator is positive. The result range is [0,C) and
3961 // [0,C) u< (1 << ceilLogBase2(C)).
3963 // 2. The numerator is negative. Then the result range is (-C,0] and
3964 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3966 // Thus a lower bound on the number of sign bits is `TyBits -
3967 // ceilLogBase2(C)`.
3969 unsigned ResBits
= TyBits
- Denominator
->ceilLogBase2();
3970 Tmp
= std::max(Tmp
, ResBits
);
3976 case Instruction::AShr
: {
3977 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3978 // ashr X, C -> adds C sign bits. Vectors too.
3980 if (match(U
->getOperand(1), m_APInt(ShAmt
))) {
3981 if (ShAmt
->uge(TyBits
))
3982 break; // Bad shift.
3983 unsigned ShAmtLimited
= ShAmt
->getZExtValue();
3984 Tmp
+= ShAmtLimited
;
3985 if (Tmp
> TyBits
) Tmp
= TyBits
;
3989 case Instruction::Shl
: {
3992 if (match(U
->getOperand(1), m_APInt(ShAmt
))) {
3993 // shl destroys sign bits.
3994 if (ShAmt
->uge(TyBits
))
3995 break; // Bad shift.
3996 // We can look through a zext (more or less treating it as a sext) if
3997 // all extended bits are shifted out.
3998 if (match(U
->getOperand(0), m_ZExt(m_Value(X
))) &&
3999 ShAmt
->uge(TyBits
- X
->getType()->getScalarSizeInBits())) {
4000 Tmp
= ComputeNumSignBits(X
, DemandedElts
, Depth
+ 1, Q
);
4001 Tmp
+= TyBits
- X
->getType()->getScalarSizeInBits();
4004 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
4005 if (ShAmt
->uge(Tmp
))
4006 break; // Shifted all sign bits out.
4007 Tmp2
= ShAmt
->getZExtValue();
4012 case Instruction::And
:
4013 case Instruction::Or
:
4014 case Instruction::Xor
: // NOT is handled here.
4015 // Logical binary ops preserve the number of sign bits at the worst.
4016 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
4018 Tmp2
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
4019 FirstAnswer
= std::min(Tmp
, Tmp2
);
4020 // We computed what we know about the sign bits as our first
4021 // answer. Now proceed to the generic code that uses
4022 // computeKnownBits, and pick whichever answer is better.
4026 case Instruction::Select
: {
4027 // If we have a clamp pattern, we know that the number of sign bits will
4028 // be the minimum of the clamp min/max range.
4030 const APInt
*CLow
, *CHigh
;
4031 if (isSignedMinMaxClamp(U
, X
, CLow
, CHigh
))
4032 return std::min(CLow
->getNumSignBits(), CHigh
->getNumSignBits());
4034 Tmp
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
4037 Tmp2
= ComputeNumSignBits(U
->getOperand(2), DemandedElts
, Depth
+ 1, Q
);
4038 return std::min(Tmp
, Tmp2
);
4041 case Instruction::Add
:
4042 // Add can have at most one carry bit. Thus we know that the output
4043 // is, at worst, one more bit than the inputs.
4044 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
4045 if (Tmp
== 1) break;
4047 // Special case decrementing a value (ADD X, -1):
4048 if (const auto *CRHS
= dyn_cast
<Constant
>(U
->getOperand(1)))
4049 if (CRHS
->isAllOnesValue()) {
4050 KnownBits
Known(TyBits
);
4051 computeKnownBits(U
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
4053 // If the input is known to be 0 or 1, the output is 0/-1, which is
4054 // all sign bits set.
4055 if ((Known
.Zero
| 1).isAllOnes())
4058 // If we are subtracting one from a positive number, there is no carry
4059 // out of the result.
4060 if (Known
.isNonNegative())
4064 Tmp2
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
4067 return std::min(Tmp
, Tmp2
) - 1;
4069 case Instruction::Sub
:
4070 Tmp2
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
4075 if (const auto *CLHS
= dyn_cast
<Constant
>(U
->getOperand(0)))
4076 if (CLHS
->isNullValue()) {
4077 KnownBits
Known(TyBits
);
4078 computeKnownBits(U
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
4079 // If the input is known to be 0 or 1, the output is 0/-1, which is
4080 // all sign bits set.
4081 if ((Known
.Zero
| 1).isAllOnes())
4084 // If the input is known to be positive (the sign bit is known clear),
4085 // the output of the NEG has the same number of sign bits as the
4087 if (Known
.isNonNegative())
4090 // Otherwise, we treat this like a SUB.
4093 // Sub can have at most one carry bit. Thus we know that the output
4094 // is, at worst, one more bit than the inputs.
4095 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
4098 return std::min(Tmp
, Tmp2
) - 1;
4100 case Instruction::Mul
: {
4101 // The output of the Mul can be at most twice the valid bits in the
4103 unsigned SignBitsOp0
=
4104 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
4105 if (SignBitsOp0
== 1)
4107 unsigned SignBitsOp1
=
4108 ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
4109 if (SignBitsOp1
== 1)
4111 unsigned OutValidBits
=
4112 (TyBits
- SignBitsOp0
+ 1) + (TyBits
- SignBitsOp1
+ 1);
4113 return OutValidBits
> TyBits
? 1 : TyBits
- OutValidBits
+ 1;
4116 case Instruction::PHI
: {
4117 const PHINode
*PN
= cast
<PHINode
>(U
);
4118 unsigned NumIncomingValues
= PN
->getNumIncomingValues();
4119 // Don't analyze large in-degree PHIs.
4120 if (NumIncomingValues
> 4) break;
4121 // Unreachable blocks may have zero-operand PHI nodes.
4122 if (NumIncomingValues
== 0) break;
4124 // Take the minimum of all incoming values. This can't infinitely loop
4125 // because of our depth threshold.
4126 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
4128 for (unsigned i
= 0, e
= NumIncomingValues
; i
!= e
; ++i
) {
4129 if (Tmp
== 1) return Tmp
;
4130 RecQ
.CxtI
= PN
->getIncomingBlock(i
)->getTerminator();
4131 Tmp
= std::min(Tmp
, ComputeNumSignBits(PN
->getIncomingValue(i
),
4132 DemandedElts
, Depth
+ 1, RecQ
));
4137 case Instruction::Trunc
: {
4138 // If the input contained enough sign bits that some remain after the
4139 // truncation, then we can make use of that. Otherwise we don't know
4141 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
4142 unsigned OperandTyBits
= U
->getOperand(0)->getType()->getScalarSizeInBits();
4143 if (Tmp
> (OperandTyBits
- TyBits
))
4144 return Tmp
- (OperandTyBits
- TyBits
);
4149 case Instruction::ExtractElement
:
4150 // Look through extract element. At the moment we keep this simple and
4151 // skip tracking the specific element. But at least we might find
4152 // information valid for all elements of the vector (for example if vector
4153 // is sign extended, shifted, etc).
4154 return ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
4156 case Instruction::ShuffleVector
: {
4157 // Collect the minimum number of sign bits that are shared by every vector
4158 // element referenced by the shuffle.
4159 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(U
);
4161 // FIXME: Add support for shufflevector constant expressions.
4164 APInt DemandedLHS
, DemandedRHS
;
4165 // For undef elements, we don't know anything about the common state of
4166 // the shuffle result.
4167 if (!getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
))
4169 Tmp
= std::numeric_limits
<unsigned>::max();
4170 if (!!DemandedLHS
) {
4171 const Value
*LHS
= Shuf
->getOperand(0);
4172 Tmp
= ComputeNumSignBits(LHS
, DemandedLHS
, Depth
+ 1, Q
);
4174 // If we don't know anything, early out and try computeKnownBits
4178 if (!!DemandedRHS
) {
4179 const Value
*RHS
= Shuf
->getOperand(1);
4180 Tmp2
= ComputeNumSignBits(RHS
, DemandedRHS
, Depth
+ 1, Q
);
4181 Tmp
= std::min(Tmp
, Tmp2
);
4183 // If we don't know anything, early out and try computeKnownBits
4187 assert(Tmp
<= TyBits
&& "Failed to determine minimum sign bits");
4190 case Instruction::Call
: {
4191 if (const auto *II
= dyn_cast
<IntrinsicInst
>(U
)) {
4192 switch (II
->getIntrinsicID()) {
4195 case Intrinsic::abs
:
4197 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
4201 // Absolute value reduces number of sign bits by at most 1.
4203 case Intrinsic::smin
:
4204 case Intrinsic::smax
: {
4205 const APInt
*CLow
, *CHigh
;
4206 if (isSignedMinMaxIntrinsicClamp(II
, CLow
, CHigh
))
4207 return std::min(CLow
->getNumSignBits(), CHigh
->getNumSignBits());
4215 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4216 // use this information.
4218 // If we can examine all elements of a vector constant successfully, we're
4219 // done (we can't do any better than that). If not, keep trying.
4220 if (unsigned VecSignBits
=
4221 computeNumSignBitsVectorConstant(V
, DemandedElts
, TyBits
))
4224 KnownBits
Known(TyBits
);
4225 computeKnownBits(V
, DemandedElts
, Known
, Depth
, Q
);
4227 // If we know that the sign bit is either zero or one, determine the number of
4228 // identical bits in the top of the input value.
4229 return std::max(FirstAnswer
, Known
.countMinSignBits());
4232 Intrinsic::ID
llvm::getIntrinsicForCallSite(const CallBase
&CB
,
4233 const TargetLibraryInfo
*TLI
) {
4234 const Function
*F
= CB
.getCalledFunction();
4236 return Intrinsic::not_intrinsic
;
4238 if (F
->isIntrinsic())
4239 return F
->getIntrinsicID();
4241 // We are going to infer semantics of a library function based on mapping it
4242 // to an LLVM intrinsic. Check that the library function is available from
4243 // this callbase and in this environment.
4245 if (F
->hasLocalLinkage() || !TLI
|| !TLI
->getLibFunc(CB
, Func
) ||
4246 !CB
.onlyReadsMemory())
4247 return Intrinsic::not_intrinsic
;
4255 return Intrinsic::sin
;
4259 return Intrinsic::cos
;
4263 return Intrinsic::tan
;
4267 return Intrinsic::asin
;
4271 return Intrinsic::acos
;
4275 return Intrinsic::atan
;
4277 case LibFunc_atan2f
:
4278 case LibFunc_atan2l
:
4279 return Intrinsic::atan2
;
4283 return Intrinsic::sinh
;
4287 return Intrinsic::cosh
;
4291 return Intrinsic::tanh
;
4295 return Intrinsic::exp
;
4299 return Intrinsic::exp2
;
4301 case LibFunc_exp10f
:
4302 case LibFunc_exp10l
:
4303 return Intrinsic::exp10
;
4307 return Intrinsic::log
;
4309 case LibFunc_log10f
:
4310 case LibFunc_log10l
:
4311 return Intrinsic::log10
;
4315 return Intrinsic::log2
;
4319 return Intrinsic::fabs
;
4323 return Intrinsic::minnum
;
4327 return Intrinsic::maxnum
;
4328 case LibFunc_copysign
:
4329 case LibFunc_copysignf
:
4330 case LibFunc_copysignl
:
4331 return Intrinsic::copysign
;
4333 case LibFunc_floorf
:
4334 case LibFunc_floorl
:
4335 return Intrinsic::floor
;
4339 return Intrinsic::ceil
;
4341 case LibFunc_truncf
:
4342 case LibFunc_truncl
:
4343 return Intrinsic::trunc
;
4347 return Intrinsic::rint
;
4348 case LibFunc_nearbyint
:
4349 case LibFunc_nearbyintf
:
4350 case LibFunc_nearbyintl
:
4351 return Intrinsic::nearbyint
;
4353 case LibFunc_roundf
:
4354 case LibFunc_roundl
:
4355 return Intrinsic::round
;
4356 case LibFunc_roundeven
:
4357 case LibFunc_roundevenf
:
4358 case LibFunc_roundevenl
:
4359 return Intrinsic::roundeven
;
4363 return Intrinsic::pow
;
4367 return Intrinsic::sqrt
;
4370 return Intrinsic::not_intrinsic
;
4373 /// Return true if it's possible to assume IEEE treatment of input denormals in
4374 /// \p F for \p Val.
4375 static bool inputDenormalIsIEEE(const Function
&F
, const Type
*Ty
) {
4376 Ty
= Ty
->getScalarType();
4377 return F
.getDenormalMode(Ty
->getFltSemantics()).Input
== DenormalMode::IEEE
;
4380 static bool inputDenormalIsIEEEOrPosZero(const Function
&F
, const Type
*Ty
) {
4381 Ty
= Ty
->getScalarType();
4382 DenormalMode Mode
= F
.getDenormalMode(Ty
->getFltSemantics());
4383 return Mode
.Input
== DenormalMode::IEEE
||
4384 Mode
.Input
== DenormalMode::PositiveZero
;
4387 static bool outputDenormalIsIEEEOrPosZero(const Function
&F
, const Type
*Ty
) {
4388 Ty
= Ty
->getScalarType();
4389 DenormalMode Mode
= F
.getDenormalMode(Ty
->getFltSemantics());
4390 return Mode
.Output
== DenormalMode::IEEE
||
4391 Mode
.Output
== DenormalMode::PositiveZero
;
4394 bool KnownFPClass::isKnownNeverLogicalZero(const Function
&F
, Type
*Ty
) const {
4395 return isKnownNeverZero() &&
4396 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F
, Ty
));
4399 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function
&F
,
4401 return isKnownNeverNegZero() &&
4402 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F
, Ty
));
4405 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function
&F
,
4407 if (!isKnownNeverPosZero())
4410 // If we know there are no denormals, nothing can be flushed to zero.
4411 if (isKnownNeverSubnormal())
4414 DenormalMode Mode
= F
.getDenormalMode(Ty
->getScalarType()->getFltSemantics());
4415 switch (Mode
.Input
) {
4416 case DenormalMode::IEEE
:
4418 case DenormalMode::PreserveSign
:
4419 // Negative subnormal won't flush to +0
4420 return isKnownNeverPosSubnormal();
4421 case DenormalMode::PositiveZero
:
4423 // Both positive and negative subnormal could flush to +0
4427 llvm_unreachable("covered switch over denormal mode");
4430 void KnownFPClass::propagateDenormal(const KnownFPClass
&Src
, const Function
&F
,
4432 KnownFPClasses
= Src
.KnownFPClasses
;
4433 // If we aren't assuming the source can't be a zero, we don't have to check if
4434 // a denormal input could be flushed.
4435 if (!Src
.isKnownNeverPosZero() && !Src
.isKnownNeverNegZero())
4438 // If we know the input can't be a denormal, it can't be flushed to 0.
4439 if (Src
.isKnownNeverSubnormal())
4442 DenormalMode Mode
= F
.getDenormalMode(Ty
->getScalarType()->getFltSemantics());
4444 if (!Src
.isKnownNeverPosSubnormal() && Mode
!= DenormalMode::getIEEE())
4445 KnownFPClasses
|= fcPosZero
;
4447 if (!Src
.isKnownNeverNegSubnormal() && Mode
!= DenormalMode::getIEEE()) {
4448 if (Mode
!= DenormalMode::getPositiveZero())
4449 KnownFPClasses
|= fcNegZero
;
4451 if (Mode
.Input
== DenormalMode::PositiveZero
||
4452 Mode
.Output
== DenormalMode::PositiveZero
||
4453 Mode
.Input
== DenormalMode::Dynamic
||
4454 Mode
.Output
== DenormalMode::Dynamic
)
4455 KnownFPClasses
|= fcPosZero
;
4459 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass
&Src
,
4460 const Function
&F
, Type
*Ty
) {
4461 propagateDenormal(Src
, F
, Ty
);
4462 propagateNaN(Src
, /*PreserveSign=*/true);
4465 /// Given an exploded icmp instruction, return true if the comparison only
4466 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4467 /// the result of the comparison is true when the input value is signed.
4468 bool llvm::isSignBitCheck(ICmpInst::Predicate Pred
, const APInt
&RHS
,
4469 bool &TrueIfSigned
) {
4471 case ICmpInst::ICMP_SLT
: // True if LHS s< 0
4472 TrueIfSigned
= true;
4473 return RHS
.isZero();
4474 case ICmpInst::ICMP_SLE
: // True if LHS s<= -1
4475 TrueIfSigned
= true;
4476 return RHS
.isAllOnes();
4477 case ICmpInst::ICMP_SGT
: // True if LHS s> -1
4478 TrueIfSigned
= false;
4479 return RHS
.isAllOnes();
4480 case ICmpInst::ICMP_SGE
: // True if LHS s>= 0
4481 TrueIfSigned
= false;
4482 return RHS
.isZero();
4483 case ICmpInst::ICMP_UGT
:
4484 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4485 TrueIfSigned
= true;
4486 return RHS
.isMaxSignedValue();
4487 case ICmpInst::ICMP_UGE
:
4488 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4489 TrueIfSigned
= true;
4490 return RHS
.isMinSignedValue();
4491 case ICmpInst::ICMP_ULT
:
4492 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4493 TrueIfSigned
= false;
4494 return RHS
.isMinSignedValue();
4495 case ICmpInst::ICMP_ULE
:
4496 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4497 TrueIfSigned
= false;
4498 return RHS
.isMaxSignedValue();
4504 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4505 /// same result as an fcmp with the given operands.
4506 std::pair
<Value
*, FPClassTest
> llvm::fcmpToClassTest(FCmpInst::Predicate Pred
,
4508 Value
*LHS
, Value
*RHS
,
4509 bool LookThroughSrc
) {
4510 const APFloat
*ConstRHS
;
4511 if (!match(RHS
, m_APFloatAllowPoison(ConstRHS
)))
4512 return {nullptr, fcAllFlags
};
4514 return fcmpToClassTest(Pred
, F
, LHS
, ConstRHS
, LookThroughSrc
);
4517 std::pair
<Value
*, FPClassTest
>
4518 llvm::fcmpToClassTest(FCmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4519 const APFloat
*ConstRHS
, bool LookThroughSrc
) {
4521 auto [Src
, ClassIfTrue
, ClassIfFalse
] =
4522 fcmpImpliesClass(Pred
, F
, LHS
, *ConstRHS
, LookThroughSrc
);
4523 if (Src
&& ClassIfTrue
== ~ClassIfFalse
)
4524 return {Src
, ClassIfTrue
};
4525 return {nullptr, fcAllFlags
};
4528 /// Return the return value for fcmpImpliesClass for a compare that produces an
4529 /// exact class test.
4530 static std::tuple
<Value
*, FPClassTest
, FPClassTest
> exactClass(Value
*V
,
4535 std::tuple
<Value
*, FPClassTest
, FPClassTest
>
4536 llvm::fcmpImpliesClass(CmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4537 FPClassTest RHSClass
, bool LookThroughSrc
) {
4538 assert(RHSClass
!= fcNone
);
4541 if (Pred
== FCmpInst::FCMP_TRUE
)
4542 return exactClass(Src
, fcAllFlags
);
4544 if (Pred
== FCmpInst::FCMP_FALSE
)
4545 return exactClass(Src
, fcNone
);
4547 const FPClassTest OrigClass
= RHSClass
;
4549 const bool IsNegativeRHS
= (RHSClass
& fcNegative
) == RHSClass
;
4550 const bool IsPositiveRHS
= (RHSClass
& fcPositive
) == RHSClass
;
4551 const bool IsNaN
= (RHSClass
& ~fcNan
) == fcNone
;
4554 // fcmp o__ x, nan -> false
4555 // fcmp u__ x, nan -> true
4556 return exactClass(Src
, CmpInst::isOrdered(Pred
) ? fcNone
: fcAllFlags
);
4559 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4560 if (Pred
== FCmpInst::FCMP_ORD
)
4561 return exactClass(Src
, ~fcNan
);
4563 // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4564 if (Pred
== FCmpInst::FCMP_UNO
)
4565 return exactClass(Src
, fcNan
);
4567 const bool IsFabs
= LookThroughSrc
&& match(LHS
, m_FAbs(m_Value(Src
)));
4569 RHSClass
= llvm::inverse_fabs(RHSClass
);
4571 const bool IsZero
= (OrigClass
& fcZero
) == OrigClass
;
4573 assert(Pred
!= FCmpInst::FCMP_ORD
&& Pred
!= FCmpInst::FCMP_UNO
);
4574 // Compares with fcNone are only exactly equal to fcZero if input denormals
4576 // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4577 if (!inputDenormalIsIEEE(F
, LHS
->getType()))
4578 return {nullptr, fcAllFlags
, fcAllFlags
};
4581 case FCmpInst::FCMP_OEQ
: // Match x == 0.0
4582 return exactClass(Src
, fcZero
);
4583 case FCmpInst::FCMP_UEQ
: // Match isnan(x) || (x == 0.0)
4584 return exactClass(Src
, fcZero
| fcNan
);
4585 case FCmpInst::FCMP_UNE
: // Match (x != 0.0)
4586 return exactClass(Src
, ~fcZero
);
4587 case FCmpInst::FCMP_ONE
: // Match !isnan(x) && x != 0.0
4588 return exactClass(Src
, ~fcNan
& ~fcZero
);
4589 case FCmpInst::FCMP_ORD
:
4590 // Canonical form of ord/uno is with a zero. We could also handle
4591 // non-canonical other non-NaN constants or LHS == RHS.
4592 return exactClass(Src
, ~fcNan
);
4593 case FCmpInst::FCMP_UNO
:
4594 return exactClass(Src
, fcNan
);
4595 case FCmpInst::FCMP_OGT
: // x > 0
4596 return exactClass(Src
, fcPosSubnormal
| fcPosNormal
| fcPosInf
);
4597 case FCmpInst::FCMP_UGT
: // isnan(x) || x > 0
4598 return exactClass(Src
, fcPosSubnormal
| fcPosNormal
| fcPosInf
| fcNan
);
4599 case FCmpInst::FCMP_OGE
: // x >= 0
4600 return exactClass(Src
, fcPositive
| fcNegZero
);
4601 case FCmpInst::FCMP_UGE
: // isnan(x) || x >= 0
4602 return exactClass(Src
, fcPositive
| fcNegZero
| fcNan
);
4603 case FCmpInst::FCMP_OLT
: // x < 0
4604 return exactClass(Src
, fcNegSubnormal
| fcNegNormal
| fcNegInf
);
4605 case FCmpInst::FCMP_ULT
: // isnan(x) || x < 0
4606 return exactClass(Src
, fcNegSubnormal
| fcNegNormal
| fcNegInf
| fcNan
);
4607 case FCmpInst::FCMP_OLE
: // x <= 0
4608 return exactClass(Src
, fcNegative
| fcPosZero
);
4609 case FCmpInst::FCMP_ULE
: // isnan(x) || x <= 0
4610 return exactClass(Src
, fcNegative
| fcPosZero
| fcNan
);
4612 llvm_unreachable("all compare types are handled");
4615 return {nullptr, fcAllFlags
, fcAllFlags
};
4618 const bool IsDenormalRHS
= (OrigClass
& fcSubnormal
) == OrigClass
;
4620 const bool IsInf
= (OrigClass
& fcInf
) == OrigClass
;
4622 FPClassTest Mask
= fcAllFlags
;
4625 case FCmpInst::FCMP_OEQ
:
4626 case FCmpInst::FCMP_UNE
: {
4627 // Match __builtin_isinf patterns
4629 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf
4630 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
4631 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf
4632 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
4634 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf
4635 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
4636 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf
4637 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
4638 if (IsNegativeRHS
) {
4649 case FCmpInst::FCMP_ONE
:
4650 case FCmpInst::FCMP_UEQ
: {
4651 // Match __builtin_isinf patterns
4652 // fcmp one x, -inf -> is_fpclass x, fcNegInf
4653 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
4654 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
4655 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
4657 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
4658 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
4659 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
4660 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
4661 if (IsNegativeRHS
) {
4662 Mask
= ~fcNegInf
& ~fcNan
;
4666 Mask
= ~fcPosInf
& ~fcNan
;
4673 case FCmpInst::FCMP_OLT
:
4674 case FCmpInst::FCMP_UGE
: {
4675 if (IsNegativeRHS
) {
4676 // No value is ordered and less than negative infinity.
4677 // All values are unordered with or at least negative infinity.
4678 // fcmp olt x, -inf -> false
4679 // fcmp uge x, -inf -> true
4684 // fcmp olt fabs(x), +inf -> fcFinite
4685 // fcmp uge fabs(x), +inf -> ~fcFinite
4686 // fcmp olt x, +inf -> fcFinite|fcNegInf
4687 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
4693 case FCmpInst::FCMP_OGE
:
4694 case FCmpInst::FCMP_ULT
: {
4695 if (IsNegativeRHS
) {
4696 // fcmp oge x, -inf -> ~fcNan
4697 // fcmp oge fabs(x), -inf -> ~fcNan
4698 // fcmp ult x, -inf -> fcNan
4699 // fcmp ult fabs(x), -inf -> fcNan
4704 // fcmp oge fabs(x), +inf -> fcInf
4705 // fcmp oge x, +inf -> fcPosInf
4706 // fcmp ult fabs(x), +inf -> ~fcInf
4707 // fcmp ult x, +inf -> ~fcPosInf
4713 case FCmpInst::FCMP_OGT
:
4714 case FCmpInst::FCMP_ULE
: {
4715 if (IsNegativeRHS
) {
4716 // fcmp ogt x, -inf -> fcmp one x, -inf
4717 // fcmp ogt fabs(x), -inf -> fcmp ord x, x
4718 // fcmp ule x, -inf -> fcmp ueq x, -inf
4719 // fcmp ule fabs(x), -inf -> fcmp uno x, x
4720 Mask
= IsFabs
? ~fcNan
: ~(fcNegInf
| fcNan
);
4724 // No value is ordered and greater than infinity.
4728 case FCmpInst::FCMP_OLE
:
4729 case FCmpInst::FCMP_UGT
: {
4730 if (IsNegativeRHS
) {
4731 Mask
= IsFabs
? fcNone
: fcNegInf
;
4735 // fcmp ole x, +inf -> fcmp ord x, x
4736 // fcmp ole fabs(x), +inf -> fcmp ord x, x
4737 // fcmp ole x, -inf -> fcmp oeq x, -inf
4738 // fcmp ole fabs(x), -inf -> false
4743 llvm_unreachable("all compare types are handled");
4746 // Invert the comparison for the unordered cases.
4747 if (FCmpInst::isUnordered(Pred
))
4750 return exactClass(Src
, Mask
);
4753 if (Pred
== FCmpInst::FCMP_OEQ
)
4754 return {Src
, RHSClass
, fcAllFlags
};
4756 if (Pred
== FCmpInst::FCMP_UEQ
) {
4757 FPClassTest Class
= RHSClass
| fcNan
;
4758 return {Src
, Class
, ~fcNan
};
4761 if (Pred
== FCmpInst::FCMP_ONE
)
4762 return {Src
, ~fcNan
, RHSClass
| fcNan
};
4764 if (Pred
== FCmpInst::FCMP_UNE
)
4765 return {Src
, fcAllFlags
, RHSClass
};
4767 assert((RHSClass
== fcNone
|| RHSClass
== fcPosNormal
||
4768 RHSClass
== fcNegNormal
|| RHSClass
== fcNormal
||
4769 RHSClass
== fcPosSubnormal
|| RHSClass
== fcNegSubnormal
||
4770 RHSClass
== fcSubnormal
) &&
4771 "should have been recognized as an exact class test");
4773 if (IsNegativeRHS
) {
4774 // TODO: Handle fneg(fabs)
4776 // fabs(x) o> -k -> fcmp ord x, x
4777 // fabs(x) u> -k -> true
4778 // fabs(x) o< -k -> false
4779 // fabs(x) u< -k -> fcmp uno x, x
4781 case FCmpInst::FCMP_OGT
:
4782 case FCmpInst::FCMP_OGE
:
4783 return {Src
, ~fcNan
, fcNan
};
4784 case FCmpInst::FCMP_UGT
:
4785 case FCmpInst::FCMP_UGE
:
4786 return {Src
, fcAllFlags
, fcNone
};
4787 case FCmpInst::FCMP_OLT
:
4788 case FCmpInst::FCMP_OLE
:
4789 return {Src
, fcNone
, fcAllFlags
};
4790 case FCmpInst::FCMP_ULT
:
4791 case FCmpInst::FCMP_ULE
:
4792 return {Src
, fcNan
, ~fcNan
};
4797 return {nullptr, fcAllFlags
, fcAllFlags
};
4800 FPClassTest ClassesLE
= fcNegInf
| fcNegNormal
;
4801 FPClassTest ClassesGE
= fcPositive
| fcNegZero
| fcNegSubnormal
;
4804 ClassesLE
|= fcNegSubnormal
;
4806 ClassesGE
|= fcNegNormal
;
4809 case FCmpInst::FCMP_OGT
:
4810 case FCmpInst::FCMP_OGE
:
4811 return {Src
, ClassesGE
, ~ClassesGE
| RHSClass
};
4812 case FCmpInst::FCMP_UGT
:
4813 case FCmpInst::FCMP_UGE
:
4814 return {Src
, ClassesGE
| fcNan
, ~(ClassesGE
| fcNan
) | RHSClass
};
4815 case FCmpInst::FCMP_OLT
:
4816 case FCmpInst::FCMP_OLE
:
4817 return {Src
, ClassesLE
, ~ClassesLE
| RHSClass
};
4818 case FCmpInst::FCMP_ULT
:
4819 case FCmpInst::FCMP_ULE
:
4820 return {Src
, ClassesLE
| fcNan
, ~(ClassesLE
| fcNan
) | RHSClass
};
4824 } else if (IsPositiveRHS
) {
4825 FPClassTest ClassesGE
= fcPosNormal
| fcPosInf
;
4826 FPClassTest ClassesLE
= fcNegative
| fcPosZero
| fcPosSubnormal
;
4828 ClassesGE
|= fcPosSubnormal
;
4830 ClassesLE
|= fcPosNormal
;
4833 ClassesGE
= llvm::inverse_fabs(ClassesGE
);
4834 ClassesLE
= llvm::inverse_fabs(ClassesLE
);
4838 case FCmpInst::FCMP_OGT
:
4839 case FCmpInst::FCMP_OGE
:
4840 return {Src
, ClassesGE
, ~ClassesGE
| RHSClass
};
4841 case FCmpInst::FCMP_UGT
:
4842 case FCmpInst::FCMP_UGE
:
4843 return {Src
, ClassesGE
| fcNan
, ~(ClassesGE
| fcNan
) | RHSClass
};
4844 case FCmpInst::FCMP_OLT
:
4845 case FCmpInst::FCMP_OLE
:
4846 return {Src
, ClassesLE
, ~ClassesLE
| RHSClass
};
4847 case FCmpInst::FCMP_ULT
:
4848 case FCmpInst::FCMP_ULE
:
4849 return {Src
, ClassesLE
| fcNan
, ~(ClassesLE
| fcNan
) | RHSClass
};
4855 return {nullptr, fcAllFlags
, fcAllFlags
};
4858 std::tuple
<Value
*, FPClassTest
, FPClassTest
>
4859 llvm::fcmpImpliesClass(CmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4860 const APFloat
&ConstRHS
, bool LookThroughSrc
) {
4861 // We can refine checks against smallest normal / largest denormal to an
4862 // exact class test.
4863 if (!ConstRHS
.isNegative() && ConstRHS
.isSmallestNormalized()) {
4865 const bool IsFabs
= LookThroughSrc
&& match(LHS
, m_FAbs(m_Value(Src
)));
4868 // Match pattern that's used in __builtin_isnormal.
4870 case FCmpInst::FCMP_OLT
:
4871 case FCmpInst::FCMP_UGE
: {
4872 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero
4873 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero
4874 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf
4875 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero)
4876 Mask
= fcZero
| fcSubnormal
;
4878 Mask
|= fcNegNormal
| fcNegInf
;
4882 case FCmpInst::FCMP_OGE
:
4883 case FCmpInst::FCMP_ULT
: {
4884 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf
4885 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal
4886 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf)
4887 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal)
4888 Mask
= fcPosInf
| fcPosNormal
;
4890 Mask
|= fcNegInf
| fcNegNormal
;
4894 return fcmpImpliesClass(Pred
, F
, LHS
, ConstRHS
.classify(),
4898 // Invert the comparison for the unordered cases.
4899 if (FCmpInst::isUnordered(Pred
))
4902 return exactClass(Src
, Mask
);
4905 return fcmpImpliesClass(Pred
, F
, LHS
, ConstRHS
.classify(), LookThroughSrc
);
4908 std::tuple
<Value
*, FPClassTest
, FPClassTest
>
4909 llvm::fcmpImpliesClass(CmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4910 Value
*RHS
, bool LookThroughSrc
) {
4911 const APFloat
*ConstRHS
;
4912 if (!match(RHS
, m_APFloatAllowPoison(ConstRHS
)))
4913 return {nullptr, fcAllFlags
, fcAllFlags
};
4915 // TODO: Just call computeKnownFPClass for RHS to handle non-constants.
4916 return fcmpImpliesClass(Pred
, F
, LHS
, *ConstRHS
, LookThroughSrc
);
4919 static void computeKnownFPClassFromCond(const Value
*V
, Value
*Cond
,
4920 unsigned Depth
, bool CondIsTrue
,
4921 const Instruction
*CxtI
,
4922 KnownFPClass
&KnownFromContext
) {
4924 if (Depth
< MaxAnalysisRecursionDepth
&&
4925 (CondIsTrue
? match(Cond
, m_LogicalAnd(m_Value(A
), m_Value(B
)))
4926 : match(Cond
, m_LogicalOr(m_Value(A
), m_Value(B
))))) {
4927 computeKnownFPClassFromCond(V
, A
, Depth
+ 1, CondIsTrue
, CxtI
,
4929 computeKnownFPClassFromCond(V
, B
, Depth
+ 1, CondIsTrue
, CxtI
,
4935 uint64_t ClassVal
= 0;
4936 const APFloat
*CRHS
;
4938 if (match(Cond
, m_FCmp(Pred
, m_Value(LHS
), m_APFloat(CRHS
)))) {
4939 auto [CmpVal
, MaskIfTrue
, MaskIfFalse
] = fcmpImpliesClass(
4940 Pred
, *CxtI
->getParent()->getParent(), LHS
, *CRHS
, LHS
!= V
);
4942 KnownFromContext
.knownNot(~(CondIsTrue
? MaskIfTrue
: MaskIfFalse
));
4943 } else if (match(Cond
, m_Intrinsic
<Intrinsic::is_fpclass
>(
4944 m_Specific(V
), m_ConstantInt(ClassVal
)))) {
4945 FPClassTest Mask
= static_cast<FPClassTest
>(ClassVal
);
4946 KnownFromContext
.knownNot(CondIsTrue
? ~Mask
: Mask
);
4947 } else if (match(Cond
, m_ICmp(Pred
, m_ElementWiseBitCast(m_Specific(V
)),
4950 if (!isSignBitCheck(Pred
, *RHS
, TrueIfSigned
))
4952 if (TrueIfSigned
== CondIsTrue
)
4953 KnownFromContext
.signBitMustBeOne();
4955 KnownFromContext
.signBitMustBeZero();
4959 static KnownFPClass
computeKnownFPClassFromContext(const Value
*V
,
4960 const SimplifyQuery
&Q
) {
4961 KnownFPClass KnownFromContext
;
4964 return KnownFromContext
;
4967 // Handle dominating conditions.
4968 for (BranchInst
*BI
: Q
.DC
->conditionsFor(V
)) {
4969 Value
*Cond
= BI
->getCondition();
4971 BasicBlockEdge
Edge0(BI
->getParent(), BI
->getSuccessor(0));
4972 if (Q
.DT
->dominates(Edge0
, Q
.CxtI
->getParent()))
4973 computeKnownFPClassFromCond(V
, Cond
, /*Depth=*/0, /*CondIsTrue=*/true,
4974 Q
.CxtI
, KnownFromContext
);
4976 BasicBlockEdge
Edge1(BI
->getParent(), BI
->getSuccessor(1));
4977 if (Q
.DT
->dominates(Edge1
, Q
.CxtI
->getParent()))
4978 computeKnownFPClassFromCond(V
, Cond
, /*Depth=*/0, /*CondIsTrue=*/false,
4979 Q
.CxtI
, KnownFromContext
);
4984 return KnownFromContext
;
4986 // Try to restrict the floating-point classes based on information from
4988 for (auto &AssumeVH
: Q
.AC
->assumptionsFor(V
)) {
4991 CallInst
*I
= cast
<CallInst
>(AssumeVH
);
4993 assert(I
->getFunction() == Q
.CxtI
->getParent()->getParent() &&
4994 "Got assumption for the wrong function!");
4995 assert(I
->getIntrinsicID() == Intrinsic::assume
&&
4996 "must be an assume intrinsic");
4998 if (!isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
5001 computeKnownFPClassFromCond(V
, I
->getArgOperand(0), /*Depth=*/0,
5002 /*CondIsTrue=*/true, Q
.CxtI
, KnownFromContext
);
5005 return KnownFromContext
;
5008 void computeKnownFPClass(const Value
*V
, const APInt
&DemandedElts
,
5009 FPClassTest InterestedClasses
, KnownFPClass
&Known
,
5010 unsigned Depth
, const SimplifyQuery
&Q
);
5012 static void computeKnownFPClass(const Value
*V
, KnownFPClass
&Known
,
5013 FPClassTest InterestedClasses
, unsigned Depth
,
5014 const SimplifyQuery
&Q
) {
5015 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
5016 APInt DemandedElts
=
5017 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
5018 computeKnownFPClass(V
, DemandedElts
, InterestedClasses
, Known
, Depth
, Q
);
5021 static void computeKnownFPClassForFPTrunc(const Operator
*Op
,
5022 const APInt
&DemandedElts
,
5023 FPClassTest InterestedClasses
,
5024 KnownFPClass
&Known
, unsigned Depth
,
5025 const SimplifyQuery
&Q
) {
5026 if ((InterestedClasses
&
5027 (KnownFPClass::OrderedLessThanZeroMask
| fcNan
)) == fcNone
)
5030 KnownFPClass KnownSrc
;
5031 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedClasses
,
5032 KnownSrc
, Depth
+ 1, Q
);
5034 // Sign should be preserved
5035 // TODO: Handle cannot be ordered greater than zero
5036 if (KnownSrc
.cannotBeOrderedLessThanZero())
5037 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5039 Known
.propagateNaN(KnownSrc
, true);
5041 // Infinity needs a range check.
5044 void computeKnownFPClass(const Value
*V
, const APInt
&DemandedElts
,
5045 FPClassTest InterestedClasses
, KnownFPClass
&Known
,
5046 unsigned Depth
, const SimplifyQuery
&Q
) {
5047 assert(Known
.isUnknown() && "should not be called with known information");
5049 if (!DemandedElts
) {
5050 // No demanded elts, better to assume we don't know anything.
5055 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
5057 if (auto *CFP
= dyn_cast
<ConstantFP
>(V
)) {
5058 Known
.KnownFPClasses
= CFP
->getValueAPF().classify();
5059 Known
.SignBit
= CFP
->isNegative();
5063 if (isa
<ConstantAggregateZero
>(V
)) {
5064 Known
.KnownFPClasses
= fcPosZero
;
5065 Known
.SignBit
= false;
5069 if (isa
<PoisonValue
>(V
)) {
5070 Known
.KnownFPClasses
= fcNone
;
5071 Known
.SignBit
= false;
5075 // Try to handle fixed width vector constants
5076 auto *VFVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
5077 const Constant
*CV
= dyn_cast
<Constant
>(V
);
5079 Known
.KnownFPClasses
= fcNone
;
5080 bool SignBitAllZero
= true;
5081 bool SignBitAllOne
= true;
5083 // For vectors, verify that each element is not NaN.
5084 unsigned NumElts
= VFVTy
->getNumElements();
5085 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
5086 if (!DemandedElts
[i
])
5089 Constant
*Elt
= CV
->getAggregateElement(i
);
5091 Known
= KnownFPClass();
5094 if (isa
<PoisonValue
>(Elt
))
5096 auto *CElt
= dyn_cast
<ConstantFP
>(Elt
);
5098 Known
= KnownFPClass();
5102 const APFloat
&C
= CElt
->getValueAPF();
5103 Known
.KnownFPClasses
|= C
.classify();
5105 SignBitAllZero
= false;
5107 SignBitAllOne
= false;
5109 if (SignBitAllOne
!= SignBitAllZero
)
5110 Known
.SignBit
= SignBitAllOne
;
5114 FPClassTest KnownNotFromFlags
= fcNone
;
5115 if (const auto *CB
= dyn_cast
<CallBase
>(V
))
5116 KnownNotFromFlags
|= CB
->getRetNoFPClass();
5117 else if (const auto *Arg
= dyn_cast
<Argument
>(V
))
5118 KnownNotFromFlags
|= Arg
->getNoFPClass();
5120 const Operator
*Op
= dyn_cast
<Operator
>(V
);
5121 if (const FPMathOperator
*FPOp
= dyn_cast_or_null
<FPMathOperator
>(Op
)) {
5122 if (FPOp
->hasNoNaNs())
5123 KnownNotFromFlags
|= fcNan
;
5124 if (FPOp
->hasNoInfs())
5125 KnownNotFromFlags
|= fcInf
;
5128 KnownFPClass AssumedClasses
= computeKnownFPClassFromContext(V
, Q
);
5129 KnownNotFromFlags
|= ~AssumedClasses
.KnownFPClasses
;
5131 // We no longer need to find out about these bits from inputs if we can
5132 // assume this from flags/attributes.
5133 InterestedClasses
&= ~KnownNotFromFlags
;
5135 auto ClearClassesFromFlags
= make_scope_exit([=, &Known
] {
5136 Known
.knownNot(KnownNotFromFlags
);
5137 if (!Known
.SignBit
&& AssumedClasses
.SignBit
) {
5138 if (*AssumedClasses
.SignBit
)
5139 Known
.signBitMustBeOne();
5141 Known
.signBitMustBeZero();
5148 // All recursive calls that increase depth must come after this.
5149 if (Depth
== MaxAnalysisRecursionDepth
)
5152 const unsigned Opc
= Op
->getOpcode();
5154 case Instruction::FNeg
: {
5155 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedClasses
,
5156 Known
, Depth
+ 1, Q
);
5160 case Instruction::Select
: {
5161 Value
*Cond
= Op
->getOperand(0);
5162 Value
*LHS
= Op
->getOperand(1);
5163 Value
*RHS
= Op
->getOperand(2);
5165 FPClassTest FilterLHS
= fcAllFlags
;
5166 FPClassTest FilterRHS
= fcAllFlags
;
5168 Value
*TestedValue
= nullptr;
5169 FPClassTest MaskIfTrue
= fcAllFlags
;
5170 FPClassTest MaskIfFalse
= fcAllFlags
;
5171 uint64_t ClassVal
= 0;
5172 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5174 Value
*CmpLHS
, *CmpRHS
;
5175 if (F
&& match(Cond
, m_FCmp(Pred
, m_Value(CmpLHS
), m_Value(CmpRHS
)))) {
5176 // If the select filters out a value based on the class, it no longer
5177 // participates in the class of the result
5179 // TODO: In some degenerate cases we can infer something if we try again
5180 // without looking through sign operations.
5181 bool LookThroughFAbsFNeg
= CmpLHS
!= LHS
&& CmpLHS
!= RHS
;
5182 std::tie(TestedValue
, MaskIfTrue
, MaskIfFalse
) =
5183 fcmpImpliesClass(Pred
, *F
, CmpLHS
, CmpRHS
, LookThroughFAbsFNeg
);
5184 } else if (match(Cond
,
5185 m_Intrinsic
<Intrinsic::is_fpclass
>(
5186 m_Value(TestedValue
), m_ConstantInt(ClassVal
)))) {
5187 FPClassTest TestedMask
= static_cast<FPClassTest
>(ClassVal
);
5188 MaskIfTrue
= TestedMask
;
5189 MaskIfFalse
= ~TestedMask
;
5192 if (TestedValue
== LHS
) {
5193 // match !isnan(x) ? x : y
5194 FilterLHS
= MaskIfTrue
;
5195 } else if (TestedValue
== RHS
) { // && IsExactClass
5196 // match !isnan(x) ? y : x
5197 FilterRHS
= MaskIfFalse
;
5200 KnownFPClass Known2
;
5201 computeKnownFPClass(LHS
, DemandedElts
, InterestedClasses
& FilterLHS
, Known
,
5203 Known
.KnownFPClasses
&= FilterLHS
;
5205 computeKnownFPClass(RHS
, DemandedElts
, InterestedClasses
& FilterRHS
,
5206 Known2
, Depth
+ 1, Q
);
5207 Known2
.KnownFPClasses
&= FilterRHS
;
5212 case Instruction::Call
: {
5213 const CallInst
*II
= cast
<CallInst
>(Op
);
5214 const Intrinsic::ID IID
= II
->getIntrinsicID();
5216 case Intrinsic::fabs
: {
5217 if ((InterestedClasses
& (fcNan
| fcPositive
)) != fcNone
) {
5218 // If we only care about the sign bit we don't need to inspect the
5220 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
,
5221 InterestedClasses
, Known
, Depth
+ 1, Q
);
5227 case Intrinsic::copysign
: {
5228 KnownFPClass KnownSign
;
5230 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5231 Known
, Depth
+ 1, Q
);
5232 computeKnownFPClass(II
->getArgOperand(1), DemandedElts
, InterestedClasses
,
5233 KnownSign
, Depth
+ 1, Q
);
5234 Known
.copysign(KnownSign
);
5237 case Intrinsic::fma
:
5238 case Intrinsic::fmuladd
: {
5239 if ((InterestedClasses
& fcNegative
) == fcNone
)
5242 if (II
->getArgOperand(0) != II
->getArgOperand(1))
5245 // The multiply cannot be -0 and therefore the add can't be -0
5246 Known
.knownNot(fcNegZero
);
5248 // x * x + y is non-negative if y is non-negative.
5249 KnownFPClass KnownAddend
;
5250 computeKnownFPClass(II
->getArgOperand(2), DemandedElts
, InterestedClasses
,
5251 KnownAddend
, Depth
+ 1, Q
);
5253 if (KnownAddend
.cannotBeOrderedLessThanZero())
5254 Known
.knownNot(fcNegative
);
5257 case Intrinsic::sqrt
:
5258 case Intrinsic::experimental_constrained_sqrt
: {
5259 KnownFPClass KnownSrc
;
5260 FPClassTest InterestedSrcs
= InterestedClasses
;
5261 if (InterestedClasses
& fcNan
)
5262 InterestedSrcs
|= KnownFPClass::OrderedLessThanZeroMask
;
5264 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedSrcs
,
5265 KnownSrc
, Depth
+ 1, Q
);
5267 if (KnownSrc
.isKnownNeverPosInfinity())
5268 Known
.knownNot(fcPosInf
);
5269 if (KnownSrc
.isKnownNever(fcSNan
))
5270 Known
.knownNot(fcSNan
);
5272 // Any negative value besides -0 returns a nan.
5273 if (KnownSrc
.isKnownNeverNaN() && KnownSrc
.cannotBeOrderedLessThanZero())
5274 Known
.knownNot(fcNan
);
5276 // The only negative value that can be returned is -0 for -0 inputs.
5277 Known
.knownNot(fcNegInf
| fcNegSubnormal
| fcNegNormal
);
5279 // If the input denormal mode could be PreserveSign, a negative
5280 // subnormal input could produce a negative zero output.
5281 const Function
*F
= II
->getFunction();
5282 if (Q
.IIQ
.hasNoSignedZeros(II
) ||
5283 (F
&& KnownSrc
.isKnownNeverLogicalNegZero(*F
, II
->getType())))
5284 Known
.knownNot(fcNegZero
);
5288 case Intrinsic::sin
:
5289 case Intrinsic::cos
: {
5290 // Return NaN on infinite inputs.
5291 KnownFPClass KnownSrc
;
5292 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5293 KnownSrc
, Depth
+ 1, Q
);
5294 Known
.knownNot(fcInf
);
5295 if (KnownSrc
.isKnownNeverNaN() && KnownSrc
.isKnownNeverInfinity())
5296 Known
.knownNot(fcNan
);
5299 case Intrinsic::maxnum
:
5300 case Intrinsic::minnum
:
5301 case Intrinsic::minimum
:
5302 case Intrinsic::maximum
: {
5303 KnownFPClass KnownLHS
, KnownRHS
;
5304 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5305 KnownLHS
, Depth
+ 1, Q
);
5306 computeKnownFPClass(II
->getArgOperand(1), DemandedElts
, InterestedClasses
,
5307 KnownRHS
, Depth
+ 1, Q
);
5309 bool NeverNaN
= KnownLHS
.isKnownNeverNaN() || KnownRHS
.isKnownNeverNaN();
5310 Known
= KnownLHS
| KnownRHS
;
5312 // If either operand is not NaN, the result is not NaN.
5313 if (NeverNaN
&& (IID
== Intrinsic::minnum
|| IID
== Intrinsic::maxnum
))
5314 Known
.knownNot(fcNan
);
5316 if (IID
== Intrinsic::maxnum
) {
5317 // If at least one operand is known to be positive, the result must be
5319 if ((KnownLHS
.cannotBeOrderedLessThanZero() &&
5320 KnownLHS
.isKnownNeverNaN()) ||
5321 (KnownRHS
.cannotBeOrderedLessThanZero() &&
5322 KnownRHS
.isKnownNeverNaN()))
5323 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5324 } else if (IID
== Intrinsic::maximum
) {
5325 // If at least one operand is known to be positive, the result must be
5327 if (KnownLHS
.cannotBeOrderedLessThanZero() ||
5328 KnownRHS
.cannotBeOrderedLessThanZero())
5329 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5330 } else if (IID
== Intrinsic::minnum
) {
5331 // If at least one operand is known to be negative, the result must be
5333 if ((KnownLHS
.cannotBeOrderedGreaterThanZero() &&
5334 KnownLHS
.isKnownNeverNaN()) ||
5335 (KnownRHS
.cannotBeOrderedGreaterThanZero() &&
5336 KnownRHS
.isKnownNeverNaN()))
5337 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5339 // If at least one operand is known to be negative, the result must be
5341 if (KnownLHS
.cannotBeOrderedGreaterThanZero() ||
5342 KnownRHS
.cannotBeOrderedGreaterThanZero())
5343 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5346 // Fixup zero handling if denormals could be returned as a zero.
5348 // As there's no spec for denormal flushing, be conservative with the
5349 // treatment of denormals that could be flushed to zero. For older
5350 // subtargets on AMDGPU the min/max instructions would not flush the
5351 // output and return the original value.
5353 if ((Known
.KnownFPClasses
& fcZero
) != fcNone
&&
5354 !Known
.isKnownNeverSubnormal()) {
5355 const Function
*Parent
= II
->getFunction();
5359 DenormalMode Mode
= Parent
->getDenormalMode(
5360 II
->getType()->getScalarType()->getFltSemantics());
5361 if (Mode
!= DenormalMode::getIEEE())
5362 Known
.KnownFPClasses
|= fcZero
;
5365 if (Known
.isKnownNeverNaN()) {
5366 if (KnownLHS
.SignBit
&& KnownRHS
.SignBit
&&
5367 *KnownLHS
.SignBit
== *KnownRHS
.SignBit
) {
5368 if (*KnownLHS
.SignBit
)
5369 Known
.signBitMustBeOne();
5371 Known
.signBitMustBeZero();
5372 } else if ((IID
== Intrinsic::maximum
|| IID
== Intrinsic::minimum
) ||
5373 ((KnownLHS
.isKnownNeverNegZero() ||
5374 KnownRHS
.isKnownNeverPosZero()) &&
5375 (KnownLHS
.isKnownNeverPosZero() ||
5376 KnownRHS
.isKnownNeverNegZero()))) {
5377 if ((IID
== Intrinsic::maximum
|| IID
== Intrinsic::maxnum
) &&
5378 (KnownLHS
.SignBit
== false || KnownRHS
.SignBit
== false))
5379 Known
.signBitMustBeZero();
5380 else if ((IID
== Intrinsic::minimum
|| IID
== Intrinsic::minnum
) &&
5381 (KnownLHS
.SignBit
== true || KnownRHS
.SignBit
== true))
5382 Known
.signBitMustBeOne();
5387 case Intrinsic::canonicalize
: {
5388 KnownFPClass KnownSrc
;
5389 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5390 KnownSrc
, Depth
+ 1, Q
);
5392 // This is essentially a stronger form of
5393 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5394 // actually have an IR canonicalization guarantee.
5396 // Canonicalize may flush denormals to zero, so we have to consider the
5397 // denormal mode to preserve known-not-0 knowledge.
5398 Known
.KnownFPClasses
= KnownSrc
.KnownFPClasses
| fcZero
| fcQNan
;
5400 // Stronger version of propagateNaN
5401 // Canonicalize is guaranteed to quiet signaling nans.
5402 if (KnownSrc
.isKnownNeverNaN())
5403 Known
.knownNot(fcNan
);
5405 Known
.knownNot(fcSNan
);
5407 const Function
*F
= II
->getFunction();
5411 // If the parent function flushes denormals, the canonical output cannot
5413 const fltSemantics
&FPType
=
5414 II
->getType()->getScalarType()->getFltSemantics();
5415 DenormalMode DenormMode
= F
->getDenormalMode(FPType
);
5416 if (DenormMode
== DenormalMode::getIEEE()) {
5417 if (KnownSrc
.isKnownNever(fcPosZero
))
5418 Known
.knownNot(fcPosZero
);
5419 if (KnownSrc
.isKnownNever(fcNegZero
))
5420 Known
.knownNot(fcNegZero
);
5424 if (DenormMode
.inputsAreZero() || DenormMode
.outputsAreZero())
5425 Known
.knownNot(fcSubnormal
);
5427 if (DenormMode
.Input
== DenormalMode::PositiveZero
||
5428 (DenormMode
.Output
== DenormalMode::PositiveZero
&&
5429 DenormMode
.Input
== DenormalMode::IEEE
))
5430 Known
.knownNot(fcNegZero
);
5434 case Intrinsic::vector_reduce_fmax
:
5435 case Intrinsic::vector_reduce_fmin
:
5436 case Intrinsic::vector_reduce_fmaximum
:
5437 case Intrinsic::vector_reduce_fminimum
: {
5438 // reduce min/max will choose an element from one of the vector elements,
5439 // so we can infer and class information that is common to all elements.
5440 Known
= computeKnownFPClass(II
->getArgOperand(0), II
->getFastMathFlags(),
5441 InterestedClasses
, Depth
+ 1, Q
);
5442 // Can only propagate sign if output is never NaN.
5443 if (!Known
.isKnownNeverNaN())
5444 Known
.SignBit
.reset();
5447 // reverse preserves all characteristics of the input vec's element.
5448 case Intrinsic::vector_reverse
:
5449 Known
= computeKnownFPClass(
5450 II
->getArgOperand(0), DemandedElts
.reverseBits(),
5451 II
->getFastMathFlags(), InterestedClasses
, Depth
+ 1, Q
);
5453 case Intrinsic::trunc
:
5454 case Intrinsic::floor
:
5455 case Intrinsic::ceil
:
5456 case Intrinsic::rint
:
5457 case Intrinsic::nearbyint
:
5458 case Intrinsic::round
:
5459 case Intrinsic::roundeven
: {
5460 KnownFPClass KnownSrc
;
5461 FPClassTest InterestedSrcs
= InterestedClasses
;
5462 if (InterestedSrcs
& fcPosFinite
)
5463 InterestedSrcs
|= fcPosFinite
;
5464 if (InterestedSrcs
& fcNegFinite
)
5465 InterestedSrcs
|= fcNegFinite
;
5466 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedSrcs
,
5467 KnownSrc
, Depth
+ 1, Q
);
5469 // Integer results cannot be subnormal.
5470 Known
.knownNot(fcSubnormal
);
5472 Known
.propagateNaN(KnownSrc
, true);
5474 // Pass through infinities, except PPC_FP128 is a special case for
5475 // intrinsics other than trunc.
5476 if (IID
== Intrinsic::trunc
|| !V
->getType()->isMultiUnitFPType()) {
5477 if (KnownSrc
.isKnownNeverPosInfinity())
5478 Known
.knownNot(fcPosInf
);
5479 if (KnownSrc
.isKnownNeverNegInfinity())
5480 Known
.knownNot(fcNegInf
);
5483 // Negative round ups to 0 produce -0
5484 if (KnownSrc
.isKnownNever(fcPosFinite
))
5485 Known
.knownNot(fcPosFinite
);
5486 if (KnownSrc
.isKnownNever(fcNegFinite
))
5487 Known
.knownNot(fcNegFinite
);
5491 case Intrinsic::exp
:
5492 case Intrinsic::exp2
:
5493 case Intrinsic::exp10
: {
5494 Known
.knownNot(fcNegative
);
5495 if ((InterestedClasses
& fcNan
) == fcNone
)
5498 KnownFPClass KnownSrc
;
5499 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5500 KnownSrc
, Depth
+ 1, Q
);
5501 if (KnownSrc
.isKnownNeverNaN()) {
5502 Known
.knownNot(fcNan
);
5503 Known
.signBitMustBeZero();
5508 case Intrinsic::fptrunc_round
: {
5509 computeKnownFPClassForFPTrunc(Op
, DemandedElts
, InterestedClasses
, Known
,
5513 case Intrinsic::log
:
5514 case Intrinsic::log10
:
5515 case Intrinsic::log2
:
5516 case Intrinsic::experimental_constrained_log
:
5517 case Intrinsic::experimental_constrained_log10
:
5518 case Intrinsic::experimental_constrained_log2
: {
5519 // log(+inf) -> +inf
5520 // log([+-]0.0) -> -inf
5523 if ((InterestedClasses
& (fcNan
| fcInf
)) == fcNone
)
5526 FPClassTest InterestedSrcs
= InterestedClasses
;
5527 if ((InterestedClasses
& fcNegInf
) != fcNone
)
5528 InterestedSrcs
|= fcZero
| fcSubnormal
;
5529 if ((InterestedClasses
& fcNan
) != fcNone
)
5530 InterestedSrcs
|= fcNan
| (fcNegative
& ~fcNan
);
5532 KnownFPClass KnownSrc
;
5533 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedSrcs
,
5534 KnownSrc
, Depth
+ 1, Q
);
5536 if (KnownSrc
.isKnownNeverPosInfinity())
5537 Known
.knownNot(fcPosInf
);
5539 if (KnownSrc
.isKnownNeverNaN() && KnownSrc
.cannotBeOrderedLessThanZero())
5540 Known
.knownNot(fcNan
);
5542 const Function
*F
= II
->getFunction();
5543 if (F
&& KnownSrc
.isKnownNeverLogicalZero(*F
, II
->getType()))
5544 Known
.knownNot(fcNegInf
);
5548 case Intrinsic::powi
: {
5549 if ((InterestedClasses
& fcNegative
) == fcNone
)
5552 const Value
*Exp
= II
->getArgOperand(1);
5553 Type
*ExpTy
= Exp
->getType();
5554 unsigned BitWidth
= ExpTy
->getScalarType()->getIntegerBitWidth();
5555 KnownBits
ExponentKnownBits(BitWidth
);
5556 computeKnownBits(Exp
, isa
<VectorType
>(ExpTy
) ? DemandedElts
: APInt(1, 1),
5557 ExponentKnownBits
, Depth
+ 1, Q
);
5559 if (ExponentKnownBits
.Zero
[0]) { // Is even
5560 Known
.knownNot(fcNegative
);
5564 // Given that exp is an integer, here are the
5565 // ways that pow can return a negative value:
5567 // pow(-x, exp) --> negative if exp is odd and x is negative.
5568 // pow(-0, exp) --> -inf if exp is negative odd.
5569 // pow(-0, exp) --> -0 if exp is positive odd.
5570 // pow(-inf, exp) --> -0 if exp is negative odd.
5571 // pow(-inf, exp) --> -inf if exp is positive odd.
5572 KnownFPClass KnownSrc
;
5573 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, fcNegative
,
5574 KnownSrc
, Depth
+ 1, Q
);
5575 if (KnownSrc
.isKnownNever(fcNegative
))
5576 Known
.knownNot(fcNegative
);
5579 case Intrinsic::ldexp
: {
5580 KnownFPClass KnownSrc
;
5581 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5582 KnownSrc
, Depth
+ 1, Q
);
5583 Known
.propagateNaN(KnownSrc
, /*PropagateSign=*/true);
5585 // Sign is preserved, but underflows may produce zeroes.
5586 if (KnownSrc
.isKnownNever(fcNegative
))
5587 Known
.knownNot(fcNegative
);
5588 else if (KnownSrc
.cannotBeOrderedLessThanZero())
5589 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5591 if (KnownSrc
.isKnownNever(fcPositive
))
5592 Known
.knownNot(fcPositive
);
5593 else if (KnownSrc
.cannotBeOrderedGreaterThanZero())
5594 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5596 // Can refine inf/zero handling based on the exponent operand.
5597 const FPClassTest ExpInfoMask
= fcZero
| fcSubnormal
| fcInf
;
5598 if ((InterestedClasses
& ExpInfoMask
) == fcNone
)
5600 if ((KnownSrc
.KnownFPClasses
& ExpInfoMask
) == fcNone
)
5603 const fltSemantics
&Flt
=
5604 II
->getType()->getScalarType()->getFltSemantics();
5605 unsigned Precision
= APFloat::semanticsPrecision(Flt
);
5606 const Value
*ExpArg
= II
->getArgOperand(1);
5607 ConstantRange ExpRange
= computeConstantRange(
5608 ExpArg
, true, Q
.IIQ
.UseInstrInfo
, Q
.AC
, Q
.CxtI
, Q
.DT
, Depth
+ 1);
5610 const int MantissaBits
= Precision
- 1;
5611 if (ExpRange
.getSignedMin().sge(static_cast<int64_t>(MantissaBits
)))
5612 Known
.knownNot(fcSubnormal
);
5614 const Function
*F
= II
->getFunction();
5615 const APInt
*ConstVal
= ExpRange
.getSingleElement();
5616 if (ConstVal
&& ConstVal
->isZero()) {
5617 // ldexp(x, 0) -> x, so propagate everything.
5618 Known
.propagateCanonicalizingSrc(KnownSrc
, *F
, II
->getType());
5619 } else if (ExpRange
.isAllNegative()) {
5620 // If we know the power is <= 0, can't introduce inf
5621 if (KnownSrc
.isKnownNeverPosInfinity())
5622 Known
.knownNot(fcPosInf
);
5623 if (KnownSrc
.isKnownNeverNegInfinity())
5624 Known
.knownNot(fcNegInf
);
5625 } else if (ExpRange
.isAllNonNegative()) {
5626 // If we know the power is >= 0, can't introduce subnormal or zero
5627 if (KnownSrc
.isKnownNeverPosSubnormal())
5628 Known
.knownNot(fcPosSubnormal
);
5629 if (KnownSrc
.isKnownNeverNegSubnormal())
5630 Known
.knownNot(fcNegSubnormal
);
5631 if (F
&& KnownSrc
.isKnownNeverLogicalPosZero(*F
, II
->getType()))
5632 Known
.knownNot(fcPosZero
);
5633 if (F
&& KnownSrc
.isKnownNeverLogicalNegZero(*F
, II
->getType()))
5634 Known
.knownNot(fcNegZero
);
5639 case Intrinsic::arithmetic_fence
: {
5640 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5641 Known
, Depth
+ 1, Q
);
5644 case Intrinsic::experimental_constrained_sitofp
:
5645 case Intrinsic::experimental_constrained_uitofp
:
5646 // Cannot produce nan
5647 Known
.knownNot(fcNan
);
5649 // sitofp and uitofp turn into +0.0 for zero.
5650 Known
.knownNot(fcNegZero
);
5652 // Integers cannot be subnormal
5653 Known
.knownNot(fcSubnormal
);
5655 if (IID
== Intrinsic::experimental_constrained_uitofp
)
5656 Known
.signBitMustBeZero();
5658 // TODO: Copy inf handling from instructions
5666 case Instruction::FAdd
:
5667 case Instruction::FSub
: {
5668 KnownFPClass KnownLHS
, KnownRHS
;
5670 Op
->getOpcode() == Instruction::FAdd
&&
5671 (InterestedClasses
& KnownFPClass::OrderedLessThanZeroMask
) != fcNone
;
5672 bool WantNaN
= (InterestedClasses
& fcNan
) != fcNone
;
5673 bool WantNegZero
= (InterestedClasses
& fcNegZero
) != fcNone
;
5675 if (!WantNaN
&& !WantNegative
&& !WantNegZero
)
5678 FPClassTest InterestedSrcs
= InterestedClasses
;
5680 InterestedSrcs
|= KnownFPClass::OrderedLessThanZeroMask
;
5681 if (InterestedClasses
& fcNan
)
5682 InterestedSrcs
|= fcInf
;
5683 computeKnownFPClass(Op
->getOperand(1), DemandedElts
, InterestedSrcs
,
5684 KnownRHS
, Depth
+ 1, Q
);
5686 if ((WantNaN
&& KnownRHS
.isKnownNeverNaN()) ||
5687 (WantNegative
&& KnownRHS
.cannotBeOrderedLessThanZero()) ||
5688 WantNegZero
|| Opc
== Instruction::FSub
) {
5690 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5691 // there's no point.
5692 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedSrcs
,
5693 KnownLHS
, Depth
+ 1, Q
);
5694 // Adding positive and negative infinity produces NaN.
5695 // TODO: Check sign of infinities.
5696 if (KnownLHS
.isKnownNeverNaN() && KnownRHS
.isKnownNeverNaN() &&
5697 (KnownLHS
.isKnownNeverInfinity() || KnownRHS
.isKnownNeverInfinity()))
5698 Known
.knownNot(fcNan
);
5700 // FIXME: Context function should always be passed in separately
5701 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5703 if (Op
->getOpcode() == Instruction::FAdd
) {
5704 if (KnownLHS
.cannotBeOrderedLessThanZero() &&
5705 KnownRHS
.cannotBeOrderedLessThanZero())
5706 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5710 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5711 if ((KnownLHS
.isKnownNeverLogicalNegZero(*F
, Op
->getType()) ||
5712 KnownRHS
.isKnownNeverLogicalNegZero(*F
, Op
->getType())) &&
5713 // Make sure output negative denormal can't flush to -0
5714 outputDenormalIsIEEEOrPosZero(*F
, Op
->getType()))
5715 Known
.knownNot(fcNegZero
);
5720 // Only fsub -0, +0 can return -0
5721 if ((KnownLHS
.isKnownNeverLogicalNegZero(*F
, Op
->getType()) ||
5722 KnownRHS
.isKnownNeverLogicalPosZero(*F
, Op
->getType())) &&
5723 // Make sure output negative denormal can't flush to -0
5724 outputDenormalIsIEEEOrPosZero(*F
, Op
->getType()))
5725 Known
.knownNot(fcNegZero
);
5731 case Instruction::FMul
: {
5732 // X * X is always non-negative or a NaN.
5733 if (Op
->getOperand(0) == Op
->getOperand(1))
5734 Known
.knownNot(fcNegative
);
5736 if ((InterestedClasses
& fcNan
) != fcNan
)
5739 // fcSubnormal is only needed in case of DAZ.
5740 const FPClassTest NeedForNan
= fcNan
| fcInf
| fcZero
| fcSubnormal
;
5742 KnownFPClass KnownLHS
, KnownRHS
;
5743 computeKnownFPClass(Op
->getOperand(1), DemandedElts
, NeedForNan
, KnownRHS
,
5745 if (!KnownRHS
.isKnownNeverNaN())
5748 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, NeedForNan
, KnownLHS
,
5750 if (!KnownLHS
.isKnownNeverNaN())
5753 if (KnownLHS
.SignBit
&& KnownRHS
.SignBit
) {
5754 if (*KnownLHS
.SignBit
== *KnownRHS
.SignBit
)
5755 Known
.signBitMustBeZero();
5757 Known
.signBitMustBeOne();
5760 // If 0 * +/-inf produces NaN.
5761 if (KnownLHS
.isKnownNeverInfinity() && KnownRHS
.isKnownNeverInfinity()) {
5762 Known
.knownNot(fcNan
);
5766 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5770 if ((KnownRHS
.isKnownNeverInfinity() ||
5771 KnownLHS
.isKnownNeverLogicalZero(*F
, Op
->getType())) &&
5772 (KnownLHS
.isKnownNeverInfinity() ||
5773 KnownRHS
.isKnownNeverLogicalZero(*F
, Op
->getType())))
5774 Known
.knownNot(fcNan
);
5778 case Instruction::FDiv
:
5779 case Instruction::FRem
: {
5780 if (Op
->getOperand(0) == Op
->getOperand(1)) {
5781 // TODO: Could filter out snan if we inspect the operand
5782 if (Op
->getOpcode() == Instruction::FDiv
) {
5783 // X / X is always exactly 1.0 or a NaN.
5784 Known
.KnownFPClasses
= fcNan
| fcPosNormal
;
5786 // X % X is always exactly [+-]0.0 or a NaN.
5787 Known
.KnownFPClasses
= fcNan
| fcZero
;
5793 const bool WantNan
= (InterestedClasses
& fcNan
) != fcNone
;
5794 const bool WantNegative
= (InterestedClasses
& fcNegative
) != fcNone
;
5795 const bool WantPositive
=
5796 Opc
== Instruction::FRem
&& (InterestedClasses
& fcPositive
) != fcNone
;
5797 if (!WantNan
&& !WantNegative
&& !WantPositive
)
5800 KnownFPClass KnownLHS
, KnownRHS
;
5802 computeKnownFPClass(Op
->getOperand(1), DemandedElts
,
5803 fcNan
| fcInf
| fcZero
| fcNegative
, KnownRHS
,
5806 bool KnowSomethingUseful
=
5807 KnownRHS
.isKnownNeverNaN() || KnownRHS
.isKnownNever(fcNegative
);
5809 if (KnowSomethingUseful
|| WantPositive
) {
5810 const FPClassTest InterestedLHS
=
5811 WantPositive
? fcAllFlags
5812 : fcNan
| fcInf
| fcZero
| fcSubnormal
| fcNegative
;
5814 computeKnownFPClass(Op
->getOperand(0), DemandedElts
,
5815 InterestedClasses
& InterestedLHS
, KnownLHS
,
5819 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5821 if (Op
->getOpcode() == Instruction::FDiv
) {
5822 // Only 0/0, Inf/Inf produce NaN.
5823 if (KnownLHS
.isKnownNeverNaN() && KnownRHS
.isKnownNeverNaN() &&
5824 (KnownLHS
.isKnownNeverInfinity() ||
5825 KnownRHS
.isKnownNeverInfinity()) &&
5826 ((F
&& KnownLHS
.isKnownNeverLogicalZero(*F
, Op
->getType())) ||
5827 (F
&& KnownRHS
.isKnownNeverLogicalZero(*F
, Op
->getType())))) {
5828 Known
.knownNot(fcNan
);
5831 // X / -0.0 is -Inf (or NaN).
5833 if (KnownLHS
.isKnownNever(fcNegative
) && KnownRHS
.isKnownNever(fcNegative
))
5834 Known
.knownNot(fcNegative
);
5836 // Inf REM x and x REM 0 produce NaN.
5837 if (KnownLHS
.isKnownNeverNaN() && KnownRHS
.isKnownNeverNaN() &&
5838 KnownLHS
.isKnownNeverInfinity() && F
&&
5839 KnownRHS
.isKnownNeverLogicalZero(*F
, Op
->getType())) {
5840 Known
.knownNot(fcNan
);
5843 // The sign for frem is the same as the first operand.
5844 if (KnownLHS
.cannotBeOrderedLessThanZero())
5845 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5846 if (KnownLHS
.cannotBeOrderedGreaterThanZero())
5847 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5849 // See if we can be more aggressive about the sign of 0.
5850 if (KnownLHS
.isKnownNever(fcNegative
))
5851 Known
.knownNot(fcNegative
);
5852 if (KnownLHS
.isKnownNever(fcPositive
))
5853 Known
.knownNot(fcPositive
);
5858 case Instruction::FPExt
: {
5859 // Infinity, nan and zero propagate from source.
5860 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedClasses
,
5861 Known
, Depth
+ 1, Q
);
5863 const fltSemantics
&DstTy
=
5864 Op
->getType()->getScalarType()->getFltSemantics();
5865 const fltSemantics
&SrcTy
=
5866 Op
->getOperand(0)->getType()->getScalarType()->getFltSemantics();
5868 // All subnormal inputs should be in the normal range in the result type.
5869 if (APFloat::isRepresentableAsNormalIn(SrcTy
, DstTy
)) {
5870 if (Known
.KnownFPClasses
& fcPosSubnormal
)
5871 Known
.KnownFPClasses
|= fcPosNormal
;
5872 if (Known
.KnownFPClasses
& fcNegSubnormal
)
5873 Known
.KnownFPClasses
|= fcNegNormal
;
5874 Known
.knownNot(fcSubnormal
);
5877 // Sign bit of a nan isn't guaranteed.
5878 if (!Known
.isKnownNeverNaN())
5879 Known
.SignBit
= std::nullopt
;
5882 case Instruction::FPTrunc
: {
5883 computeKnownFPClassForFPTrunc(Op
, DemandedElts
, InterestedClasses
, Known
,
5887 case Instruction::SIToFP
:
5888 case Instruction::UIToFP
: {
5889 // Cannot produce nan
5890 Known
.knownNot(fcNan
);
5892 // Integers cannot be subnormal
5893 Known
.knownNot(fcSubnormal
);
5895 // sitofp and uitofp turn into +0.0 for zero.
5896 Known
.knownNot(fcNegZero
);
5897 if (Op
->getOpcode() == Instruction::UIToFP
)
5898 Known
.signBitMustBeZero();
5900 if (InterestedClasses
& fcInf
) {
5901 // Get width of largest magnitude integer (remove a bit if signed).
5902 // This still works for a signed minimum value because the largest FP
5903 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5904 int IntSize
= Op
->getOperand(0)->getType()->getScalarSizeInBits();
5905 if (Op
->getOpcode() == Instruction::SIToFP
)
5908 // If the exponent of the largest finite FP value can hold the largest
5909 // integer, the result of the cast must be finite.
5910 Type
*FPTy
= Op
->getType()->getScalarType();
5911 if (ilogb(APFloat::getLargest(FPTy
->getFltSemantics())) >= IntSize
)
5912 Known
.knownNot(fcInf
);
5917 case Instruction::ExtractElement
: {
5918 // Look through extract element. If the index is non-constant or
5919 // out-of-range demand all elements, otherwise just the extracted element.
5920 const Value
*Vec
= Op
->getOperand(0);
5921 const Value
*Idx
= Op
->getOperand(1);
5922 auto *CIdx
= dyn_cast
<ConstantInt
>(Idx
);
5924 if (auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType())) {
5925 unsigned NumElts
= VecTy
->getNumElements();
5926 APInt DemandedVecElts
= APInt::getAllOnes(NumElts
);
5927 if (CIdx
&& CIdx
->getValue().ult(NumElts
))
5928 DemandedVecElts
= APInt::getOneBitSet(NumElts
, CIdx
->getZExtValue());
5929 return computeKnownFPClass(Vec
, DemandedVecElts
, InterestedClasses
, Known
,
5935 case Instruction::InsertElement
: {
5936 if (isa
<ScalableVectorType
>(Op
->getType()))
5939 const Value
*Vec
= Op
->getOperand(0);
5940 const Value
*Elt
= Op
->getOperand(1);
5941 auto *CIdx
= dyn_cast
<ConstantInt
>(Op
->getOperand(2));
5942 unsigned NumElts
= DemandedElts
.getBitWidth();
5943 APInt DemandedVecElts
= DemandedElts
;
5944 bool NeedsElt
= true;
5945 // If we know the index we are inserting to, clear it from Vec check.
5946 if (CIdx
&& CIdx
->getValue().ult(NumElts
)) {
5947 DemandedVecElts
.clearBit(CIdx
->getZExtValue());
5948 NeedsElt
= DemandedElts
[CIdx
->getZExtValue()];
5951 // Do we demand the inserted element?
5953 computeKnownFPClass(Elt
, Known
, InterestedClasses
, Depth
+ 1, Q
);
5954 // If we don't know any bits, early out.
5955 if (Known
.isUnknown())
5958 Known
.KnownFPClasses
= fcNone
;
5961 // Do we need anymore elements from Vec?
5962 if (!DemandedVecElts
.isZero()) {
5963 KnownFPClass Known2
;
5964 computeKnownFPClass(Vec
, DemandedVecElts
, InterestedClasses
, Known2
,
5971 case Instruction::ShuffleVector
: {
5972 // For undef elements, we don't know anything about the common state of
5973 // the shuffle result.
5974 APInt DemandedLHS
, DemandedRHS
;
5975 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(Op
);
5976 if (!Shuf
|| !getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
))
5979 if (!!DemandedLHS
) {
5980 const Value
*LHS
= Shuf
->getOperand(0);
5981 computeKnownFPClass(LHS
, DemandedLHS
, InterestedClasses
, Known
,
5984 // If we don't know any bits, early out.
5985 if (Known
.isUnknown())
5988 Known
.KnownFPClasses
= fcNone
;
5991 if (!!DemandedRHS
) {
5992 KnownFPClass Known2
;
5993 const Value
*RHS
= Shuf
->getOperand(1);
5994 computeKnownFPClass(RHS
, DemandedRHS
, InterestedClasses
, Known2
,
6001 case Instruction::ExtractValue
: {
6002 const ExtractValueInst
*Extract
= cast
<ExtractValueInst
>(Op
);
6003 ArrayRef
<unsigned> Indices
= Extract
->getIndices();
6004 const Value
*Src
= Extract
->getAggregateOperand();
6005 if (isa
<StructType
>(Src
->getType()) && Indices
.size() == 1 &&
6007 if (const auto *II
= dyn_cast
<IntrinsicInst
>(Src
)) {
6008 switch (II
->getIntrinsicID()) {
6009 case Intrinsic::frexp
: {
6010 Known
.knownNot(fcSubnormal
);
6012 KnownFPClass KnownSrc
;
6013 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
,
6014 InterestedClasses
, KnownSrc
, Depth
+ 1, Q
);
6016 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
6018 if (KnownSrc
.isKnownNever(fcNegative
))
6019 Known
.knownNot(fcNegative
);
6021 if (F
&& KnownSrc
.isKnownNeverLogicalNegZero(*F
, Op
->getType()))
6022 Known
.knownNot(fcNegZero
);
6023 if (KnownSrc
.isKnownNever(fcNegInf
))
6024 Known
.knownNot(fcNegInf
);
6027 if (KnownSrc
.isKnownNever(fcPositive
))
6028 Known
.knownNot(fcPositive
);
6030 if (F
&& KnownSrc
.isKnownNeverLogicalPosZero(*F
, Op
->getType()))
6031 Known
.knownNot(fcPosZero
);
6032 if (KnownSrc
.isKnownNever(fcPosInf
))
6033 Known
.knownNot(fcPosInf
);
6036 Known
.propagateNaN(KnownSrc
);
6045 computeKnownFPClass(Src
, DemandedElts
, InterestedClasses
, Known
, Depth
+ 1,
6049 case Instruction::PHI
: {
6050 const PHINode
*P
= cast
<PHINode
>(Op
);
6051 // Unreachable blocks may have zero-operand PHI nodes.
6052 if (P
->getNumIncomingValues() == 0)
6055 // Otherwise take the unions of the known bit sets of the operands,
6056 // taking conservative care to avoid excessive recursion.
6057 const unsigned PhiRecursionLimit
= MaxAnalysisRecursionDepth
- 2;
6059 if (Depth
< PhiRecursionLimit
) {
6060 // Skip if every incoming value references to ourself.
6061 if (isa_and_nonnull
<UndefValue
>(P
->hasConstantValue()))
6066 for (const Use
&U
: P
->operands()) {
6069 breakSelfRecursivePHI(&U
, P
, IncValue
, CxtI
);
6070 // Skip direct self references.
6074 KnownFPClass KnownSrc
;
6075 // Recurse, but cap the recursion to two levels, because we don't want
6076 // to waste time spinning around in loops. We need at least depth 2 to
6077 // detect known sign bits.
6078 computeKnownFPClass(IncValue
, DemandedElts
, InterestedClasses
, KnownSrc
,
6080 Q
.getWithoutCondContext().getWithInstruction(CxtI
));
6089 if (Known
.KnownFPClasses
== fcAllFlags
)
6096 case Instruction::BitCast
: {
6098 if (!match(Op
, m_ElementWiseBitCast(m_Value(Src
))) ||
6099 !Src
->getType()->isIntOrIntVectorTy())
6102 const Type
*Ty
= Op
->getType()->getScalarType();
6103 KnownBits
Bits(Ty
->getScalarSizeInBits());
6104 computeKnownBits(Src
, DemandedElts
, Bits
, Depth
+ 1, Q
);
6106 // Transfer information from the sign bit.
6107 if (Bits
.isNonNegative())
6108 Known
.signBitMustBeZero();
6109 else if (Bits
.isNegative())
6110 Known
.signBitMustBeOne();
6113 // IEEE floats are NaN when all bits of the exponent plus at least one of
6114 // the fraction bits are 1. This means:
6115 // - If we assume unknown bits are 0 and the value is NaN, it will
6117 // - If we assume unknown bits are 1 and the value is not NaN, it can
6119 if (APFloat(Ty
->getFltSemantics(), Bits
.One
).isNaN())
6120 Known
.KnownFPClasses
= fcNan
;
6121 else if (!APFloat(Ty
->getFltSemantics(), ~Bits
.Zero
).isNaN())
6122 Known
.knownNot(fcNan
);
6124 // Build KnownBits representing Inf and check if it must be equal or
6125 // unequal to this value.
6126 auto InfKB
= KnownBits::makeConstant(
6127 APFloat::getInf(Ty
->getFltSemantics()).bitcastToAPInt());
6128 InfKB
.Zero
.clearSignBit();
6129 if (const auto InfResult
= KnownBits::eq(Bits
, InfKB
)) {
6130 assert(!InfResult
.value());
6131 Known
.knownNot(fcInf
);
6132 } else if (Bits
== InfKB
) {
6133 Known
.KnownFPClasses
= fcInf
;
6136 // Build KnownBits representing Zero and check if it must be equal or
6137 // unequal to this value.
6138 auto ZeroKB
= KnownBits::makeConstant(
6139 APFloat::getZero(Ty
->getFltSemantics()).bitcastToAPInt());
6140 ZeroKB
.Zero
.clearSignBit();
6141 if (const auto ZeroResult
= KnownBits::eq(Bits
, ZeroKB
)) {
6142 assert(!ZeroResult
.value());
6143 Known
.knownNot(fcZero
);
6144 } else if (Bits
== ZeroKB
) {
6145 Known
.KnownFPClasses
= fcZero
;
6156 KnownFPClass
llvm::computeKnownFPClass(const Value
*V
,
6157 const APInt
&DemandedElts
,
6158 FPClassTest InterestedClasses
,
6160 const SimplifyQuery
&SQ
) {
6161 KnownFPClass KnownClasses
;
6162 ::computeKnownFPClass(V
, DemandedElts
, InterestedClasses
, KnownClasses
, Depth
,
6164 return KnownClasses
;
6167 KnownFPClass
llvm::computeKnownFPClass(const Value
*V
,
6168 FPClassTest InterestedClasses
,
6170 const SimplifyQuery
&SQ
) {
6172 ::computeKnownFPClass(V
, Known
, InterestedClasses
, Depth
, SQ
);
6176 Value
*llvm::isBytewiseValue(Value
*V
, const DataLayout
&DL
) {
6178 // All byte-wide stores are splatable, even of arbitrary variables.
6179 if (V
->getType()->isIntegerTy(8))
6182 LLVMContext
&Ctx
= V
->getContext();
6184 // Undef don't care.
6185 auto *UndefInt8
= UndefValue::get(Type::getInt8Ty(Ctx
));
6186 if (isa
<UndefValue
>(V
))
6189 // Return poison for zero-sized type.
6190 if (DL
.getTypeStoreSize(V
->getType()).isZero())
6191 return PoisonValue::get(Type::getInt8Ty(Ctx
));
6193 Constant
*C
= dyn_cast
<Constant
>(V
);
6195 // Conceptually, we could handle things like:
6196 // %a = zext i8 %X to i16
6197 // %b = shl i16 %a, 8
6198 // %c = or i16 %a, %b
6199 // but until there is an example that actually needs this, it doesn't seem
6200 // worth worrying about.
6204 // Handle 'null' ConstantArrayZero etc.
6205 if (C
->isNullValue())
6206 return Constant::getNullValue(Type::getInt8Ty(Ctx
));
6208 // Constant floating-point values can be handled as integer values if the
6209 // corresponding integer value is "byteable". An important case is 0.0.
6210 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
)) {
6212 if (CFP
->getType()->isHalfTy())
6213 Ty
= Type::getInt16Ty(Ctx
);
6214 else if (CFP
->getType()->isFloatTy())
6215 Ty
= Type::getInt32Ty(Ctx
);
6216 else if (CFP
->getType()->isDoubleTy())
6217 Ty
= Type::getInt64Ty(Ctx
);
6218 // Don't handle long double formats, which have strange constraints.
6219 return Ty
? isBytewiseValue(ConstantExpr::getBitCast(CFP
, Ty
), DL
)
6223 // We can handle constant integers that are multiple of 8 bits.
6224 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
6225 if (CI
->getBitWidth() % 8 == 0) {
6226 assert(CI
->getBitWidth() > 8 && "8 bits should be handled above!");
6227 if (!CI
->getValue().isSplat(8))
6229 return ConstantInt::get(Ctx
, CI
->getValue().trunc(8));
6233 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
6234 if (CE
->getOpcode() == Instruction::IntToPtr
) {
6235 if (auto *PtrTy
= dyn_cast
<PointerType
>(CE
->getType())) {
6236 unsigned BitWidth
= DL
.getPointerSizeInBits(PtrTy
->getAddressSpace());
6237 if (Constant
*Op
= ConstantFoldIntegerCast(
6238 CE
->getOperand(0), Type::getIntNTy(Ctx
, BitWidth
), false, DL
))
6239 return isBytewiseValue(Op
, DL
);
6244 auto Merge
= [&](Value
*LHS
, Value
*RHS
) -> Value
* {
6249 if (LHS
== UndefInt8
)
6251 if (RHS
== UndefInt8
)
6256 if (ConstantDataSequential
*CA
= dyn_cast
<ConstantDataSequential
>(C
)) {
6257 Value
*Val
= UndefInt8
;
6258 for (unsigned I
= 0, E
= CA
->getNumElements(); I
!= E
; ++I
)
6259 if (!(Val
= Merge(Val
, isBytewiseValue(CA
->getElementAsConstant(I
), DL
))))
6264 if (isa
<ConstantAggregate
>(C
)) {
6265 Value
*Val
= UndefInt8
;
6266 for (Value
*Op
: C
->operands())
6267 if (!(Val
= Merge(Val
, isBytewiseValue(Op
, DL
))))
6272 // Don't try to handle the handful of other constants.
6276 // This is the recursive version of BuildSubAggregate. It takes a few different
6277 // arguments. Idxs is the index within the nested struct From that we are
6278 // looking at now (which is of type IndexedType). IdxSkip is the number of
6279 // indices from Idxs that should be left out when inserting into the resulting
6280 // struct. To is the result struct built so far, new insertvalue instructions
6282 static Value
*BuildSubAggregate(Value
*From
, Value
*To
, Type
*IndexedType
,
6283 SmallVectorImpl
<unsigned> &Idxs
,
6285 BasicBlock::iterator InsertBefore
) {
6286 StructType
*STy
= dyn_cast
<StructType
>(IndexedType
);
6288 // Save the original To argument so we can modify it
6290 // General case, the type indexed by Idxs is a struct
6291 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
6292 // Process each struct element recursively
6295 To
= BuildSubAggregate(From
, To
, STy
->getElementType(i
), Idxs
, IdxSkip
,
6299 // Couldn't find any inserted value for this index? Cleanup
6300 while (PrevTo
!= OrigTo
) {
6301 InsertValueInst
* Del
= cast
<InsertValueInst
>(PrevTo
);
6302 PrevTo
= Del
->getAggregateOperand();
6303 Del
->eraseFromParent();
6305 // Stop processing elements
6309 // If we successfully found a value for each of our subaggregates
6313 // Base case, the type indexed by SourceIdxs is not a struct, or not all of
6314 // the struct's elements had a value that was inserted directly. In the latter
6315 // case, perhaps we can't determine each of the subelements individually, but
6316 // we might be able to find the complete struct somewhere.
6318 // Find the value that is at that particular spot
6319 Value
*V
= FindInsertedValue(From
, Idxs
);
6324 // Insert the value in the new (sub) aggregate
6325 return InsertValueInst::Create(To
, V
, ArrayRef(Idxs
).slice(IdxSkip
), "tmp",
6329 // This helper takes a nested struct and extracts a part of it (which is again a
6330 // struct) into a new value. For example, given the struct:
6331 // { a, { b, { c, d }, e } }
6332 // and the indices "1, 1" this returns
6335 // It does this by inserting an insertvalue for each element in the resulting
6336 // struct, as opposed to just inserting a single struct. This will only work if
6337 // each of the elements of the substruct are known (ie, inserted into From by an
6338 // insertvalue instruction somewhere).
6340 // All inserted insertvalue instructions are inserted before InsertBefore
6341 static Value
*BuildSubAggregate(Value
*From
, ArrayRef
<unsigned> idx_range
,
6342 BasicBlock::iterator InsertBefore
) {
6343 Type
*IndexedType
= ExtractValueInst::getIndexedType(From
->getType(),
6345 Value
*To
= PoisonValue::get(IndexedType
);
6346 SmallVector
<unsigned, 10> Idxs(idx_range
);
6347 unsigned IdxSkip
= Idxs
.size();
6349 return BuildSubAggregate(From
, To
, IndexedType
, Idxs
, IdxSkip
, InsertBefore
);
6352 /// Given an aggregate and a sequence of indices, see if the scalar value
6353 /// indexed is already around as a register, for example if it was inserted
6354 /// directly into the aggregate.
6356 /// If InsertBefore is not null, this function will duplicate (modified)
6357 /// insertvalues when a part of a nested struct is extracted.
6359 llvm::FindInsertedValue(Value
*V
, ArrayRef
<unsigned> idx_range
,
6360 std::optional
<BasicBlock::iterator
> InsertBefore
) {
6361 // Nothing to index? Just return V then (this is useful at the end of our
6363 if (idx_range
.empty())
6365 // We have indices, so V should have an indexable type.
6366 assert((V
->getType()->isStructTy() || V
->getType()->isArrayTy()) &&
6367 "Not looking at a struct or array?");
6368 assert(ExtractValueInst::getIndexedType(V
->getType(), idx_range
) &&
6369 "Invalid indices for type?");
6371 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
6372 C
= C
->getAggregateElement(idx_range
[0]);
6373 if (!C
) return nullptr;
6374 return FindInsertedValue(C
, idx_range
.slice(1), InsertBefore
);
6377 if (InsertValueInst
*I
= dyn_cast
<InsertValueInst
>(V
)) {
6378 // Loop the indices for the insertvalue instruction in parallel with the
6379 // requested indices
6380 const unsigned *req_idx
= idx_range
.begin();
6381 for (const unsigned *i
= I
->idx_begin(), *e
= I
->idx_end();
6382 i
!= e
; ++i
, ++req_idx
) {
6383 if (req_idx
== idx_range
.end()) {
6384 // We can't handle this without inserting insertvalues
6388 // The requested index identifies a part of a nested aggregate. Handle
6389 // this specially. For example,
6390 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
6391 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
6392 // %C = extractvalue {i32, { i32, i32 } } %B, 1
6393 // This can be changed into
6394 // %A = insertvalue {i32, i32 } undef, i32 10, 0
6395 // %C = insertvalue {i32, i32 } %A, i32 11, 1
6396 // which allows the unused 0,0 element from the nested struct to be
6398 return BuildSubAggregate(V
, ArrayRef(idx_range
.begin(), req_idx
),
6402 // This insert value inserts something else than what we are looking for.
6403 // See if the (aggregate) value inserted into has the value we are
6404 // looking for, then.
6406 return FindInsertedValue(I
->getAggregateOperand(), idx_range
,
6409 // If we end up here, the indices of the insertvalue match with those
6410 // requested (though possibly only partially). Now we recursively look at
6411 // the inserted value, passing any remaining indices.
6412 return FindInsertedValue(I
->getInsertedValueOperand(),
6413 ArrayRef(req_idx
, idx_range
.end()), InsertBefore
);
6416 if (ExtractValueInst
*I
= dyn_cast
<ExtractValueInst
>(V
)) {
6417 // If we're extracting a value from an aggregate that was extracted from
6418 // something else, we can extract from that something else directly instead.
6419 // However, we will need to chain I's indices with the requested indices.
6421 // Calculate the number of indices required
6422 unsigned size
= I
->getNumIndices() + idx_range
.size();
6423 // Allocate some space to put the new indices in
6424 SmallVector
<unsigned, 5> Idxs
;
6426 // Add indices from the extract value instruction
6427 Idxs
.append(I
->idx_begin(), I
->idx_end());
6429 // Add requested indices
6430 Idxs
.append(idx_range
.begin(), idx_range
.end());
6432 assert(Idxs
.size() == size
6433 && "Number of indices added not correct?");
6435 return FindInsertedValue(I
->getAggregateOperand(), Idxs
, InsertBefore
);
6437 // Otherwise, we don't know (such as, extracting from a function return value
6438 // or load instruction)
6442 bool llvm::isGEPBasedOnPointerToString(const GEPOperator
*GEP
,
6443 unsigned CharSize
) {
6444 // Make sure the GEP has exactly three arguments.
6445 if (GEP
->getNumOperands() != 3)
6448 // Make sure the index-ee is a pointer to array of \p CharSize integers.
6450 ArrayType
*AT
= dyn_cast
<ArrayType
>(GEP
->getSourceElementType());
6451 if (!AT
|| !AT
->getElementType()->isIntegerTy(CharSize
))
6454 // Check to make sure that the first operand of the GEP is an integer and
6455 // has value 0 so that we are sure we're indexing into the initializer.
6456 const ConstantInt
*FirstIdx
= dyn_cast
<ConstantInt
>(GEP
->getOperand(1));
6457 if (!FirstIdx
|| !FirstIdx
->isZero())
6463 // If V refers to an initialized global constant, set Slice either to
6464 // its initializer if the size of its elements equals ElementSize, or,
6465 // for ElementSize == 8, to its representation as an array of unsiged
6466 // char. Return true on success.
6467 // Offset is in the unit "nr of ElementSize sized elements".
6468 bool llvm::getConstantDataArrayInfo(const Value
*V
,
6469 ConstantDataArraySlice
&Slice
,
6470 unsigned ElementSize
, uint64_t Offset
) {
6471 assert(V
&& "V should not be null.");
6472 assert((ElementSize
% 8) == 0 &&
6473 "ElementSize expected to be a multiple of the size of a byte.");
6474 unsigned ElementSizeInBytes
= ElementSize
/ 8;
6476 // Drill down into the pointer expression V, ignoring any intervening
6477 // casts, and determine the identity of the object it references along
6478 // with the cumulative byte offset into it.
6479 const GlobalVariable
*GV
=
6480 dyn_cast
<GlobalVariable
>(getUnderlyingObject(V
));
6481 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer())
6482 // Fail if V is not based on constant global object.
6485 const DataLayout
&DL
= GV
->getDataLayout();
6486 APInt
Off(DL
.getIndexTypeSizeInBits(V
->getType()), 0);
6488 if (GV
!= V
->stripAndAccumulateConstantOffsets(DL
, Off
,
6489 /*AllowNonInbounds*/ true))
6490 // Fail if a constant offset could not be determined.
6493 uint64_t StartIdx
= Off
.getLimitedValue();
6494 if (StartIdx
== UINT64_MAX
)
6495 // Fail if the constant offset is excessive.
6498 // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6499 // elements. Simply bail out if that isn't possible.
6500 if ((StartIdx
% ElementSizeInBytes
) != 0)
6503 Offset
+= StartIdx
/ ElementSizeInBytes
;
6504 ConstantDataArray
*Array
= nullptr;
6505 ArrayType
*ArrayTy
= nullptr;
6507 if (GV
->getInitializer()->isNullValue()) {
6508 Type
*GVTy
= GV
->getValueType();
6509 uint64_t SizeInBytes
= DL
.getTypeStoreSize(GVTy
).getFixedValue();
6510 uint64_t Length
= SizeInBytes
/ ElementSizeInBytes
;
6512 Slice
.Array
= nullptr;
6514 // Return an empty Slice for undersized constants to let callers
6515 // transform even undefined library calls into simpler, well-defined
6516 // expressions. This is preferable to making the calls although it
6517 // prevents sanitizers from detecting such calls.
6518 Slice
.Length
= Length
< Offset
? 0 : Length
- Offset
;
6522 auto *Init
= const_cast<Constant
*>(GV
->getInitializer());
6523 if (auto *ArrayInit
= dyn_cast
<ConstantDataArray
>(Init
)) {
6524 Type
*InitElTy
= ArrayInit
->getElementType();
6525 if (InitElTy
->isIntegerTy(ElementSize
)) {
6526 // If Init is an initializer for an array of the expected type
6527 // and size, use it as is.
6529 ArrayTy
= ArrayInit
->getType();
6534 if (ElementSize
!= 8)
6535 // TODO: Handle conversions to larger integral types.
6538 // Otherwise extract the portion of the initializer starting
6539 // at Offset as an array of bytes, and reset Offset.
6540 Init
= ReadByteArrayFromGlobal(GV
, Offset
);
6545 Array
= dyn_cast
<ConstantDataArray
>(Init
);
6546 ArrayTy
= dyn_cast
<ArrayType
>(Init
->getType());
6549 uint64_t NumElts
= ArrayTy
->getArrayNumElements();
6550 if (Offset
> NumElts
)
6553 Slice
.Array
= Array
;
6554 Slice
.Offset
= Offset
;
6555 Slice
.Length
= NumElts
- Offset
;
6559 /// Extract bytes from the initializer of the constant array V, which need
6560 /// not be a nul-terminated string. On success, store the bytes in Str and
6561 /// return true. When TrimAtNul is set, Str will contain only the bytes up
6562 /// to but not including the first nul. Return false on failure.
6563 bool llvm::getConstantStringInfo(const Value
*V
, StringRef
&Str
,
6565 ConstantDataArraySlice Slice
;
6566 if (!getConstantDataArrayInfo(V
, Slice
, 8))
6569 if (Slice
.Array
== nullptr) {
6571 // Return a nul-terminated string even for an empty Slice. This is
6572 // safe because all existing SimplifyLibcalls callers require string
6573 // arguments and the behavior of the functions they fold is undefined
6574 // otherwise. Folding the calls this way is preferable to making
6575 // the undefined library calls, even though it prevents sanitizers
6576 // from reporting such calls.
6580 if (Slice
.Length
== 1) {
6581 Str
= StringRef("", 1);
6584 // We cannot instantiate a StringRef as we do not have an appropriate string
6589 // Start out with the entire array in the StringRef.
6590 Str
= Slice
.Array
->getAsString();
6591 // Skip over 'offset' bytes.
6592 Str
= Str
.substr(Slice
.Offset
);
6595 // Trim off the \0 and anything after it. If the array is not nul
6596 // terminated, we just return the whole end of string. The client may know
6597 // some other way that the string is length-bound.
6598 Str
= Str
.substr(0, Str
.find('\0'));
6603 // These next two are very similar to the above, but also look through PHI
6605 // TODO: See if we can integrate these two together.
6607 /// If we can compute the length of the string pointed to by
6608 /// the specified pointer, return 'len+1'. If we can't, return 0.
6609 static uint64_t GetStringLengthH(const Value
*V
,
6610 SmallPtrSetImpl
<const PHINode
*> &PHIs
,
6611 unsigned CharSize
) {
6612 // Look through noop bitcast instructions.
6613 V
= V
->stripPointerCasts();
6615 // If this is a PHI node, there are two cases: either we have already seen it
6617 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
6618 if (!PHIs
.insert(PN
).second
)
6619 return ~0ULL; // already in the set.
6621 // If it was new, see if all the input strings are the same length.
6622 uint64_t LenSoFar
= ~0ULL;
6623 for (Value
*IncValue
: PN
->incoming_values()) {
6624 uint64_t Len
= GetStringLengthH(IncValue
, PHIs
, CharSize
);
6625 if (Len
== 0) return 0; // Unknown length -> unknown.
6627 if (Len
== ~0ULL) continue;
6629 if (Len
!= LenSoFar
&& LenSoFar
!= ~0ULL)
6630 return 0; // Disagree -> unknown.
6634 // Success, all agree.
6638 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6639 if (const SelectInst
*SI
= dyn_cast
<SelectInst
>(V
)) {
6640 uint64_t Len1
= GetStringLengthH(SI
->getTrueValue(), PHIs
, CharSize
);
6641 if (Len1
== 0) return 0;
6642 uint64_t Len2
= GetStringLengthH(SI
->getFalseValue(), PHIs
, CharSize
);
6643 if (Len2
== 0) return 0;
6644 if (Len1
== ~0ULL) return Len2
;
6645 if (Len2
== ~0ULL) return Len1
;
6646 if (Len1
!= Len2
) return 0;
6650 // Otherwise, see if we can read the string.
6651 ConstantDataArraySlice Slice
;
6652 if (!getConstantDataArrayInfo(V
, Slice
, CharSize
))
6655 if (Slice
.Array
== nullptr)
6656 // Zeroinitializer (including an empty one).
6659 // Search for the first nul character. Return a conservative result even
6660 // when there is no nul. This is safe since otherwise the string function
6661 // being folded such as strlen is undefined, and can be preferable to
6662 // making the undefined library call.
6663 unsigned NullIndex
= 0;
6664 for (unsigned E
= Slice
.Length
; NullIndex
< E
; ++NullIndex
) {
6665 if (Slice
.Array
->getElementAsInteger(Slice
.Offset
+ NullIndex
) == 0)
6669 return NullIndex
+ 1;
6672 /// If we can compute the length of the string pointed to by
6673 /// the specified pointer, return 'len+1'. If we can't, return 0.
6674 uint64_t llvm::GetStringLength(const Value
*V
, unsigned CharSize
) {
6675 if (!V
->getType()->isPointerTy())
6678 SmallPtrSet
<const PHINode
*, 32> PHIs
;
6679 uint64_t Len
= GetStringLengthH(V
, PHIs
, CharSize
);
6680 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6681 // an empty string as a length.
6682 return Len
== ~0ULL ? 1 : Len
;
6686 llvm::getArgumentAliasingToReturnedPointer(const CallBase
*Call
,
6687 bool MustPreserveNullness
) {
6689 "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6690 if (const Value
*RV
= Call
->getReturnedArgOperand())
6692 // This can be used only as a aliasing property.
6693 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6694 Call
, MustPreserveNullness
))
6695 return Call
->getArgOperand(0);
6699 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6700 const CallBase
*Call
, bool MustPreserveNullness
) {
6701 switch (Call
->getIntrinsicID()) {
6702 case Intrinsic::launder_invariant_group
:
6703 case Intrinsic::strip_invariant_group
:
6704 case Intrinsic::aarch64_irg
:
6705 case Intrinsic::aarch64_tagp
:
6706 // The amdgcn_make_buffer_rsrc function does not alter the address of the
6707 // input pointer (and thus preserve null-ness for the purposes of escape
6708 // analysis, which is where the MustPreserveNullness flag comes in to play).
6709 // However, it will not necessarily map ptr addrspace(N) null to ptr
6710 // addrspace(8) null, aka the "null descriptor", which has "all loads return
6711 // 0, all stores are dropped" semantics. Given the context of this intrinsic
6712 // list, no one should be relying on such a strict interpretation of
6713 // MustPreserveNullness (and, at time of writing, they are not), but we
6714 // document this fact out of an abundance of caution.
6715 case Intrinsic::amdgcn_make_buffer_rsrc
:
6717 case Intrinsic::ptrmask
:
6718 return !MustPreserveNullness
;
6719 case Intrinsic::threadlocal_address
:
6720 // The underlying variable changes with thread ID. The Thread ID may change
6721 // at coroutine suspend points.
6722 return !Call
->getParent()->getParent()->isPresplitCoroutine();
6728 /// \p PN defines a loop-variant pointer to an object. Check if the
6729 /// previous iteration of the loop was referring to the same object as \p PN.
6730 static bool isSameUnderlyingObjectInLoop(const PHINode
*PN
,
6731 const LoopInfo
*LI
) {
6732 // Find the loop-defined value.
6733 Loop
*L
= LI
->getLoopFor(PN
->getParent());
6734 if (PN
->getNumIncomingValues() != 2)
6737 // Find the value from previous iteration.
6738 auto *PrevValue
= dyn_cast
<Instruction
>(PN
->getIncomingValue(0));
6739 if (!PrevValue
|| LI
->getLoopFor(PrevValue
->getParent()) != L
)
6740 PrevValue
= dyn_cast
<Instruction
>(PN
->getIncomingValue(1));
6741 if (!PrevValue
|| LI
->getLoopFor(PrevValue
->getParent()) != L
)
6744 // If a new pointer is loaded in the loop, the pointer references a different
6745 // object in every iteration. E.g.:
6749 if (auto *Load
= dyn_cast
<LoadInst
>(PrevValue
))
6750 if (!L
->isLoopInvariant(Load
->getPointerOperand()))
6755 const Value
*llvm::getUnderlyingObject(const Value
*V
, unsigned MaxLookup
) {
6756 for (unsigned Count
= 0; MaxLookup
== 0 || Count
< MaxLookup
; ++Count
) {
6757 if (auto *GEP
= dyn_cast
<GEPOperator
>(V
)) {
6758 const Value
*PtrOp
= GEP
->getPointerOperand();
6759 if (!PtrOp
->getType()->isPointerTy()) // Only handle scalar pointer base.
6762 } else if (Operator::getOpcode(V
) == Instruction::BitCast
||
6763 Operator::getOpcode(V
) == Instruction::AddrSpaceCast
) {
6764 Value
*NewV
= cast
<Operator
>(V
)->getOperand(0);
6765 if (!NewV
->getType()->isPointerTy())
6768 } else if (auto *GA
= dyn_cast
<GlobalAlias
>(V
)) {
6769 if (GA
->isInterposable())
6771 V
= GA
->getAliasee();
6773 if (auto *PHI
= dyn_cast
<PHINode
>(V
)) {
6774 // Look through single-arg phi nodes created by LCSSA.
6775 if (PHI
->getNumIncomingValues() == 1) {
6776 V
= PHI
->getIncomingValue(0);
6779 } else if (auto *Call
= dyn_cast
<CallBase
>(V
)) {
6780 // CaptureTracking can know about special capturing properties of some
6781 // intrinsics like launder.invariant.group, that can't be expressed with
6782 // the attributes, but have properties like returning aliasing pointer.
6783 // Because some analysis may assume that nocaptured pointer is not
6784 // returned from some special intrinsic (because function would have to
6785 // be marked with returns attribute), it is crucial to use this function
6786 // because it should be in sync with CaptureTracking. Not using it may
6787 // cause weird miscompilations where 2 aliasing pointers are assumed to
6789 if (auto *RP
= getArgumentAliasingToReturnedPointer(Call
, false)) {
6797 assert(V
->getType()->isPointerTy() && "Unexpected operand type!");
6802 void llvm::getUnderlyingObjects(const Value
*V
,
6803 SmallVectorImpl
<const Value
*> &Objects
,
6804 const LoopInfo
*LI
, unsigned MaxLookup
) {
6805 SmallPtrSet
<const Value
*, 4> Visited
;
6806 SmallVector
<const Value
*, 4> Worklist
;
6807 Worklist
.push_back(V
);
6809 const Value
*P
= Worklist
.pop_back_val();
6810 P
= getUnderlyingObject(P
, MaxLookup
);
6812 if (!Visited
.insert(P
).second
)
6815 if (auto *SI
= dyn_cast
<SelectInst
>(P
)) {
6816 Worklist
.push_back(SI
->getTrueValue());
6817 Worklist
.push_back(SI
->getFalseValue());
6821 if (auto *PN
= dyn_cast
<PHINode
>(P
)) {
6822 // If this PHI changes the underlying object in every iteration of the
6823 // loop, don't look through it. Consider:
6826 // Prev = Curr; // Prev = PHI (Prev_0, Curr)
6830 // Prev is tracking Curr one iteration behind so they refer to different
6831 // underlying objects.
6832 if (!LI
|| !LI
->isLoopHeader(PN
->getParent()) ||
6833 isSameUnderlyingObjectInLoop(PN
, LI
))
6834 append_range(Worklist
, PN
->incoming_values());
6836 Objects
.push_back(P
);
6840 Objects
.push_back(P
);
6841 } while (!Worklist
.empty());
6844 const Value
*llvm::getUnderlyingObjectAggressive(const Value
*V
) {
6845 const unsigned MaxVisited
= 8;
6847 SmallPtrSet
<const Value
*, 8> Visited
;
6848 SmallVector
<const Value
*, 8> Worklist
;
6849 Worklist
.push_back(V
);
6850 const Value
*Object
= nullptr;
6851 // Used as fallback if we can't find a common underlying object through
6854 const Value
*FirstObject
= getUnderlyingObject(V
);
6856 const Value
*P
= Worklist
.pop_back_val();
6857 P
= First
? FirstObject
: getUnderlyingObject(P
);
6860 if (!Visited
.insert(P
).second
)
6863 if (Visited
.size() == MaxVisited
)
6866 if (auto *SI
= dyn_cast
<SelectInst
>(P
)) {
6867 Worklist
.push_back(SI
->getTrueValue());
6868 Worklist
.push_back(SI
->getFalseValue());
6872 if (auto *PN
= dyn_cast
<PHINode
>(P
)) {
6873 append_range(Worklist
, PN
->incoming_values());
6879 else if (Object
!= P
)
6881 } while (!Worklist
.empty());
6883 return Object
? Object
: FirstObject
;
6886 /// This is the function that does the work of looking through basic
6887 /// ptrtoint+arithmetic+inttoptr sequences.
6888 static const Value
*getUnderlyingObjectFromInt(const Value
*V
) {
6890 if (const Operator
*U
= dyn_cast
<Operator
>(V
)) {
6891 // If we find a ptrtoint, we can transfer control back to the
6892 // regular getUnderlyingObjectFromInt.
6893 if (U
->getOpcode() == Instruction::PtrToInt
)
6894 return U
->getOperand(0);
6895 // If we find an add of a constant, a multiplied value, or a phi, it's
6896 // likely that the other operand will lead us to the base
6897 // object. We don't have to worry about the case where the
6898 // object address is somehow being computed by the multiply,
6899 // because our callers only care when the result is an
6900 // identifiable object.
6901 if (U
->getOpcode() != Instruction::Add
||
6902 (!isa
<ConstantInt
>(U
->getOperand(1)) &&
6903 Operator::getOpcode(U
->getOperand(1)) != Instruction::Mul
&&
6904 !isa
<PHINode
>(U
->getOperand(1))))
6906 V
= U
->getOperand(0);
6910 assert(V
->getType()->isIntegerTy() && "Unexpected operand type!");
6914 /// This is a wrapper around getUnderlyingObjects and adds support for basic
6915 /// ptrtoint+arithmetic+inttoptr sequences.
6916 /// It returns false if unidentified object is found in getUnderlyingObjects.
6917 bool llvm::getUnderlyingObjectsForCodeGen(const Value
*V
,
6918 SmallVectorImpl
<Value
*> &Objects
) {
6919 SmallPtrSet
<const Value
*, 16> Visited
;
6920 SmallVector
<const Value
*, 4> Working(1, V
);
6922 V
= Working
.pop_back_val();
6924 SmallVector
<const Value
*, 4> Objs
;
6925 getUnderlyingObjects(V
, Objs
);
6927 for (const Value
*V
: Objs
) {
6928 if (!Visited
.insert(V
).second
)
6930 if (Operator::getOpcode(V
) == Instruction::IntToPtr
) {
6932 getUnderlyingObjectFromInt(cast
<User
>(V
)->getOperand(0));
6933 if (O
->getType()->isPointerTy()) {
6934 Working
.push_back(O
);
6938 // If getUnderlyingObjects fails to find an identifiable object,
6939 // getUnderlyingObjectsForCodeGen also fails for safety.
6940 if (!isIdentifiedObject(V
)) {
6944 Objects
.push_back(const_cast<Value
*>(V
));
6946 } while (!Working
.empty());
6950 AllocaInst
*llvm::findAllocaForValue(Value
*V
, bool OffsetZero
) {
6951 AllocaInst
*Result
= nullptr;
6952 SmallPtrSet
<Value
*, 4> Visited
;
6953 SmallVector
<Value
*, 4> Worklist
;
6955 auto AddWork
= [&](Value
*V
) {
6956 if (Visited
.insert(V
).second
)
6957 Worklist
.push_back(V
);
6962 V
= Worklist
.pop_back_val();
6963 assert(Visited
.count(V
));
6965 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(V
)) {
6966 if (Result
&& Result
!= AI
)
6969 } else if (CastInst
*CI
= dyn_cast
<CastInst
>(V
)) {
6970 AddWork(CI
->getOperand(0));
6971 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
6972 for (Value
*IncValue
: PN
->incoming_values())
6974 } else if (auto *SI
= dyn_cast
<SelectInst
>(V
)) {
6975 AddWork(SI
->getTrueValue());
6976 AddWork(SI
->getFalseValue());
6977 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(V
)) {
6978 if (OffsetZero
&& !GEP
->hasAllZeroIndices())
6980 AddWork(GEP
->getPointerOperand());
6981 } else if (CallBase
*CB
= dyn_cast
<CallBase
>(V
)) {
6982 Value
*Returned
= CB
->getReturnedArgOperand();
6990 } while (!Worklist
.empty());
6995 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6996 const Value
*V
, bool AllowLifetime
, bool AllowDroppable
) {
6997 for (const User
*U
: V
->users()) {
6998 const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
7002 if (AllowLifetime
&& II
->isLifetimeStartOrEnd())
7005 if (AllowDroppable
&& II
->isDroppable())
7013 bool llvm::onlyUsedByLifetimeMarkers(const Value
*V
) {
7014 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
7015 V
, /* AllowLifetime */ true, /* AllowDroppable */ false);
7017 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value
*V
) {
7018 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
7019 V
, /* AllowLifetime */ true, /* AllowDroppable */ true);
7022 bool llvm::isNotCrossLaneOperation(const Instruction
*I
) {
7023 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
))
7024 return isTriviallyVectorizable(II
->getIntrinsicID());
7025 auto *Shuffle
= dyn_cast
<ShuffleVectorInst
>(I
);
7026 return (!Shuffle
|| Shuffle
->isSelect()) &&
7027 !isa
<CallBase
, BitCastInst
, ExtractElementInst
>(I
);
7030 bool llvm::isSafeToSpeculativelyExecute(const Instruction
*Inst
,
7031 const Instruction
*CtxI
,
7032 AssumptionCache
*AC
,
7033 const DominatorTree
*DT
,
7034 const TargetLibraryInfo
*TLI
,
7035 bool UseVariableInfo
) {
7036 return isSafeToSpeculativelyExecuteWithOpcode(Inst
->getOpcode(), Inst
, CtxI
,
7037 AC
, DT
, TLI
, UseVariableInfo
);
7040 bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
7041 unsigned Opcode
, const Instruction
*Inst
, const Instruction
*CtxI
,
7042 AssumptionCache
*AC
, const DominatorTree
*DT
, const TargetLibraryInfo
*TLI
,
7043 bool UseVariableInfo
) {
7045 if (Inst
->getOpcode() != Opcode
) {
7046 // Check that the operands are actually compatible with the Opcode override.
7047 auto hasEqualReturnAndLeadingOperandTypes
=
7048 [](const Instruction
*Inst
, unsigned NumLeadingOperands
) {
7049 if (Inst
->getNumOperands() < NumLeadingOperands
)
7051 const Type
*ExpectedType
= Inst
->getType();
7052 for (unsigned ItOp
= 0; ItOp
< NumLeadingOperands
; ++ItOp
)
7053 if (Inst
->getOperand(ItOp
)->getType() != ExpectedType
)
7057 assert(!Instruction::isBinaryOp(Opcode
) ||
7058 hasEqualReturnAndLeadingOperandTypes(Inst
, 2));
7059 assert(!Instruction::isUnaryOp(Opcode
) ||
7060 hasEqualReturnAndLeadingOperandTypes(Inst
, 1));
7067 case Instruction::UDiv
:
7068 case Instruction::URem
: {
7069 // x / y is undefined if y == 0.
7071 if (match(Inst
->getOperand(1), m_APInt(V
)))
7075 case Instruction::SDiv
:
7076 case Instruction::SRem
: {
7077 // x / y is undefined if y == 0 or x == INT_MIN and y == -1
7078 const APInt
*Numerator
, *Denominator
;
7079 if (!match(Inst
->getOperand(1), m_APInt(Denominator
)))
7081 // We cannot hoist this division if the denominator is 0.
7082 if (*Denominator
== 0)
7084 // It's safe to hoist if the denominator is not 0 or -1.
7085 if (!Denominator
->isAllOnes())
7087 // At this point we know that the denominator is -1. It is safe to hoist as
7088 // long we know that the numerator is not INT_MIN.
7089 if (match(Inst
->getOperand(0), m_APInt(Numerator
)))
7090 return !Numerator
->isMinSignedValue();
7091 // The numerator *might* be MinSignedValue.
7094 case Instruction::Load
: {
7095 if (!UseVariableInfo
)
7098 const LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
);
7101 if (mustSuppressSpeculation(*LI
))
7103 const DataLayout
&DL
= LI
->getDataLayout();
7104 return isDereferenceableAndAlignedPointer(LI
->getPointerOperand(),
7105 LI
->getType(), LI
->getAlign(), DL
,
7108 case Instruction::Call
: {
7109 auto *CI
= dyn_cast
<const CallInst
>(Inst
);
7112 const Function
*Callee
= CI
->getCalledFunction();
7114 // The called function could have undefined behavior or side-effects, even
7115 // if marked readnone nounwind.
7116 return Callee
&& Callee
->isSpeculatable();
7118 case Instruction::VAArg
:
7119 case Instruction::Alloca
:
7120 case Instruction::Invoke
:
7121 case Instruction::CallBr
:
7122 case Instruction::PHI
:
7123 case Instruction::Store
:
7124 case Instruction::Ret
:
7125 case Instruction::Br
:
7126 case Instruction::IndirectBr
:
7127 case Instruction::Switch
:
7128 case Instruction::Unreachable
:
7129 case Instruction::Fence
:
7130 case Instruction::AtomicRMW
:
7131 case Instruction::AtomicCmpXchg
:
7132 case Instruction::LandingPad
:
7133 case Instruction::Resume
:
7134 case Instruction::CatchSwitch
:
7135 case Instruction::CatchPad
:
7136 case Instruction::CatchRet
:
7137 case Instruction::CleanupPad
:
7138 case Instruction::CleanupRet
:
7139 return false; // Misc instructions which have effects
7143 bool llvm::mayHaveNonDefUseDependency(const Instruction
&I
) {
7144 if (I
.mayReadOrWriteMemory())
7145 // Memory dependency possible
7147 if (!isSafeToSpeculativelyExecute(&I
))
7148 // Can't move above a maythrow call or infinite loop. Or if an
7149 // inalloca alloca, above a stacksave call.
7151 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7152 // 1) Can't reorder two inf-loop calls, even if readonly
7153 // 2) Also can't reorder an inf-loop call below a instruction which isn't
7154 // safe to speculative execute. (Inverse of above)
7159 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
7160 static OverflowResult
mapOverflowResult(ConstantRange::OverflowResult OR
) {
7162 case ConstantRange::OverflowResult::MayOverflow
:
7163 return OverflowResult::MayOverflow
;
7164 case ConstantRange::OverflowResult::AlwaysOverflowsLow
:
7165 return OverflowResult::AlwaysOverflowsLow
;
7166 case ConstantRange::OverflowResult::AlwaysOverflowsHigh
:
7167 return OverflowResult::AlwaysOverflowsHigh
;
7168 case ConstantRange::OverflowResult::NeverOverflows
:
7169 return OverflowResult::NeverOverflows
;
7171 llvm_unreachable("Unknown OverflowResult");
7174 /// Combine constant ranges from computeConstantRange() and computeKnownBits().
7176 llvm::computeConstantRangeIncludingKnownBits(const WithCache
<const Value
*> &V
,
7178 const SimplifyQuery
&SQ
) {
7180 ConstantRange::fromKnownBits(V
.getKnownBits(SQ
), ForSigned
);
7181 ConstantRange CR2
= computeConstantRange(V
, ForSigned
, SQ
.IIQ
.UseInstrInfo
);
7182 ConstantRange::PreferredRangeType RangeType
=
7183 ForSigned
? ConstantRange::Signed
: ConstantRange::Unsigned
;
7184 return CR1
.intersectWith(CR2
, RangeType
);
7187 OverflowResult
llvm::computeOverflowForUnsignedMul(const Value
*LHS
,
7189 const SimplifyQuery
&SQ
,
7191 KnownBits LHSKnown
= computeKnownBits(LHS
, /*Depth=*/0, SQ
);
7192 KnownBits RHSKnown
= computeKnownBits(RHS
, /*Depth=*/0, SQ
);
7194 // mul nsw of two non-negative numbers is also nuw.
7195 if (IsNSW
&& LHSKnown
.isNonNegative() && RHSKnown
.isNonNegative())
7196 return OverflowResult::NeverOverflows
;
7198 ConstantRange LHSRange
= ConstantRange::fromKnownBits(LHSKnown
, false);
7199 ConstantRange RHSRange
= ConstantRange::fromKnownBits(RHSKnown
, false);
7200 return mapOverflowResult(LHSRange
.unsignedMulMayOverflow(RHSRange
));
7203 OverflowResult
llvm::computeOverflowForSignedMul(const Value
*LHS
,
7205 const SimplifyQuery
&SQ
) {
7206 // Multiplying n * m significant bits yields a result of n + m significant
7207 // bits. If the total number of significant bits does not exceed the
7208 // result bit width (minus 1), there is no overflow.
7209 // This means if we have enough leading sign bits in the operands
7210 // we can guarantee that the result does not overflow.
7211 // Ref: "Hacker's Delight" by Henry Warren
7212 unsigned BitWidth
= LHS
->getType()->getScalarSizeInBits();
7214 // Note that underestimating the number of sign bits gives a more
7215 // conservative answer.
7217 ::ComputeNumSignBits(LHS
, 0, SQ
) + ::ComputeNumSignBits(RHS
, 0, SQ
);
7219 // First handle the easy case: if we have enough sign bits there's
7220 // definitely no overflow.
7221 if (SignBits
> BitWidth
+ 1)
7222 return OverflowResult::NeverOverflows
;
7224 // There are two ambiguous cases where there can be no overflow:
7225 // SignBits == BitWidth + 1 and
7226 // SignBits == BitWidth
7227 // The second case is difficult to check, therefore we only handle the
7229 if (SignBits
== BitWidth
+ 1) {
7230 // It overflows only when both arguments are negative and the true
7231 // product is exactly the minimum negative number.
7232 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
7233 // For simplicity we just check if at least one side is not negative.
7234 KnownBits LHSKnown
= computeKnownBits(LHS
, /*Depth=*/0, SQ
);
7235 KnownBits RHSKnown
= computeKnownBits(RHS
, /*Depth=*/0, SQ
);
7236 if (LHSKnown
.isNonNegative() || RHSKnown
.isNonNegative())
7237 return OverflowResult::NeverOverflows
;
7239 return OverflowResult::MayOverflow
;
7243 llvm::computeOverflowForUnsignedAdd(const WithCache
<const Value
*> &LHS
,
7244 const WithCache
<const Value
*> &RHS
,
7245 const SimplifyQuery
&SQ
) {
7246 ConstantRange LHSRange
=
7247 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/false, SQ
);
7248 ConstantRange RHSRange
=
7249 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/false, SQ
);
7250 return mapOverflowResult(LHSRange
.unsignedAddMayOverflow(RHSRange
));
7253 static OverflowResult
7254 computeOverflowForSignedAdd(const WithCache
<const Value
*> &LHS
,
7255 const WithCache
<const Value
*> &RHS
,
7256 const AddOperator
*Add
, const SimplifyQuery
&SQ
) {
7257 if (Add
&& Add
->hasNoSignedWrap()) {
7258 return OverflowResult::NeverOverflows
;
7261 // If LHS and RHS each have at least two sign bits, the addition will look
7267 // If the carry into the most significant position is 0, X and Y can't both
7268 // be 1 and therefore the carry out of the addition is also 0.
7270 // If the carry into the most significant position is 1, X and Y can't both
7271 // be 0 and therefore the carry out of the addition is also 1.
7273 // Since the carry into the most significant position is always equal to
7274 // the carry out of the addition, there is no signed overflow.
7275 if (::ComputeNumSignBits(LHS
, 0, SQ
) > 1 &&
7276 ::ComputeNumSignBits(RHS
, 0, SQ
) > 1)
7277 return OverflowResult::NeverOverflows
;
7279 ConstantRange LHSRange
=
7280 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/true, SQ
);
7281 ConstantRange RHSRange
=
7282 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/true, SQ
);
7284 mapOverflowResult(LHSRange
.signedAddMayOverflow(RHSRange
));
7285 if (OR
!= OverflowResult::MayOverflow
)
7288 // The remaining code needs Add to be available. Early returns if not so.
7290 return OverflowResult::MayOverflow
;
7292 // If the sign of Add is the same as at least one of the operands, this add
7293 // CANNOT overflow. If this can be determined from the known bits of the
7294 // operands the above signedAddMayOverflow() check will have already done so.
7295 // The only other way to improve on the known bits is from an assumption, so
7296 // call computeKnownBitsFromContext() directly.
7297 bool LHSOrRHSKnownNonNegative
=
7298 (LHSRange
.isAllNonNegative() || RHSRange
.isAllNonNegative());
7299 bool LHSOrRHSKnownNegative
=
7300 (LHSRange
.isAllNegative() || RHSRange
.isAllNegative());
7301 if (LHSOrRHSKnownNonNegative
|| LHSOrRHSKnownNegative
) {
7302 KnownBits
AddKnown(LHSRange
.getBitWidth());
7303 computeKnownBitsFromContext(Add
, AddKnown
, /*Depth=*/0, SQ
);
7304 if ((AddKnown
.isNonNegative() && LHSOrRHSKnownNonNegative
) ||
7305 (AddKnown
.isNegative() && LHSOrRHSKnownNegative
))
7306 return OverflowResult::NeverOverflows
;
7309 return OverflowResult::MayOverflow
;
7312 OverflowResult
llvm::computeOverflowForUnsignedSub(const Value
*LHS
,
7314 const SimplifyQuery
&SQ
) {
7316 // The remainder of a value can't have greater magnitude than itself,
7317 // so the subtraction can't overflow.
7320 // In the minimal case, this would simplify to "?", so there's no subtract
7321 // at all. But if this analysis is used to peek through casts, for example,
7322 // then determining no-overflow may allow other transforms.
7324 // TODO: There are other patterns like this.
7325 // See simplifyICmpWithBinOpOnLHS() for candidates.
7326 if (match(RHS
, m_URem(m_Specific(LHS
), m_Value())) ||
7327 match(RHS
, m_NUWSub(m_Specific(LHS
), m_Value())))
7328 if (isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
7329 return OverflowResult::NeverOverflows
;
7331 if (auto C
= isImpliedByDomCondition(CmpInst::ICMP_UGE
, LHS
, RHS
, SQ
.CxtI
,
7334 return OverflowResult::NeverOverflows
;
7335 return OverflowResult::AlwaysOverflowsLow
;
7338 ConstantRange LHSRange
=
7339 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/false, SQ
);
7340 ConstantRange RHSRange
=
7341 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/false, SQ
);
7342 return mapOverflowResult(LHSRange
.unsignedSubMayOverflow(RHSRange
));
7345 OverflowResult
llvm::computeOverflowForSignedSub(const Value
*LHS
,
7347 const SimplifyQuery
&SQ
) {
7349 // The remainder of a value can't have greater magnitude than itself,
7350 // so the subtraction can't overflow.
7353 // In the minimal case, this would simplify to "?", so there's no subtract
7354 // at all. But if this analysis is used to peek through casts, for example,
7355 // then determining no-overflow may allow other transforms.
7356 if (match(RHS
, m_SRem(m_Specific(LHS
), m_Value())) ||
7357 match(RHS
, m_NSWSub(m_Specific(LHS
), m_Value())))
7358 if (isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
7359 return OverflowResult::NeverOverflows
;
7361 // If LHS and RHS each have at least two sign bits, the subtraction
7363 if (::ComputeNumSignBits(LHS
, 0, SQ
) > 1 &&
7364 ::ComputeNumSignBits(RHS
, 0, SQ
) > 1)
7365 return OverflowResult::NeverOverflows
;
7367 ConstantRange LHSRange
=
7368 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/true, SQ
);
7369 ConstantRange RHSRange
=
7370 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/true, SQ
);
7371 return mapOverflowResult(LHSRange
.signedSubMayOverflow(RHSRange
));
7374 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst
*WO
,
7375 const DominatorTree
&DT
) {
7376 SmallVector
<const BranchInst
*, 2> GuardingBranches
;
7377 SmallVector
<const ExtractValueInst
*, 2> Results
;
7379 for (const User
*U
: WO
->users()) {
7380 if (const auto *EVI
= dyn_cast
<ExtractValueInst
>(U
)) {
7381 assert(EVI
->getNumIndices() == 1 && "Obvious from CI's type");
7383 if (EVI
->getIndices()[0] == 0)
7384 Results
.push_back(EVI
);
7386 assert(EVI
->getIndices()[0] == 1 && "Obvious from CI's type");
7388 for (const auto *U
: EVI
->users())
7389 if (const auto *B
= dyn_cast
<BranchInst
>(U
)) {
7390 assert(B
->isConditional() && "How else is it using an i1?");
7391 GuardingBranches
.push_back(B
);
7395 // We are using the aggregate directly in a way we don't want to analyze
7396 // here (storing it to a global, say).
7401 auto AllUsesGuardedByBranch
= [&](const BranchInst
*BI
) {
7402 BasicBlockEdge
NoWrapEdge(BI
->getParent(), BI
->getSuccessor(1));
7403 if (!NoWrapEdge
.isSingleEdge())
7406 // Check if all users of the add are provably no-wrap.
7407 for (const auto *Result
: Results
) {
7408 // If the extractvalue itself is not executed on overflow, the we don't
7409 // need to check each use separately, since domination is transitive.
7410 if (DT
.dominates(NoWrapEdge
, Result
->getParent()))
7413 for (const auto &RU
: Result
->uses())
7414 if (!DT
.dominates(NoWrapEdge
, RU
))
7421 return llvm::any_of(GuardingBranches
, AllUsesGuardedByBranch
);
7424 /// Shifts return poison if shiftwidth is larger than the bitwidth.
7425 static bool shiftAmountKnownInRange(const Value
*ShiftAmount
) {
7426 auto *C
= dyn_cast
<Constant
>(ShiftAmount
);
7430 // Shifts return poison if shiftwidth is larger than the bitwidth.
7431 SmallVector
<const Constant
*, 4> ShiftAmounts
;
7432 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(C
->getType())) {
7433 unsigned NumElts
= FVTy
->getNumElements();
7434 for (unsigned i
= 0; i
< NumElts
; ++i
)
7435 ShiftAmounts
.push_back(C
->getAggregateElement(i
));
7436 } else if (isa
<ScalableVectorType
>(C
->getType()))
7437 return false; // Can't tell, just return false to be safe
7439 ShiftAmounts
.push_back(C
);
7441 bool Safe
= llvm::all_of(ShiftAmounts
, [](const Constant
*C
) {
7442 auto *CI
= dyn_cast_or_null
<ConstantInt
>(C
);
7443 return CI
&& CI
->getValue().ult(C
->getType()->getIntegerBitWidth());
7449 enum class UndefPoisonKind
{
7450 PoisonOnly
= (1 << 0),
7451 UndefOnly
= (1 << 1),
7452 UndefOrPoison
= PoisonOnly
| UndefOnly
,
7455 static bool includesPoison(UndefPoisonKind Kind
) {
7456 return (unsigned(Kind
) & unsigned(UndefPoisonKind::PoisonOnly
)) != 0;
7459 static bool includesUndef(UndefPoisonKind Kind
) {
7460 return (unsigned(Kind
) & unsigned(UndefPoisonKind::UndefOnly
)) != 0;
7463 static bool canCreateUndefOrPoison(const Operator
*Op
, UndefPoisonKind Kind
,
7464 bool ConsiderFlagsAndMetadata
) {
7466 if (ConsiderFlagsAndMetadata
&& includesPoison(Kind
) &&
7467 Op
->hasPoisonGeneratingAnnotations())
7470 unsigned Opcode
= Op
->getOpcode();
7472 // Check whether opcode is a poison/undef-generating operation
7474 case Instruction::Shl
:
7475 case Instruction::AShr
:
7476 case Instruction::LShr
:
7477 return includesPoison(Kind
) && !shiftAmountKnownInRange(Op
->getOperand(1));
7478 case Instruction::FPToSI
:
7479 case Instruction::FPToUI
:
7480 // fptosi/ui yields poison if the resulting value does not fit in the
7481 // destination type.
7483 case Instruction::Call
:
7484 if (auto *II
= dyn_cast
<IntrinsicInst
>(Op
)) {
7485 switch (II
->getIntrinsicID()) {
7486 // TODO: Add more intrinsics.
7487 case Intrinsic::ctlz
:
7488 case Intrinsic::cttz
:
7489 case Intrinsic::abs
:
7490 if (cast
<ConstantInt
>(II
->getArgOperand(1))->isNullValue())
7493 case Intrinsic::ctpop
:
7494 case Intrinsic::bswap
:
7495 case Intrinsic::bitreverse
:
7496 case Intrinsic::fshl
:
7497 case Intrinsic::fshr
:
7498 case Intrinsic::smax
:
7499 case Intrinsic::smin
:
7500 case Intrinsic::umax
:
7501 case Intrinsic::umin
:
7502 case Intrinsic::ptrmask
:
7503 case Intrinsic::fptoui_sat
:
7504 case Intrinsic::fptosi_sat
:
7505 case Intrinsic::sadd_with_overflow
:
7506 case Intrinsic::ssub_with_overflow
:
7507 case Intrinsic::smul_with_overflow
:
7508 case Intrinsic::uadd_with_overflow
:
7509 case Intrinsic::usub_with_overflow
:
7510 case Intrinsic::umul_with_overflow
:
7511 case Intrinsic::sadd_sat
:
7512 case Intrinsic::uadd_sat
:
7513 case Intrinsic::ssub_sat
:
7514 case Intrinsic::usub_sat
:
7516 case Intrinsic::sshl_sat
:
7517 case Intrinsic::ushl_sat
:
7518 return includesPoison(Kind
) &&
7519 !shiftAmountKnownInRange(II
->getArgOperand(1));
7520 case Intrinsic::fma
:
7521 case Intrinsic::fmuladd
:
7522 case Intrinsic::sqrt
:
7523 case Intrinsic::powi
:
7524 case Intrinsic::sin
:
7525 case Intrinsic::cos
:
7526 case Intrinsic::pow
:
7527 case Intrinsic::log
:
7528 case Intrinsic::log10
:
7529 case Intrinsic::log2
:
7530 case Intrinsic::exp
:
7531 case Intrinsic::exp2
:
7532 case Intrinsic::exp10
:
7533 case Intrinsic::fabs
:
7534 case Intrinsic::copysign
:
7535 case Intrinsic::floor
:
7536 case Intrinsic::ceil
:
7537 case Intrinsic::trunc
:
7538 case Intrinsic::rint
:
7539 case Intrinsic::nearbyint
:
7540 case Intrinsic::round
:
7541 case Intrinsic::roundeven
:
7542 case Intrinsic::fptrunc_round
:
7543 case Intrinsic::canonicalize
:
7544 case Intrinsic::arithmetic_fence
:
7545 case Intrinsic::minnum
:
7546 case Intrinsic::maxnum
:
7547 case Intrinsic::minimum
:
7548 case Intrinsic::maximum
:
7549 case Intrinsic::is_fpclass
:
7550 case Intrinsic::ldexp
:
7551 case Intrinsic::frexp
:
7553 case Intrinsic::lround
:
7554 case Intrinsic::llround
:
7555 case Intrinsic::lrint
:
7556 case Intrinsic::llrint
:
7557 // If the value doesn't fit an unspecified value is returned (but this
7563 case Instruction::CallBr
:
7564 case Instruction::Invoke
: {
7565 const auto *CB
= cast
<CallBase
>(Op
);
7566 return !CB
->hasRetAttr(Attribute::NoUndef
);
7568 case Instruction::InsertElement
:
7569 case Instruction::ExtractElement
: {
7570 // If index exceeds the length of the vector, it returns poison
7571 auto *VTy
= cast
<VectorType
>(Op
->getOperand(0)->getType());
7572 unsigned IdxOp
= Op
->getOpcode() == Instruction::InsertElement
? 2 : 1;
7573 auto *Idx
= dyn_cast
<ConstantInt
>(Op
->getOperand(IdxOp
));
7574 if (includesPoison(Kind
))
7576 Idx
->getValue().uge(VTy
->getElementCount().getKnownMinValue());
7579 case Instruction::ShuffleVector
: {
7580 ArrayRef
<int> Mask
= isa
<ConstantExpr
>(Op
)
7581 ? cast
<ConstantExpr
>(Op
)->getShuffleMask()
7582 : cast
<ShuffleVectorInst
>(Op
)->getShuffleMask();
7583 return includesPoison(Kind
) && is_contained(Mask
, PoisonMaskElem
);
7585 case Instruction::FNeg
:
7586 case Instruction::PHI
:
7587 case Instruction::Select
:
7588 case Instruction::URem
:
7589 case Instruction::SRem
:
7590 case Instruction::ExtractValue
:
7591 case Instruction::InsertValue
:
7592 case Instruction::Freeze
:
7593 case Instruction::ICmp
:
7594 case Instruction::FCmp
:
7595 case Instruction::FAdd
:
7596 case Instruction::FSub
:
7597 case Instruction::FMul
:
7598 case Instruction::FDiv
:
7599 case Instruction::FRem
:
7601 case Instruction::GetElementPtr
:
7602 // inbounds is handled above
7603 // TODO: what about inrange on constexpr?
7606 const auto *CE
= dyn_cast
<ConstantExpr
>(Op
);
7607 if (isa
<CastInst
>(Op
) || (CE
&& CE
->isCast()))
7609 else if (Instruction::isBinaryOp(Opcode
))
7611 // Be conservative and return true.
7617 bool llvm::canCreateUndefOrPoison(const Operator
*Op
,
7618 bool ConsiderFlagsAndMetadata
) {
7619 return ::canCreateUndefOrPoison(Op
, UndefPoisonKind::UndefOrPoison
,
7620 ConsiderFlagsAndMetadata
);
7623 bool llvm::canCreatePoison(const Operator
*Op
, bool ConsiderFlagsAndMetadata
) {
7624 return ::canCreateUndefOrPoison(Op
, UndefPoisonKind::PoisonOnly
,
7625 ConsiderFlagsAndMetadata
);
7628 static bool directlyImpliesPoison(const Value
*ValAssumedPoison
, const Value
*V
,
7630 if (ValAssumedPoison
== V
)
7633 const unsigned MaxDepth
= 2;
7634 if (Depth
>= MaxDepth
)
7637 if (const auto *I
= dyn_cast
<Instruction
>(V
)) {
7638 if (any_of(I
->operands(), [=](const Use
&Op
) {
7639 return propagatesPoison(Op
) &&
7640 directlyImpliesPoison(ValAssumedPoison
, Op
, Depth
+ 1);
7644 // V = extractvalue V0, idx
7645 // V2 = extractvalue V0, idx2
7646 // V0's elements are all poison or not. (e.g., add_with_overflow)
7647 const WithOverflowInst
*II
;
7648 if (match(I
, m_ExtractValue(m_WithOverflowInst(II
))) &&
7649 (match(ValAssumedPoison
, m_ExtractValue(m_Specific(II
))) ||
7650 llvm::is_contained(II
->args(), ValAssumedPoison
)))
7656 static bool impliesPoison(const Value
*ValAssumedPoison
, const Value
*V
,
7658 if (isGuaranteedNotToBePoison(ValAssumedPoison
))
7661 if (directlyImpliesPoison(ValAssumedPoison
, V
, /* Depth */ 0))
7664 const unsigned MaxDepth
= 2;
7665 if (Depth
>= MaxDepth
)
7668 const auto *I
= dyn_cast
<Instruction
>(ValAssumedPoison
);
7669 if (I
&& !canCreatePoison(cast
<Operator
>(I
))) {
7670 return all_of(I
->operands(), [=](const Value
*Op
) {
7671 return impliesPoison(Op
, V
, Depth
+ 1);
7677 bool llvm::impliesPoison(const Value
*ValAssumedPoison
, const Value
*V
) {
7678 return ::impliesPoison(ValAssumedPoison
, V
, /* Depth */ 0);
7681 static bool programUndefinedIfUndefOrPoison(const Value
*V
, bool PoisonOnly
);
7683 static bool isGuaranteedNotToBeUndefOrPoison(
7684 const Value
*V
, AssumptionCache
*AC
, const Instruction
*CtxI
,
7685 const DominatorTree
*DT
, unsigned Depth
, UndefPoisonKind Kind
) {
7686 if (Depth
>= MaxAnalysisRecursionDepth
)
7689 if (isa
<MetadataAsValue
>(V
))
7692 if (const auto *A
= dyn_cast
<Argument
>(V
)) {
7693 if (A
->hasAttribute(Attribute::NoUndef
) ||
7694 A
->hasAttribute(Attribute::Dereferenceable
) ||
7695 A
->hasAttribute(Attribute::DereferenceableOrNull
))
7699 if (auto *C
= dyn_cast
<Constant
>(V
)) {
7700 if (isa
<PoisonValue
>(C
))
7701 return !includesPoison(Kind
);
7703 if (isa
<UndefValue
>(C
))
7704 return !includesUndef(Kind
);
7706 if (isa
<ConstantInt
>(C
) || isa
<GlobalVariable
>(C
) || isa
<ConstantFP
>(V
) ||
7707 isa
<ConstantPointerNull
>(C
) || isa
<Function
>(C
))
7710 if (C
->getType()->isVectorTy() && !isa
<ConstantExpr
>(C
)) {
7711 if (includesUndef(Kind
) && C
->containsUndefElement())
7713 if (includesPoison(Kind
) && C
->containsPoisonElement())
7715 return !C
->containsConstantExpression();
7719 // Strip cast operations from a pointer value.
7720 // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7721 // inbounds with zero offset. To guarantee that the result isn't poison, the
7722 // stripped pointer is checked as it has to be pointing into an allocated
7723 // object or be null `null` to ensure `inbounds` getelement pointers with a
7724 // zero offset could not produce poison.
7725 // It can strip off addrspacecast that do not change bit representation as
7726 // well. We believe that such addrspacecast is equivalent to no-op.
7727 auto *StrippedV
= V
->stripPointerCastsSameRepresentation();
7728 if (isa
<AllocaInst
>(StrippedV
) || isa
<GlobalVariable
>(StrippedV
) ||
7729 isa
<Function
>(StrippedV
) || isa
<ConstantPointerNull
>(StrippedV
))
7732 auto OpCheck
= [&](const Value
*V
) {
7733 return isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
+ 1, Kind
);
7736 if (auto *Opr
= dyn_cast
<Operator
>(V
)) {
7737 // If the value is a freeze instruction, then it can never
7738 // be undef or poison.
7739 if (isa
<FreezeInst
>(V
))
7742 if (const auto *CB
= dyn_cast
<CallBase
>(V
)) {
7743 if (CB
->hasRetAttr(Attribute::NoUndef
) ||
7744 CB
->hasRetAttr(Attribute::Dereferenceable
) ||
7745 CB
->hasRetAttr(Attribute::DereferenceableOrNull
))
7749 if (const auto *PN
= dyn_cast
<PHINode
>(V
)) {
7750 unsigned Num
= PN
->getNumIncomingValues();
7751 bool IsWellDefined
= true;
7752 for (unsigned i
= 0; i
< Num
; ++i
) {
7753 auto *TI
= PN
->getIncomingBlock(i
)->getTerminator();
7754 if (!isGuaranteedNotToBeUndefOrPoison(PN
->getIncomingValue(i
), AC
, TI
,
7755 DT
, Depth
+ 1, Kind
)) {
7756 IsWellDefined
= false;
7762 } else if (!::canCreateUndefOrPoison(Opr
, Kind
,
7763 /*ConsiderFlagsAndMetadata*/ true) &&
7764 all_of(Opr
->operands(), OpCheck
))
7768 if (auto *I
= dyn_cast
<LoadInst
>(V
))
7769 if (I
->hasMetadata(LLVMContext::MD_noundef
) ||
7770 I
->hasMetadata(LLVMContext::MD_dereferenceable
) ||
7771 I
->hasMetadata(LLVMContext::MD_dereferenceable_or_null
))
7774 if (programUndefinedIfUndefOrPoison(V
, !includesUndef(Kind
)))
7777 // CxtI may be null or a cloned instruction.
7778 if (!CtxI
|| !CtxI
->getParent() || !DT
)
7781 auto *DNode
= DT
->getNode(CtxI
->getParent());
7783 // Unreachable block
7786 // If V is used as a branch condition before reaching CtxI, V cannot be
7790 // CtxI ; V cannot be undef or poison here
7791 auto *Dominator
= DNode
->getIDom();
7792 // This check is purely for compile time reasons: we can skip the IDom walk
7793 // if what we are checking for includes undef and the value is not an integer.
7794 if (!includesUndef(Kind
) || V
->getType()->isIntegerTy())
7796 auto *TI
= Dominator
->getBlock()->getTerminator();
7798 Value
*Cond
= nullptr;
7799 if (auto BI
= dyn_cast_or_null
<BranchInst
>(TI
)) {
7800 if (BI
->isConditional())
7801 Cond
= BI
->getCondition();
7802 } else if (auto SI
= dyn_cast_or_null
<SwitchInst
>(TI
)) {
7803 Cond
= SI
->getCondition();
7809 else if (!includesUndef(Kind
) && isa
<Operator
>(Cond
)) {
7810 // For poison, we can analyze further
7811 auto *Opr
= cast
<Operator
>(Cond
);
7812 if (any_of(Opr
->operands(), [V
](const Use
&U
) {
7813 return V
== U
&& propagatesPoison(U
);
7819 Dominator
= Dominator
->getIDom();
7822 if (getKnowledgeValidInContext(V
, {Attribute::NoUndef
}, CtxI
, DT
, AC
))
7828 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value
*V
, AssumptionCache
*AC
,
7829 const Instruction
*CtxI
,
7830 const DominatorTree
*DT
,
7832 return ::isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
,
7833 UndefPoisonKind::UndefOrPoison
);
7836 bool llvm::isGuaranteedNotToBePoison(const Value
*V
, AssumptionCache
*AC
,
7837 const Instruction
*CtxI
,
7838 const DominatorTree
*DT
, unsigned Depth
) {
7839 return ::isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
,
7840 UndefPoisonKind::PoisonOnly
);
7843 bool llvm::isGuaranteedNotToBeUndef(const Value
*V
, AssumptionCache
*AC
,
7844 const Instruction
*CtxI
,
7845 const DominatorTree
*DT
, unsigned Depth
) {
7846 return ::isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
,
7847 UndefPoisonKind::UndefOnly
);
7850 /// Return true if undefined behavior would provably be executed on the path to
7851 /// OnPathTo if Root produced a posion result. Note that this doesn't say
7852 /// anything about whether OnPathTo is actually executed or whether Root is
7853 /// actually poison. This can be used to assess whether a new use of Root can
7854 /// be added at a location which is control equivalent with OnPathTo (such as
7855 /// immediately before it) without introducing UB which didn't previously
7856 /// exist. Note that a false result conveys no information.
7857 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction
*Root
,
7858 Instruction
*OnPathTo
,
7859 DominatorTree
*DT
) {
7860 // Basic approach is to assume Root is poison, propagate poison forward
7861 // through all users we can easily track, and then check whether any of those
7862 // users are provable UB and must execute before out exiting block might
7865 // The set of all recursive users we've visited (which are assumed to all be
7866 // poison because of said visit)
7867 SmallSet
<const Value
*, 16> KnownPoison
;
7868 SmallVector
<const Instruction
*, 16> Worklist
;
7869 Worklist
.push_back(Root
);
7870 while (!Worklist
.empty()) {
7871 const Instruction
*I
= Worklist
.pop_back_val();
7873 // If we know this must trigger UB on a path leading our target.
7874 if (mustTriggerUB(I
, KnownPoison
) && DT
->dominates(I
, OnPathTo
))
7877 // If we can't analyze propagation through this instruction, just skip it
7878 // and transitive users. Safe as false is a conservative result.
7879 if (I
!= Root
&& !any_of(I
->operands(), [&KnownPoison
](const Use
&U
) {
7880 return KnownPoison
.contains(U
) && propagatesPoison(U
);
7884 if (KnownPoison
.insert(I
).second
)
7885 for (const User
*User
: I
->users())
7886 Worklist
.push_back(cast
<Instruction
>(User
));
7889 // Might be non-UB, or might have a path we couldn't prove must execute on
7890 // way to exiting bb.
7894 OverflowResult
llvm::computeOverflowForSignedAdd(const AddOperator
*Add
,
7895 const SimplifyQuery
&SQ
) {
7896 return ::computeOverflowForSignedAdd(Add
->getOperand(0), Add
->getOperand(1),
7901 llvm::computeOverflowForSignedAdd(const WithCache
<const Value
*> &LHS
,
7902 const WithCache
<const Value
*> &RHS
,
7903 const SimplifyQuery
&SQ
) {
7904 return ::computeOverflowForSignedAdd(LHS
, RHS
, nullptr, SQ
);
7907 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction
*I
) {
7908 // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7909 // of time because it's possible for another thread to interfere with it for an
7910 // arbitrary length of time, but programs aren't allowed to rely on that.
7912 // If there is no successor, then execution can't transfer to it.
7913 if (isa
<ReturnInst
>(I
))
7915 if (isa
<UnreachableInst
>(I
))
7918 // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7919 // Instruction::willReturn.
7921 // FIXME: Move this check into Instruction::willReturn.
7922 if (isa
<CatchPadInst
>(I
)) {
7923 switch (classifyEHPersonality(I
->getFunction()->getPersonalityFn())) {
7925 // A catchpad may invoke exception object constructors and such, which
7926 // in some languages can be arbitrary code, so be conservative by default.
7928 case EHPersonality::CoreCLR
:
7929 // For CoreCLR, it just involves a type test.
7934 // An instruction that returns without throwing must transfer control flow
7936 return !I
->mayThrow() && I
->willReturn();
7939 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock
*BB
) {
7940 // TODO: This is slightly conservative for invoke instruction since exiting
7941 // via an exception *is* normal control for them.
7942 for (const Instruction
&I
: *BB
)
7943 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7948 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7949 BasicBlock::const_iterator Begin
, BasicBlock::const_iterator End
,
7950 unsigned ScanLimit
) {
7951 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin
, End
),
7955 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7956 iterator_range
<BasicBlock::const_iterator
> Range
, unsigned ScanLimit
) {
7957 assert(ScanLimit
&& "scan limit must be non-zero");
7958 for (const Instruction
&I
: Range
) {
7959 if (isa
<DbgInfoIntrinsic
>(I
))
7961 if (--ScanLimit
== 0)
7963 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7969 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction
*I
,
7971 // The loop header is guaranteed to be executed for every iteration.
7973 // FIXME: Relax this constraint to cover all basic blocks that are
7974 // guaranteed to be executed at every iteration.
7975 if (I
->getParent() != L
->getHeader()) return false;
7977 for (const Instruction
&LI
: *L
->getHeader()) {
7978 if (&LI
== I
) return true;
7979 if (!isGuaranteedToTransferExecutionToSuccessor(&LI
)) return false;
7981 llvm_unreachable("Instruction not contained in its own parent basic block.");
7984 bool llvm::propagatesPoison(const Use
&PoisonOp
) {
7985 const Operator
*I
= cast
<Operator
>(PoisonOp
.getUser());
7986 switch (I
->getOpcode()) {
7987 case Instruction::Freeze
:
7988 case Instruction::PHI
:
7989 case Instruction::Invoke
:
7991 case Instruction::Select
:
7992 return PoisonOp
.getOperandNo() == 0;
7993 case Instruction::Call
:
7994 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
)) {
7995 switch (II
->getIntrinsicID()) {
7996 // TODO: Add more intrinsics.
7997 case Intrinsic::sadd_with_overflow
:
7998 case Intrinsic::ssub_with_overflow
:
7999 case Intrinsic::smul_with_overflow
:
8000 case Intrinsic::uadd_with_overflow
:
8001 case Intrinsic::usub_with_overflow
:
8002 case Intrinsic::umul_with_overflow
:
8003 // If an input is a vector containing a poison element, the
8004 // two output vectors (calculated results, overflow bits)'
8005 // corresponding lanes are poison.
8007 case Intrinsic::ctpop
:
8008 case Intrinsic::ctlz
:
8009 case Intrinsic::cttz
:
8010 case Intrinsic::abs
:
8011 case Intrinsic::smax
:
8012 case Intrinsic::smin
:
8013 case Intrinsic::umax
:
8014 case Intrinsic::umin
:
8015 case Intrinsic::bitreverse
:
8016 case Intrinsic::bswap
:
8017 case Intrinsic::sadd_sat
:
8018 case Intrinsic::ssub_sat
:
8019 case Intrinsic::sshl_sat
:
8020 case Intrinsic::uadd_sat
:
8021 case Intrinsic::usub_sat
:
8022 case Intrinsic::ushl_sat
:
8027 case Instruction::ICmp
:
8028 case Instruction::FCmp
:
8029 case Instruction::GetElementPtr
:
8032 if (isa
<BinaryOperator
>(I
) || isa
<UnaryOperator
>(I
) || isa
<CastInst
>(I
))
8035 // Be conservative and return false.
8040 /// Enumerates all operands of \p I that are guaranteed to not be undef or
8041 /// poison. If the callback \p Handle returns true, stop processing and return
8042 /// true. Otherwise, return false.
8043 template <typename CallableT
>
8044 static bool handleGuaranteedWellDefinedOps(const Instruction
*I
,
8045 const CallableT
&Handle
) {
8046 switch (I
->getOpcode()) {
8047 case Instruction::Store
:
8048 if (Handle(cast
<StoreInst
>(I
)->getPointerOperand()))
8052 case Instruction::Load
:
8053 if (Handle(cast
<LoadInst
>(I
)->getPointerOperand()))
8057 // Since dereferenceable attribute imply noundef, atomic operations
8058 // also implicitly have noundef pointers too
8059 case Instruction::AtomicCmpXchg
:
8060 if (Handle(cast
<AtomicCmpXchgInst
>(I
)->getPointerOperand()))
8064 case Instruction::AtomicRMW
:
8065 if (Handle(cast
<AtomicRMWInst
>(I
)->getPointerOperand()))
8069 case Instruction::Call
:
8070 case Instruction::Invoke
: {
8071 const CallBase
*CB
= cast
<CallBase
>(I
);
8072 if (CB
->isIndirectCall() && Handle(CB
->getCalledOperand()))
8074 for (unsigned i
= 0; i
< CB
->arg_size(); ++i
)
8075 if ((CB
->paramHasAttr(i
, Attribute::NoUndef
) ||
8076 CB
->paramHasAttr(i
, Attribute::Dereferenceable
) ||
8077 CB
->paramHasAttr(i
, Attribute::DereferenceableOrNull
)) &&
8078 Handle(CB
->getArgOperand(i
)))
8082 case Instruction::Ret
:
8083 if (I
->getFunction()->hasRetAttribute(Attribute::NoUndef
) &&
8084 Handle(I
->getOperand(0)))
8087 case Instruction::Switch
:
8088 if (Handle(cast
<SwitchInst
>(I
)->getCondition()))
8091 case Instruction::Br
: {
8092 auto *BR
= cast
<BranchInst
>(I
);
8093 if (BR
->isConditional() && Handle(BR
->getCondition()))
8104 void llvm::getGuaranteedWellDefinedOps(
8105 const Instruction
*I
, SmallVectorImpl
<const Value
*> &Operands
) {
8106 handleGuaranteedWellDefinedOps(I
, [&](const Value
*V
) {
8107 Operands
.push_back(V
);
8112 /// Enumerates all operands of \p I that are guaranteed to not be poison.
8113 template <typename CallableT
>
8114 static bool handleGuaranteedNonPoisonOps(const Instruction
*I
,
8115 const CallableT
&Handle
) {
8116 if (handleGuaranteedWellDefinedOps(I
, Handle
))
8118 switch (I
->getOpcode()) {
8119 // Divisors of these operations are allowed to be partially undef.
8120 case Instruction::UDiv
:
8121 case Instruction::SDiv
:
8122 case Instruction::URem
:
8123 case Instruction::SRem
:
8124 return Handle(I
->getOperand(1));
8130 void llvm::getGuaranteedNonPoisonOps(const Instruction
*I
,
8131 SmallVectorImpl
<const Value
*> &Operands
) {
8132 handleGuaranteedNonPoisonOps(I
, [&](const Value
*V
) {
8133 Operands
.push_back(V
);
8138 bool llvm::mustTriggerUB(const Instruction
*I
,
8139 const SmallPtrSetImpl
<const Value
*> &KnownPoison
) {
8140 return handleGuaranteedNonPoisonOps(
8141 I
, [&](const Value
*V
) { return KnownPoison
.count(V
); });
8144 static bool programUndefinedIfUndefOrPoison(const Value
*V
,
8146 // We currently only look for uses of values within the same basic
8147 // block, as that makes it easier to guarantee that the uses will be
8148 // executed given that Inst is executed.
8150 // FIXME: Expand this to consider uses beyond the same basic block. To do
8151 // this, look out for the distinction between post-dominance and strong
8153 const BasicBlock
*BB
= nullptr;
8154 BasicBlock::const_iterator Begin
;
8155 if (const auto *Inst
= dyn_cast
<Instruction
>(V
)) {
8156 BB
= Inst
->getParent();
8157 Begin
= Inst
->getIterator();
8159 } else if (const auto *Arg
= dyn_cast
<Argument
>(V
)) {
8160 if (Arg
->getParent()->isDeclaration())
8162 BB
= &Arg
->getParent()->getEntryBlock();
8163 Begin
= BB
->begin();
8168 // Limit number of instructions we look at, to avoid scanning through large
8169 // blocks. The current limit is chosen arbitrarily.
8170 unsigned ScanLimit
= 32;
8171 BasicBlock::const_iterator End
= BB
->end();
8174 // Since undef does not propagate eagerly, be conservative & just check
8175 // whether a value is directly passed to an instruction that must take
8176 // well-defined operands.
8178 for (const auto &I
: make_range(Begin
, End
)) {
8179 if (isa
<DbgInfoIntrinsic
>(I
))
8181 if (--ScanLimit
== 0)
8184 if (handleGuaranteedWellDefinedOps(&I
, [V
](const Value
*WellDefinedOp
) {
8185 return WellDefinedOp
== V
;
8189 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
8195 // Set of instructions that we have proved will yield poison if Inst
8197 SmallSet
<const Value
*, 16> YieldsPoison
;
8198 SmallSet
<const BasicBlock
*, 4> Visited
;
8200 YieldsPoison
.insert(V
);
8204 for (const auto &I
: make_range(Begin
, End
)) {
8205 if (isa
<DbgInfoIntrinsic
>(I
))
8207 if (--ScanLimit
== 0)
8209 if (mustTriggerUB(&I
, YieldsPoison
))
8211 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
8214 // If an operand is poison and propagates it, mark I as yielding poison.
8215 for (const Use
&Op
: I
.operands()) {
8216 if (YieldsPoison
.count(Op
) && propagatesPoison(Op
)) {
8217 YieldsPoison
.insert(&I
);
8222 // Special handling for select, which returns poison if its operand 0 is
8223 // poison (handled in the loop above) *or* if both its true/false operands
8224 // are poison (handled here).
8225 if (I
.getOpcode() == Instruction::Select
&&
8226 YieldsPoison
.count(I
.getOperand(1)) &&
8227 YieldsPoison
.count(I
.getOperand(2))) {
8228 YieldsPoison
.insert(&I
);
8232 BB
= BB
->getSingleSuccessor();
8233 if (!BB
|| !Visited
.insert(BB
).second
)
8236 Begin
= BB
->getFirstNonPHIIt();
8242 bool llvm::programUndefinedIfUndefOrPoison(const Instruction
*Inst
) {
8243 return ::programUndefinedIfUndefOrPoison(Inst
, false);
8246 bool llvm::programUndefinedIfPoison(const Instruction
*Inst
) {
8247 return ::programUndefinedIfUndefOrPoison(Inst
, true);
8250 static bool isKnownNonNaN(const Value
*V
, FastMathFlags FMF
) {
8254 if (auto *C
= dyn_cast
<ConstantFP
>(V
))
8257 if (auto *C
= dyn_cast
<ConstantDataVector
>(V
)) {
8258 if (!C
->getElementType()->isFloatingPointTy())
8260 for (unsigned I
= 0, E
= C
->getNumElements(); I
< E
; ++I
) {
8261 if (C
->getElementAsAPFloat(I
).isNaN())
8267 if (isa
<ConstantAggregateZero
>(V
))
8273 static bool isKnownNonZero(const Value
*V
) {
8274 if (auto *C
= dyn_cast
<ConstantFP
>(V
))
8275 return !C
->isZero();
8277 if (auto *C
= dyn_cast
<ConstantDataVector
>(V
)) {
8278 if (!C
->getElementType()->isFloatingPointTy())
8280 for (unsigned I
= 0, E
= C
->getNumElements(); I
< E
; ++I
) {
8281 if (C
->getElementAsAPFloat(I
).isZero())
8290 /// Match clamp pattern for float types without care about NaNs or signed zeros.
8291 /// Given non-min/max outer cmp/select from the clamp pattern this
8292 /// function recognizes if it can be substitued by a "canonical" min/max
8294 static SelectPatternResult
matchFastFloatClamp(CmpInst::Predicate Pred
,
8295 Value
*CmpLHS
, Value
*CmpRHS
,
8296 Value
*TrueVal
, Value
*FalseVal
,
8297 Value
*&LHS
, Value
*&RHS
) {
8299 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
8300 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
8301 // and return description of the outer Max/Min.
8303 // First, check if select has inverse order:
8304 if (CmpRHS
== FalseVal
) {
8305 std::swap(TrueVal
, FalseVal
);
8306 Pred
= CmpInst::getInversePredicate(Pred
);
8309 // Assume success now. If there's no match, callers should not use these anyway.
8314 if (CmpRHS
!= TrueVal
|| !match(CmpRHS
, m_APFloat(FC1
)) || !FC1
->isFinite())
8315 return {SPF_UNKNOWN
, SPNB_NA
, false};
8319 case CmpInst::FCMP_OLT
:
8320 case CmpInst::FCMP_OLE
:
8321 case CmpInst::FCMP_ULT
:
8322 case CmpInst::FCMP_ULE
:
8323 if (match(FalseVal
, m_OrdOrUnordFMin(m_Specific(CmpLHS
), m_APFloat(FC2
))) &&
8325 return {SPF_FMAXNUM
, SPNB_RETURNS_ANY
, false};
8327 case CmpInst::FCMP_OGT
:
8328 case CmpInst::FCMP_OGE
:
8329 case CmpInst::FCMP_UGT
:
8330 case CmpInst::FCMP_UGE
:
8331 if (match(FalseVal
, m_OrdOrUnordFMax(m_Specific(CmpLHS
), m_APFloat(FC2
))) &&
8333 return {SPF_FMINNUM
, SPNB_RETURNS_ANY
, false};
8339 return {SPF_UNKNOWN
, SPNB_NA
, false};
8342 /// Recognize variations of:
8343 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
8344 static SelectPatternResult
matchClamp(CmpInst::Predicate Pred
,
8345 Value
*CmpLHS
, Value
*CmpRHS
,
8346 Value
*TrueVal
, Value
*FalseVal
) {
8347 // Swap the select operands and predicate to match the patterns below.
8348 if (CmpRHS
!= TrueVal
) {
8349 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8350 std::swap(TrueVal
, FalseVal
);
8353 if (CmpRHS
== TrueVal
&& match(CmpRHS
, m_APInt(C1
))) {
8355 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
8356 if (match(FalseVal
, m_SMin(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8357 C1
->slt(*C2
) && Pred
== CmpInst::ICMP_SLT
)
8358 return {SPF_SMAX
, SPNB_NA
, false};
8360 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
8361 if (match(FalseVal
, m_SMax(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8362 C1
->sgt(*C2
) && Pred
== CmpInst::ICMP_SGT
)
8363 return {SPF_SMIN
, SPNB_NA
, false};
8365 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
8366 if (match(FalseVal
, m_UMin(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8367 C1
->ult(*C2
) && Pred
== CmpInst::ICMP_ULT
)
8368 return {SPF_UMAX
, SPNB_NA
, false};
8370 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
8371 if (match(FalseVal
, m_UMax(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8372 C1
->ugt(*C2
) && Pred
== CmpInst::ICMP_UGT
)
8373 return {SPF_UMIN
, SPNB_NA
, false};
8375 return {SPF_UNKNOWN
, SPNB_NA
, false};
8378 /// Recognize variations of:
8379 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
8380 static SelectPatternResult
matchMinMaxOfMinMax(CmpInst::Predicate Pred
,
8381 Value
*CmpLHS
, Value
*CmpRHS
,
8382 Value
*TVal
, Value
*FVal
,
8384 // TODO: Allow FP min/max with nnan/nsz.
8385 assert(CmpInst::isIntPredicate(Pred
) && "Expected integer comparison");
8387 Value
*A
= nullptr, *B
= nullptr;
8388 SelectPatternResult L
= matchSelectPattern(TVal
, A
, B
, nullptr, Depth
+ 1);
8389 if (!SelectPatternResult::isMinOrMax(L
.Flavor
))
8390 return {SPF_UNKNOWN
, SPNB_NA
, false};
8392 Value
*C
= nullptr, *D
= nullptr;
8393 SelectPatternResult R
= matchSelectPattern(FVal
, C
, D
, nullptr, Depth
+ 1);
8394 if (L
.Flavor
!= R
.Flavor
)
8395 return {SPF_UNKNOWN
, SPNB_NA
, false};
8397 // We have something like: x Pred y ? min(a, b) : min(c, d).
8398 // Try to match the compare to the min/max operations of the select operands.
8399 // First, make sure we have the right compare predicate.
8402 if (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_SGE
) {
8403 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8404 std::swap(CmpLHS
, CmpRHS
);
8406 if (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SLE
)
8408 return {SPF_UNKNOWN
, SPNB_NA
, false};
8410 if (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SLE
) {
8411 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8412 std::swap(CmpLHS
, CmpRHS
);
8414 if (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_SGE
)
8416 return {SPF_UNKNOWN
, SPNB_NA
, false};
8418 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
) {
8419 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8420 std::swap(CmpLHS
, CmpRHS
);
8422 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
)
8424 return {SPF_UNKNOWN
, SPNB_NA
, false};
8426 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
) {
8427 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8428 std::swap(CmpLHS
, CmpRHS
);
8430 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
)
8432 return {SPF_UNKNOWN
, SPNB_NA
, false};
8434 return {SPF_UNKNOWN
, SPNB_NA
, false};
8437 // If there is a common operand in the already matched min/max and the other
8438 // min/max operands match the compare operands (either directly or inverted),
8439 // then this is min/max of the same flavor.
8441 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8442 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8444 if ((CmpLHS
== A
&& CmpRHS
== C
) || (match(C
, m_Not(m_Specific(CmpLHS
))) &&
8445 match(A
, m_Not(m_Specific(CmpRHS
)))))
8446 return {L
.Flavor
, SPNB_NA
, false};
8448 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8449 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8451 if ((CmpLHS
== A
&& CmpRHS
== D
) || (match(D
, m_Not(m_Specific(CmpLHS
))) &&
8452 match(A
, m_Not(m_Specific(CmpRHS
)))))
8453 return {L
.Flavor
, SPNB_NA
, false};
8455 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8456 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8458 if ((CmpLHS
== B
&& CmpRHS
== C
) || (match(C
, m_Not(m_Specific(CmpLHS
))) &&
8459 match(B
, m_Not(m_Specific(CmpRHS
)))))
8460 return {L
.Flavor
, SPNB_NA
, false};
8462 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8463 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8465 if ((CmpLHS
== B
&& CmpRHS
== D
) || (match(D
, m_Not(m_Specific(CmpLHS
))) &&
8466 match(B
, m_Not(m_Specific(CmpRHS
)))))
8467 return {L
.Flavor
, SPNB_NA
, false};
8470 return {SPF_UNKNOWN
, SPNB_NA
, false};
8473 /// If the input value is the result of a 'not' op, constant integer, or vector
8474 /// splat of a constant integer, return the bitwise-not source value.
8475 /// TODO: This could be extended to handle non-splat vector integer constants.
8476 static Value
*getNotValue(Value
*V
) {
8478 if (match(V
, m_Not(m_Value(NotV
))))
8482 if (match(V
, m_APInt(C
)))
8483 return ConstantInt::get(V
->getType(), ~(*C
));
8488 /// Match non-obvious integer minimum and maximum sequences.
8489 static SelectPatternResult
matchMinMax(CmpInst::Predicate Pred
,
8490 Value
*CmpLHS
, Value
*CmpRHS
,
8491 Value
*TrueVal
, Value
*FalseVal
,
8492 Value
*&LHS
, Value
*&RHS
,
8494 // Assume success. If there's no match, callers should not use these anyway.
8498 SelectPatternResult SPR
= matchClamp(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
);
8499 if (SPR
.Flavor
!= SelectPatternFlavor::SPF_UNKNOWN
)
8502 SPR
= matchMinMaxOfMinMax(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, Depth
);
8503 if (SPR
.Flavor
!= SelectPatternFlavor::SPF_UNKNOWN
)
8506 // Look through 'not' ops to find disguised min/max.
8507 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8508 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8509 if (CmpLHS
== getNotValue(TrueVal
) && CmpRHS
== getNotValue(FalseVal
)) {
8511 case CmpInst::ICMP_SGT
: return {SPF_SMIN
, SPNB_NA
, false};
8512 case CmpInst::ICMP_SLT
: return {SPF_SMAX
, SPNB_NA
, false};
8513 case CmpInst::ICMP_UGT
: return {SPF_UMIN
, SPNB_NA
, false};
8514 case CmpInst::ICMP_ULT
: return {SPF_UMAX
, SPNB_NA
, false};
8519 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8520 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8521 if (CmpLHS
== getNotValue(FalseVal
) && CmpRHS
== getNotValue(TrueVal
)) {
8523 case CmpInst::ICMP_SGT
: return {SPF_SMAX
, SPNB_NA
, false};
8524 case CmpInst::ICMP_SLT
: return {SPF_SMIN
, SPNB_NA
, false};
8525 case CmpInst::ICMP_UGT
: return {SPF_UMAX
, SPNB_NA
, false};
8526 case CmpInst::ICMP_ULT
: return {SPF_UMIN
, SPNB_NA
, false};
8531 if (Pred
!= CmpInst::ICMP_SGT
&& Pred
!= CmpInst::ICMP_SLT
)
8532 return {SPF_UNKNOWN
, SPNB_NA
, false};
8535 if (!match(CmpRHS
, m_APInt(C1
)))
8536 return {SPF_UNKNOWN
, SPNB_NA
, false};
8538 // An unsigned min/max can be written with a signed compare.
8540 if ((CmpLHS
== TrueVal
&& match(FalseVal
, m_APInt(C2
))) ||
8541 (CmpLHS
== FalseVal
&& match(TrueVal
, m_APInt(C2
)))) {
8542 // Is the sign bit set?
8543 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8544 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8545 if (Pred
== CmpInst::ICMP_SLT
&& C1
->isZero() && C2
->isMaxSignedValue())
8546 return {CmpLHS
== TrueVal
? SPF_UMAX
: SPF_UMIN
, SPNB_NA
, false};
8548 // Is the sign bit clear?
8549 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8550 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8551 if (Pred
== CmpInst::ICMP_SGT
&& C1
->isAllOnes() && C2
->isMinSignedValue())
8552 return {CmpLHS
== FalseVal
? SPF_UMAX
: SPF_UMIN
, SPNB_NA
, false};
8555 return {SPF_UNKNOWN
, SPNB_NA
, false};
8558 bool llvm::isKnownNegation(const Value
*X
, const Value
*Y
, bool NeedNSW
,
8560 assert(X
&& Y
&& "Invalid operand");
8562 auto IsNegationOf
= [&](const Value
*X
, const Value
*Y
) {
8563 if (!match(X
, m_Neg(m_Specific(Y
))))
8566 auto *BO
= cast
<BinaryOperator
>(X
);
8567 if (NeedNSW
&& !BO
->hasNoSignedWrap())
8570 auto *Zero
= cast
<Constant
>(BO
->getOperand(0));
8571 if (!AllowPoison
&& !Zero
->isNullValue())
8578 if (IsNegationOf(X
, Y
) || IsNegationOf(Y
, X
))
8581 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8583 return (!NeedNSW
&& (match(X
, m_Sub(m_Value(A
), m_Value(B
))) &&
8584 match(Y
, m_Sub(m_Specific(B
), m_Specific(A
))))) ||
8585 (NeedNSW
&& (match(X
, m_NSWSub(m_Value(A
), m_Value(B
))) &&
8586 match(Y
, m_NSWSub(m_Specific(B
), m_Specific(A
)))));
8589 bool llvm::isKnownInversion(const Value
*X
, const Value
*Y
) {
8590 // Handle X = icmp pred A, B, Y = icmp pred A, C.
8592 CmpPredicate Pred1
, Pred2
;
8593 if (!match(X
, m_ICmp(Pred1
, m_Value(A
), m_Value(B
))) ||
8594 !match(Y
, m_c_ICmp(Pred2
, m_Specific(A
), m_Value(C
))))
8597 // They must both have samesign flag or not.
8598 if (cast
<ICmpInst
>(X
)->hasSameSign() != cast
<ICmpInst
>(Y
)->hasSameSign())
8602 return Pred1
== ICmpInst::getInversePredicate(Pred2
);
8604 // Try to infer the relationship from constant ranges.
8605 const APInt
*RHSC1
, *RHSC2
;
8606 if (!match(B
, m_APInt(RHSC1
)) || !match(C
, m_APInt(RHSC2
)))
8609 // Sign bits of two RHSCs should match.
8610 if (cast
<ICmpInst
>(X
)->hasSameSign() &&
8611 RHSC1
->isNonNegative() != RHSC2
->isNonNegative())
8614 const auto CR1
= ConstantRange::makeExactICmpRegion(Pred1
, *RHSC1
);
8615 const auto CR2
= ConstantRange::makeExactICmpRegion(Pred2
, *RHSC2
);
8617 return CR1
.inverse() == CR2
;
8620 SelectPatternResult
llvm::getSelectPattern(CmpInst::Predicate Pred
,
8621 SelectPatternNaNBehavior NaNBehavior
,
8625 return {SPF_UNKNOWN
, SPNB_NA
, false}; // Equality.
8626 case ICmpInst::ICMP_UGT
:
8627 case ICmpInst::ICMP_UGE
:
8628 return {SPF_UMAX
, SPNB_NA
, false};
8629 case ICmpInst::ICMP_SGT
:
8630 case ICmpInst::ICMP_SGE
:
8631 return {SPF_SMAX
, SPNB_NA
, false};
8632 case ICmpInst::ICMP_ULT
:
8633 case ICmpInst::ICMP_ULE
:
8634 return {SPF_UMIN
, SPNB_NA
, false};
8635 case ICmpInst::ICMP_SLT
:
8636 case ICmpInst::ICMP_SLE
:
8637 return {SPF_SMIN
, SPNB_NA
, false};
8638 case FCmpInst::FCMP_UGT
:
8639 case FCmpInst::FCMP_UGE
:
8640 case FCmpInst::FCMP_OGT
:
8641 case FCmpInst::FCMP_OGE
:
8642 return {SPF_FMAXNUM
, NaNBehavior
, Ordered
};
8643 case FCmpInst::FCMP_ULT
:
8644 case FCmpInst::FCMP_ULE
:
8645 case FCmpInst::FCMP_OLT
:
8646 case FCmpInst::FCMP_OLE
:
8647 return {SPF_FMINNUM
, NaNBehavior
, Ordered
};
8651 std::optional
<std::pair
<CmpPredicate
, Constant
*>>
8652 llvm::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred
, Constant
*C
) {
8653 assert(ICmpInst::isRelational(Pred
) && ICmpInst::isIntPredicate(Pred
) &&
8654 "Only for relational integer predicates.");
8655 if (isa
<UndefValue
>(C
))
8656 return std::nullopt
;
8658 Type
*Type
= C
->getType();
8659 bool IsSigned
= ICmpInst::isSigned(Pred
);
8661 CmpInst::Predicate UnsignedPred
= ICmpInst::getUnsignedPredicate(Pred
);
8662 bool WillIncrement
=
8663 UnsignedPred
== ICmpInst::ICMP_ULE
|| UnsignedPred
== ICmpInst::ICMP_UGT
;
8665 // Check if the constant operand can be safely incremented/decremented
8666 // without overflowing/underflowing.
8667 auto ConstantIsOk
= [WillIncrement
, IsSigned
](ConstantInt
*C
) {
8668 return WillIncrement
? !C
->isMaxValue(IsSigned
) : !C
->isMinValue(IsSigned
);
8671 Constant
*SafeReplacementConstant
= nullptr;
8672 if (auto *CI
= dyn_cast
<ConstantInt
>(C
)) {
8673 // Bail out if the constant can't be safely incremented/decremented.
8674 if (!ConstantIsOk(CI
))
8675 return std::nullopt
;
8676 } else if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Type
)) {
8677 unsigned NumElts
= FVTy
->getNumElements();
8678 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
8679 Constant
*Elt
= C
->getAggregateElement(i
);
8681 return std::nullopt
;
8683 if (isa
<UndefValue
>(Elt
))
8686 // Bail out if we can't determine if this constant is min/max or if we
8687 // know that this constant is min/max.
8688 auto *CI
= dyn_cast
<ConstantInt
>(Elt
);
8689 if (!CI
|| !ConstantIsOk(CI
))
8690 return std::nullopt
;
8692 if (!SafeReplacementConstant
)
8693 SafeReplacementConstant
= CI
;
8695 } else if (isa
<VectorType
>(C
->getType())) {
8696 // Handle scalable splat
8697 Value
*SplatC
= C
->getSplatValue();
8698 auto *CI
= dyn_cast_or_null
<ConstantInt
>(SplatC
);
8699 // Bail out if the constant can't be safely incremented/decremented.
8700 if (!CI
|| !ConstantIsOk(CI
))
8701 return std::nullopt
;
8704 return std::nullopt
;
8707 // It may not be safe to change a compare predicate in the presence of
8708 // undefined elements, so replace those elements with the first safe constant
8710 // TODO: in case of poison, it is safe; let's replace undefs only.
8711 if (C
->containsUndefOrPoisonElement()) {
8712 assert(SafeReplacementConstant
&& "Replacement constant not set");
8713 C
= Constant::replaceUndefsWith(C
, SafeReplacementConstant
);
8716 CmpInst::Predicate NewPred
= CmpInst::getFlippedStrictnessPredicate(Pred
);
8718 // Increment or decrement the constant.
8719 Constant
*OneOrNegOne
= ConstantInt::get(Type
, WillIncrement
? 1 : -1, true);
8720 Constant
*NewC
= ConstantExpr::getAdd(C
, OneOrNegOne
);
8722 return std::make_pair(NewPred
, NewC
);
8725 static SelectPatternResult
matchSelectPattern(CmpInst::Predicate Pred
,
8727 Value
*CmpLHS
, Value
*CmpRHS
,
8728 Value
*TrueVal
, Value
*FalseVal
,
8729 Value
*&LHS
, Value
*&RHS
,
8731 bool HasMismatchedZeros
= false;
8732 if (CmpInst::isFPPredicate(Pred
)) {
8733 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8734 // 0.0 operand, set the compare's 0.0 operands to that same value for the
8735 // purpose of identifying min/max. Disregard vector constants with undefined
8736 // elements because those can not be back-propagated for analysis.
8737 Value
*OutputZeroVal
= nullptr;
8738 if (match(TrueVal
, m_AnyZeroFP()) && !match(FalseVal
, m_AnyZeroFP()) &&
8739 !cast
<Constant
>(TrueVal
)->containsUndefOrPoisonElement())
8740 OutputZeroVal
= TrueVal
;
8741 else if (match(FalseVal
, m_AnyZeroFP()) && !match(TrueVal
, m_AnyZeroFP()) &&
8742 !cast
<Constant
>(FalseVal
)->containsUndefOrPoisonElement())
8743 OutputZeroVal
= FalseVal
;
8745 if (OutputZeroVal
) {
8746 if (match(CmpLHS
, m_AnyZeroFP()) && CmpLHS
!= OutputZeroVal
) {
8747 HasMismatchedZeros
= true;
8748 CmpLHS
= OutputZeroVal
;
8750 if (match(CmpRHS
, m_AnyZeroFP()) && CmpRHS
!= OutputZeroVal
) {
8751 HasMismatchedZeros
= true;
8752 CmpRHS
= OutputZeroVal
;
8760 // Signed zero may return inconsistent results between implementations.
8761 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8762 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8763 // Therefore, we behave conservatively and only proceed if at least one of the
8764 // operands is known to not be zero or if we don't care about signed zero.
8767 case CmpInst::FCMP_OGT
: case CmpInst::FCMP_OLT
:
8768 case CmpInst::FCMP_UGT
: case CmpInst::FCMP_ULT
:
8769 if (!HasMismatchedZeros
)
8772 case CmpInst::FCMP_OGE
: case CmpInst::FCMP_OLE
:
8773 case CmpInst::FCMP_UGE
: case CmpInst::FCMP_ULE
:
8774 if (!FMF
.noSignedZeros() && !isKnownNonZero(CmpLHS
) &&
8775 !isKnownNonZero(CmpRHS
))
8776 return {SPF_UNKNOWN
, SPNB_NA
, false};
8779 SelectPatternNaNBehavior NaNBehavior
= SPNB_NA
;
8780 bool Ordered
= false;
8782 // When given one NaN and one non-NaN input:
8783 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8784 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8785 // ordered comparison fails), which could be NaN or non-NaN.
8786 // so here we discover exactly what NaN behavior is required/accepted.
8787 if (CmpInst::isFPPredicate(Pred
)) {
8788 bool LHSSafe
= isKnownNonNaN(CmpLHS
, FMF
);
8789 bool RHSSafe
= isKnownNonNaN(CmpRHS
, FMF
);
8791 if (LHSSafe
&& RHSSafe
) {
8792 // Both operands are known non-NaN.
8793 NaNBehavior
= SPNB_RETURNS_ANY
;
8794 } else if (CmpInst::isOrdered(Pred
)) {
8795 // An ordered comparison will return false when given a NaN, so it
8799 // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8800 NaNBehavior
= SPNB_RETURNS_NAN
;
8802 NaNBehavior
= SPNB_RETURNS_OTHER
;
8804 // Completely unsafe.
8805 return {SPF_UNKNOWN
, SPNB_NA
, false};
8808 // An unordered comparison will return true when given a NaN, so it
8811 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8812 NaNBehavior
= SPNB_RETURNS_OTHER
;
8814 NaNBehavior
= SPNB_RETURNS_NAN
;
8816 // Completely unsafe.
8817 return {SPF_UNKNOWN
, SPNB_NA
, false};
8821 if (TrueVal
== CmpRHS
&& FalseVal
== CmpLHS
) {
8822 std::swap(CmpLHS
, CmpRHS
);
8823 Pred
= CmpInst::getSwappedPredicate(Pred
);
8824 if (NaNBehavior
== SPNB_RETURNS_NAN
)
8825 NaNBehavior
= SPNB_RETURNS_OTHER
;
8826 else if (NaNBehavior
== SPNB_RETURNS_OTHER
)
8827 NaNBehavior
= SPNB_RETURNS_NAN
;
8831 // ([if]cmp X, Y) ? X : Y
8832 if (TrueVal
== CmpLHS
&& FalseVal
== CmpRHS
)
8833 return getSelectPattern(Pred
, NaNBehavior
, Ordered
);
8835 if (isKnownNegation(TrueVal
, FalseVal
)) {
8836 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8837 // match against either LHS or sext(LHS).
8838 auto MaybeSExtCmpLHS
=
8839 m_CombineOr(m_Specific(CmpLHS
), m_SExt(m_Specific(CmpLHS
)));
8840 auto ZeroOrAllOnes
= m_CombineOr(m_ZeroInt(), m_AllOnes());
8841 auto ZeroOrOne
= m_CombineOr(m_ZeroInt(), m_One());
8842 if (match(TrueVal
, MaybeSExtCmpLHS
)) {
8843 // Set the return values. If the compare uses the negated value (-X >s 0),
8844 // swap the return values because the negated value is always 'RHS'.
8847 if (match(CmpLHS
, m_Neg(m_Specific(FalseVal
))))
8848 std::swap(LHS
, RHS
);
8850 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8851 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8852 if (Pred
== ICmpInst::ICMP_SGT
&& match(CmpRHS
, ZeroOrAllOnes
))
8853 return {SPF_ABS
, SPNB_NA
, false};
8855 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8856 if (Pred
== ICmpInst::ICMP_SGE
&& match(CmpRHS
, ZeroOrOne
))
8857 return {SPF_ABS
, SPNB_NA
, false};
8859 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8860 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8861 if (Pred
== ICmpInst::ICMP_SLT
&& match(CmpRHS
, ZeroOrOne
))
8862 return {SPF_NABS
, SPNB_NA
, false};
8864 else if (match(FalseVal
, MaybeSExtCmpLHS
)) {
8865 // Set the return values. If the compare uses the negated value (-X >s 0),
8866 // swap the return values because the negated value is always 'RHS'.
8869 if (match(CmpLHS
, m_Neg(m_Specific(TrueVal
))))
8870 std::swap(LHS
, RHS
);
8872 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8873 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8874 if (Pred
== ICmpInst::ICMP_SGT
&& match(CmpRHS
, ZeroOrAllOnes
))
8875 return {SPF_NABS
, SPNB_NA
, false};
8877 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8878 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8879 if (Pred
== ICmpInst::ICMP_SLT
&& match(CmpRHS
, ZeroOrOne
))
8880 return {SPF_ABS
, SPNB_NA
, false};
8884 if (CmpInst::isIntPredicate(Pred
))
8885 return matchMinMax(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, LHS
, RHS
, Depth
);
8887 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8888 // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8889 // semantics than minNum. Be conservative in such case.
8890 if (NaNBehavior
!= SPNB_RETURNS_ANY
||
8891 (!FMF
.noSignedZeros() && !isKnownNonZero(CmpLHS
) &&
8892 !isKnownNonZero(CmpRHS
)))
8893 return {SPF_UNKNOWN
, SPNB_NA
, false};
8895 return matchFastFloatClamp(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, LHS
, RHS
);
8898 static Value
*lookThroughCastConst(CmpInst
*CmpI
, Type
*SrcTy
, Constant
*C
,
8899 Instruction::CastOps
*CastOp
) {
8900 const DataLayout
&DL
= CmpI
->getDataLayout();
8902 Constant
*CastedTo
= nullptr;
8904 case Instruction::ZExt
:
8905 if (CmpI
->isUnsigned())
8906 CastedTo
= ConstantExpr::getTrunc(C
, SrcTy
);
8908 case Instruction::SExt
:
8909 if (CmpI
->isSigned())
8910 CastedTo
= ConstantExpr::getTrunc(C
, SrcTy
, true);
8912 case Instruction::Trunc
:
8914 if (match(CmpI
->getOperand(1), m_Constant(CmpConst
)) &&
8915 CmpConst
->getType() == SrcTy
) {
8916 // Here we have the following case:
8918 // %cond = cmp iN %x, CmpConst
8919 // %tr = trunc iN %x to iK
8920 // %narrowsel = select i1 %cond, iK %t, iK C
8922 // We can always move trunc after select operation:
8924 // %cond = cmp iN %x, CmpConst
8925 // %widesel = select i1 %cond, iN %x, iN CmpConst
8926 // %tr = trunc iN %widesel to iK
8928 // Note that C could be extended in any way because we don't care about
8929 // upper bits after truncation. It can't be abs pattern, because it would
8932 // select i1 %cond, x, -x.
8934 // So only min/max pattern could be matched. Such match requires widened C
8935 // == CmpConst. That is why set widened C = CmpConst, condition trunc
8936 // CmpConst == C is checked below.
8937 CastedTo
= CmpConst
;
8939 unsigned ExtOp
= CmpI
->isSigned() ? Instruction::SExt
: Instruction::ZExt
;
8940 CastedTo
= ConstantFoldCastOperand(ExtOp
, C
, SrcTy
, DL
);
8943 case Instruction::FPTrunc
:
8944 CastedTo
= ConstantFoldCastOperand(Instruction::FPExt
, C
, SrcTy
, DL
);
8946 case Instruction::FPExt
:
8947 CastedTo
= ConstantFoldCastOperand(Instruction::FPTrunc
, C
, SrcTy
, DL
);
8949 case Instruction::FPToUI
:
8950 CastedTo
= ConstantFoldCastOperand(Instruction::UIToFP
, C
, SrcTy
, DL
);
8952 case Instruction::FPToSI
:
8953 CastedTo
= ConstantFoldCastOperand(Instruction::SIToFP
, C
, SrcTy
, DL
);
8955 case Instruction::UIToFP
:
8956 CastedTo
= ConstantFoldCastOperand(Instruction::FPToUI
, C
, SrcTy
, DL
);
8958 case Instruction::SIToFP
:
8959 CastedTo
= ConstantFoldCastOperand(Instruction::FPToSI
, C
, SrcTy
, DL
);
8968 // Make sure the cast doesn't lose any information.
8969 Constant
*CastedBack
=
8970 ConstantFoldCastOperand(*CastOp
, CastedTo
, C
->getType(), DL
);
8971 if (CastedBack
&& CastedBack
!= C
)
8977 /// Helps to match a select pattern in case of a type mismatch.
8979 /// The function processes the case when type of true and false values of a
8980 /// select instruction differs from type of the cmp instruction operands because
8981 /// of a cast instruction. The function checks if it is legal to move the cast
8982 /// operation after "select". If yes, it returns the new second value of
8983 /// "select" (with the assumption that cast is moved):
8984 /// 1. As operand of cast instruction when both values of "select" are same cast
8986 /// 2. As restored constant (by applying reverse cast operation) when the first
8987 /// value of the "select" is a cast operation and the second value is a
8988 /// constant. It is implemented in lookThroughCastConst().
8989 /// 3. As one operand is cast instruction and the other is not. The operands in
8990 /// sel(cmp) are in different type integer.
8991 /// NOTE: We return only the new second value because the first value could be
8992 /// accessed as operand of cast instruction.
8993 static Value
*lookThroughCast(CmpInst
*CmpI
, Value
*V1
, Value
*V2
,
8994 Instruction::CastOps
*CastOp
) {
8995 auto *Cast1
= dyn_cast
<CastInst
>(V1
);
8999 *CastOp
= Cast1
->getOpcode();
9000 Type
*SrcTy
= Cast1
->getSrcTy();
9001 if (auto *Cast2
= dyn_cast
<CastInst
>(V2
)) {
9002 // If V1 and V2 are both the same cast from the same type, look through V1.
9003 if (*CastOp
== Cast2
->getOpcode() && SrcTy
== Cast2
->getSrcTy())
9004 return Cast2
->getOperand(0);
9008 auto *C
= dyn_cast
<Constant
>(V2
);
9010 return lookThroughCastConst(CmpI
, SrcTy
, C
, CastOp
);
9012 Value
*CastedTo
= nullptr;
9013 if (*CastOp
== Instruction::Trunc
) {
9014 if (match(CmpI
->getOperand(1), m_ZExtOrSExt(m_Specific(V2
)))) {
9015 // Here we have the following case:
9016 // %y_ext = sext iK %y to iN
9017 // %cond = cmp iN %x, %y_ext
9018 // %tr = trunc iN %x to iK
9019 // %narrowsel = select i1 %cond, iK %tr, iK %y
9021 // We can always move trunc after select operation:
9022 // %y_ext = sext iK %y to iN
9023 // %cond = cmp iN %x, %y_ext
9024 // %widesel = select i1 %cond, iN %x, iN %y_ext
9025 // %tr = trunc iN %widesel to iK
9026 assert(V2
->getType() == Cast1
->getType() &&
9027 "V2 and Cast1 should be the same type.");
9028 CastedTo
= CmpI
->getOperand(1);
9034 SelectPatternResult
llvm::matchSelectPattern(Value
*V
, Value
*&LHS
, Value
*&RHS
,
9035 Instruction::CastOps
*CastOp
,
9037 if (Depth
>= MaxAnalysisRecursionDepth
)
9038 return {SPF_UNKNOWN
, SPNB_NA
, false};
9040 SelectInst
*SI
= dyn_cast
<SelectInst
>(V
);
9041 if (!SI
) return {SPF_UNKNOWN
, SPNB_NA
, false};
9043 CmpInst
*CmpI
= dyn_cast
<CmpInst
>(SI
->getCondition());
9044 if (!CmpI
) return {SPF_UNKNOWN
, SPNB_NA
, false};
9046 Value
*TrueVal
= SI
->getTrueValue();
9047 Value
*FalseVal
= SI
->getFalseValue();
9049 return llvm::matchDecomposedSelectPattern(CmpI
, TrueVal
, FalseVal
, LHS
, RHS
,
9053 SelectPatternResult
llvm::matchDecomposedSelectPattern(
9054 CmpInst
*CmpI
, Value
*TrueVal
, Value
*FalseVal
, Value
*&LHS
, Value
*&RHS
,
9055 Instruction::CastOps
*CastOp
, unsigned Depth
) {
9056 CmpInst::Predicate Pred
= CmpI
->getPredicate();
9057 Value
*CmpLHS
= CmpI
->getOperand(0);
9058 Value
*CmpRHS
= CmpI
->getOperand(1);
9060 if (isa
<FPMathOperator
>(CmpI
))
9061 FMF
= CmpI
->getFastMathFlags();
9064 if (CmpI
->isEquality())
9065 return {SPF_UNKNOWN
, SPNB_NA
, false};
9067 // Deal with type mismatches.
9068 if (CastOp
&& CmpLHS
->getType() != TrueVal
->getType()) {
9069 if (Value
*C
= lookThroughCast(CmpI
, TrueVal
, FalseVal
, CastOp
)) {
9070 // If this is a potential fmin/fmax with a cast to integer, then ignore
9071 // -0.0 because there is no corresponding integer value.
9072 if (*CastOp
== Instruction::FPToSI
|| *CastOp
== Instruction::FPToUI
)
9073 FMF
.setNoSignedZeros();
9074 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
,
9075 cast
<CastInst
>(TrueVal
)->getOperand(0), C
,
9078 if (Value
*C
= lookThroughCast(CmpI
, FalseVal
, TrueVal
, CastOp
)) {
9079 // If this is a potential fmin/fmax with a cast to integer, then ignore
9080 // -0.0 because there is no corresponding integer value.
9081 if (*CastOp
== Instruction::FPToSI
|| *CastOp
== Instruction::FPToUI
)
9082 FMF
.setNoSignedZeros();
9083 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
,
9084 C
, cast
<CastInst
>(FalseVal
)->getOperand(0),
9088 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
,
9092 CmpInst::Predicate
llvm::getMinMaxPred(SelectPatternFlavor SPF
, bool Ordered
) {
9093 if (SPF
== SPF_SMIN
) return ICmpInst::ICMP_SLT
;
9094 if (SPF
== SPF_UMIN
) return ICmpInst::ICMP_ULT
;
9095 if (SPF
== SPF_SMAX
) return ICmpInst::ICMP_SGT
;
9096 if (SPF
== SPF_UMAX
) return ICmpInst::ICMP_UGT
;
9097 if (SPF
== SPF_FMINNUM
)
9098 return Ordered
? FCmpInst::FCMP_OLT
: FCmpInst::FCMP_ULT
;
9099 if (SPF
== SPF_FMAXNUM
)
9100 return Ordered
? FCmpInst::FCMP_OGT
: FCmpInst::FCMP_UGT
;
9101 llvm_unreachable("unhandled!");
9104 Intrinsic::ID
llvm::getMinMaxIntrinsic(SelectPatternFlavor SPF
) {
9106 case SelectPatternFlavor::SPF_UMIN
:
9107 return Intrinsic::umin
;
9108 case SelectPatternFlavor::SPF_UMAX
:
9109 return Intrinsic::umax
;
9110 case SelectPatternFlavor::SPF_SMIN
:
9111 return Intrinsic::smin
;
9112 case SelectPatternFlavor::SPF_SMAX
:
9113 return Intrinsic::smax
;
9115 llvm_unreachable("Unexpected SPF");
9119 SelectPatternFlavor
llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF
) {
9120 if (SPF
== SPF_SMIN
) return SPF_SMAX
;
9121 if (SPF
== SPF_UMIN
) return SPF_UMAX
;
9122 if (SPF
== SPF_SMAX
) return SPF_SMIN
;
9123 if (SPF
== SPF_UMAX
) return SPF_UMIN
;
9124 llvm_unreachable("unhandled!");
9127 Intrinsic::ID
llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID
) {
9129 case Intrinsic::smax
: return Intrinsic::smin
;
9130 case Intrinsic::smin
: return Intrinsic::smax
;
9131 case Intrinsic::umax
: return Intrinsic::umin
;
9132 case Intrinsic::umin
: return Intrinsic::umax
;
9133 // Please note that next four intrinsics may produce the same result for
9134 // original and inverted case even if X != Y due to NaN is handled specially.
9135 case Intrinsic::maximum
: return Intrinsic::minimum
;
9136 case Intrinsic::minimum
: return Intrinsic::maximum
;
9137 case Intrinsic::maxnum
: return Intrinsic::minnum
;
9138 case Intrinsic::minnum
: return Intrinsic::maxnum
;
9139 default: llvm_unreachable("Unexpected intrinsic");
9143 APInt
llvm::getMinMaxLimit(SelectPatternFlavor SPF
, unsigned BitWidth
) {
9145 case SPF_SMAX
: return APInt::getSignedMaxValue(BitWidth
);
9146 case SPF_SMIN
: return APInt::getSignedMinValue(BitWidth
);
9147 case SPF_UMAX
: return APInt::getMaxValue(BitWidth
);
9148 case SPF_UMIN
: return APInt::getMinValue(BitWidth
);
9149 default: llvm_unreachable("Unexpected flavor");
9153 std::pair
<Intrinsic::ID
, bool>
9154 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef
<Value
*> VL
) {
9155 // Check if VL contains select instructions that can be folded into a min/max
9156 // vector intrinsic and return the intrinsic if it is possible.
9157 // TODO: Support floating point min/max.
9158 bool AllCmpSingleUse
= true;
9159 SelectPatternResult SelectPattern
;
9160 SelectPattern
.Flavor
= SPF_UNKNOWN
;
9161 if (all_of(VL
, [&SelectPattern
, &AllCmpSingleUse
](Value
*I
) {
9163 auto CurrentPattern
= matchSelectPattern(I
, LHS
, RHS
);
9164 if (!SelectPatternResult::isMinOrMax(CurrentPattern
.Flavor
))
9166 if (SelectPattern
.Flavor
!= SPF_UNKNOWN
&&
9167 SelectPattern
.Flavor
!= CurrentPattern
.Flavor
)
9169 SelectPattern
= CurrentPattern
;
9171 match(I
, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
9174 switch (SelectPattern
.Flavor
) {
9176 return {Intrinsic::smin
, AllCmpSingleUse
};
9178 return {Intrinsic::umin
, AllCmpSingleUse
};
9180 return {Intrinsic::smax
, AllCmpSingleUse
};
9182 return {Intrinsic::umax
, AllCmpSingleUse
};
9184 return {Intrinsic::maxnum
, AllCmpSingleUse
};
9186 return {Intrinsic::minnum
, AllCmpSingleUse
};
9188 llvm_unreachable("unexpected select pattern flavor");
9191 return {Intrinsic::not_intrinsic
, false};
9194 bool llvm::matchSimpleRecurrence(const PHINode
*P
, BinaryOperator
*&BO
,
9195 Value
*&Start
, Value
*&Step
) {
9196 // Handle the case of a simple two-predecessor recurrence PHI.
9197 // There's a lot more that could theoretically be done here, but
9198 // this is sufficient to catch some interesting cases.
9199 if (P
->getNumIncomingValues() != 2)
9202 for (unsigned i
= 0; i
!= 2; ++i
) {
9203 Value
*L
= P
->getIncomingValue(i
);
9204 Value
*R
= P
->getIncomingValue(!i
);
9205 auto *LU
= dyn_cast
<BinaryOperator
>(L
);
9208 unsigned Opcode
= LU
->getOpcode();
9213 // TODO: Expand list -- xor, gep, uadd.sat etc.
9214 case Instruction::LShr
:
9215 case Instruction::AShr
:
9216 case Instruction::Shl
:
9217 case Instruction::Add
:
9218 case Instruction::Sub
:
9219 case Instruction::UDiv
:
9220 case Instruction::URem
:
9221 case Instruction::And
:
9222 case Instruction::Or
:
9223 case Instruction::Mul
:
9224 case Instruction::FMul
: {
9225 Value
*LL
= LU
->getOperand(0);
9226 Value
*LR
= LU
->getOperand(1);
9227 // Find a recurrence.
9233 continue; // Check for recurrence with L and R flipped.
9239 // We have matched a recurrence of the form:
9240 // %iv = [R, %entry], [%iv.next, %backedge]
9241 // %iv.next = binop %iv, L
9243 // %iv = [R, %entry], [%iv.next, %backedge]
9244 // %iv.next = binop L, %iv
9253 bool llvm::matchSimpleRecurrence(const BinaryOperator
*I
, PHINode
*&P
,
9254 Value
*&Start
, Value
*&Step
) {
9255 BinaryOperator
*BO
= nullptr;
9256 P
= dyn_cast
<PHINode
>(I
->getOperand(0));
9258 P
= dyn_cast
<PHINode
>(I
->getOperand(1));
9259 return P
&& matchSimpleRecurrence(P
, BO
, Start
, Step
) && BO
== I
;
9262 /// Return true if "icmp Pred LHS RHS" is always true.
9263 static bool isTruePredicate(CmpInst::Predicate Pred
, const Value
*LHS
,
9265 if (ICmpInst::isTrueWhenEqual(Pred
) && LHS
== RHS
)
9272 case CmpInst::ICMP_SLE
: {
9275 // LHS s<= LHS +_{nsw} C if C >= 0
9276 // LHS s<= LHS | C if C >= 0
9277 if (match(RHS
, m_NSWAdd(m_Specific(LHS
), m_APInt(C
))) ||
9278 match(RHS
, m_Or(m_Specific(LHS
), m_APInt(C
))))
9279 return !C
->isNegative();
9281 // LHS s<= smax(LHS, V) for any V
9282 if (match(RHS
, m_c_SMax(m_Specific(LHS
), m_Value())))
9285 // smin(RHS, V) s<= RHS for any V
9286 if (match(LHS
, m_c_SMin(m_Specific(RHS
), m_Value())))
9289 // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
9291 const APInt
*CLHS
, *CRHS
;
9292 if (match(LHS
, m_NSWAddLike(m_Value(X
), m_APInt(CLHS
))) &&
9293 match(RHS
, m_NSWAddLike(m_Specific(X
), m_APInt(CRHS
))))
9294 return CLHS
->sle(*CRHS
);
9299 case CmpInst::ICMP_ULE
: {
9300 // LHS u<= LHS +_{nuw} V for any V
9301 if (match(RHS
, m_c_Add(m_Specific(LHS
), m_Value())) &&
9302 cast
<OverflowingBinaryOperator
>(RHS
)->hasNoUnsignedWrap())
9305 // LHS u<= LHS | V for any V
9306 if (match(RHS
, m_c_Or(m_Specific(LHS
), m_Value())))
9309 // LHS u<= umax(LHS, V) for any V
9310 if (match(RHS
, m_c_UMax(m_Specific(LHS
), m_Value())))
9313 // RHS >> V u<= RHS for any V
9314 if (match(LHS
, m_LShr(m_Specific(RHS
), m_Value())))
9317 // RHS u/ C_ugt_1 u<= RHS
9319 if (match(LHS
, m_UDiv(m_Specific(RHS
), m_APInt(C
))) && C
->ugt(1))
9322 // RHS & V u<= RHS for any V
9323 if (match(LHS
, m_c_And(m_Specific(RHS
), m_Value())))
9326 // umin(RHS, V) u<= RHS for any V
9327 if (match(LHS
, m_c_UMin(m_Specific(RHS
), m_Value())))
9330 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
9332 const APInt
*CLHS
, *CRHS
;
9333 if (match(LHS
, m_NUWAddLike(m_Value(X
), m_APInt(CLHS
))) &&
9334 match(RHS
, m_NUWAddLike(m_Specific(X
), m_APInt(CRHS
))))
9335 return CLHS
->ule(*CRHS
);
9342 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
9343 /// ALHS ARHS" is true. Otherwise, return std::nullopt.
9344 static std::optional
<bool>
9345 isImpliedCondOperands(CmpInst::Predicate Pred
, const Value
*ALHS
,
9346 const Value
*ARHS
, const Value
*BLHS
, const Value
*BRHS
) {
9349 return std::nullopt
;
9351 case CmpInst::ICMP_SLT
:
9352 case CmpInst::ICMP_SLE
:
9353 if (isTruePredicate(CmpInst::ICMP_SLE
, BLHS
, ALHS
) &&
9354 isTruePredicate(CmpInst::ICMP_SLE
, ARHS
, BRHS
))
9356 return std::nullopt
;
9358 case CmpInst::ICMP_SGT
:
9359 case CmpInst::ICMP_SGE
:
9360 if (isTruePredicate(CmpInst::ICMP_SLE
, ALHS
, BLHS
) &&
9361 isTruePredicate(CmpInst::ICMP_SLE
, BRHS
, ARHS
))
9363 return std::nullopt
;
9365 case CmpInst::ICMP_ULT
:
9366 case CmpInst::ICMP_ULE
:
9367 if (isTruePredicate(CmpInst::ICMP_ULE
, BLHS
, ALHS
) &&
9368 isTruePredicate(CmpInst::ICMP_ULE
, ARHS
, BRHS
))
9370 return std::nullopt
;
9372 case CmpInst::ICMP_UGT
:
9373 case CmpInst::ICMP_UGE
:
9374 if (isTruePredicate(CmpInst::ICMP_ULE
, ALHS
, BLHS
) &&
9375 isTruePredicate(CmpInst::ICMP_ULE
, BRHS
, ARHS
))
9377 return std::nullopt
;
9381 /// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true.
9382 /// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false.
9383 /// Otherwise, return std::nullopt if we can't infer anything.
9384 static std::optional
<bool>
9385 isImpliedCondCommonOperandWithCR(CmpPredicate LPred
, const ConstantRange
&LCR
,
9386 CmpPredicate RPred
, const ConstantRange
&RCR
) {
9387 auto CRImpliesPred
= [&](ConstantRange CR
,
9388 CmpInst::Predicate Pred
) -> std::optional
<bool> {
9389 // If all true values for lhs and true for rhs, lhs implies rhs
9390 if (CR
.icmp(Pred
, RCR
))
9393 // If there is no overlap, lhs implies not rhs
9394 if (CR
.icmp(CmpInst::getInversePredicate(Pred
), RCR
))
9397 return std::nullopt
;
9399 if (auto Res
= CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred
, LCR
),
9402 if (LPred
.hasSameSign() ^ RPred
.hasSameSign()) {
9403 LPred
= LPred
.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(LPred
)
9404 : static_cast<CmpInst::Predicate
>(LPred
);
9405 RPred
= RPred
.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(RPred
)
9406 : static_cast<CmpInst::Predicate
>(RPred
);
9407 return CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred
, LCR
),
9410 return std::nullopt
;
9413 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
9414 /// is true. Return false if LHS implies RHS is false. Otherwise, return
9415 /// std::nullopt if we can't infer anything.
9416 static std::optional
<bool>
9417 isImpliedCondICmps(const ICmpInst
*LHS
, CmpPredicate RPred
, const Value
*R0
,
9418 const Value
*R1
, const DataLayout
&DL
, bool LHSIsTrue
) {
9419 Value
*L0
= LHS
->getOperand(0);
9420 Value
*L1
= LHS
->getOperand(1);
9422 // The rest of the logic assumes the LHS condition is true. If that's not the
9423 // case, invert the predicate to make it so.
9424 CmpPredicate LPred
=
9425 LHSIsTrue
? LHS
->getCmpPredicate() : LHS
->getInverseCmpPredicate();
9427 // We can have non-canonical operands, so try to normalize any common operand
9431 RPred
= ICmpInst::getSwappedCmpPredicate(RPred
);
9435 LPred
= ICmpInst::getSwappedCmpPredicate(LPred
);
9438 // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
9439 if (L0
!= R0
|| match(L0
, m_ImmConstant())) {
9441 LPred
= ICmpInst::getSwappedCmpPredicate(LPred
);
9443 RPred
= ICmpInst::getSwappedCmpPredicate(RPred
);
9447 // See if we can infer anything if operand-0 matches and we have at least one
9449 const APInt
*Unused
;
9450 if (L0
== R0
&& (match(L1
, m_APInt(Unused
)) || match(R1
, m_APInt(Unused
)))) {
9451 // Potential TODO: We could also further use the constant range of L0/R0 to
9452 // further constraint the constant ranges. At the moment this leads to
9453 // several regressions related to not transforming `multi_use(A + C0) eq/ne
9454 // C1` (see discussion: D58633).
9455 ConstantRange LCR
= computeConstantRange(
9456 L1
, ICmpInst::isSigned(LPred
), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9457 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth
- 1);
9458 ConstantRange RCR
= computeConstantRange(
9459 R1
, ICmpInst::isSigned(RPred
), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9460 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth
- 1);
9461 // Even if L1/R1 are not both constant, we can still sometimes deduce
9462 // relationship from a single constant. For example X u> Y implies X != 0.
9463 if (auto R
= isImpliedCondCommonOperandWithCR(LPred
, LCR
, RPred
, RCR
))
9465 // If both L1/R1 were exact constant ranges and we didn't get anything
9466 // here, we won't be able to deduce this.
9467 if (match(L1
, m_APInt(Unused
)) && match(R1
, m_APInt(Unused
)))
9468 return std::nullopt
;
9471 // Can we infer anything when the two compares have matching operands?
9472 if (L0
== R0
&& L1
== R1
)
9473 return ICmpInst::isImpliedByMatchingCmp(LPred
, RPred
);
9475 // It only really makes sense in the context of signed comparison for "X - Y
9476 // must be positive if X >= Y and no overflow".
9477 // Take SGT as an example: L0:x > L1:y and C >= 0
9478 // ==> R0:(x -nsw y) < R1:(-C) is false
9479 CmpInst::Predicate SignedLPred
= LPred
.getPreferredSignedPredicate();
9480 if ((SignedLPred
== ICmpInst::ICMP_SGT
||
9481 SignedLPred
== ICmpInst::ICMP_SGE
) &&
9482 match(R0
, m_NSWSub(m_Specific(L0
), m_Specific(L1
)))) {
9483 if (match(R1
, m_NonPositive()) &&
9484 ICmpInst::isImpliedByMatchingCmp(SignedLPred
, RPred
) == false)
9488 // Take SLT as an example: L0:x < L1:y and C <= 0
9489 // ==> R0:(x -nsw y) < R1:(-C) is true
9490 if ((SignedLPred
== ICmpInst::ICMP_SLT
||
9491 SignedLPred
== ICmpInst::ICMP_SLE
) &&
9492 match(R0
, m_NSWSub(m_Specific(L0
), m_Specific(L1
)))) {
9493 if (match(R1
, m_NonNegative()) &&
9494 ICmpInst::isImpliedByMatchingCmp(SignedLPred
, RPred
) == true)
9498 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
9500 (LPred
== ICmpInst::ICMP_ULT
|| LPred
== ICmpInst::ICMP_UGE
) &&
9501 (RPred
== ICmpInst::ICMP_ULT
|| RPred
== ICmpInst::ICMP_UGE
) &&
9502 match(L0
, m_c_Add(m_Specific(L1
), m_Specific(R1
))))
9503 return CmpPredicate::getMatching(LPred
, RPred
).has_value();
9505 if (auto P
= CmpPredicate::getMatching(LPred
, RPred
))
9506 return isImpliedCondOperands(*P
, L0
, L1
, R0
, R1
);
9508 return std::nullopt
;
9511 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
9512 /// false. Otherwise, return std::nullopt if we can't infer anything. We
9513 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
9515 static std::optional
<bool>
9516 isImpliedCondAndOr(const Instruction
*LHS
, CmpPredicate RHSPred
,
9517 const Value
*RHSOp0
, const Value
*RHSOp1
,
9518 const DataLayout
&DL
, bool LHSIsTrue
, unsigned Depth
) {
9519 // The LHS must be an 'or', 'and', or a 'select' instruction.
9520 assert((LHS
->getOpcode() == Instruction::And
||
9521 LHS
->getOpcode() == Instruction::Or
||
9522 LHS
->getOpcode() == Instruction::Select
) &&
9523 "Expected LHS to be 'and', 'or', or 'select'.");
9525 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Hit recursion limit");
9527 // If the result of an 'or' is false, then we know both legs of the 'or' are
9528 // false. Similarly, if the result of an 'and' is true, then we know both
9529 // legs of the 'and' are true.
9530 const Value
*ALHS
, *ARHS
;
9531 if ((!LHSIsTrue
&& match(LHS
, m_LogicalOr(m_Value(ALHS
), m_Value(ARHS
)))) ||
9532 (LHSIsTrue
&& match(LHS
, m_LogicalAnd(m_Value(ALHS
), m_Value(ARHS
))))) {
9533 // FIXME: Make this non-recursion.
9534 if (std::optional
<bool> Implication
= isImpliedCondition(
9535 ALHS
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
, Depth
+ 1))
9537 if (std::optional
<bool> Implication
= isImpliedCondition(
9538 ARHS
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
, Depth
+ 1))
9540 return std::nullopt
;
9542 return std::nullopt
;
9546 llvm::isImpliedCondition(const Value
*LHS
, CmpPredicate RHSPred
,
9547 const Value
*RHSOp0
, const Value
*RHSOp1
,
9548 const DataLayout
&DL
, bool LHSIsTrue
, unsigned Depth
) {
9549 // Bail out when we hit the limit.
9550 if (Depth
== MaxAnalysisRecursionDepth
)
9551 return std::nullopt
;
9553 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
9555 if (RHSOp0
->getType()->isVectorTy() != LHS
->getType()->isVectorTy())
9556 return std::nullopt
;
9558 assert(LHS
->getType()->isIntOrIntVectorTy(1) &&
9559 "Expected integer type only!");
9562 if (match(LHS
, m_Not(m_Value(LHS
))))
9563 LHSIsTrue
= !LHSIsTrue
;
9565 // Both LHS and RHS are icmps.
9566 const ICmpInst
*LHSCmp
= dyn_cast
<ICmpInst
>(LHS
);
9568 return isImpliedCondICmps(LHSCmp
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
);
9570 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
9571 /// the RHS to be an icmp.
9572 /// FIXME: Add support for and/or/select on the RHS.
9573 if (const Instruction
*LHSI
= dyn_cast
<Instruction
>(LHS
)) {
9574 if ((LHSI
->getOpcode() == Instruction::And
||
9575 LHSI
->getOpcode() == Instruction::Or
||
9576 LHSI
->getOpcode() == Instruction::Select
))
9577 return isImpliedCondAndOr(LHSI
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
,
9580 return std::nullopt
;
9583 std::optional
<bool> llvm::isImpliedCondition(const Value
*LHS
, const Value
*RHS
,
9584 const DataLayout
&DL
,
9585 bool LHSIsTrue
, unsigned Depth
) {
9586 // LHS ==> RHS by definition
9591 bool InvertRHS
= false;
9592 if (match(RHS
, m_Not(m_Value(RHS
)))) {
9598 if (const ICmpInst
*RHSCmp
= dyn_cast
<ICmpInst
>(RHS
)) {
9599 if (auto Implied
= isImpliedCondition(
9600 LHS
, RHSCmp
->getCmpPredicate(), RHSCmp
->getOperand(0),
9601 RHSCmp
->getOperand(1), DL
, LHSIsTrue
, Depth
))
9602 return InvertRHS
? !*Implied
: *Implied
;
9603 return std::nullopt
;
9606 if (Depth
== MaxAnalysisRecursionDepth
)
9607 return std::nullopt
;
9609 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
9610 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
9611 const Value
*RHS1
, *RHS2
;
9612 if (match(RHS
, m_LogicalOr(m_Value(RHS1
), m_Value(RHS2
)))) {
9613 if (std::optional
<bool> Imp
=
9614 isImpliedCondition(LHS
, RHS1
, DL
, LHSIsTrue
, Depth
+ 1))
9617 if (std::optional
<bool> Imp
=
9618 isImpliedCondition(LHS
, RHS2
, DL
, LHSIsTrue
, Depth
+ 1))
9622 if (match(RHS
, m_LogicalAnd(m_Value(RHS1
), m_Value(RHS2
)))) {
9623 if (std::optional
<bool> Imp
=
9624 isImpliedCondition(LHS
, RHS1
, DL
, LHSIsTrue
, Depth
+ 1))
9627 if (std::optional
<bool> Imp
=
9628 isImpliedCondition(LHS
, RHS2
, DL
, LHSIsTrue
, Depth
+ 1))
9633 return std::nullopt
;
9636 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
9637 // condition dominating ContextI or nullptr, if no condition is found.
9638 static std::pair
<Value
*, bool>
9639 getDomPredecessorCondition(const Instruction
*ContextI
) {
9640 if (!ContextI
|| !ContextI
->getParent())
9641 return {nullptr, false};
9643 // TODO: This is a poor/cheap way to determine dominance. Should we use a
9644 // dominator tree (eg, from a SimplifyQuery) instead?
9645 const BasicBlock
*ContextBB
= ContextI
->getParent();
9646 const BasicBlock
*PredBB
= ContextBB
->getSinglePredecessor();
9648 return {nullptr, false};
9650 // We need a conditional branch in the predecessor.
9652 BasicBlock
*TrueBB
, *FalseBB
;
9653 if (!match(PredBB
->getTerminator(), m_Br(m_Value(PredCond
), TrueBB
, FalseBB
)))
9654 return {nullptr, false};
9656 // The branch should get simplified. Don't bother simplifying this condition.
9657 if (TrueBB
== FalseBB
)
9658 return {nullptr, false};
9660 assert((TrueBB
== ContextBB
|| FalseBB
== ContextBB
) &&
9661 "Predecessor block does not point to successor?");
9663 // Is this condition implied by the predecessor condition?
9664 return {PredCond
, TrueBB
== ContextBB
};
9667 std::optional
<bool> llvm::isImpliedByDomCondition(const Value
*Cond
,
9668 const Instruction
*ContextI
,
9669 const DataLayout
&DL
) {
9670 assert(Cond
->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
9671 auto PredCond
= getDomPredecessorCondition(ContextI
);
9673 return isImpliedCondition(PredCond
.first
, Cond
, DL
, PredCond
.second
);
9674 return std::nullopt
;
9677 std::optional
<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred
,
9680 const Instruction
*ContextI
,
9681 const DataLayout
&DL
) {
9682 auto PredCond
= getDomPredecessorCondition(ContextI
);
9684 return isImpliedCondition(PredCond
.first
, Pred
, LHS
, RHS
, DL
,
9686 return std::nullopt
;
9689 static void setLimitsForBinOp(const BinaryOperator
&BO
, APInt
&Lower
,
9690 APInt
&Upper
, const InstrInfoQuery
&IIQ
,
9691 bool PreferSignedRange
) {
9692 unsigned Width
= Lower
.getBitWidth();
9694 switch (BO
.getOpcode()) {
9695 case Instruction::Add
:
9696 if (match(BO
.getOperand(1), m_APInt(C
)) && !C
->isZero()) {
9697 bool HasNSW
= IIQ
.hasNoSignedWrap(&BO
);
9698 bool HasNUW
= IIQ
.hasNoUnsignedWrap(&BO
);
9700 // If the caller expects a signed compare, then try to use a signed range.
9701 // Otherwise if both no-wraps are set, use the unsigned range because it
9702 // is never larger than the signed range. Example:
9703 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9704 if (PreferSignedRange
&& HasNSW
&& HasNUW
)
9708 // 'add nuw x, C' produces [C, UINT_MAX].
9710 } else if (HasNSW
) {
9711 if (C
->isNegative()) {
9712 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9713 Lower
= APInt::getSignedMinValue(Width
);
9714 Upper
= APInt::getSignedMaxValue(Width
) + *C
+ 1;
9716 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9717 Lower
= APInt::getSignedMinValue(Width
) + *C
;
9718 Upper
= APInt::getSignedMaxValue(Width
) + 1;
9724 case Instruction::And
:
9725 if (match(BO
.getOperand(1), m_APInt(C
)))
9726 // 'and x, C' produces [0, C].
9728 // X & -X is a power of two or zero. So we can cap the value at max power of
9730 if (match(BO
.getOperand(0), m_Neg(m_Specific(BO
.getOperand(1)))) ||
9731 match(BO
.getOperand(1), m_Neg(m_Specific(BO
.getOperand(0)))))
9732 Upper
= APInt::getSignedMinValue(Width
) + 1;
9735 case Instruction::Or
:
9736 if (match(BO
.getOperand(1), m_APInt(C
)))
9737 // 'or x, C' produces [C, UINT_MAX].
9741 case Instruction::AShr
:
9742 if (match(BO
.getOperand(1), m_APInt(C
)) && C
->ult(Width
)) {
9743 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9744 Lower
= APInt::getSignedMinValue(Width
).ashr(*C
);
9745 Upper
= APInt::getSignedMaxValue(Width
).ashr(*C
) + 1;
9746 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9747 unsigned ShiftAmount
= Width
- 1;
9748 if (!C
->isZero() && IIQ
.isExact(&BO
))
9749 ShiftAmount
= C
->countr_zero();
9750 if (C
->isNegative()) {
9751 // 'ashr C, x' produces [C, C >> (Width-1)]
9753 Upper
= C
->ashr(ShiftAmount
) + 1;
9755 // 'ashr C, x' produces [C >> (Width-1), C]
9756 Lower
= C
->ashr(ShiftAmount
);
9762 case Instruction::LShr
:
9763 if (match(BO
.getOperand(1), m_APInt(C
)) && C
->ult(Width
)) {
9764 // 'lshr x, C' produces [0, UINT_MAX >> C].
9765 Upper
= APInt::getAllOnes(Width
).lshr(*C
) + 1;
9766 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9767 // 'lshr C, x' produces [C >> (Width-1), C].
9768 unsigned ShiftAmount
= Width
- 1;
9769 if (!C
->isZero() && IIQ
.isExact(&BO
))
9770 ShiftAmount
= C
->countr_zero();
9771 Lower
= C
->lshr(ShiftAmount
);
9776 case Instruction::Shl
:
9777 if (match(BO
.getOperand(0), m_APInt(C
))) {
9778 if (IIQ
.hasNoUnsignedWrap(&BO
)) {
9779 // 'shl nuw C, x' produces [C, C << CLZ(C)]
9781 Upper
= Lower
.shl(Lower
.countl_zero()) + 1;
9782 } else if (BO
.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
9783 if (C
->isNegative()) {
9784 // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9785 unsigned ShiftAmount
= C
->countl_one() - 1;
9786 Lower
= C
->shl(ShiftAmount
);
9789 // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9790 unsigned ShiftAmount
= C
->countl_zero() - 1;
9792 Upper
= C
->shl(ShiftAmount
) + 1;
9795 // If lowbit is set, value can never be zero.
9797 Lower
= APInt::getOneBitSet(Width
, 0);
9798 // If we are shifting a constant the largest it can be is if the longest
9799 // sequence of consecutive ones is shifted to the highbits (breaking
9800 // ties for which sequence is higher). At the moment we take a liberal
9801 // upper bound on this by just popcounting the constant.
9802 // TODO: There may be a bitwise trick for it longest/highest
9803 // consecutative sequence of ones (naive method is O(Width) loop).
9804 Upper
= APInt::getHighBitsSet(Width
, C
->popcount()) + 1;
9806 } else if (match(BO
.getOperand(1), m_APInt(C
)) && C
->ult(Width
)) {
9807 Upper
= APInt::getBitsSetFrom(Width
, C
->getZExtValue()) + 1;
9811 case Instruction::SDiv
:
9812 if (match(BO
.getOperand(1), m_APInt(C
))) {
9813 APInt IntMin
= APInt::getSignedMinValue(Width
);
9814 APInt IntMax
= APInt::getSignedMaxValue(Width
);
9815 if (C
->isAllOnes()) {
9816 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9817 // where C != -1 and C != 0 and C != 1
9820 } else if (C
->countl_zero() < Width
- 1) {
9821 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9822 // where C != -1 and C != 0 and C != 1
9823 Lower
= IntMin
.sdiv(*C
);
9824 Upper
= IntMax
.sdiv(*C
);
9825 if (Lower
.sgt(Upper
))
9826 std::swap(Lower
, Upper
);
9828 assert(Upper
!= Lower
&& "Upper part of range has wrapped!");
9830 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9831 if (C
->isMinSignedValue()) {
9832 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9834 Upper
= Lower
.lshr(1) + 1;
9836 // 'sdiv C, x' produces [-|C|, |C|].
9837 Upper
= C
->abs() + 1;
9838 Lower
= (-Upper
) + 1;
9843 case Instruction::UDiv
:
9844 if (match(BO
.getOperand(1), m_APInt(C
)) && !C
->isZero()) {
9845 // 'udiv x, C' produces [0, UINT_MAX / C].
9846 Upper
= APInt::getMaxValue(Width
).udiv(*C
) + 1;
9847 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9848 // 'udiv C, x' produces [0, C].
9853 case Instruction::SRem
:
9854 if (match(BO
.getOperand(1), m_APInt(C
))) {
9855 // 'srem x, C' produces (-|C|, |C|).
9857 Lower
= (-Upper
) + 1;
9858 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9859 if (C
->isNegative()) {
9860 // 'srem -|C|, x' produces [-|C|, 0].
9864 // 'srem |C|, x' produces [0, |C|].
9870 case Instruction::URem
:
9871 if (match(BO
.getOperand(1), m_APInt(C
)))
9872 // 'urem x, C' produces [0, C).
9874 else if (match(BO
.getOperand(0), m_APInt(C
)))
9875 // 'urem C, x' produces [0, C].
9884 static ConstantRange
getRangeForIntrinsic(const IntrinsicInst
&II
,
9885 bool UseInstrInfo
) {
9886 unsigned Width
= II
.getType()->getScalarSizeInBits();
9888 switch (II
.getIntrinsicID()) {
9889 case Intrinsic::ctlz
:
9890 case Intrinsic::cttz
: {
9891 APInt
Upper(Width
, Width
);
9892 if (!UseInstrInfo
|| !match(II
.getArgOperand(1), m_One()))
9894 // Maximum of set/clear bits is the bit width.
9895 return ConstantRange::getNonEmpty(APInt::getZero(Width
), Upper
);
9897 case Intrinsic::ctpop
:
9898 // Maximum of set/clear bits is the bit width.
9899 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9900 APInt(Width
, Width
) + 1);
9901 case Intrinsic::uadd_sat
:
9902 // uadd.sat(x, C) produces [C, UINT_MAX].
9903 if (match(II
.getOperand(0), m_APInt(C
)) ||
9904 match(II
.getOperand(1), m_APInt(C
)))
9905 return ConstantRange::getNonEmpty(*C
, APInt::getZero(Width
));
9907 case Intrinsic::sadd_sat
:
9908 if (match(II
.getOperand(0), m_APInt(C
)) ||
9909 match(II
.getOperand(1), m_APInt(C
))) {
9910 if (C
->isNegative())
9911 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9912 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9913 APInt::getSignedMaxValue(Width
) + *C
+
9916 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9917 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
) + *C
,
9918 APInt::getSignedMaxValue(Width
) + 1);
9921 case Intrinsic::usub_sat
:
9922 // usub.sat(C, x) produces [0, C].
9923 if (match(II
.getOperand(0), m_APInt(C
)))
9924 return ConstantRange::getNonEmpty(APInt::getZero(Width
), *C
+ 1);
9926 // usub.sat(x, C) produces [0, UINT_MAX - C].
9927 if (match(II
.getOperand(1), m_APInt(C
)))
9928 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9929 APInt::getMaxValue(Width
) - *C
+ 1);
9931 case Intrinsic::ssub_sat
:
9932 if (match(II
.getOperand(0), m_APInt(C
))) {
9933 if (C
->isNegative())
9934 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9935 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9936 *C
- APInt::getSignedMinValue(Width
) +
9939 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9940 return ConstantRange::getNonEmpty(*C
- APInt::getSignedMaxValue(Width
),
9941 APInt::getSignedMaxValue(Width
) + 1);
9942 } else if (match(II
.getOperand(1), m_APInt(C
))) {
9943 if (C
->isNegative())
9944 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9945 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
) - *C
,
9946 APInt::getSignedMaxValue(Width
) + 1);
9948 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9949 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9950 APInt::getSignedMaxValue(Width
) - *C
+
9954 case Intrinsic::umin
:
9955 case Intrinsic::umax
:
9956 case Intrinsic::smin
:
9957 case Intrinsic::smax
:
9958 if (!match(II
.getOperand(0), m_APInt(C
)) &&
9959 !match(II
.getOperand(1), m_APInt(C
)))
9962 switch (II
.getIntrinsicID()) {
9963 case Intrinsic::umin
:
9964 return ConstantRange::getNonEmpty(APInt::getZero(Width
), *C
+ 1);
9965 case Intrinsic::umax
:
9966 return ConstantRange::getNonEmpty(*C
, APInt::getZero(Width
));
9967 case Intrinsic::smin
:
9968 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9970 case Intrinsic::smax
:
9971 return ConstantRange::getNonEmpty(*C
,
9972 APInt::getSignedMaxValue(Width
) + 1);
9974 llvm_unreachable("Must be min/max intrinsic");
9977 case Intrinsic::abs
:
9978 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9979 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9980 if (match(II
.getOperand(1), m_One()))
9981 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9982 APInt::getSignedMaxValue(Width
) + 1);
9984 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9985 APInt::getSignedMinValue(Width
) + 1);
9986 case Intrinsic::vscale
:
9987 if (!II
.getParent() || !II
.getFunction())
9989 return getVScaleRange(II
.getFunction(), Width
);
9990 case Intrinsic::scmp
:
9991 case Intrinsic::ucmp
:
9992 return ConstantRange::getNonEmpty(APInt::getAllOnes(Width
),
9998 return ConstantRange::getFull(Width
);
10001 static ConstantRange
getRangeForSelectPattern(const SelectInst
&SI
,
10002 const InstrInfoQuery
&IIQ
) {
10003 unsigned BitWidth
= SI
.getType()->getScalarSizeInBits();
10004 const Value
*LHS
= nullptr, *RHS
= nullptr;
10005 SelectPatternResult R
= matchSelectPattern(&SI
, LHS
, RHS
);
10006 if (R
.Flavor
== SPF_UNKNOWN
)
10007 return ConstantRange::getFull(BitWidth
);
10009 if (R
.Flavor
== SelectPatternFlavor::SPF_ABS
) {
10010 // If the negation part of the abs (in RHS) has the NSW flag,
10011 // then the result of abs(X) is [0..SIGNED_MAX],
10012 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
10013 if (match(RHS
, m_Neg(m_Specific(LHS
))) &&
10014 IIQ
.hasNoSignedWrap(cast
<Instruction
>(RHS
)))
10015 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth
),
10016 APInt::getSignedMaxValue(BitWidth
) + 1);
10018 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth
),
10019 APInt::getSignedMinValue(BitWidth
) + 1);
10022 if (R
.Flavor
== SelectPatternFlavor::SPF_NABS
) {
10023 // The result of -abs(X) is <= 0.
10024 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth
),
10025 APInt(BitWidth
, 1));
10029 if (!match(LHS
, m_APInt(C
)) && !match(RHS
, m_APInt(C
)))
10030 return ConstantRange::getFull(BitWidth
);
10032 switch (R
.Flavor
) {
10034 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth
), *C
+ 1);
10036 return ConstantRange::getNonEmpty(*C
, APInt::getZero(BitWidth
));
10038 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth
),
10041 return ConstantRange::getNonEmpty(*C
,
10042 APInt::getSignedMaxValue(BitWidth
) + 1);
10044 return ConstantRange::getFull(BitWidth
);
10048 static void setLimitForFPToI(const Instruction
*I
, APInt
&Lower
, APInt
&Upper
) {
10049 // The maximum representable value of a half is 65504. For floats the maximum
10050 // value is 3.4e38 which requires roughly 129 bits.
10051 unsigned BitWidth
= I
->getType()->getScalarSizeInBits();
10052 if (!I
->getOperand(0)->getType()->getScalarType()->isHalfTy())
10054 if (isa
<FPToSIInst
>(I
) && BitWidth
>= 17) {
10055 Lower
= APInt(BitWidth
, -65504, true);
10056 Upper
= APInt(BitWidth
, 65505);
10059 if (isa
<FPToUIInst
>(I
) && BitWidth
>= 16) {
10060 // For a fptoui the lower limit is left as 0.
10061 Upper
= APInt(BitWidth
, 65505);
10065 ConstantRange
llvm::computeConstantRange(const Value
*V
, bool ForSigned
,
10066 bool UseInstrInfo
, AssumptionCache
*AC
,
10067 const Instruction
*CtxI
,
10068 const DominatorTree
*DT
,
10070 assert(V
->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
10072 if (Depth
== MaxAnalysisRecursionDepth
)
10073 return ConstantRange::getFull(V
->getType()->getScalarSizeInBits());
10075 if (auto *C
= dyn_cast
<Constant
>(V
))
10076 return C
->toConstantRange();
10078 unsigned BitWidth
= V
->getType()->getScalarSizeInBits();
10079 InstrInfoQuery
IIQ(UseInstrInfo
);
10080 ConstantRange CR
= ConstantRange::getFull(BitWidth
);
10081 if (auto *BO
= dyn_cast
<BinaryOperator
>(V
)) {
10082 APInt Lower
= APInt(BitWidth
, 0);
10083 APInt Upper
= APInt(BitWidth
, 0);
10084 // TODO: Return ConstantRange.
10085 setLimitsForBinOp(*BO
, Lower
, Upper
, IIQ
, ForSigned
);
10086 CR
= ConstantRange::getNonEmpty(Lower
, Upper
);
10087 } else if (auto *II
= dyn_cast
<IntrinsicInst
>(V
))
10088 CR
= getRangeForIntrinsic(*II
, UseInstrInfo
);
10089 else if (auto *SI
= dyn_cast
<SelectInst
>(V
)) {
10090 ConstantRange CRTrue
= computeConstantRange(
10091 SI
->getTrueValue(), ForSigned
, UseInstrInfo
, AC
, CtxI
, DT
, Depth
+ 1);
10092 ConstantRange CRFalse
= computeConstantRange(
10093 SI
->getFalseValue(), ForSigned
, UseInstrInfo
, AC
, CtxI
, DT
, Depth
+ 1);
10094 CR
= CRTrue
.unionWith(CRFalse
);
10095 CR
= CR
.intersectWith(getRangeForSelectPattern(*SI
, IIQ
));
10096 } else if (isa
<FPToUIInst
>(V
) || isa
<FPToSIInst
>(V
)) {
10097 APInt Lower
= APInt(BitWidth
, 0);
10098 APInt Upper
= APInt(BitWidth
, 0);
10099 // TODO: Return ConstantRange.
10100 setLimitForFPToI(cast
<Instruction
>(V
), Lower
, Upper
);
10101 CR
= ConstantRange::getNonEmpty(Lower
, Upper
);
10102 } else if (const auto *A
= dyn_cast
<Argument
>(V
))
10103 if (std::optional
<ConstantRange
> Range
= A
->getRange())
10106 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
10107 if (auto *Range
= IIQ
.getMetadata(I
, LLVMContext::MD_range
))
10108 CR
= CR
.intersectWith(getConstantRangeFromMetadata(*Range
));
10110 if (const auto *CB
= dyn_cast
<CallBase
>(V
))
10111 if (std::optional
<ConstantRange
> Range
= CB
->getRange())
10112 CR
= CR
.intersectWith(*Range
);
10116 // Try to restrict the range based on information from assumptions.
10117 for (auto &AssumeVH
: AC
->assumptionsFor(V
)) {
10120 CallInst
*I
= cast
<CallInst
>(AssumeVH
);
10121 assert(I
->getParent()->getParent() == CtxI
->getParent()->getParent() &&
10122 "Got assumption for the wrong function!");
10123 assert(I
->getIntrinsicID() == Intrinsic::assume
&&
10124 "must be an assume intrinsic");
10126 if (!isValidAssumeForContext(I
, CtxI
, DT
))
10128 Value
*Arg
= I
->getArgOperand(0);
10129 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(Arg
);
10130 // Currently we just use information from comparisons.
10131 if (!Cmp
|| Cmp
->getOperand(0) != V
)
10133 // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
10134 ConstantRange RHS
=
10135 computeConstantRange(Cmp
->getOperand(1), /* ForSigned */ false,
10136 UseInstrInfo
, AC
, I
, DT
, Depth
+ 1);
10137 CR
= CR
.intersectWith(
10138 ConstantRange::makeAllowedICmpRegion(Cmp
->getPredicate(), RHS
));
10146 addValueAffectedByCondition(Value
*V
,
10147 function_ref
<void(Value
*)> InsertAffected
) {
10148 assert(V
!= nullptr);
10149 if (isa
<Argument
>(V
) || isa
<GlobalValue
>(V
)) {
10151 } else if (auto *I
= dyn_cast
<Instruction
>(V
)) {
10154 // Peek through unary operators to find the source of the condition.
10156 if (match(I
, m_CombineOr(m_PtrToInt(m_Value(Op
)), m_Trunc(m_Value(Op
))))) {
10157 if (isa
<Instruction
>(Op
) || isa
<Argument
>(Op
))
10158 InsertAffected(Op
);
10163 void llvm::findValuesAffectedByCondition(
10164 Value
*Cond
, bool IsAssume
, function_ref
<void(Value
*)> InsertAffected
) {
10165 auto AddAffected
= [&InsertAffected
](Value
*V
) {
10166 addValueAffectedByCondition(V
, InsertAffected
);
10169 auto AddCmpOperands
= [&AddAffected
, IsAssume
](Value
*LHS
, Value
*RHS
) {
10173 } else if (match(RHS
, m_Constant()))
10177 SmallVector
<Value
*, 8> Worklist
;
10178 SmallPtrSet
<Value
*, 8> Visited
;
10179 Worklist
.push_back(Cond
);
10180 while (!Worklist
.empty()) {
10181 Value
*V
= Worklist
.pop_back_val();
10182 if (!Visited
.insert(V
).second
)
10190 if (match(V
, m_Not(m_Value(X
))))
10194 if (match(V
, m_LogicalOp(m_Value(A
), m_Value(B
)))) {
10195 // assume(A && B) is split to -> assume(A); assume(B);
10196 // assume(!(A || B)) is split to -> assume(!A); assume(!B);
10197 // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
10198 // enough information to be worth handling (intersection of information as
10199 // opposed to union).
10201 Worklist
.push_back(A
);
10202 Worklist
.push_back(B
);
10204 } else if (match(V
, m_ICmp(Pred
, m_Value(A
), m_Value(B
)))) {
10205 AddCmpOperands(A
, B
);
10207 bool HasRHSC
= match(B
, m_ConstantInt());
10208 if (ICmpInst::isEquality(Pred
)) {
10211 // (X & C) or (X | C) or (X ^ C).
10212 // (X << C) or (X >>_s C) or (X >>_u C).
10213 if (match(A
, m_BitwiseLogic(m_Value(X
), m_ConstantInt())) ||
10214 match(A
, m_Shift(m_Value(X
), m_ConstantInt())))
10216 else if (match(A
, m_And(m_Value(X
), m_Value(Y
))) ||
10217 match(A
, m_Or(m_Value(X
), m_Value(Y
)))) {
10224 // Handle (A + C1) u< C2, which is the canonical form of
10225 // A > C3 && A < C4.
10226 if (match(A
, m_AddLike(m_Value(X
), m_ConstantInt())))
10229 if (ICmpInst::isUnsigned(Pred
)) {
10231 // X & Y u> C -> X >u C && Y >u C
10232 // X | Y u< C -> X u< C && Y u< C
10233 // X nuw+ Y u< C -> X u< C && Y u< C
10234 if (match(A
, m_And(m_Value(X
), m_Value(Y
))) ||
10235 match(A
, m_Or(m_Value(X
), m_Value(Y
))) ||
10236 match(A
, m_NUWAdd(m_Value(X
), m_Value(Y
)))) {
10240 // X nuw- Y u> C -> X u> C
10241 if (match(A
, m_NUWSub(m_Value(X
), m_Value())))
10246 // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
10247 // by computeKnownFPClass().
10248 if (match(A
, m_ElementWiseBitCast(m_Value(X
)))) {
10249 if (Pred
== ICmpInst::ICMP_SLT
&& match(B
, m_Zero()))
10251 else if (Pred
== ICmpInst::ICMP_SGT
&& match(B
, m_AllOnes()))
10256 if (HasRHSC
&& match(A
, m_Intrinsic
<Intrinsic::ctpop
>(m_Value(X
))))
10258 } else if (match(V
, m_FCmp(Pred
, m_Value(A
), m_Value(B
)))) {
10259 AddCmpOperands(A
, B
);
10263 // fcmp fneg(fabs(x)), y
10264 if (match(A
, m_FNeg(m_Value(A
))))
10266 if (match(A
, m_FAbs(m_Value(A
))))
10269 } else if (match(V
, m_Intrinsic
<Intrinsic::is_fpclass
>(m_Value(A
),
10271 // Handle patterns that computeKnownFPClass() support.