1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains routines that help analyze properties that chains of
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/ValueTracking.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/iterator_range.h"
25 #include "llvm/Analysis/AliasAnalysis.h"
26 #include "llvm/Analysis/AssumeBundleQueries.h"
27 #include "llvm/Analysis/AssumptionCache.h"
28 #include "llvm/Analysis/ConstantFolding.h"
29 #include "llvm/Analysis/DomConditionCache.h"
30 #include "llvm/Analysis/GuardUtils.h"
31 #include "llvm/Analysis/InstructionSimplify.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/TargetLibraryInfo.h"
36 #include "llvm/Analysis/VectorUtils.h"
37 #include "llvm/Analysis/WithCache.h"
38 #include "llvm/IR/Argument.h"
39 #include "llvm/IR/Attributes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/Constant.h"
42 #include "llvm/IR/ConstantRange.h"
43 #include "llvm/IR/Constants.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/DiagnosticInfo.h"
46 #include "llvm/IR/Dominators.h"
47 #include "llvm/IR/EHPersonalities.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GetElementPtrTypeIterator.h"
50 #include "llvm/IR/GlobalAlias.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/GlobalVariable.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/IntrinsicsAArch64.h"
59 #include "llvm/IR/IntrinsicsAMDGPU.h"
60 #include "llvm/IR/IntrinsicsRISCV.h"
61 #include "llvm/IR/IntrinsicsX86.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/Metadata.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/ErrorHandling.h"
74 #include "llvm/Support/KnownBits.h"
75 #include "llvm/Support/MathExtras.h"
76 #include "llvm/TargetParser/RISCVTargetParser.h"
84 using namespace llvm::PatternMatch
;
86 // Controls the number of uses of the value searched for possible
87 // dominating comparisons.
88 static cl::opt
<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
89 cl::Hidden
, cl::init(20));
92 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
93 /// returns the element type's bitwidth.
94 static unsigned getBitWidth(Type
*Ty
, const DataLayout
&DL
) {
95 if (unsigned BitWidth
= Ty
->getScalarSizeInBits())
98 return DL
.getPointerTypeSizeInBits(Ty
);
101 // Given the provided Value and, potentially, a context instruction, return
102 // the preferred context instruction (if any).
103 static const Instruction
*safeCxtI(const Value
*V
, const Instruction
*CxtI
) {
104 // If we've been provided with a context instruction, then use that (provided
105 // it has been inserted).
106 if (CxtI
&& CxtI
->getParent())
109 // If the value is really an already-inserted instruction, then use that.
110 CxtI
= dyn_cast
<Instruction
>(V
);
111 if (CxtI
&& CxtI
->getParent())
117 static const Instruction
*safeCxtI(const Value
*V1
, const Value
*V2
, const Instruction
*CxtI
) {
118 // If we've been provided with a context instruction, then use that (provided
119 // it has been inserted).
120 if (CxtI
&& CxtI
->getParent())
123 // If the value is really an already-inserted instruction, then use that.
124 CxtI
= dyn_cast
<Instruction
>(V1
);
125 if (CxtI
&& CxtI
->getParent())
128 CxtI
= dyn_cast
<Instruction
>(V2
);
129 if (CxtI
&& CxtI
->getParent())
135 static bool getShuffleDemandedElts(const ShuffleVectorInst
*Shuf
,
136 const APInt
&DemandedElts
,
137 APInt
&DemandedLHS
, APInt
&DemandedRHS
) {
138 if (isa
<ScalableVectorType
>(Shuf
->getType())) {
139 assert(DemandedElts
== APInt(1,1));
140 DemandedLHS
= DemandedRHS
= DemandedElts
;
145 cast
<FixedVectorType
>(Shuf
->getOperand(0)->getType())->getNumElements();
146 return llvm::getShuffleDemandedElts(NumElts
, Shuf
->getShuffleMask(),
147 DemandedElts
, DemandedLHS
, DemandedRHS
);
150 static void computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
151 KnownBits
&Known
, unsigned Depth
,
152 const SimplifyQuery
&Q
);
154 void llvm::computeKnownBits(const Value
*V
, KnownBits
&Known
, unsigned Depth
,
155 const SimplifyQuery
&Q
) {
156 // Since the number of lanes in a scalable vector is unknown at compile time,
157 // we track one bit which is implicitly broadcast to all lanes. This means
158 // that all lanes in a scalable vector are considered demanded.
159 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
161 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
162 ::computeKnownBits(V
, DemandedElts
, Known
, Depth
, Q
);
165 void llvm::computeKnownBits(const Value
*V
, KnownBits
&Known
,
166 const DataLayout
&DL
, unsigned Depth
,
167 AssumptionCache
*AC
, const Instruction
*CxtI
,
168 const DominatorTree
*DT
, bool UseInstrInfo
) {
171 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
174 KnownBits
llvm::computeKnownBits(const Value
*V
, const DataLayout
&DL
,
175 unsigned Depth
, AssumptionCache
*AC
,
176 const Instruction
*CxtI
,
177 const DominatorTree
*DT
, bool UseInstrInfo
) {
178 return computeKnownBits(
179 V
, Depth
, SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
182 KnownBits
llvm::computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
183 const DataLayout
&DL
, unsigned Depth
,
184 AssumptionCache
*AC
, const Instruction
*CxtI
,
185 const DominatorTree
*DT
, bool UseInstrInfo
) {
186 return computeKnownBits(
187 V
, DemandedElts
, Depth
,
188 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
191 static bool haveNoCommonBitsSetSpecialCases(const Value
*LHS
, const Value
*RHS
,
192 const SimplifyQuery
&SQ
) {
193 // Look for an inverted mask: (X & ~M) op (Y & M).
196 if (match(LHS
, m_c_And(m_Not(m_Value(M
)), m_Value())) &&
197 match(RHS
, m_c_And(m_Specific(M
), m_Value())) &&
198 isGuaranteedNotToBeUndef(M
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
203 if (match(RHS
, m_c_And(m_Not(m_Specific(LHS
)), m_Value())) &&
204 isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
207 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
211 m_c_Xor(m_c_And(m_Specific(LHS
), m_Value(Y
)), m_Deferred(Y
))) &&
212 isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
) &&
213 isGuaranteedNotToBeUndef(Y
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
216 // Peek through extends to find a 'not' of the other side:
217 // (ext Y) op ext(~Y)
218 if (match(LHS
, m_ZExtOrSExt(m_Value(Y
))) &&
219 match(RHS
, m_ZExtOrSExt(m_Not(m_Specific(Y
)))) &&
220 isGuaranteedNotToBeUndef(Y
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
223 // Look for: (A & B) op ~(A | B)
226 if (match(LHS
, m_And(m_Value(A
), m_Value(B
))) &&
227 match(RHS
, m_Not(m_c_Or(m_Specific(A
), m_Specific(B
)))) &&
228 isGuaranteedNotToBeUndef(A
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
) &&
229 isGuaranteedNotToBeUndef(B
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
236 bool llvm::haveNoCommonBitsSet(const WithCache
<const Value
*> &LHSCache
,
237 const WithCache
<const Value
*> &RHSCache
,
238 const SimplifyQuery
&SQ
) {
239 const Value
*LHS
= LHSCache
.getValue();
240 const Value
*RHS
= RHSCache
.getValue();
242 assert(LHS
->getType() == RHS
->getType() &&
243 "LHS and RHS should have the same type");
244 assert(LHS
->getType()->isIntOrIntVectorTy() &&
245 "LHS and RHS should be integers");
247 if (haveNoCommonBitsSetSpecialCases(LHS
, RHS
, SQ
) ||
248 haveNoCommonBitsSetSpecialCases(RHS
, LHS
, SQ
))
251 return KnownBits::haveNoCommonBitsSet(LHSCache
.getKnownBits(SQ
),
252 RHSCache
.getKnownBits(SQ
));
255 bool llvm::isOnlyUsedInZeroComparison(const Instruction
*I
) {
256 return !I
->user_empty() && all_of(I
->users(), [](const User
*U
) {
257 ICmpInst::Predicate P
;
258 return match(U
, m_ICmp(P
, m_Value(), m_Zero()));
262 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction
*I
) {
263 return !I
->user_empty() && all_of(I
->users(), [](const User
*U
) {
264 ICmpInst::Predicate P
;
265 return match(U
, m_ICmp(P
, m_Value(), m_Zero())) && ICmpInst::isEquality(P
);
269 static bool isKnownToBeAPowerOfTwo(const Value
*V
, bool OrZero
, unsigned Depth
,
270 const SimplifyQuery
&Q
);
272 bool llvm::isKnownToBeAPowerOfTwo(const Value
*V
, const DataLayout
&DL
,
273 bool OrZero
, unsigned Depth
,
274 AssumptionCache
*AC
, const Instruction
*CxtI
,
275 const DominatorTree
*DT
, bool UseInstrInfo
) {
276 return ::isKnownToBeAPowerOfTwo(
278 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
281 static bool isKnownNonZero(const Value
*V
, const APInt
&DemandedElts
,
282 const SimplifyQuery
&Q
, unsigned Depth
);
284 bool llvm::isKnownNonNegative(const Value
*V
, const SimplifyQuery
&SQ
,
286 return computeKnownBits(V
, Depth
, SQ
).isNonNegative();
289 bool llvm::isKnownPositive(const Value
*V
, const SimplifyQuery
&SQ
,
291 if (auto *CI
= dyn_cast
<ConstantInt
>(V
))
292 return CI
->getValue().isStrictlyPositive();
294 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
296 KnownBits Known
= computeKnownBits(V
, Depth
, SQ
);
297 return Known
.isNonNegative() &&
298 (Known
.isNonZero() || isKnownNonZero(V
, SQ
, Depth
));
301 bool llvm::isKnownNegative(const Value
*V
, const SimplifyQuery
&SQ
,
303 return computeKnownBits(V
, Depth
, SQ
).isNegative();
306 static bool isKnownNonEqual(const Value
*V1
, const Value
*V2
,
307 const APInt
&DemandedElts
, unsigned Depth
,
308 const SimplifyQuery
&Q
);
310 bool llvm::isKnownNonEqual(const Value
*V1
, const Value
*V2
,
311 const DataLayout
&DL
, AssumptionCache
*AC
,
312 const Instruction
*CxtI
, const DominatorTree
*DT
,
314 assert(V1
->getType() == V2
->getType() &&
315 "Testing equality of non-equal types!");
316 auto *FVTy
= dyn_cast
<FixedVectorType
>(V1
->getType());
318 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
319 return ::isKnownNonEqual(
320 V1
, V2
, DemandedElts
, 0,
321 SimplifyQuery(DL
, DT
, AC
, safeCxtI(V2
, V1
, CxtI
), UseInstrInfo
));
324 bool llvm::MaskedValueIsZero(const Value
*V
, const APInt
&Mask
,
325 const SimplifyQuery
&SQ
, unsigned Depth
) {
326 KnownBits
Known(Mask
.getBitWidth());
327 computeKnownBits(V
, Known
, Depth
, SQ
);
328 return Mask
.isSubsetOf(Known
.Zero
);
331 static unsigned ComputeNumSignBits(const Value
*V
, const APInt
&DemandedElts
,
332 unsigned Depth
, const SimplifyQuery
&Q
);
334 static unsigned ComputeNumSignBits(const Value
*V
, unsigned Depth
,
335 const SimplifyQuery
&Q
) {
336 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
338 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
339 return ComputeNumSignBits(V
, DemandedElts
, Depth
, Q
);
342 unsigned llvm::ComputeNumSignBits(const Value
*V
, const DataLayout
&DL
,
343 unsigned Depth
, AssumptionCache
*AC
,
344 const Instruction
*CxtI
,
345 const DominatorTree
*DT
, bool UseInstrInfo
) {
346 return ::ComputeNumSignBits(
347 V
, Depth
, SimplifyQuery(DL
, DT
, AC
, safeCxtI(V
, CxtI
), UseInstrInfo
));
350 unsigned llvm::ComputeMaxSignificantBits(const Value
*V
, const DataLayout
&DL
,
351 unsigned Depth
, AssumptionCache
*AC
,
352 const Instruction
*CxtI
,
353 const DominatorTree
*DT
) {
354 unsigned SignBits
= ComputeNumSignBits(V
, DL
, Depth
, AC
, CxtI
, DT
);
355 return V
->getType()->getScalarSizeInBits() - SignBits
+ 1;
358 static void computeKnownBitsAddSub(bool Add
, const Value
*Op0
, const Value
*Op1
,
360 const APInt
&DemandedElts
,
361 KnownBits
&KnownOut
, KnownBits
&Known2
,
362 unsigned Depth
, const SimplifyQuery
&Q
) {
363 computeKnownBits(Op1
, DemandedElts
, KnownOut
, Depth
+ 1, Q
);
365 // If one operand is unknown and we have no nowrap information,
366 // the result will be unknown independently of the second operand.
367 if (KnownOut
.isUnknown() && !NSW
&& !NUW
)
370 computeKnownBits(Op0
, DemandedElts
, Known2
, Depth
+ 1, Q
);
371 KnownOut
= KnownBits::computeForAddSub(Add
, NSW
, NUW
, Known2
, KnownOut
);
374 static void computeKnownBitsMul(const Value
*Op0
, const Value
*Op1
, bool NSW
,
375 const APInt
&DemandedElts
, KnownBits
&Known
,
376 KnownBits
&Known2
, unsigned Depth
,
377 const SimplifyQuery
&Q
) {
378 computeKnownBits(Op1
, DemandedElts
, Known
, Depth
+ 1, Q
);
379 computeKnownBits(Op0
, DemandedElts
, Known2
, Depth
+ 1, Q
);
381 bool isKnownNegative
= false;
382 bool isKnownNonNegative
= false;
383 // If the multiplication is known not to overflow, compute the sign bit.
386 // The product of a number with itself is non-negative.
387 isKnownNonNegative
= true;
389 bool isKnownNonNegativeOp1
= Known
.isNonNegative();
390 bool isKnownNonNegativeOp0
= Known2
.isNonNegative();
391 bool isKnownNegativeOp1
= Known
.isNegative();
392 bool isKnownNegativeOp0
= Known2
.isNegative();
393 // The product of two numbers with the same sign is non-negative.
394 isKnownNonNegative
= (isKnownNegativeOp1
&& isKnownNegativeOp0
) ||
395 (isKnownNonNegativeOp1
&& isKnownNonNegativeOp0
);
396 // The product of a negative number and a non-negative number is either
398 if (!isKnownNonNegative
)
400 (isKnownNegativeOp1
&& isKnownNonNegativeOp0
&&
401 Known2
.isNonZero()) ||
402 (isKnownNegativeOp0
&& isKnownNonNegativeOp1
&& Known
.isNonZero());
406 bool SelfMultiply
= Op0
== Op1
;
409 isGuaranteedNotToBeUndef(Op0
, Q
.AC
, Q
.CxtI
, Q
.DT
, Depth
+ 1);
410 Known
= KnownBits::mul(Known
, Known2
, SelfMultiply
);
412 // Only make use of no-wrap flags if we failed to compute the sign bit
413 // directly. This matters if the multiplication always overflows, in
414 // which case we prefer to follow the result of the direct computation,
415 // though as the program is invoking undefined behaviour we can choose
416 // whatever we like here.
417 if (isKnownNonNegative
&& !Known
.isNegative())
418 Known
.makeNonNegative();
419 else if (isKnownNegative
&& !Known
.isNonNegative())
420 Known
.makeNegative();
423 void llvm::computeKnownBitsFromRangeMetadata(const MDNode
&Ranges
,
425 unsigned BitWidth
= Known
.getBitWidth();
426 unsigned NumRanges
= Ranges
.getNumOperands() / 2;
427 assert(NumRanges
>= 1);
429 Known
.Zero
.setAllBits();
430 Known
.One
.setAllBits();
432 for (unsigned i
= 0; i
< NumRanges
; ++i
) {
434 mdconst::extract
<ConstantInt
>(Ranges
.getOperand(2 * i
+ 0));
436 mdconst::extract
<ConstantInt
>(Ranges
.getOperand(2 * i
+ 1));
437 ConstantRange
Range(Lower
->getValue(), Upper
->getValue());
439 // The first CommonPrefixBits of all values in Range are equal.
440 unsigned CommonPrefixBits
=
441 (Range
.getUnsignedMax() ^ Range
.getUnsignedMin()).countl_zero();
442 APInt Mask
= APInt::getHighBitsSet(BitWidth
, CommonPrefixBits
);
443 APInt UnsignedMax
= Range
.getUnsignedMax().zextOrTrunc(BitWidth
);
444 Known
.One
&= UnsignedMax
& Mask
;
445 Known
.Zero
&= ~UnsignedMax
& Mask
;
449 static bool isEphemeralValueOf(const Instruction
*I
, const Value
*E
) {
450 SmallVector
<const Value
*, 16> WorkSet(1, I
);
451 SmallPtrSet
<const Value
*, 32> Visited
;
452 SmallPtrSet
<const Value
*, 16> EphValues
;
454 // The instruction defining an assumption's condition itself is always
455 // considered ephemeral to that assumption (even if it has other
456 // non-ephemeral users). See r246696's test case for an example.
457 if (is_contained(I
->operands(), E
))
460 while (!WorkSet
.empty()) {
461 const Value
*V
= WorkSet
.pop_back_val();
462 if (!Visited
.insert(V
).second
)
465 // If all uses of this value are ephemeral, then so is this value.
466 if (llvm::all_of(V
->users(), [&](const User
*U
) {
467 return EphValues
.count(U
);
472 if (V
== I
|| (isa
<Instruction
>(V
) &&
473 !cast
<Instruction
>(V
)->mayHaveSideEffects() &&
474 !cast
<Instruction
>(V
)->isTerminator())) {
476 if (const User
*U
= dyn_cast
<User
>(V
))
477 append_range(WorkSet
, U
->operands());
485 // Is this an intrinsic that cannot be speculated but also cannot trap?
486 bool llvm::isAssumeLikeIntrinsic(const Instruction
*I
) {
487 if (const IntrinsicInst
*CI
= dyn_cast
<IntrinsicInst
>(I
))
488 return CI
->isAssumeLikeIntrinsic();
493 bool llvm::isValidAssumeForContext(const Instruction
*Inv
,
494 const Instruction
*CxtI
,
495 const DominatorTree
*DT
,
496 bool AllowEphemerals
) {
497 // There are two restrictions on the use of an assume:
498 // 1. The assume must dominate the context (or the control flow must
499 // reach the assume whenever it reaches the context).
500 // 2. The context must not be in the assume's set of ephemeral values
501 // (otherwise we will use the assume to prove that the condition
502 // feeding the assume is trivially true, thus causing the removal of
505 if (Inv
->getParent() == CxtI
->getParent()) {
506 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
508 if (Inv
->comesBefore(CxtI
))
511 // Don't let an assume affect itself - this would cause the problems
512 // `isEphemeralValueOf` is trying to prevent, and it would also make
513 // the loop below go out of bounds.
514 if (!AllowEphemerals
&& Inv
== CxtI
)
517 // The context comes first, but they're both in the same block.
518 // Make sure there is nothing in between that might interrupt
519 // the control flow, not even CxtI itself.
520 // We limit the scan distance between the assume and its context instruction
521 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
522 // it can be adjusted if needed (could be turned into a cl::opt).
523 auto Range
= make_range(CxtI
->getIterator(), Inv
->getIterator());
524 if (!isGuaranteedToTransferExecutionToSuccessor(Range
, 15))
527 return AllowEphemerals
|| !isEphemeralValueOf(Inv
, CxtI
);
530 // Inv and CxtI are in different blocks.
532 if (DT
->dominates(Inv
, CxtI
))
534 } else if (Inv
->getParent() == CxtI
->getParent()->getSinglePredecessor()) {
535 // We don't have a DT, but this trivially dominates.
542 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
543 // we still have enough information about `RHS` to conclude non-zero. For
544 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
545 // so the extra compile time may not be worth it, but possibly a second API
546 // should be created for use outside of loops.
547 static bool cmpExcludesZero(CmpInst::Predicate Pred
, const Value
*RHS
) {
548 // v u> y implies v != 0.
549 if (Pred
== ICmpInst::ICMP_UGT
)
552 // Special-case v != 0 to also handle v != null.
553 if (Pred
== ICmpInst::ICMP_NE
)
554 return match(RHS
, m_Zero());
556 // All other predicates - rely on generic ConstantRange handling.
558 auto Zero
= APInt::getZero(RHS
->getType()->getScalarSizeInBits());
559 if (match(RHS
, m_APInt(C
))) {
560 ConstantRange TrueValues
= ConstantRange::makeExactICmpRegion(Pred
, *C
);
561 return !TrueValues
.contains(Zero
);
564 auto *VC
= dyn_cast
<ConstantDataVector
>(RHS
);
568 for (unsigned ElemIdx
= 0, NElem
= VC
->getNumElements(); ElemIdx
< NElem
;
570 ConstantRange TrueValues
= ConstantRange::makeExactICmpRegion(
571 Pred
, VC
->getElementAsAPInt(ElemIdx
));
572 if (TrueValues
.contains(Zero
))
578 static bool isKnownNonZeroFromAssume(const Value
*V
, const SimplifyQuery
&Q
) {
579 // Use of assumptions is context-sensitive. If we don't have a context, we
581 if (!Q
.AC
|| !Q
.CxtI
)
584 for (AssumptionCache::ResultElem
&Elem
: Q
.AC
->assumptionsFor(V
)) {
588 AssumeInst
*I
= cast
<AssumeInst
>(Elem
.Assume
);
589 assert(I
->getFunction() == Q
.CxtI
->getFunction() &&
590 "Got assumption for the wrong function!");
592 if (Elem
.Index
!= AssumptionCache::ExprResultIdx
) {
593 if (!V
->getType()->isPointerTy())
595 if (RetainedKnowledge RK
= getKnowledgeFromBundle(
596 *I
, I
->bundle_op_info_begin()[Elem
.Index
])) {
598 (RK
.AttrKind
== Attribute::NonNull
||
599 (RK
.AttrKind
== Attribute::Dereferenceable
&&
600 !NullPointerIsDefined(Q
.CxtI
->getFunction(),
601 V
->getType()->getPointerAddressSpace()))) &&
602 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
608 // Warning: This loop can end up being somewhat performance sensitive.
609 // We're running this loop for once for each value queried resulting in a
610 // runtime of ~O(#assumes * #values).
613 CmpInst::Predicate Pred
;
614 auto m_V
= m_CombineOr(m_Specific(V
), m_PtrToInt(m_Specific(V
)));
615 if (!match(I
->getArgOperand(0), m_c_ICmp(Pred
, m_V
, m_Value(RHS
))))
618 if (cmpExcludesZero(Pred
, RHS
) && isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
625 static void computeKnownBitsFromCmp(const Value
*V
, CmpInst::Predicate Pred
,
626 Value
*LHS
, Value
*RHS
, KnownBits
&Known
,
627 const SimplifyQuery
&Q
) {
628 if (RHS
->getType()->isPointerTy()) {
629 // Handle comparison of pointer to null explicitly, as it will not be
630 // covered by the m_APInt() logic below.
631 if (LHS
== V
&& match(RHS
, m_Zero())) {
633 case ICmpInst::ICMP_EQ
:
636 case ICmpInst::ICMP_SGE
:
637 case ICmpInst::ICMP_SGT
:
638 Known
.makeNonNegative();
640 case ICmpInst::ICMP_SLT
:
641 Known
.makeNegative();
650 unsigned BitWidth
= Known
.getBitWidth();
652 m_CombineOr(m_Specific(V
), m_PtrToIntSameSize(Q
.DL
, m_Specific(V
)));
655 const APInt
*Mask
, *C
;
658 case ICmpInst::ICMP_EQ
:
660 if (match(LHS
, m_V
) && match(RHS
, m_APInt(C
))) {
661 Known
= Known
.unionWith(KnownBits::makeConstant(*C
));
662 // assume(V & Mask = C)
663 } else if (match(LHS
, m_c_And(m_V
, m_Value(Y
))) &&
664 match(RHS
, m_APInt(C
))) {
665 // For one bits in Mask, we can propagate bits from C to V.
667 if (match(Y
, m_APInt(Mask
)))
668 Known
.Zero
|= ~*C
& *Mask
;
669 // assume(V | Mask = C)
670 } else if (match(LHS
, m_c_Or(m_V
, m_Value(Y
))) && match(RHS
, m_APInt(C
))) {
671 // For zero bits in Mask, we can propagate bits from C to V.
673 if (match(Y
, m_APInt(Mask
)))
674 Known
.One
|= *C
& ~*Mask
;
675 // assume(V ^ Mask = C)
676 } else if (match(LHS
, m_Xor(m_V
, m_APInt(Mask
))) &&
677 match(RHS
, m_APInt(C
))) {
678 // Equivalent to assume(V == Mask ^ C)
679 Known
= Known
.unionWith(KnownBits::makeConstant(*C
^ *Mask
));
680 // assume(V << ShAmt = C)
681 } else if (match(LHS
, m_Shl(m_V
, m_ConstantInt(ShAmt
))) &&
682 match(RHS
, m_APInt(C
)) && ShAmt
< BitWidth
) {
683 // For those bits in C that are known, we can propagate them to known
684 // bits in V shifted to the right by ShAmt.
685 KnownBits RHSKnown
= KnownBits::makeConstant(*C
);
686 RHSKnown
.Zero
.lshrInPlace(ShAmt
);
687 RHSKnown
.One
.lshrInPlace(ShAmt
);
688 Known
= Known
.unionWith(RHSKnown
);
689 // assume(V >> ShAmt = C)
690 } else if (match(LHS
, m_Shr(m_V
, m_ConstantInt(ShAmt
))) &&
691 match(RHS
, m_APInt(C
)) && ShAmt
< BitWidth
) {
692 KnownBits RHSKnown
= KnownBits::makeConstant(*C
);
693 // For those bits in RHS that are known, we can propagate them to known
694 // bits in V shifted to the right by C.
695 Known
.Zero
|= RHSKnown
.Zero
<< ShAmt
;
696 Known
.One
|= RHSKnown
.One
<< ShAmt
;
699 case ICmpInst::ICMP_NE
: {
700 // assume (V & B != 0) where B is a power of 2
702 if (match(LHS
, m_And(m_V
, m_Power2(BPow2
))) && match(RHS
, m_Zero()))
707 if (match(RHS
, m_APInt(C
))) {
708 const APInt
*Offset
= nullptr;
709 if (match(LHS
, m_CombineOr(m_V
, m_AddLike(m_V
, m_APInt(Offset
))))) {
710 ConstantRange LHSRange
= ConstantRange::makeAllowedICmpRegion(Pred
, *C
);
712 LHSRange
= LHSRange
.sub(*Offset
);
713 Known
= Known
.unionWith(LHSRange
.toKnownBits());
715 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
) {
716 // X & Y u> C -> X u> C && Y u> C
717 // X nuw- Y u> C -> X u> C
718 if (match(LHS
, m_c_And(m_V
, m_Value())) ||
719 match(LHS
, m_NUWSub(m_V
, m_Value())))
720 Known
.One
.setHighBits(
721 (*C
+ (Pred
== ICmpInst::ICMP_UGT
)).countLeadingOnes());
723 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
) {
724 // X | Y u< C -> X u< C && Y u< C
725 // X nuw+ Y u< C -> X u< C && Y u< C
726 if (match(LHS
, m_c_Or(m_V
, m_Value())) ||
727 match(LHS
, m_c_NUWAdd(m_V
, m_Value()))) {
728 Known
.Zero
.setHighBits(
729 (*C
- (Pred
== ICmpInst::ICMP_ULT
)).countLeadingZeros());
737 static void computeKnownBitsFromICmpCond(const Value
*V
, ICmpInst
*Cmp
,
739 const SimplifyQuery
&SQ
, bool Invert
) {
740 ICmpInst::Predicate Pred
=
741 Invert
? Cmp
->getInversePredicate() : Cmp
->getPredicate();
742 Value
*LHS
= Cmp
->getOperand(0);
743 Value
*RHS
= Cmp
->getOperand(1);
745 // Handle icmp pred (trunc V), C
746 if (match(LHS
, m_Trunc(m_Specific(V
)))) {
747 KnownBits
DstKnown(LHS
->getType()->getScalarSizeInBits());
748 computeKnownBitsFromCmp(LHS
, Pred
, LHS
, RHS
, DstKnown
, SQ
);
749 Known
= Known
.unionWith(DstKnown
.anyext(Known
.getBitWidth()));
753 computeKnownBitsFromCmp(V
, Pred
, LHS
, RHS
, Known
, SQ
);
756 static void computeKnownBitsFromCond(const Value
*V
, Value
*Cond
,
757 KnownBits
&Known
, unsigned Depth
,
758 const SimplifyQuery
&SQ
, bool Invert
) {
760 if (Depth
< MaxAnalysisRecursionDepth
&&
761 match(Cond
, m_LogicalOp(m_Value(A
), m_Value(B
)))) {
762 KnownBits
Known2(Known
.getBitWidth());
763 KnownBits
Known3(Known
.getBitWidth());
764 computeKnownBitsFromCond(V
, A
, Known2
, Depth
+ 1, SQ
, Invert
);
765 computeKnownBitsFromCond(V
, B
, Known3
, Depth
+ 1, SQ
, Invert
);
766 if (Invert
? match(Cond
, m_LogicalOr(m_Value(), m_Value()))
767 : match(Cond
, m_LogicalAnd(m_Value(), m_Value())))
768 Known2
= Known2
.unionWith(Known3
);
770 Known2
= Known2
.intersectWith(Known3
);
771 Known
= Known
.unionWith(Known2
);
774 if (auto *Cmp
= dyn_cast
<ICmpInst
>(Cond
))
775 computeKnownBitsFromICmpCond(V
, Cmp
, Known
, SQ
, Invert
);
778 void llvm::computeKnownBitsFromContext(const Value
*V
, KnownBits
&Known
,
779 unsigned Depth
, const SimplifyQuery
&Q
) {
780 // Handle injected condition.
781 if (Q
.CC
&& Q
.CC
->AffectedValues
.contains(V
))
782 computeKnownBitsFromCond(V
, Q
.CC
->Cond
, Known
, Depth
, Q
, Q
.CC
->Invert
);
788 // Handle dominating conditions.
789 for (BranchInst
*BI
: Q
.DC
->conditionsFor(V
)) {
790 BasicBlockEdge
Edge0(BI
->getParent(), BI
->getSuccessor(0));
791 if (Q
.DT
->dominates(Edge0
, Q
.CxtI
->getParent()))
792 computeKnownBitsFromCond(V
, BI
->getCondition(), Known
, Depth
, Q
,
795 BasicBlockEdge
Edge1(BI
->getParent(), BI
->getSuccessor(1));
796 if (Q
.DT
->dominates(Edge1
, Q
.CxtI
->getParent()))
797 computeKnownBitsFromCond(V
, BI
->getCondition(), Known
, Depth
, Q
,
801 if (Known
.hasConflict())
808 unsigned BitWidth
= Known
.getBitWidth();
810 // Note that the patterns below need to be kept in sync with the code
811 // in AssumptionCache::updateAffectedValues.
813 for (AssumptionCache::ResultElem
&Elem
: Q
.AC
->assumptionsFor(V
)) {
817 AssumeInst
*I
= cast
<AssumeInst
>(Elem
.Assume
);
818 assert(I
->getParent()->getParent() == Q
.CxtI
->getParent()->getParent() &&
819 "Got assumption for the wrong function!");
821 if (Elem
.Index
!= AssumptionCache::ExprResultIdx
) {
822 if (!V
->getType()->isPointerTy())
824 if (RetainedKnowledge RK
= getKnowledgeFromBundle(
825 *I
, I
->bundle_op_info_begin()[Elem
.Index
])) {
826 if (RK
.WasOn
== V
&& RK
.AttrKind
== Attribute::Alignment
&&
827 isPowerOf2_64(RK
.ArgValue
) &&
828 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
829 Known
.Zero
.setLowBits(Log2_64(RK
.ArgValue
));
834 // Warning: This loop can end up being somewhat performance sensitive.
835 // We're running this loop for once for each value queried resulting in a
836 // runtime of ~O(#assumes * #values).
838 Value
*Arg
= I
->getArgOperand(0);
840 if (Arg
== V
&& isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
841 assert(BitWidth
== 1 && "assume operand is not i1?");
846 if (match(Arg
, m_Not(m_Specific(V
))) &&
847 isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
)) {
848 assert(BitWidth
== 1 && "assume operand is not i1?");
854 // The remaining tests are all recursive, so bail out if we hit the limit.
855 if (Depth
== MaxAnalysisRecursionDepth
)
858 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(Arg
);
862 if (!isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
865 computeKnownBitsFromICmpCond(V
, Cmp
, Known
, Q
, /*Invert=*/false);
868 // Conflicting assumption: Undefined behavior will occur on this execution
870 if (Known
.hasConflict())
874 /// Compute known bits from a shift operator, including those with a
875 /// non-constant shift amount. Known is the output of this function. Known2 is a
876 /// pre-allocated temporary with the same bit width as Known and on return
877 /// contains the known bit of the shift value source. KF is an
878 /// operator-specific function that, given the known-bits and a shift amount,
879 /// compute the implied known-bits of the shift operator's result respectively
880 /// for that shift amount. The results from calling KF are conservatively
881 /// combined for all permitted shift amounts.
882 static void computeKnownBitsFromShiftOperator(
883 const Operator
*I
, const APInt
&DemandedElts
, KnownBits
&Known
,
884 KnownBits
&Known2
, unsigned Depth
, const SimplifyQuery
&Q
,
885 function_ref
<KnownBits(const KnownBits
&, const KnownBits
&, bool)> KF
) {
886 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
887 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
888 // To limit compile-time impact, only query isKnownNonZero() if we know at
889 // least something about the shift amount.
892 (Known
.getMaxValue().ult(Known
.getBitWidth()) &&
893 isKnownNonZero(I
->getOperand(1), DemandedElts
, Q
, Depth
+ 1));
894 Known
= KF(Known2
, Known
, ShAmtNonZero
);
898 getKnownBitsFromAndXorOr(const Operator
*I
, const APInt
&DemandedElts
,
899 const KnownBits
&KnownLHS
, const KnownBits
&KnownRHS
,
900 unsigned Depth
, const SimplifyQuery
&Q
) {
901 unsigned BitWidth
= KnownLHS
.getBitWidth();
902 KnownBits
KnownOut(BitWidth
);
904 bool HasKnownOne
= !KnownLHS
.One
.isZero() || !KnownRHS
.One
.isZero();
905 Value
*X
= nullptr, *Y
= nullptr;
907 switch (I
->getOpcode()) {
908 case Instruction::And
:
909 KnownOut
= KnownLHS
& KnownRHS
;
911 // and(x, -x) is common idioms that will clear all but lowest set
912 // bit. If we have a single known bit in x, we can clear all bits
914 // TODO: instcombine often reassociates independent `and` which can hide
915 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
916 if (HasKnownOne
&& match(I
, m_c_And(m_Value(X
), m_Neg(m_Deferred(X
))))) {
917 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
918 if (KnownLHS
.countMaxTrailingZeros() <= KnownRHS
.countMaxTrailingZeros())
919 KnownOut
= KnownLHS
.blsi();
921 KnownOut
= KnownRHS
.blsi();
924 case Instruction::Or
:
925 KnownOut
= KnownLHS
| KnownRHS
;
927 case Instruction::Xor
:
928 KnownOut
= KnownLHS
^ KnownRHS
;
929 // xor(x, x-1) is common idioms that will clear all but lowest set
930 // bit. If we have a single known bit in x, we can clear all bits
932 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
933 // -1 but for the purpose of demanded bits (xor(x, x-C) &
934 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
935 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
937 match(I
, m_c_Xor(m_Value(X
), m_Add(m_Deferred(X
), m_AllOnes())))) {
938 const KnownBits
&XBits
= I
->getOperand(0) == X
? KnownLHS
: KnownRHS
;
939 KnownOut
= XBits
.blsmsk();
943 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
946 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
947 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
948 // here we handle the more general case of adding any odd number by
949 // matching the form and/xor/or(x, add(x, y)) where y is odd.
950 // TODO: This could be generalized to clearing any bit set in y where the
951 // following bit is known to be unset in y.
952 if (!KnownOut
.Zero
[0] && !KnownOut
.One
[0] &&
953 (match(I
, m_c_BinOp(m_Value(X
), m_c_Add(m_Deferred(X
), m_Value(Y
)))) ||
954 match(I
, m_c_BinOp(m_Value(X
), m_Sub(m_Deferred(X
), m_Value(Y
)))) ||
955 match(I
, m_c_BinOp(m_Value(X
), m_Sub(m_Value(Y
), m_Deferred(X
)))))) {
956 KnownBits
KnownY(BitWidth
);
957 computeKnownBits(Y
, DemandedElts
, KnownY
, Depth
+ 1, Q
);
958 if (KnownY
.countMinTrailingOnes() > 0) {
960 KnownOut
.Zero
.setBit(0);
962 KnownOut
.One
.setBit(0);
968 static KnownBits
computeKnownBitsForHorizontalOperation(
969 const Operator
*I
, const APInt
&DemandedElts
, unsigned Depth
,
970 const SimplifyQuery
&Q
,
971 const function_ref
<KnownBits(const KnownBits
&, const KnownBits
&)>
973 APInt DemandedEltsLHS
, DemandedEltsRHS
;
974 getHorizDemandedEltsForFirstOperand(Q
.DL
.getTypeSizeInBits(I
->getType()),
975 DemandedElts
, DemandedEltsLHS
,
978 const auto ComputeForSingleOpFunc
=
979 [Depth
, &Q
, KnownBitsFunc
](const Value
*Op
, APInt
&DemandedEltsOp
) {
980 return KnownBitsFunc(
981 computeKnownBits(Op
, DemandedEltsOp
, Depth
+ 1, Q
),
982 computeKnownBits(Op
, DemandedEltsOp
<< 1, Depth
+ 1, Q
));
985 if (DemandedEltsRHS
.isZero())
986 return ComputeForSingleOpFunc(I
->getOperand(0), DemandedEltsLHS
);
987 if (DemandedEltsLHS
.isZero())
988 return ComputeForSingleOpFunc(I
->getOperand(1), DemandedEltsRHS
);
990 return ComputeForSingleOpFunc(I
->getOperand(0), DemandedEltsLHS
)
991 .intersectWith(ComputeForSingleOpFunc(I
->getOperand(1), DemandedEltsRHS
));
994 // Public so this can be used in `SimplifyDemandedUseBits`.
995 KnownBits
llvm::analyzeKnownBitsFromAndXorOr(const Operator
*I
,
996 const KnownBits
&KnownLHS
,
997 const KnownBits
&KnownRHS
,
999 const SimplifyQuery
&SQ
) {
1000 auto *FVTy
= dyn_cast
<FixedVectorType
>(I
->getType());
1001 APInt DemandedElts
=
1002 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
1004 return getKnownBitsFromAndXorOr(I
, DemandedElts
, KnownLHS
, KnownRHS
, Depth
,
1008 ConstantRange
llvm::getVScaleRange(const Function
*F
, unsigned BitWidth
) {
1009 Attribute Attr
= F
->getFnAttribute(Attribute::VScaleRange
);
1010 // Without vscale_range, we only know that vscale is non-zero.
1011 if (!Attr
.isValid())
1012 return ConstantRange(APInt(BitWidth
, 1), APInt::getZero(BitWidth
));
1014 unsigned AttrMin
= Attr
.getVScaleRangeMin();
1015 // Minimum is larger than vscale width, result is always poison.
1016 if ((unsigned)llvm::bit_width(AttrMin
) > BitWidth
)
1017 return ConstantRange::getEmpty(BitWidth
);
1019 APInt
Min(BitWidth
, AttrMin
);
1020 std::optional
<unsigned> AttrMax
= Attr
.getVScaleRangeMax();
1021 if (!AttrMax
|| (unsigned)llvm::bit_width(*AttrMax
) > BitWidth
)
1022 return ConstantRange(Min
, APInt::getZero(BitWidth
));
1024 return ConstantRange(Min
, APInt(BitWidth
, *AttrMax
) + 1);
1027 void llvm::adjustKnownBitsForSelectArm(KnownBits
&Known
, Value
*Cond
,
1028 Value
*Arm
, bool Invert
, unsigned Depth
,
1029 const SimplifyQuery
&Q
) {
1030 // If we have a constant arm, we are done.
1031 if (Known
.isConstant())
1034 // See what condition implies about the bits of the select arm.
1035 KnownBits
CondRes(Known
.getBitWidth());
1036 computeKnownBitsFromCond(Arm
, Cond
, CondRes
, Depth
+ 1, Q
, Invert
);
1037 // If we don't get any information from the condition, no reason to
1039 if (CondRes
.isUnknown())
1042 // We can have conflict if the condition is dead. I.e if we have
1043 // (x | 64) < 32 ? (x | 64) : y
1044 // we will have conflict at bit 6 from the condition/the `or`.
1045 // In that case just return. Its not particularly important
1046 // what we do, as this select is going to be simplified soon.
1047 CondRes
= CondRes
.unionWith(Known
);
1048 if (CondRes
.hasConflict())
1051 // Finally make sure the information we found is valid. This is relatively
1052 // expensive so it's left for the very end.
1053 if (!isGuaranteedNotToBeUndef(Arm
, Q
.AC
, Q
.CxtI
, Q
.DT
, Depth
+ 1))
1056 // Finally, we know we get information from the condition and its valid,
1061 static void computeKnownBitsFromOperator(const Operator
*I
,
1062 const APInt
&DemandedElts
,
1063 KnownBits
&Known
, unsigned Depth
,
1064 const SimplifyQuery
&Q
) {
1065 unsigned BitWidth
= Known
.getBitWidth();
1067 KnownBits
Known2(BitWidth
);
1068 switch (I
->getOpcode()) {
1070 case Instruction::Load
:
1072 Q
.IIQ
.getMetadata(cast
<LoadInst
>(I
), LLVMContext::MD_range
))
1073 computeKnownBitsFromRangeMetadata(*MD
, Known
);
1075 case Instruction::And
:
1076 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
1077 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1079 Known
= getKnownBitsFromAndXorOr(I
, DemandedElts
, Known2
, Known
, Depth
, Q
);
1081 case Instruction::Or
:
1082 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
1083 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1085 Known
= getKnownBitsFromAndXorOr(I
, DemandedElts
, Known2
, Known
, Depth
, Q
);
1087 case Instruction::Xor
:
1088 computeKnownBits(I
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
1089 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1091 Known
= getKnownBitsFromAndXorOr(I
, DemandedElts
, Known2
, Known
, Depth
, Q
);
1093 case Instruction::Mul
: {
1094 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1095 computeKnownBitsMul(I
->getOperand(0), I
->getOperand(1), NSW
, DemandedElts
,
1096 Known
, Known2
, Depth
, Q
);
1099 case Instruction::UDiv
: {
1100 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1101 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1103 KnownBits::udiv(Known
, Known2
, Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)));
1106 case Instruction::SDiv
: {
1107 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1108 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1110 KnownBits::sdiv(Known
, Known2
, Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)));
1113 case Instruction::Select
: {
1114 auto ComputeForArm
= [&](Value
*Arm
, bool Invert
) {
1115 KnownBits
Res(Known
.getBitWidth());
1116 computeKnownBits(Arm
, DemandedElts
, Res
, Depth
+ 1, Q
);
1117 adjustKnownBitsForSelectArm(Res
, I
->getOperand(0), Arm
, Invert
, Depth
, Q
);
1120 // Only known if known in both the LHS and RHS.
1122 ComputeForArm(I
->getOperand(1), /*Invert=*/false)
1123 .intersectWith(ComputeForArm(I
->getOperand(2), /*Invert=*/true));
1126 case Instruction::FPTrunc
:
1127 case Instruction::FPExt
:
1128 case Instruction::FPToUI
:
1129 case Instruction::FPToSI
:
1130 case Instruction::SIToFP
:
1131 case Instruction::UIToFP
:
1132 break; // Can't work with floating point.
1133 case Instruction::PtrToInt
:
1134 case Instruction::IntToPtr
:
1135 // Fall through and handle them the same as zext/trunc.
1137 case Instruction::ZExt
:
1138 case Instruction::Trunc
: {
1139 Type
*SrcTy
= I
->getOperand(0)->getType();
1141 unsigned SrcBitWidth
;
1142 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1143 // which fall through here.
1144 Type
*ScalarTy
= SrcTy
->getScalarType();
1145 SrcBitWidth
= ScalarTy
->isPointerTy() ?
1146 Q
.DL
.getPointerTypeSizeInBits(ScalarTy
) :
1147 Q
.DL
.getTypeSizeInBits(ScalarTy
);
1149 assert(SrcBitWidth
&& "SrcBitWidth can't be zero");
1150 Known
= Known
.anyextOrTrunc(SrcBitWidth
);
1151 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1152 if (auto *Inst
= dyn_cast
<PossiblyNonNegInst
>(I
);
1153 Inst
&& Inst
->hasNonNeg() && !Known
.isNegative())
1154 Known
.makeNonNegative();
1155 Known
= Known
.zextOrTrunc(BitWidth
);
1158 case Instruction::BitCast
: {
1159 Type
*SrcTy
= I
->getOperand(0)->getType();
1160 if (SrcTy
->isIntOrPtrTy() &&
1161 // TODO: For now, not handling conversions like:
1162 // (bitcast i64 %x to <2 x i32>)
1163 !I
->getType()->isVectorTy()) {
1164 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1169 // Handle bitcast from floating point to integer.
1170 if (match(I
, m_ElementWiseBitCast(m_Value(V
))) &&
1171 V
->getType()->isFPOrFPVectorTy()) {
1172 Type
*FPType
= V
->getType()->getScalarType();
1173 KnownFPClass Result
=
1174 computeKnownFPClass(V
, DemandedElts
, fcAllFlags
, Depth
+ 1, Q
);
1175 FPClassTest FPClasses
= Result
.KnownFPClasses
;
1177 // TODO: Treat it as zero/poison if the use of I is unreachable.
1178 if (FPClasses
== fcNone
)
1181 if (Result
.isKnownNever(fcNormal
| fcSubnormal
| fcNan
)) {
1182 Known
.Zero
.setAllBits();
1183 Known
.One
.setAllBits();
1185 if (FPClasses
& fcInf
)
1186 Known
= Known
.intersectWith(KnownBits::makeConstant(
1187 APFloat::getInf(FPType
->getFltSemantics()).bitcastToAPInt()));
1189 if (FPClasses
& fcZero
)
1190 Known
= Known
.intersectWith(KnownBits::makeConstant(
1191 APInt::getZero(FPType
->getScalarSizeInBits())));
1193 Known
.Zero
.clearSignBit();
1194 Known
.One
.clearSignBit();
1197 if (Result
.SignBit
) {
1198 if (*Result
.SignBit
)
1199 Known
.makeNegative();
1201 Known
.makeNonNegative();
1207 // Handle cast from vector integer type to scalar or vector integer.
1208 auto *SrcVecTy
= dyn_cast
<FixedVectorType
>(SrcTy
);
1209 if (!SrcVecTy
|| !SrcVecTy
->getElementType()->isIntegerTy() ||
1210 !I
->getType()->isIntOrIntVectorTy() ||
1211 isa
<ScalableVectorType
>(I
->getType()))
1214 // Look through a cast from narrow vector elements to wider type.
1215 // Examples: v4i32 -> v2i64, v3i8 -> v24
1216 unsigned SubBitWidth
= SrcVecTy
->getScalarSizeInBits();
1217 if (BitWidth
% SubBitWidth
== 0) {
1218 // Known bits are automatically intersected across demanded elements of a
1219 // vector. So for example, if a bit is computed as known zero, it must be
1220 // zero across all demanded elements of the vector.
1222 // For this bitcast, each demanded element of the output is sub-divided
1223 // across a set of smaller vector elements in the source vector. To get
1224 // the known bits for an entire element of the output, compute the known
1225 // bits for each sub-element sequentially. This is done by shifting the
1226 // one-set-bit demanded elements parameter across the sub-elements for
1227 // consecutive calls to computeKnownBits. We are using the demanded
1228 // elements parameter as a mask operator.
1230 // The known bits of each sub-element are then inserted into place
1231 // (dependent on endian) to form the full result of known bits.
1232 unsigned NumElts
= DemandedElts
.getBitWidth();
1233 unsigned SubScale
= BitWidth
/ SubBitWidth
;
1234 APInt SubDemandedElts
= APInt::getZero(NumElts
* SubScale
);
1235 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1236 if (DemandedElts
[i
])
1237 SubDemandedElts
.setBit(i
* SubScale
);
1240 KnownBits
KnownSrc(SubBitWidth
);
1241 for (unsigned i
= 0; i
!= SubScale
; ++i
) {
1242 computeKnownBits(I
->getOperand(0), SubDemandedElts
.shl(i
), KnownSrc
,
1244 unsigned ShiftElt
= Q
.DL
.isLittleEndian() ? i
: SubScale
- 1 - i
;
1245 Known
.insertBits(KnownSrc
, ShiftElt
* SubBitWidth
);
1250 case Instruction::SExt
: {
1251 // Compute the bits in the result that are not present in the input.
1252 unsigned SrcBitWidth
= I
->getOperand(0)->getType()->getScalarSizeInBits();
1254 Known
= Known
.trunc(SrcBitWidth
);
1255 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1256 // If the sign bit of the input is known set or clear, then we know the
1257 // top bits of the result.
1258 Known
= Known
.sext(BitWidth
);
1261 case Instruction::Shl
: {
1262 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1263 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1264 auto KF
= [NUW
, NSW
](const KnownBits
&KnownVal
, const KnownBits
&KnownAmt
,
1265 bool ShAmtNonZero
) {
1266 return KnownBits::shl(KnownVal
, KnownAmt
, NUW
, NSW
, ShAmtNonZero
);
1268 computeKnownBitsFromShiftOperator(I
, DemandedElts
, Known
, Known2
, Depth
, Q
,
1270 // Trailing zeros of a right-shifted constant never decrease.
1272 if (match(I
->getOperand(0), m_APInt(C
)))
1273 Known
.Zero
.setLowBits(C
->countr_zero());
1276 case Instruction::LShr
: {
1277 bool Exact
= Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
));
1278 auto KF
= [Exact
](const KnownBits
&KnownVal
, const KnownBits
&KnownAmt
,
1279 bool ShAmtNonZero
) {
1280 return KnownBits::lshr(KnownVal
, KnownAmt
, ShAmtNonZero
, Exact
);
1282 computeKnownBitsFromShiftOperator(I
, DemandedElts
, Known
, Known2
, Depth
, Q
,
1284 // Leading zeros of a left-shifted constant never decrease.
1286 if (match(I
->getOperand(0), m_APInt(C
)))
1287 Known
.Zero
.setHighBits(C
->countl_zero());
1290 case Instruction::AShr
: {
1291 bool Exact
= Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
));
1292 auto KF
= [Exact
](const KnownBits
&KnownVal
, const KnownBits
&KnownAmt
,
1293 bool ShAmtNonZero
) {
1294 return KnownBits::ashr(KnownVal
, KnownAmt
, ShAmtNonZero
, Exact
);
1296 computeKnownBitsFromShiftOperator(I
, DemandedElts
, Known
, Known2
, Depth
, Q
,
1300 case Instruction::Sub
: {
1301 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1302 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1303 computeKnownBitsAddSub(false, I
->getOperand(0), I
->getOperand(1), NSW
, NUW
,
1304 DemandedElts
, Known
, Known2
, Depth
, Q
);
1307 case Instruction::Add
: {
1308 bool NSW
= Q
.IIQ
.hasNoSignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1309 bool NUW
= Q
.IIQ
.hasNoUnsignedWrap(cast
<OverflowingBinaryOperator
>(I
));
1310 computeKnownBitsAddSub(true, I
->getOperand(0), I
->getOperand(1), NSW
, NUW
,
1311 DemandedElts
, Known
, Known2
, Depth
, Q
);
1314 case Instruction::SRem
:
1315 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1316 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1317 Known
= KnownBits::srem(Known
, Known2
);
1320 case Instruction::URem
:
1321 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1322 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1323 Known
= KnownBits::urem(Known
, Known2
);
1325 case Instruction::Alloca
:
1326 Known
.Zero
.setLowBits(Log2(cast
<AllocaInst
>(I
)->getAlign()));
1328 case Instruction::GetElementPtr
: {
1329 // Analyze all of the subscripts of this getelementptr instruction
1330 // to determine if we can prove known low zero bits.
1331 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1332 // Accumulate the constant indices in a separate variable
1333 // to minimize the number of calls to computeForAddSub.
1334 APInt
AccConstIndices(BitWidth
, 0, /*IsSigned*/ true);
1336 gep_type_iterator GTI
= gep_type_begin(I
);
1337 for (unsigned i
= 1, e
= I
->getNumOperands(); i
!= e
; ++i
, ++GTI
) {
1338 // TrailZ can only become smaller, short-circuit if we hit zero.
1339 if (Known
.isUnknown())
1342 Value
*Index
= I
->getOperand(i
);
1344 // Handle case when index is zero.
1345 Constant
*CIndex
= dyn_cast
<Constant
>(Index
);
1346 if (CIndex
&& CIndex
->isZeroValue())
1349 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
1350 // Handle struct member offset arithmetic.
1353 "Access to structure field must be known at compile time");
1355 if (CIndex
->getType()->isVectorTy())
1356 Index
= CIndex
->getSplatValue();
1358 unsigned Idx
= cast
<ConstantInt
>(Index
)->getZExtValue();
1359 const StructLayout
*SL
= Q
.DL
.getStructLayout(STy
);
1360 uint64_t Offset
= SL
->getElementOffset(Idx
);
1361 AccConstIndices
+= Offset
;
1365 // Handle array index arithmetic.
1366 Type
*IndexedTy
= GTI
.getIndexedType();
1367 if (!IndexedTy
->isSized()) {
1372 unsigned IndexBitWidth
= Index
->getType()->getScalarSizeInBits();
1373 KnownBits
IndexBits(IndexBitWidth
);
1374 computeKnownBits(Index
, IndexBits
, Depth
+ 1, Q
);
1375 TypeSize IndexTypeSize
= GTI
.getSequentialElementStride(Q
.DL
);
1376 uint64_t TypeSizeInBytes
= IndexTypeSize
.getKnownMinValue();
1377 KnownBits
ScalingFactor(IndexBitWidth
);
1378 // Multiply by current sizeof type.
1379 // &A[i] == A + i * sizeof(*A[i]).
1380 if (IndexTypeSize
.isScalable()) {
1381 // For scalable types the only thing we know about sizeof is
1382 // that this is a multiple of the minimum size.
1383 ScalingFactor
.Zero
.setLowBits(llvm::countr_zero(TypeSizeInBytes
));
1384 } else if (IndexBits
.isConstant()) {
1385 APInt IndexConst
= IndexBits
.getConstant();
1386 APInt
ScalingFactor(IndexBitWidth
, TypeSizeInBytes
);
1387 IndexConst
*= ScalingFactor
;
1388 AccConstIndices
+= IndexConst
.sextOrTrunc(BitWidth
);
1392 KnownBits::makeConstant(APInt(IndexBitWidth
, TypeSizeInBytes
));
1394 IndexBits
= KnownBits::mul(IndexBits
, ScalingFactor
);
1396 // If the offsets have a different width from the pointer, according
1397 // to the language reference we need to sign-extend or truncate them
1398 // to the width of the pointer.
1399 IndexBits
= IndexBits
.sextOrTrunc(BitWidth
);
1401 // Note that inbounds does *not* guarantee nsw for the addition, as only
1402 // the offset is signed, while the base address is unsigned.
1403 Known
= KnownBits::computeForAddSub(
1404 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, Known
, IndexBits
);
1406 if (!Known
.isUnknown() && !AccConstIndices
.isZero()) {
1407 KnownBits Index
= KnownBits::makeConstant(AccConstIndices
);
1408 Known
= KnownBits::computeForAddSub(
1409 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, Known
, Index
);
1413 case Instruction::PHI
: {
1414 const PHINode
*P
= cast
<PHINode
>(I
);
1415 BinaryOperator
*BO
= nullptr;
1416 Value
*R
= nullptr, *L
= nullptr;
1417 if (matchSimpleRecurrence(P
, BO
, R
, L
)) {
1418 // Handle the case of a simple two-predecessor recurrence PHI.
1419 // There's a lot more that could theoretically be done here, but
1420 // this is sufficient to catch some interesting cases.
1421 unsigned Opcode
= BO
->getOpcode();
1423 // If this is a shift recurrence, we know the bits being shifted in.
1424 // We can combine that with information about the start value of the
1425 // recurrence to conclude facts about the result.
1426 if ((Opcode
== Instruction::LShr
|| Opcode
== Instruction::AShr
||
1427 Opcode
== Instruction::Shl
) &&
1428 BO
->getOperand(0) == I
) {
1430 // We have matched a recurrence of the form:
1431 // %iv = [R, %entry], [%iv.next, %backedge]
1432 // %iv.next = shift_op %iv, L
1434 // Recurse with the phi context to avoid concern about whether facts
1435 // inferred hold at original context instruction. TODO: It may be
1436 // correct to use the original context. IF warranted, explore and
1437 // add sufficient tests to cover.
1438 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
1440 computeKnownBits(R
, DemandedElts
, Known2
, Depth
+ 1, RecQ
);
1442 case Instruction::Shl
:
1443 // A shl recurrence will only increase the tailing zeros
1444 Known
.Zero
.setLowBits(Known2
.countMinTrailingZeros());
1446 case Instruction::LShr
:
1447 // A lshr recurrence will preserve the leading zeros of the
1449 Known
.Zero
.setHighBits(Known2
.countMinLeadingZeros());
1451 case Instruction::AShr
:
1452 // An ashr recurrence will extend the initial sign bit
1453 Known
.Zero
.setHighBits(Known2
.countMinLeadingZeros());
1454 Known
.One
.setHighBits(Known2
.countMinLeadingOnes());
1459 // Check for operations that have the property that if
1460 // both their operands have low zero bits, the result
1461 // will have low zero bits.
1462 if (Opcode
== Instruction::Add
||
1463 Opcode
== Instruction::Sub
||
1464 Opcode
== Instruction::And
||
1465 Opcode
== Instruction::Or
||
1466 Opcode
== Instruction::Mul
) {
1467 // Change the context instruction to the "edge" that flows into the
1468 // phi. This is important because that is where the value is actually
1469 // "evaluated" even though it is used later somewhere else. (see also
1471 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
1473 unsigned OpNum
= P
->getOperand(0) == R
? 0 : 1;
1474 Instruction
*RInst
= P
->getIncomingBlock(OpNum
)->getTerminator();
1475 Instruction
*LInst
= P
->getIncomingBlock(1 - OpNum
)->getTerminator();
1477 // Ok, we have a PHI of the form L op= R. Check for low
1480 computeKnownBits(R
, DemandedElts
, Known2
, Depth
+ 1, RecQ
);
1482 // We need to take the minimum number of known bits
1483 KnownBits
Known3(BitWidth
);
1485 computeKnownBits(L
, DemandedElts
, Known3
, Depth
+ 1, RecQ
);
1487 Known
.Zero
.setLowBits(std::min(Known2
.countMinTrailingZeros(),
1488 Known3
.countMinTrailingZeros()));
1490 auto *OverflowOp
= dyn_cast
<OverflowingBinaryOperator
>(BO
);
1491 if (OverflowOp
&& Q
.IIQ
.hasNoSignedWrap(OverflowOp
)) {
1492 // If initial value of recurrence is nonnegative, and we are adding
1493 // a nonnegative number with nsw, the result can only be nonnegative
1494 // or poison value regardless of the number of times we execute the
1495 // add in phi recurrence. If initial value is negative and we are
1496 // adding a negative number with nsw, the result can only be
1497 // negative or poison value. Similar arguments apply to sub and mul.
1499 // (add non-negative, non-negative) --> non-negative
1500 // (add negative, negative) --> negative
1501 if (Opcode
== Instruction::Add
) {
1502 if (Known2
.isNonNegative() && Known3
.isNonNegative())
1503 Known
.makeNonNegative();
1504 else if (Known2
.isNegative() && Known3
.isNegative())
1505 Known
.makeNegative();
1508 // (sub nsw non-negative, negative) --> non-negative
1509 // (sub nsw negative, non-negative) --> negative
1510 else if (Opcode
== Instruction::Sub
&& BO
->getOperand(0) == I
) {
1511 if (Known2
.isNonNegative() && Known3
.isNegative())
1512 Known
.makeNonNegative();
1513 else if (Known2
.isNegative() && Known3
.isNonNegative())
1514 Known
.makeNegative();
1517 // (mul nsw non-negative, non-negative) --> non-negative
1518 else if (Opcode
== Instruction::Mul
&& Known2
.isNonNegative() &&
1519 Known3
.isNonNegative())
1520 Known
.makeNonNegative();
1527 // Unreachable blocks may have zero-operand PHI nodes.
1528 if (P
->getNumIncomingValues() == 0)
1531 // Otherwise take the unions of the known bit sets of the operands,
1532 // taking conservative care to avoid excessive recursion.
1533 if (Depth
< MaxAnalysisRecursionDepth
- 1 && Known
.isUnknown()) {
1534 // Skip if every incoming value references to ourself.
1535 if (isa_and_nonnull
<UndefValue
>(P
->hasConstantValue()))
1538 Known
.Zero
.setAllBits();
1539 Known
.One
.setAllBits();
1540 for (unsigned u
= 0, e
= P
->getNumIncomingValues(); u
< e
; ++u
) {
1541 Value
*IncValue
= P
->getIncomingValue(u
);
1542 // Skip direct self references.
1543 if (IncValue
== P
) continue;
1545 // Change the context instruction to the "edge" that flows into the
1546 // phi. This is important because that is where the value is actually
1547 // "evaluated" even though it is used later somewhere else. (see also
1549 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
1550 RecQ
.CxtI
= P
->getIncomingBlock(u
)->getTerminator();
1552 Known2
= KnownBits(BitWidth
);
1554 // Recurse, but cap the recursion to one level, because we don't
1555 // want to waste time spinning around in loops.
1556 // TODO: See if we can base recursion limiter on number of incoming phi
1557 // edges so we don't overly clamp analysis.
1558 computeKnownBits(IncValue
, DemandedElts
, Known2
,
1559 MaxAnalysisRecursionDepth
- 1, RecQ
);
1561 // See if we can further use a conditional branch into the phi
1562 // to help us determine the range of the value.
1563 if (!Known2
.isConstant()) {
1564 ICmpInst::Predicate Pred
;
1566 BasicBlock
*TrueSucc
, *FalseSucc
;
1567 // TODO: Use RHS Value and compute range from its known bits.
1568 if (match(RecQ
.CxtI
,
1569 m_Br(m_c_ICmp(Pred
, m_Specific(IncValue
), m_APInt(RHSC
)),
1570 m_BasicBlock(TrueSucc
), m_BasicBlock(FalseSucc
)))) {
1571 // Check for cases of duplicate successors.
1572 if ((TrueSucc
== P
->getParent()) != (FalseSucc
== P
->getParent())) {
1573 // If we're using the false successor, invert the predicate.
1574 if (FalseSucc
== P
->getParent())
1575 Pred
= CmpInst::getInversePredicate(Pred
);
1576 // Get the knownbits implied by the incoming phi condition.
1577 auto CR
= ConstantRange::makeExactICmpRegion(Pred
, *RHSC
);
1578 KnownBits KnownUnion
= Known2
.unionWith(CR
.toKnownBits());
1579 // We can have conflicts here if we are analyzing deadcode (its
1580 // impossible for us reach this BB based the icmp).
1581 if (KnownUnion
.hasConflict()) {
1582 // No reason to continue analyzing in a known dead region, so
1583 // just resetAll and break. This will cause us to also exit the
1588 Known2
= KnownUnion
;
1593 Known
= Known
.intersectWith(Known2
);
1594 // If all bits have been ruled out, there's no need to check
1596 if (Known
.isUnknown())
1602 case Instruction::Call
:
1603 case Instruction::Invoke
: {
1604 // If range metadata is attached to this call, set known bits from that,
1605 // and then intersect with known bits based on other properties of the
1608 Q
.IIQ
.getMetadata(cast
<Instruction
>(I
), LLVMContext::MD_range
))
1609 computeKnownBitsFromRangeMetadata(*MD
, Known
);
1611 const auto *CB
= cast
<CallBase
>(I
);
1613 if (std::optional
<ConstantRange
> Range
= CB
->getRange())
1614 Known
= Known
.unionWith(Range
->toKnownBits());
1616 if (const Value
*RV
= CB
->getReturnedArgOperand()) {
1617 if (RV
->getType() == I
->getType()) {
1618 computeKnownBits(RV
, Known2
, Depth
+ 1, Q
);
1619 Known
= Known
.unionWith(Known2
);
1620 // If the function doesn't return properly for all input values
1621 // (e.g. unreachable exits) then there might be conflicts between the
1622 // argument value and the range metadata. Simply discard the known bits
1623 // in case of conflicts.
1624 if (Known
.hasConflict())
1628 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
)) {
1629 switch (II
->getIntrinsicID()) {
1632 case Intrinsic::abs
: {
1633 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1634 bool IntMinIsPoison
= match(II
->getArgOperand(1), m_One());
1635 Known
= Known2
.abs(IntMinIsPoison
);
1638 case Intrinsic::bitreverse
:
1639 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1640 Known
.Zero
|= Known2
.Zero
.reverseBits();
1641 Known
.One
|= Known2
.One
.reverseBits();
1643 case Intrinsic::bswap
:
1644 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1645 Known
.Zero
|= Known2
.Zero
.byteSwap();
1646 Known
.One
|= Known2
.One
.byteSwap();
1648 case Intrinsic::ctlz
: {
1649 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1650 // If we have a known 1, its position is our upper bound.
1651 unsigned PossibleLZ
= Known2
.countMaxLeadingZeros();
1652 // If this call is poison for 0 input, the result will be less than 2^n.
1653 if (II
->getArgOperand(1) == ConstantInt::getTrue(II
->getContext()))
1654 PossibleLZ
= std::min(PossibleLZ
, BitWidth
- 1);
1655 unsigned LowBits
= llvm::bit_width(PossibleLZ
);
1656 Known
.Zero
.setBitsFrom(LowBits
);
1659 case Intrinsic::cttz
: {
1660 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1661 // If we have a known 1, its position is our upper bound.
1662 unsigned PossibleTZ
= Known2
.countMaxTrailingZeros();
1663 // If this call is poison for 0 input, the result will be less than 2^n.
1664 if (II
->getArgOperand(1) == ConstantInt::getTrue(II
->getContext()))
1665 PossibleTZ
= std::min(PossibleTZ
, BitWidth
- 1);
1666 unsigned LowBits
= llvm::bit_width(PossibleTZ
);
1667 Known
.Zero
.setBitsFrom(LowBits
);
1670 case Intrinsic::ctpop
: {
1671 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1672 // We can bound the space the count needs. Also, bits known to be zero
1673 // can't contribute to the population.
1674 unsigned BitsPossiblySet
= Known2
.countMaxPopulation();
1675 unsigned LowBits
= llvm::bit_width(BitsPossiblySet
);
1676 Known
.Zero
.setBitsFrom(LowBits
);
1677 // TODO: we could bound KnownOne using the lower bound on the number
1678 // of bits which might be set provided by popcnt KnownOne2.
1681 case Intrinsic::fshr
:
1682 case Intrinsic::fshl
: {
1684 if (!match(I
->getOperand(2), m_APInt(SA
)))
1687 // Normalize to funnel shift left.
1688 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
1689 if (II
->getIntrinsicID() == Intrinsic::fshr
)
1690 ShiftAmt
= BitWidth
- ShiftAmt
;
1692 KnownBits
Known3(BitWidth
);
1693 computeKnownBits(I
->getOperand(0), DemandedElts
, Known2
, Depth
+ 1, Q
);
1694 computeKnownBits(I
->getOperand(1), DemandedElts
, Known3
, Depth
+ 1, Q
);
1697 Known2
.Zero
.shl(ShiftAmt
) | Known3
.Zero
.lshr(BitWidth
- ShiftAmt
);
1699 Known2
.One
.shl(ShiftAmt
) | Known3
.One
.lshr(BitWidth
- ShiftAmt
);
1702 case Intrinsic::uadd_sat
:
1703 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1704 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1705 Known
= KnownBits::uadd_sat(Known
, Known2
);
1707 case Intrinsic::usub_sat
:
1708 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1709 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1710 Known
= KnownBits::usub_sat(Known
, Known2
);
1712 case Intrinsic::sadd_sat
:
1713 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1714 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1715 Known
= KnownBits::sadd_sat(Known
, Known2
);
1717 case Intrinsic::ssub_sat
:
1718 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1719 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1720 Known
= KnownBits::ssub_sat(Known
, Known2
);
1722 // Vec reverse preserves bits from input vec.
1723 case Intrinsic::vector_reverse
:
1724 computeKnownBits(I
->getOperand(0), DemandedElts
.reverseBits(), Known
,
1727 // for min/max/and/or reduce, any bit common to each element in the
1728 // input vec is set in the output.
1729 case Intrinsic::vector_reduce_and
:
1730 case Intrinsic::vector_reduce_or
:
1731 case Intrinsic::vector_reduce_umax
:
1732 case Intrinsic::vector_reduce_umin
:
1733 case Intrinsic::vector_reduce_smax
:
1734 case Intrinsic::vector_reduce_smin
:
1735 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1737 case Intrinsic::vector_reduce_xor
: {
1738 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1739 // The zeros common to all vecs are zero in the output.
1740 // If the number of elements is odd, then the common ones remain. If the
1741 // number of elements is even, then the common ones becomes zeros.
1742 auto *VecTy
= cast
<VectorType
>(I
->getOperand(0)->getType());
1743 // Even, so the ones become zeros.
1744 bool EvenCnt
= VecTy
->getElementCount().isKnownEven();
1746 Known
.Zero
|= Known
.One
;
1747 // Maybe even element count so need to clear ones.
1748 if (VecTy
->isScalableTy() || EvenCnt
)
1749 Known
.One
.clearAllBits();
1752 case Intrinsic::umin
:
1753 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1754 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1755 Known
= KnownBits::umin(Known
, Known2
);
1757 case Intrinsic::umax
:
1758 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1759 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1760 Known
= KnownBits::umax(Known
, Known2
);
1762 case Intrinsic::smin
:
1763 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1764 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1765 Known
= KnownBits::smin(Known
, Known2
);
1767 case Intrinsic::smax
:
1768 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1769 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1770 Known
= KnownBits::smax(Known
, Known2
);
1772 case Intrinsic::ptrmask
: {
1773 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1775 const Value
*Mask
= I
->getOperand(1);
1776 Known2
= KnownBits(Mask
->getType()->getScalarSizeInBits());
1777 computeKnownBits(Mask
, DemandedElts
, Known2
, Depth
+ 1, Q
);
1778 // TODO: 1-extend would be more precise.
1779 Known
&= Known2
.anyextOrTrunc(BitWidth
);
1782 case Intrinsic::x86_sse2_pmulh_w
:
1783 case Intrinsic::x86_avx2_pmulh_w
:
1784 case Intrinsic::x86_avx512_pmulh_w_512
:
1785 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1786 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1787 Known
= KnownBits::mulhs(Known
, Known2
);
1789 case Intrinsic::x86_sse2_pmulhu_w
:
1790 case Intrinsic::x86_avx2_pmulhu_w
:
1791 case Intrinsic::x86_avx512_pmulhu_w_512
:
1792 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
1793 computeKnownBits(I
->getOperand(1), DemandedElts
, Known2
, Depth
+ 1, Q
);
1794 Known
= KnownBits::mulhu(Known
, Known2
);
1796 case Intrinsic::x86_sse42_crc32_64_64
:
1797 Known
.Zero
.setBitsFrom(32);
1799 case Intrinsic::x86_ssse3_phadd_d_128
:
1800 case Intrinsic::x86_ssse3_phadd_w_128
:
1801 case Intrinsic::x86_avx2_phadd_d
:
1802 case Intrinsic::x86_avx2_phadd_w
: {
1803 Known
= computeKnownBitsForHorizontalOperation(
1804 I
, DemandedElts
, Depth
, Q
,
1805 [](const KnownBits
&KnownLHS
, const KnownBits
&KnownRHS
) {
1806 return KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
1807 /*NUW=*/false, KnownLHS
,
1812 case Intrinsic::x86_ssse3_phadd_sw_128
:
1813 case Intrinsic::x86_avx2_phadd_sw
: {
1814 Known
= computeKnownBitsForHorizontalOperation(I
, DemandedElts
, Depth
,
1815 Q
, KnownBits::sadd_sat
);
1818 case Intrinsic::x86_ssse3_phsub_d_128
:
1819 case Intrinsic::x86_ssse3_phsub_w_128
:
1820 case Intrinsic::x86_avx2_phsub_d
:
1821 case Intrinsic::x86_avx2_phsub_w
: {
1822 Known
= computeKnownBitsForHorizontalOperation(
1823 I
, DemandedElts
, Depth
, Q
,
1824 [](const KnownBits
&KnownLHS
, const KnownBits
&KnownRHS
) {
1825 return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
1826 /*NUW=*/false, KnownLHS
,
1831 case Intrinsic::x86_ssse3_phsub_sw_128
:
1832 case Intrinsic::x86_avx2_phsub_sw
: {
1833 Known
= computeKnownBitsForHorizontalOperation(I
, DemandedElts
, Depth
,
1834 Q
, KnownBits::ssub_sat
);
1837 case Intrinsic::riscv_vsetvli
:
1838 case Intrinsic::riscv_vsetvlimax
: {
1839 bool HasAVL
= II
->getIntrinsicID() == Intrinsic::riscv_vsetvli
;
1840 const ConstantRange Range
= getVScaleRange(II
->getFunction(), BitWidth
);
1841 uint64_t SEW
= RISCVVType::decodeVSEW(
1842 cast
<ConstantInt
>(II
->getArgOperand(HasAVL
))->getZExtValue());
1843 RISCVII::VLMUL VLMUL
= static_cast<RISCVII::VLMUL
>(
1844 cast
<ConstantInt
>(II
->getArgOperand(1 + HasAVL
))->getZExtValue());
1846 Range
.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock
;
1847 uint64_t MaxVL
= MaxVLEN
/ RISCVVType::getSEWLMULRatio(SEW
, VLMUL
);
1849 // Result of vsetvli must be not larger than AVL.
1851 if (auto *CI
= dyn_cast
<ConstantInt
>(II
->getArgOperand(0)))
1852 MaxVL
= std::min(MaxVL
, CI
->getZExtValue());
1854 unsigned KnownZeroFirstBit
= Log2_32(MaxVL
) + 1;
1855 if (BitWidth
> KnownZeroFirstBit
)
1856 Known
.Zero
.setBitsFrom(KnownZeroFirstBit
);
1859 case Intrinsic::vscale
: {
1860 if (!II
->getParent() || !II
->getFunction())
1863 Known
= getVScaleRange(II
->getFunction(), BitWidth
).toKnownBits();
1870 case Instruction::ShuffleVector
: {
1871 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(I
);
1872 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1877 // For undef elements, we don't know anything about the common state of
1878 // the shuffle result.
1879 APInt DemandedLHS
, DemandedRHS
;
1880 if (!getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
)) {
1884 Known
.One
.setAllBits();
1885 Known
.Zero
.setAllBits();
1886 if (!!DemandedLHS
) {
1887 const Value
*LHS
= Shuf
->getOperand(0);
1888 computeKnownBits(LHS
, DemandedLHS
, Known
, Depth
+ 1, Q
);
1889 // If we don't know any bits, early out.
1890 if (Known
.isUnknown())
1893 if (!!DemandedRHS
) {
1894 const Value
*RHS
= Shuf
->getOperand(1);
1895 computeKnownBits(RHS
, DemandedRHS
, Known2
, Depth
+ 1, Q
);
1896 Known
= Known
.intersectWith(Known2
);
1900 case Instruction::InsertElement
: {
1901 if (isa
<ScalableVectorType
>(I
->getType())) {
1905 const Value
*Vec
= I
->getOperand(0);
1906 const Value
*Elt
= I
->getOperand(1);
1907 auto *CIdx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
1908 unsigned NumElts
= DemandedElts
.getBitWidth();
1909 APInt DemandedVecElts
= DemandedElts
;
1910 bool NeedsElt
= true;
1911 // If we know the index we are inserting too, clear it from Vec check.
1912 if (CIdx
&& CIdx
->getValue().ult(NumElts
)) {
1913 DemandedVecElts
.clearBit(CIdx
->getZExtValue());
1914 NeedsElt
= DemandedElts
[CIdx
->getZExtValue()];
1917 Known
.One
.setAllBits();
1918 Known
.Zero
.setAllBits();
1920 computeKnownBits(Elt
, Known
, Depth
+ 1, Q
);
1921 // If we don't know any bits, early out.
1922 if (Known
.isUnknown())
1926 if (!DemandedVecElts
.isZero()) {
1927 computeKnownBits(Vec
, DemandedVecElts
, Known2
, Depth
+ 1, Q
);
1928 Known
= Known
.intersectWith(Known2
);
1932 case Instruction::ExtractElement
: {
1933 // Look through extract element. If the index is non-constant or
1934 // out-of-range demand all elements, otherwise just the extracted element.
1935 const Value
*Vec
= I
->getOperand(0);
1936 const Value
*Idx
= I
->getOperand(1);
1937 auto *CIdx
= dyn_cast
<ConstantInt
>(Idx
);
1938 if (isa
<ScalableVectorType
>(Vec
->getType())) {
1939 // FIXME: there's probably *something* we can do with scalable vectors
1943 unsigned NumElts
= cast
<FixedVectorType
>(Vec
->getType())->getNumElements();
1944 APInt DemandedVecElts
= APInt::getAllOnes(NumElts
);
1945 if (CIdx
&& CIdx
->getValue().ult(NumElts
))
1946 DemandedVecElts
= APInt::getOneBitSet(NumElts
, CIdx
->getZExtValue());
1947 computeKnownBits(Vec
, DemandedVecElts
, Known
, Depth
+ 1, Q
);
1950 case Instruction::ExtractValue
:
1951 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
->getOperand(0))) {
1952 const ExtractValueInst
*EVI
= cast
<ExtractValueInst
>(I
);
1953 if (EVI
->getNumIndices() != 1) break;
1954 if (EVI
->getIndices()[0] == 0) {
1955 switch (II
->getIntrinsicID()) {
1957 case Intrinsic::uadd_with_overflow
:
1958 case Intrinsic::sadd_with_overflow
:
1959 computeKnownBitsAddSub(
1960 true, II
->getArgOperand(0), II
->getArgOperand(1), /*NSW=*/false,
1961 /* NUW=*/false, DemandedElts
, Known
, Known2
, Depth
, Q
);
1963 case Intrinsic::usub_with_overflow
:
1964 case Intrinsic::ssub_with_overflow
:
1965 computeKnownBitsAddSub(
1966 false, II
->getArgOperand(0), II
->getArgOperand(1), /*NSW=*/false,
1967 /* NUW=*/false, DemandedElts
, Known
, Known2
, Depth
, Q
);
1969 case Intrinsic::umul_with_overflow
:
1970 case Intrinsic::smul_with_overflow
:
1971 computeKnownBitsMul(II
->getArgOperand(0), II
->getArgOperand(1), false,
1972 DemandedElts
, Known
, Known2
, Depth
, Q
);
1978 case Instruction::Freeze
:
1979 if (isGuaranteedNotToBePoison(I
->getOperand(0), Q
.AC
, Q
.CxtI
, Q
.DT
,
1981 computeKnownBits(I
->getOperand(0), Known
, Depth
+ 1, Q
);
1986 /// Determine which bits of V are known to be either zero or one and return
1988 KnownBits
llvm::computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
1989 unsigned Depth
, const SimplifyQuery
&Q
) {
1990 KnownBits
Known(getBitWidth(V
->getType(), Q
.DL
));
1991 ::computeKnownBits(V
, DemandedElts
, Known
, Depth
, Q
);
1995 /// Determine which bits of V are known to be either zero or one and return
1997 KnownBits
llvm::computeKnownBits(const Value
*V
, unsigned Depth
,
1998 const SimplifyQuery
&Q
) {
1999 KnownBits
Known(getBitWidth(V
->getType(), Q
.DL
));
2000 computeKnownBits(V
, Known
, Depth
, Q
);
2004 /// Determine which bits of V are known to be either zero or one and return
2005 /// them in the Known bit set.
2007 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2008 /// we cannot optimize based on the assumption that it is zero without changing
2009 /// it to be an explicit zero. If we don't change it to zero, other code could
2010 /// optimized based on the contradictory assumption that it is non-zero.
2011 /// Because instcombine aggressively folds operations with undef args anyway,
2012 /// this won't lose us code quality.
2014 /// This function is defined on values with integer type, values with pointer
2015 /// type, and vectors of integers. In the case
2016 /// where V is a vector, known zero, and known one values are the
2017 /// same width as the vector element, and the bit is set only if it is true
2018 /// for all of the demanded elements in the vector specified by DemandedElts.
2019 void computeKnownBits(const Value
*V
, const APInt
&DemandedElts
,
2020 KnownBits
&Known
, unsigned Depth
,
2021 const SimplifyQuery
&Q
) {
2022 if (!DemandedElts
) {
2023 // No demanded elts, better to assume we don't know anything.
2028 assert(V
&& "No Value?");
2029 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
2032 Type
*Ty
= V
->getType();
2033 unsigned BitWidth
= Known
.getBitWidth();
2035 assert((Ty
->isIntOrIntVectorTy(BitWidth
) || Ty
->isPtrOrPtrVectorTy()) &&
2036 "Not integer or pointer type!");
2038 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
2040 FVTy
->getNumElements() == DemandedElts
.getBitWidth() &&
2041 "DemandedElt width should equal the fixed vector number of elements");
2043 assert(DemandedElts
== APInt(1, 1) &&
2044 "DemandedElt width should be 1 for scalars or scalable vectors");
2047 Type
*ScalarTy
= Ty
->getScalarType();
2048 if (ScalarTy
->isPointerTy()) {
2049 assert(BitWidth
== Q
.DL
.getPointerTypeSizeInBits(ScalarTy
) &&
2050 "V and Known should have same BitWidth");
2052 assert(BitWidth
== Q
.DL
.getTypeSizeInBits(ScalarTy
) &&
2053 "V and Known should have same BitWidth");
2058 if (match(V
, m_APInt(C
))) {
2059 // We know all of the bits for a scalar constant or a splat vector constant!
2060 Known
= KnownBits::makeConstant(*C
);
2063 // Null and aggregate-zero are all-zeros.
2064 if (isa
<ConstantPointerNull
>(V
) || isa
<ConstantAggregateZero
>(V
)) {
2068 // Handle a constant vector by taking the intersection of the known bits of
2070 if (const ConstantDataVector
*CDV
= dyn_cast
<ConstantDataVector
>(V
)) {
2071 assert(!isa
<ScalableVectorType
>(V
->getType()));
2072 // We know that CDV must be a vector of integers. Take the intersection of
2074 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
2075 for (unsigned i
= 0, e
= CDV
->getNumElements(); i
!= e
; ++i
) {
2076 if (!DemandedElts
[i
])
2078 APInt Elt
= CDV
->getElementAsAPInt(i
);
2082 if (Known
.hasConflict())
2087 if (const auto *CV
= dyn_cast
<ConstantVector
>(V
)) {
2088 assert(!isa
<ScalableVectorType
>(V
->getType()));
2089 // We know that CV must be a vector of integers. Take the intersection of
2091 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
2092 for (unsigned i
= 0, e
= CV
->getNumOperands(); i
!= e
; ++i
) {
2093 if (!DemandedElts
[i
])
2095 Constant
*Element
= CV
->getAggregateElement(i
);
2096 if (isa
<PoisonValue
>(Element
))
2098 auto *ElementCI
= dyn_cast_or_null
<ConstantInt
>(Element
);
2103 const APInt
&Elt
= ElementCI
->getValue();
2107 if (Known
.hasConflict())
2112 // Start out not knowing anything.
2115 // We can't imply anything about undefs.
2116 if (isa
<UndefValue
>(V
))
2119 // There's no point in looking through other users of ConstantData for
2120 // assumptions. Confirm that we've handled them all.
2121 assert(!isa
<ConstantData
>(V
) && "Unhandled constant data!");
2123 if (const auto *A
= dyn_cast
<Argument
>(V
))
2124 if (std::optional
<ConstantRange
> Range
= A
->getRange())
2125 Known
= Range
->toKnownBits();
2127 // All recursive calls that increase depth must come after this.
2128 if (Depth
== MaxAnalysisRecursionDepth
)
2131 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2132 // the bits of its aliasee.
2133 if (const GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(V
)) {
2134 if (!GA
->isInterposable())
2135 computeKnownBits(GA
->getAliasee(), Known
, Depth
+ 1, Q
);
2139 if (const Operator
*I
= dyn_cast
<Operator
>(V
))
2140 computeKnownBitsFromOperator(I
, DemandedElts
, Known
, Depth
, Q
);
2141 else if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
2142 if (std::optional
<ConstantRange
> CR
= GV
->getAbsoluteSymbolRange())
2143 Known
= CR
->toKnownBits();
2146 // Aligned pointers have trailing zeros - refine Known.Zero set
2147 if (isa
<PointerType
>(V
->getType())) {
2148 Align Alignment
= V
->getPointerAlignment(Q
.DL
);
2149 Known
.Zero
.setLowBits(Log2(Alignment
));
2152 // computeKnownBitsFromContext strictly refines Known.
2153 // Therefore, we run them after computeKnownBitsFromOperator.
2155 // Check whether we can determine known bits from context such as assumes.
2156 computeKnownBitsFromContext(V
, Known
, Depth
, Q
);
2159 /// Try to detect a recurrence that the value of the induction variable is
2160 /// always a power of two (or zero).
2161 static bool isPowerOfTwoRecurrence(const PHINode
*PN
, bool OrZero
,
2162 unsigned Depth
, SimplifyQuery
&Q
) {
2163 BinaryOperator
*BO
= nullptr;
2164 Value
*Start
= nullptr, *Step
= nullptr;
2165 if (!matchSimpleRecurrence(PN
, BO
, Start
, Step
))
2168 // Initial value must be a power of two.
2169 for (const Use
&U
: PN
->operands()) {
2170 if (U
.get() == Start
) {
2171 // Initial value comes from a different BB, need to adjust context
2172 // instruction for analysis.
2173 Q
.CxtI
= PN
->getIncomingBlock(U
)->getTerminator();
2174 if (!isKnownToBeAPowerOfTwo(Start
, OrZero
, Depth
, Q
))
2179 // Except for Mul, the induction variable must be on the left side of the
2180 // increment expression, otherwise its value can be arbitrary.
2181 if (BO
->getOpcode() != Instruction::Mul
&& BO
->getOperand(1) != Step
)
2184 Q
.CxtI
= BO
->getParent()->getTerminator();
2185 switch (BO
->getOpcode()) {
2186 case Instruction::Mul
:
2187 // Power of two is closed under multiplication.
2188 return (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(BO
) ||
2189 Q
.IIQ
.hasNoSignedWrap(BO
)) &&
2190 isKnownToBeAPowerOfTwo(Step
, OrZero
, Depth
, Q
);
2191 case Instruction::SDiv
:
2192 // Start value must not be signmask for signed division, so simply being a
2193 // power of two is not sufficient, and it has to be a constant.
2194 if (!match(Start
, m_Power2()) || match(Start
, m_SignMask()))
2197 case Instruction::UDiv
:
2198 // Divisor must be a power of two.
2199 // If OrZero is false, cannot guarantee induction variable is non-zero after
2200 // division, same for Shr, unless it is exact division.
2201 return (OrZero
|| Q
.IIQ
.isExact(BO
)) &&
2202 isKnownToBeAPowerOfTwo(Step
, false, Depth
, Q
);
2203 case Instruction::Shl
:
2204 return OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(BO
) || Q
.IIQ
.hasNoSignedWrap(BO
);
2205 case Instruction::AShr
:
2206 if (!match(Start
, m_Power2()) || match(Start
, m_SignMask()))
2209 case Instruction::LShr
:
2210 return OrZero
|| Q
.IIQ
.isExact(BO
);
2216 /// Return true if the given value is known to have exactly one
2217 /// bit set when defined. For vectors return true if every element is known to
2218 /// be a power of two when defined. Supports values with integer or pointer
2219 /// types and vectors of integers.
2220 bool isKnownToBeAPowerOfTwo(const Value
*V
, bool OrZero
, unsigned Depth
,
2221 const SimplifyQuery
&Q
) {
2222 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
2224 if (isa
<Constant
>(V
))
2225 return OrZero
? match(V
, m_Power2OrZero()) : match(V
, m_Power2());
2227 // i1 is by definition a power of 2 or zero.
2228 if (OrZero
&& V
->getType()->getScalarSizeInBits() == 1)
2231 auto *I
= dyn_cast
<Instruction
>(V
);
2235 if (Q
.CxtI
&& match(V
, m_VScale())) {
2236 const Function
*F
= Q
.CxtI
->getFunction();
2237 // The vscale_range indicates vscale is a power-of-two.
2238 return F
->hasFnAttribute(Attribute::VScaleRange
);
2241 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2242 // it is shifted off the end then the result is undefined.
2243 if (match(I
, m_Shl(m_One(), m_Value())))
2246 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2247 // the bottom. If it is shifted off the bottom then the result is undefined.
2248 if (match(I
, m_LShr(m_SignMask(), m_Value())))
2251 // The remaining tests are all recursive, so bail out if we hit the limit.
2252 if (Depth
++ == MaxAnalysisRecursionDepth
)
2255 switch (I
->getOpcode()) {
2256 case Instruction::ZExt
:
2257 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2258 case Instruction::Trunc
:
2259 return OrZero
&& isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2260 case Instruction::Shl
:
2261 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(I
) || Q
.IIQ
.hasNoSignedWrap(I
))
2262 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2264 case Instruction::LShr
:
2265 if (OrZero
|| Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)))
2266 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2268 case Instruction::UDiv
:
2269 if (Q
.IIQ
.isExact(cast
<BinaryOperator
>(I
)))
2270 return isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
);
2272 case Instruction::Mul
:
2273 return isKnownToBeAPowerOfTwo(I
->getOperand(1), OrZero
, Depth
, Q
) &&
2274 isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
) &&
2275 (OrZero
|| isKnownNonZero(I
, Q
, Depth
));
2276 case Instruction::And
:
2277 // A power of two and'd with anything is a power of two or zero.
2279 (isKnownToBeAPowerOfTwo(I
->getOperand(1), /*OrZero*/ true, Depth
, Q
) ||
2280 isKnownToBeAPowerOfTwo(I
->getOperand(0), /*OrZero*/ true, Depth
, Q
)))
2282 // X & (-X) is always a power of two or zero.
2283 if (match(I
->getOperand(0), m_Neg(m_Specific(I
->getOperand(1)))) ||
2284 match(I
->getOperand(1), m_Neg(m_Specific(I
->getOperand(0)))))
2285 return OrZero
|| isKnownNonZero(I
->getOperand(0), Q
, Depth
);
2287 case Instruction::Add
: {
2288 // Adding a power-of-two or zero to the same power-of-two or zero yields
2289 // either the original power-of-two, a larger power-of-two or zero.
2290 const OverflowingBinaryOperator
*VOBO
= cast
<OverflowingBinaryOperator
>(V
);
2291 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(VOBO
) ||
2292 Q
.IIQ
.hasNoSignedWrap(VOBO
)) {
2293 if (match(I
->getOperand(0),
2294 m_c_And(m_Specific(I
->getOperand(1)), m_Value())) &&
2295 isKnownToBeAPowerOfTwo(I
->getOperand(1), OrZero
, Depth
, Q
))
2297 if (match(I
->getOperand(1),
2298 m_c_And(m_Specific(I
->getOperand(0)), m_Value())) &&
2299 isKnownToBeAPowerOfTwo(I
->getOperand(0), OrZero
, Depth
, Q
))
2302 unsigned BitWidth
= V
->getType()->getScalarSizeInBits();
2303 KnownBits
LHSBits(BitWidth
);
2304 computeKnownBits(I
->getOperand(0), LHSBits
, Depth
, Q
);
2306 KnownBits
RHSBits(BitWidth
);
2307 computeKnownBits(I
->getOperand(1), RHSBits
, Depth
, Q
);
2308 // If i8 V is a power of two or zero:
2309 // ZeroBits: 1 1 1 0 1 1 1 1
2310 // ~ZeroBits: 0 0 0 1 0 0 0 0
2311 if ((~(LHSBits
.Zero
& RHSBits
.Zero
)).isPowerOf2())
2312 // If OrZero isn't set, we cannot give back a zero result.
2313 // Make sure either the LHS or RHS has a bit set.
2314 if (OrZero
|| RHSBits
.One
.getBoolValue() || LHSBits
.One
.getBoolValue())
2318 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2319 if (OrZero
|| Q
.IIQ
.hasNoUnsignedWrap(VOBO
))
2320 if (match(I
, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2324 case Instruction::Select
:
2325 return isKnownToBeAPowerOfTwo(I
->getOperand(1), OrZero
, Depth
, Q
) &&
2326 isKnownToBeAPowerOfTwo(I
->getOperand(2), OrZero
, Depth
, Q
);
2327 case Instruction::PHI
: {
2328 // A PHI node is power of two if all incoming values are power of two, or if
2329 // it is an induction variable where in each step its value is a power of
2331 auto *PN
= cast
<PHINode
>(I
);
2332 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
2334 // Check if it is an induction variable and always power of two.
2335 if (isPowerOfTwoRecurrence(PN
, OrZero
, Depth
, RecQ
))
2338 // Recursively check all incoming values. Limit recursion to 2 levels, so
2339 // that search complexity is limited to number of operands^2.
2340 unsigned NewDepth
= std::max(Depth
, MaxAnalysisRecursionDepth
- 1);
2341 return llvm::all_of(PN
->operands(), [&](const Use
&U
) {
2342 // Value is power of 2 if it is coming from PHI node itself by induction.
2346 // Change the context instruction to the incoming block where it is
2348 RecQ
.CxtI
= PN
->getIncomingBlock(U
)->getTerminator();
2349 return isKnownToBeAPowerOfTwo(U
.get(), OrZero
, NewDepth
, RecQ
);
2352 case Instruction::Invoke
:
2353 case Instruction::Call
: {
2354 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
)) {
2355 switch (II
->getIntrinsicID()) {
2356 case Intrinsic::umax
:
2357 case Intrinsic::smax
:
2358 case Intrinsic::umin
:
2359 case Intrinsic::smin
:
2360 return isKnownToBeAPowerOfTwo(II
->getArgOperand(1), OrZero
, Depth
, Q
) &&
2361 isKnownToBeAPowerOfTwo(II
->getArgOperand(0), OrZero
, Depth
, Q
);
2362 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2363 // thus dont change pow2/non-pow2 status.
2364 case Intrinsic::bitreverse
:
2365 case Intrinsic::bswap
:
2366 return isKnownToBeAPowerOfTwo(II
->getArgOperand(0), OrZero
, Depth
, Q
);
2367 case Intrinsic::fshr
:
2368 case Intrinsic::fshl
:
2369 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2370 if (II
->getArgOperand(0) == II
->getArgOperand(1))
2371 return isKnownToBeAPowerOfTwo(II
->getArgOperand(0), OrZero
, Depth
, Q
);
2384 /// Test whether a GEP's result is known to be non-null.
2386 /// Uses properties inherent in a GEP to try to determine whether it is known
2389 /// Currently this routine does not support vector GEPs.
2390 static bool isGEPKnownNonNull(const GEPOperator
*GEP
, unsigned Depth
,
2391 const SimplifyQuery
&Q
) {
2392 const Function
*F
= nullptr;
2393 if (const Instruction
*I
= dyn_cast
<Instruction
>(GEP
))
2394 F
= I
->getFunction();
2396 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2397 // may be null iff the base pointer is null and the offset is zero.
2398 if (!GEP
->hasNoUnsignedWrap() &&
2399 !(GEP
->isInBounds() &&
2400 !NullPointerIsDefined(F
, GEP
->getPointerAddressSpace())))
2403 // FIXME: Support vector-GEPs.
2404 assert(GEP
->getType()->isPointerTy() && "We only support plain pointer GEP");
2406 // If the base pointer is non-null, we cannot walk to a null address with an
2407 // inbounds GEP in address space zero.
2408 if (isKnownNonZero(GEP
->getPointerOperand(), Q
, Depth
))
2411 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2412 // If so, then the GEP cannot produce a null pointer, as doing so would
2413 // inherently violate the inbounds contract within address space zero.
2414 for (gep_type_iterator GTI
= gep_type_begin(GEP
), GTE
= gep_type_end(GEP
);
2415 GTI
!= GTE
; ++GTI
) {
2416 // Struct types are easy -- they must always be indexed by a constant.
2417 if (StructType
*STy
= GTI
.getStructTypeOrNull()) {
2418 ConstantInt
*OpC
= cast
<ConstantInt
>(GTI
.getOperand());
2419 unsigned ElementIdx
= OpC
->getZExtValue();
2420 const StructLayout
*SL
= Q
.DL
.getStructLayout(STy
);
2421 uint64_t ElementOffset
= SL
->getElementOffset(ElementIdx
);
2422 if (ElementOffset
> 0)
2427 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2428 if (GTI
.getSequentialElementStride(Q
.DL
).isZero())
2431 // Fast path the constant operand case both for efficiency and so we don't
2432 // increment Depth when just zipping down an all-constant GEP.
2433 if (ConstantInt
*OpC
= dyn_cast
<ConstantInt
>(GTI
.getOperand())) {
2439 // We post-increment Depth here because while isKnownNonZero increments it
2440 // as well, when we pop back up that increment won't persist. We don't want
2441 // to recurse 10k times just because we have 10k GEP operands. We don't
2442 // bail completely out because we want to handle constant GEPs regardless
2444 if (Depth
++ >= MaxAnalysisRecursionDepth
)
2447 if (isKnownNonZero(GTI
.getOperand(), Q
, Depth
))
2454 static bool isKnownNonNullFromDominatingCondition(const Value
*V
,
2455 const Instruction
*CtxI
,
2456 const DominatorTree
*DT
) {
2457 assert(!isa
<Constant
>(V
) && "Called for constant?");
2462 unsigned NumUsesExplored
= 0;
2463 for (const auto *U
: V
->users()) {
2464 // Avoid massive lists
2465 if (NumUsesExplored
>= DomConditionsMaxUses
)
2469 // If the value is used as an argument to a call or invoke, then argument
2470 // attributes may provide an answer about null-ness.
2471 if (const auto *CB
= dyn_cast
<CallBase
>(U
))
2472 if (auto *CalledFunc
= CB
->getCalledFunction())
2473 for (const Argument
&Arg
: CalledFunc
->args())
2474 if (CB
->getArgOperand(Arg
.getArgNo()) == V
&&
2475 Arg
.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2476 DT
->dominates(CB
, CtxI
))
2479 // If the value is used as a load/store, then the pointer must be non null.
2480 if (V
== getLoadStorePointerOperand(U
)) {
2481 const Instruction
*I
= cast
<Instruction
>(U
);
2482 if (!NullPointerIsDefined(I
->getFunction(),
2483 V
->getType()->getPointerAddressSpace()) &&
2484 DT
->dominates(I
, CtxI
))
2488 if ((match(U
, m_IDiv(m_Value(), m_Specific(V
))) ||
2489 match(U
, m_IRem(m_Value(), m_Specific(V
)))) &&
2490 isValidAssumeForContext(cast
<Instruction
>(U
), CtxI
, DT
))
2493 // Consider only compare instructions uniquely controlling a branch
2495 CmpInst::Predicate Pred
;
2496 if (!match(U
, m_c_ICmp(Pred
, m_Specific(V
), m_Value(RHS
))))
2500 if (cmpExcludesZero(Pred
, RHS
))
2501 NonNullIfTrue
= true;
2502 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred
), RHS
))
2503 NonNullIfTrue
= false;
2507 SmallVector
<const User
*, 4> WorkList
;
2508 SmallPtrSet
<const User
*, 4> Visited
;
2509 for (const auto *CmpU
: U
->users()) {
2510 assert(WorkList
.empty() && "Should be!");
2511 if (Visited
.insert(CmpU
).second
)
2512 WorkList
.push_back(CmpU
);
2514 while (!WorkList
.empty()) {
2515 auto *Curr
= WorkList
.pop_back_val();
2517 // If a user is an AND, add all its users to the work list. We only
2518 // propagate "pred != null" condition through AND because it is only
2519 // correct to assume that all conditions of AND are met in true branch.
2520 // TODO: Support similar logic of OR and EQ predicate?
2522 if (match(Curr
, m_LogicalAnd(m_Value(), m_Value()))) {
2523 for (const auto *CurrU
: Curr
->users())
2524 if (Visited
.insert(CurrU
).second
)
2525 WorkList
.push_back(CurrU
);
2529 if (const BranchInst
*BI
= dyn_cast
<BranchInst
>(Curr
)) {
2530 assert(BI
->isConditional() && "uses a comparison!");
2532 BasicBlock
*NonNullSuccessor
=
2533 BI
->getSuccessor(NonNullIfTrue
? 0 : 1);
2534 BasicBlockEdge
Edge(BI
->getParent(), NonNullSuccessor
);
2535 if (Edge
.isSingleEdge() && DT
->dominates(Edge
, CtxI
->getParent()))
2537 } else if (NonNullIfTrue
&& isGuard(Curr
) &&
2538 DT
->dominates(cast
<Instruction
>(Curr
), CtxI
)) {
2548 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
2549 /// ensure that the value it's attached to is never Value? 'RangeType' is
2550 /// is the type of the value described by the range.
2551 static bool rangeMetadataExcludesValue(const MDNode
* Ranges
, const APInt
& Value
) {
2552 const unsigned NumRanges
= Ranges
->getNumOperands() / 2;
2553 assert(NumRanges
>= 1);
2554 for (unsigned i
= 0; i
< NumRanges
; ++i
) {
2555 ConstantInt
*Lower
=
2556 mdconst::extract
<ConstantInt
>(Ranges
->getOperand(2 * i
+ 0));
2557 ConstantInt
*Upper
=
2558 mdconst::extract
<ConstantInt
>(Ranges
->getOperand(2 * i
+ 1));
2559 ConstantRange
Range(Lower
->getValue(), Upper
->getValue());
2560 if (Range
.contains(Value
))
2566 /// Try to detect a recurrence that monotonically increases/decreases from a
2567 /// non-zero starting value. These are common as induction variables.
2568 static bool isNonZeroRecurrence(const PHINode
*PN
) {
2569 BinaryOperator
*BO
= nullptr;
2570 Value
*Start
= nullptr, *Step
= nullptr;
2571 const APInt
*StartC
, *StepC
;
2572 if (!matchSimpleRecurrence(PN
, BO
, Start
, Step
) ||
2573 !match(Start
, m_APInt(StartC
)) || StartC
->isZero())
2576 switch (BO
->getOpcode()) {
2577 case Instruction::Add
:
2578 // Starting from non-zero and stepping away from zero can never wrap back
2580 return BO
->hasNoUnsignedWrap() ||
2581 (BO
->hasNoSignedWrap() && match(Step
, m_APInt(StepC
)) &&
2582 StartC
->isNegative() == StepC
->isNegative());
2583 case Instruction::Mul
:
2584 return (BO
->hasNoUnsignedWrap() || BO
->hasNoSignedWrap()) &&
2585 match(Step
, m_APInt(StepC
)) && !StepC
->isZero();
2586 case Instruction::Shl
:
2587 return BO
->hasNoUnsignedWrap() || BO
->hasNoSignedWrap();
2588 case Instruction::AShr
:
2589 case Instruction::LShr
:
2590 return BO
->isExact();
2596 static bool matchOpWithOpEqZero(Value
*Op0
, Value
*Op1
) {
2597 ICmpInst::Predicate Pred
;
2598 return (match(Op0
, m_ZExtOrSExt(m_ICmp(Pred
, m_Specific(Op1
), m_Zero()))) ||
2599 match(Op1
, m_ZExtOrSExt(m_ICmp(Pred
, m_Specific(Op0
), m_Zero())))) &&
2600 Pred
== ICmpInst::ICMP_EQ
;
2603 static bool isNonZeroAdd(const APInt
&DemandedElts
, unsigned Depth
,
2604 const SimplifyQuery
&Q
, unsigned BitWidth
, Value
*X
,
2605 Value
*Y
, bool NSW
, bool NUW
) {
2606 // (X + (X != 0)) is non zero
2607 if (matchOpWithOpEqZero(X
, Y
))
2611 return isKnownNonZero(Y
, DemandedElts
, Q
, Depth
) ||
2612 isKnownNonZero(X
, DemandedElts
, Q
, Depth
);
2614 KnownBits XKnown
= computeKnownBits(X
, DemandedElts
, Depth
, Q
);
2615 KnownBits YKnown
= computeKnownBits(Y
, DemandedElts
, Depth
, Q
);
2617 // If X and Y are both non-negative (as signed values) then their sum is not
2618 // zero unless both X and Y are zero.
2619 if (XKnown
.isNonNegative() && YKnown
.isNonNegative())
2620 if (isKnownNonZero(Y
, DemandedElts
, Q
, Depth
) ||
2621 isKnownNonZero(X
, DemandedElts
, Q
, Depth
))
2624 // If X and Y are both negative (as signed values) then their sum is not
2625 // zero unless both X and Y equal INT_MIN.
2626 if (XKnown
.isNegative() && YKnown
.isNegative()) {
2627 APInt Mask
= APInt::getSignedMaxValue(BitWidth
);
2628 // The sign bit of X is set. If some other bit is set then X is not equal
2630 if (XKnown
.One
.intersects(Mask
))
2632 // The sign bit of Y is set. If some other bit is set then Y is not equal
2634 if (YKnown
.One
.intersects(Mask
))
2638 // The sum of a non-negative number and a power of two is not zero.
2639 if (XKnown
.isNonNegative() &&
2640 isKnownToBeAPowerOfTwo(Y
, /*OrZero*/ false, Depth
, Q
))
2642 if (YKnown
.isNonNegative() &&
2643 isKnownToBeAPowerOfTwo(X
, /*OrZero*/ false, Depth
, Q
))
2646 return KnownBits::computeForAddSub(/*Add=*/true, NSW
, NUW
, XKnown
, YKnown
)
2650 static bool isNonZeroSub(const APInt
&DemandedElts
, unsigned Depth
,
2651 const SimplifyQuery
&Q
, unsigned BitWidth
, Value
*X
,
2653 // (X - (X != 0)) is non zero
2654 // ((X != 0) - X) is non zero
2655 if (matchOpWithOpEqZero(X
, Y
))
2658 // TODO: Move this case into isKnownNonEqual().
2659 if (auto *C
= dyn_cast
<Constant
>(X
))
2660 if (C
->isNullValue() && isKnownNonZero(Y
, DemandedElts
, Q
, Depth
))
2663 return ::isKnownNonEqual(X
, Y
, DemandedElts
, Depth
, Q
);
2666 static bool isNonZeroMul(const APInt
&DemandedElts
, unsigned Depth
,
2667 const SimplifyQuery
&Q
, unsigned BitWidth
, Value
*X
,
2668 Value
*Y
, bool NSW
, bool NUW
) {
2669 // If X and Y are non-zero then so is X * Y as long as the multiplication
2670 // does not overflow.
2672 return isKnownNonZero(X
, DemandedElts
, Q
, Depth
) &&
2673 isKnownNonZero(Y
, DemandedElts
, Q
, Depth
);
2675 // If either X or Y is odd, then if the other is non-zero the result can't
2677 KnownBits XKnown
= computeKnownBits(X
, DemandedElts
, Depth
, Q
);
2679 return isKnownNonZero(Y
, DemandedElts
, Q
, Depth
);
2681 KnownBits YKnown
= computeKnownBits(Y
, DemandedElts
, Depth
, Q
);
2683 return XKnown
.isNonZero() || isKnownNonZero(X
, DemandedElts
, Q
, Depth
);
2685 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2686 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2687 // the lowest known One of X and Y. If they are non-zero, the result
2688 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2689 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2690 return (XKnown
.countMaxTrailingZeros() + YKnown
.countMaxTrailingZeros()) <
2694 static bool isNonZeroShift(const Operator
*I
, const APInt
&DemandedElts
,
2695 unsigned Depth
, const SimplifyQuery
&Q
,
2696 const KnownBits
&KnownVal
) {
2697 auto ShiftOp
= [&](const APInt
&Lhs
, const APInt
&Rhs
) {
2698 switch (I
->getOpcode()) {
2699 case Instruction::Shl
:
2700 return Lhs
.shl(Rhs
);
2701 case Instruction::LShr
:
2702 return Lhs
.lshr(Rhs
);
2703 case Instruction::AShr
:
2704 return Lhs
.ashr(Rhs
);
2706 llvm_unreachable("Unknown Shift Opcode");
2710 auto InvShiftOp
= [&](const APInt
&Lhs
, const APInt
&Rhs
) {
2711 switch (I
->getOpcode()) {
2712 case Instruction::Shl
:
2713 return Lhs
.lshr(Rhs
);
2714 case Instruction::LShr
:
2715 case Instruction::AShr
:
2716 return Lhs
.shl(Rhs
);
2718 llvm_unreachable("Unknown Shift Opcode");
2722 if (KnownVal
.isUnknown())
2725 KnownBits KnownCnt
=
2726 computeKnownBits(I
->getOperand(1), DemandedElts
, Depth
, Q
);
2727 APInt MaxShift
= KnownCnt
.getMaxValue();
2728 unsigned NumBits
= KnownVal
.getBitWidth();
2729 if (MaxShift
.uge(NumBits
))
2732 if (!ShiftOp(KnownVal
.One
, MaxShift
).isZero())
2735 // If all of the bits shifted out are known to be zero, and Val is known
2736 // non-zero then at least one non-zero bit must remain.
2737 if (InvShiftOp(KnownVal
.Zero
, NumBits
- MaxShift
)
2738 .eq(InvShiftOp(APInt::getAllOnes(NumBits
), NumBits
- MaxShift
)) &&
2739 isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
))
2745 static bool isKnownNonZeroFromOperator(const Operator
*I
,
2746 const APInt
&DemandedElts
,
2747 unsigned Depth
, const SimplifyQuery
&Q
) {
2748 unsigned BitWidth
= getBitWidth(I
->getType()->getScalarType(), Q
.DL
);
2749 switch (I
->getOpcode()) {
2750 case Instruction::Alloca
:
2751 // Alloca never returns null, malloc might.
2752 return I
->getType()->getPointerAddressSpace() == 0;
2753 case Instruction::GetElementPtr
:
2754 if (I
->getType()->isPointerTy())
2755 return isGEPKnownNonNull(cast
<GEPOperator
>(I
), Depth
, Q
);
2757 case Instruction::BitCast
: {
2758 // We need to be a bit careful here. We can only peek through the bitcast
2759 // if the scalar size of elements in the operand are smaller than and a
2760 // multiple of the size they are casting too. Take three cases:
2763 // bitcast <2 x i16> %NonZero to <4 x i8>
2765 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2766 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2767 // guranteed (imagine just sign bit set in the 2 i16 elements).
2770 // bitcast <4 x i3> %NonZero to <3 x i4>
2772 // Even though the scalar size of the src (`i3`) is smaller than the
2773 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2774 // its possible for the `3 x i4` elements to be zero because there are
2775 // some elements in the destination that don't contain any full src
2779 // bitcast <4 x i8> %NonZero to <2 x i16>
2781 // This is always safe as non-zero in the 4 i8 elements implies
2782 // non-zero in the combination of any two adjacent ones. Since i8 is a
2783 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2784 // This all implies the 2 i16 elements are non-zero.
2785 Type
*FromTy
= I
->getOperand(0)->getType();
2786 if ((FromTy
->isIntOrIntVectorTy() || FromTy
->isPtrOrPtrVectorTy()) &&
2787 (BitWidth
% getBitWidth(FromTy
->getScalarType(), Q
.DL
)) == 0)
2788 return isKnownNonZero(I
->getOperand(0), Q
, Depth
);
2790 case Instruction::IntToPtr
:
2791 // Note that we have to take special care to avoid looking through
2792 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2793 // as casts that can alter the value, e.g., AddrSpaceCasts.
2794 if (!isa
<ScalableVectorType
>(I
->getType()) &&
2795 Q
.DL
.getTypeSizeInBits(I
->getOperand(0)->getType()).getFixedValue() <=
2796 Q
.DL
.getTypeSizeInBits(I
->getType()).getFixedValue())
2797 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2799 case Instruction::PtrToInt
:
2800 // Similar to int2ptr above, we can look through ptr2int here if the cast
2801 // is a no-op or an extend and not a truncate.
2802 if (!isa
<ScalableVectorType
>(I
->getType()) &&
2803 Q
.DL
.getTypeSizeInBits(I
->getOperand(0)->getType()).getFixedValue() <=
2804 Q
.DL
.getTypeSizeInBits(I
->getType()).getFixedValue())
2805 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2807 case Instruction::Trunc
:
2808 // nuw/nsw trunc preserves zero/non-zero status of input.
2809 if (auto *TI
= dyn_cast
<TruncInst
>(I
))
2810 if (TI
->hasNoSignedWrap() || TI
->hasNoUnsignedWrap())
2811 return isKnownNonZero(TI
->getOperand(0), DemandedElts
, Q
, Depth
);
2814 case Instruction::Sub
:
2815 return isNonZeroSub(DemandedElts
, Depth
, Q
, BitWidth
, I
->getOperand(0),
2817 case Instruction::Xor
:
2818 // (X ^ (X != 0)) is non zero
2819 if (matchOpWithOpEqZero(I
->getOperand(0), I
->getOperand(1)))
2822 case Instruction::Or
:
2823 // (X | (X != 0)) is non zero
2824 if (matchOpWithOpEqZero(I
->getOperand(0), I
->getOperand(1)))
2826 // X | Y != 0 if X != 0 or Y != 0.
2827 return isKnownNonZero(I
->getOperand(1), DemandedElts
, Q
, Depth
) ||
2828 isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2829 case Instruction::SExt
:
2830 case Instruction::ZExt
:
2831 // ext X != 0 if X != 0.
2832 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2834 case Instruction::Shl
: {
2835 // shl nsw/nuw can't remove any non-zero bits.
2836 const OverflowingBinaryOperator
*BO
= cast
<OverflowingBinaryOperator
>(I
);
2837 if (Q
.IIQ
.hasNoUnsignedWrap(BO
) || Q
.IIQ
.hasNoSignedWrap(BO
))
2838 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2840 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2841 // if the lowest bit is shifted off the end.
2842 KnownBits
Known(BitWidth
);
2843 computeKnownBits(I
->getOperand(0), DemandedElts
, Known
, Depth
, Q
);
2847 return isNonZeroShift(I
, DemandedElts
, Depth
, Q
, Known
);
2849 case Instruction::LShr
:
2850 case Instruction::AShr
: {
2851 // shr exact can only shift out zero bits.
2852 const PossiblyExactOperator
*BO
= cast
<PossiblyExactOperator
>(I
);
2854 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2856 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2857 // defined if the sign bit is shifted off the end.
2859 computeKnownBits(I
->getOperand(0), DemandedElts
, Depth
, Q
);
2860 if (Known
.isNegative())
2863 return isNonZeroShift(I
, DemandedElts
, Depth
, Q
, Known
);
2865 case Instruction::UDiv
:
2866 case Instruction::SDiv
: {
2868 // div exact can only produce a zero if the dividend is zero.
2869 if (cast
<PossiblyExactOperator
>(I
)->isExact())
2870 return isKnownNonZero(I
->getOperand(0), DemandedElts
, Q
, Depth
);
2873 computeKnownBits(I
->getOperand(0), DemandedElts
, Depth
, Q
);
2874 // If X is fully unknown we won't be able to figure anything out so don't
2875 // both computing knownbits for Y.
2876 if (XKnown
.isUnknown())
2880 computeKnownBits(I
->getOperand(1), DemandedElts
, Depth
, Q
);
2881 if (I
->getOpcode() == Instruction::SDiv
) {
2882 // For signed division need to compare abs value of the operands.
2883 XKnown
= XKnown
.abs(/*IntMinIsPoison*/ false);
2884 YKnown
= YKnown
.abs(/*IntMinIsPoison*/ false);
2886 // If X u>= Y then div is non zero (0/0 is UB).
2887 std::optional
<bool> XUgeY
= KnownBits::uge(XKnown
, YKnown
);
2888 // If X is total unknown or X u< Y we won't be able to prove non-zero
2889 // with compute known bits so just return early.
2890 return XUgeY
&& *XUgeY
;
2892 case Instruction::Add
: {
2895 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
2897 auto *BO
= cast
<OverflowingBinaryOperator
>(I
);
2898 return isNonZeroAdd(DemandedElts
, Depth
, Q
, BitWidth
, I
->getOperand(0),
2899 I
->getOperand(1), Q
.IIQ
.hasNoSignedWrap(BO
),
2900 Q
.IIQ
.hasNoUnsignedWrap(BO
));
2902 case Instruction::Mul
: {
2903 const OverflowingBinaryOperator
*BO
= cast
<OverflowingBinaryOperator
>(I
);
2904 return isNonZeroMul(DemandedElts
, Depth
, Q
, BitWidth
, I
->getOperand(0),
2905 I
->getOperand(1), Q
.IIQ
.hasNoSignedWrap(BO
),
2906 Q
.IIQ
.hasNoUnsignedWrap(BO
));
2908 case Instruction::Select
: {
2909 // (C ? X : Y) != 0 if X != 0 and Y != 0.
2911 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
2912 // then see if the select condition implies the arm is non-zero. For example
2913 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
2914 // dominated by `X != 0`.
2915 auto SelectArmIsNonZero
= [&](bool IsTrueArm
) {
2917 Op
= IsTrueArm
? I
->getOperand(1) : I
->getOperand(2);
2918 // Op is trivially non-zero.
2919 if (isKnownNonZero(Op
, DemandedElts
, Q
, Depth
))
2922 // The condition of the select dominates the true/false arm. Check if the
2923 // condition implies that a given arm is non-zero.
2925 CmpInst::Predicate Pred
;
2926 if (!match(I
->getOperand(0), m_c_ICmp(Pred
, m_Specific(Op
), m_Value(X
))))
2930 Pred
= ICmpInst::getInversePredicate(Pred
);
2932 return cmpExcludesZero(Pred
, X
);
2935 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
2936 SelectArmIsNonZero(/* IsTrueArm */ false))
2940 case Instruction::PHI
: {
2941 auto *PN
= cast
<PHINode
>(I
);
2942 if (Q
.IIQ
.UseInstrInfo
&& isNonZeroRecurrence(PN
))
2945 // Check if all incoming values are non-zero using recursion.
2946 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
2947 unsigned NewDepth
= std::max(Depth
, MaxAnalysisRecursionDepth
- 1);
2948 return llvm::all_of(PN
->operands(), [&](const Use
&U
) {
2951 RecQ
.CxtI
= PN
->getIncomingBlock(U
)->getTerminator();
2952 // Check if the branch on the phi excludes zero.
2953 ICmpInst::Predicate Pred
;
2955 BasicBlock
*TrueSucc
, *FalseSucc
;
2956 if (match(RecQ
.CxtI
,
2957 m_Br(m_c_ICmp(Pred
, m_Specific(U
.get()), m_Value(X
)),
2958 m_BasicBlock(TrueSucc
), m_BasicBlock(FalseSucc
)))) {
2959 // Check for cases of duplicate successors.
2960 if ((TrueSucc
== PN
->getParent()) != (FalseSucc
== PN
->getParent())) {
2961 // If we're using the false successor, invert the predicate.
2962 if (FalseSucc
== PN
->getParent())
2963 Pred
= CmpInst::getInversePredicate(Pred
);
2964 if (cmpExcludesZero(Pred
, X
))
2968 // Finally recurse on the edge and check it directly.
2969 return isKnownNonZero(U
.get(), DemandedElts
, RecQ
, NewDepth
);
2972 case Instruction::InsertElement
: {
2973 if (isa
<ScalableVectorType
>(I
->getType()))
2976 const Value
*Vec
= I
->getOperand(0);
2977 const Value
*Elt
= I
->getOperand(1);
2978 auto *CIdx
= dyn_cast
<ConstantInt
>(I
->getOperand(2));
2980 unsigned NumElts
= DemandedElts
.getBitWidth();
2981 APInt DemandedVecElts
= DemandedElts
;
2982 bool SkipElt
= false;
2983 // If we know the index we are inserting too, clear it from Vec check.
2984 if (CIdx
&& CIdx
->getValue().ult(NumElts
)) {
2985 DemandedVecElts
.clearBit(CIdx
->getZExtValue());
2986 SkipElt
= !DemandedElts
[CIdx
->getZExtValue()];
2989 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
2991 return (SkipElt
|| isKnownNonZero(Elt
, Q
, Depth
)) &&
2992 (DemandedVecElts
.isZero() ||
2993 isKnownNonZero(Vec
, DemandedVecElts
, Q
, Depth
));
2995 case Instruction::ExtractElement
:
2996 if (const auto *EEI
= dyn_cast
<ExtractElementInst
>(I
)) {
2997 const Value
*Vec
= EEI
->getVectorOperand();
2998 const Value
*Idx
= EEI
->getIndexOperand();
2999 auto *CIdx
= dyn_cast
<ConstantInt
>(Idx
);
3000 if (auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType())) {
3001 unsigned NumElts
= VecTy
->getNumElements();
3002 APInt DemandedVecElts
= APInt::getAllOnes(NumElts
);
3003 if (CIdx
&& CIdx
->getValue().ult(NumElts
))
3004 DemandedVecElts
= APInt::getOneBitSet(NumElts
, CIdx
->getZExtValue());
3005 return isKnownNonZero(Vec
, DemandedVecElts
, Q
, Depth
);
3009 case Instruction::ShuffleVector
: {
3010 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(I
);
3013 APInt DemandedLHS
, DemandedRHS
;
3014 // For undef elements, we don't know anything about the common state of
3015 // the shuffle result.
3016 if (!getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
))
3018 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3019 return (DemandedRHS
.isZero() ||
3020 isKnownNonZero(Shuf
->getOperand(1), DemandedRHS
, Q
, Depth
)) &&
3021 (DemandedLHS
.isZero() ||
3022 isKnownNonZero(Shuf
->getOperand(0), DemandedLHS
, Q
, Depth
));
3024 case Instruction::Freeze
:
3025 return isKnownNonZero(I
->getOperand(0), Q
, Depth
) &&
3026 isGuaranteedNotToBePoison(I
->getOperand(0), Q
.AC
, Q
.CxtI
, Q
.DT
,
3028 case Instruction::Load
: {
3029 auto *LI
= cast
<LoadInst
>(I
);
3030 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3032 if (auto *PtrT
= dyn_cast
<PointerType
>(I
->getType())) {
3033 if (Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_nonnull
) ||
3034 (Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_dereferenceable
) &&
3035 !NullPointerIsDefined(LI
->getFunction(), PtrT
->getAddressSpace())))
3037 } else if (MDNode
*Ranges
= Q
.IIQ
.getMetadata(LI
, LLVMContext::MD_range
)) {
3038 return rangeMetadataExcludesValue(Ranges
, APInt::getZero(BitWidth
));
3041 // No need to fall through to computeKnownBits as range metadata is already
3042 // handled in isKnownNonZero.
3045 case Instruction::ExtractValue
: {
3046 const WithOverflowInst
*WO
;
3047 if (match(I
, m_ExtractValue
<0>(m_WithOverflowInst(WO
)))) {
3048 switch (WO
->getBinaryOp()) {
3051 case Instruction::Add
:
3052 return isNonZeroAdd(DemandedElts
, Depth
, Q
, BitWidth
,
3053 WO
->getArgOperand(0), WO
->getArgOperand(1),
3056 case Instruction::Sub
:
3057 return isNonZeroSub(DemandedElts
, Depth
, Q
, BitWidth
,
3058 WO
->getArgOperand(0), WO
->getArgOperand(1));
3059 case Instruction::Mul
:
3060 return isNonZeroMul(DemandedElts
, Depth
, Q
, BitWidth
,
3061 WO
->getArgOperand(0), WO
->getArgOperand(1),
3062 /*NSW=*/false, /*NUW=*/false);
3068 case Instruction::Call
:
3069 case Instruction::Invoke
: {
3070 const auto *Call
= cast
<CallBase
>(I
);
3071 if (I
->getType()->isPointerTy()) {
3072 if (Call
->isReturnNonNull())
3074 if (const auto *RP
= getArgumentAliasingToReturnedPointer(Call
, true))
3075 return isKnownNonZero(RP
, Q
, Depth
);
3077 if (MDNode
*Ranges
= Q
.IIQ
.getMetadata(Call
, LLVMContext::MD_range
))
3078 return rangeMetadataExcludesValue(Ranges
, APInt::getZero(BitWidth
));
3079 if (std::optional
<ConstantRange
> Range
= Call
->getRange()) {
3080 const APInt
ZeroValue(Range
->getBitWidth(), 0);
3081 if (!Range
->contains(ZeroValue
))
3084 if (const Value
*RV
= Call
->getReturnedArgOperand())
3085 if (RV
->getType() == I
->getType() && isKnownNonZero(RV
, Q
, Depth
))
3089 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
)) {
3090 switch (II
->getIntrinsicID()) {
3091 case Intrinsic::sshl_sat
:
3092 case Intrinsic::ushl_sat
:
3093 case Intrinsic::abs
:
3094 case Intrinsic::bitreverse
:
3095 case Intrinsic::bswap
:
3096 case Intrinsic::ctpop
:
3097 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
);
3098 // NB: We don't do usub_sat here as in any case we can prove its
3099 // non-zero, we will fold it to `sub nuw` in InstCombine.
3100 case Intrinsic::ssub_sat
:
3101 return isNonZeroSub(DemandedElts
, Depth
, Q
, BitWidth
,
3102 II
->getArgOperand(0), II
->getArgOperand(1));
3103 case Intrinsic::sadd_sat
:
3104 return isNonZeroAdd(DemandedElts
, Depth
, Q
, BitWidth
,
3105 II
->getArgOperand(0), II
->getArgOperand(1),
3106 /*NSW=*/true, /* NUW=*/false);
3107 // Vec reverse preserves zero/non-zero status from input vec.
3108 case Intrinsic::vector_reverse
:
3109 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
.reverseBits(),
3111 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3112 case Intrinsic::vector_reduce_or
:
3113 case Intrinsic::vector_reduce_umax
:
3114 case Intrinsic::vector_reduce_umin
:
3115 case Intrinsic::vector_reduce_smax
:
3116 case Intrinsic::vector_reduce_smin
:
3117 return isKnownNonZero(II
->getArgOperand(0), Q
, Depth
);
3118 case Intrinsic::umax
:
3119 case Intrinsic::uadd_sat
:
3120 // umax(X, (X != 0)) is non zero
3121 // X +usat (X != 0) is non zero
3122 if (matchOpWithOpEqZero(II
->getArgOperand(0), II
->getArgOperand(1)))
3125 return isKnownNonZero(II
->getArgOperand(1), DemandedElts
, Q
, Depth
) ||
3126 isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
);
3127 case Intrinsic::smax
: {
3128 // If either arg is strictly positive the result is non-zero. Otherwise
3129 // the result is non-zero if both ops are non-zero.
3130 auto IsNonZero
= [&](Value
*Op
, std::optional
<bool> &OpNonZero
,
3131 const KnownBits
&OpKnown
) {
3132 if (!OpNonZero
.has_value())
3133 OpNonZero
= OpKnown
.isNonZero() ||
3134 isKnownNonZero(Op
, DemandedElts
, Q
, Depth
);
3137 // Avoid re-computing isKnownNonZero.
3138 std::optional
<bool> Op0NonZero
, Op1NonZero
;
3139 KnownBits Op1Known
=
3140 computeKnownBits(II
->getArgOperand(1), DemandedElts
, Depth
, Q
);
3141 if (Op1Known
.isNonNegative() &&
3142 IsNonZero(II
->getArgOperand(1), Op1NonZero
, Op1Known
))
3144 KnownBits Op0Known
=
3145 computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
);
3146 if (Op0Known
.isNonNegative() &&
3147 IsNonZero(II
->getArgOperand(0), Op0NonZero
, Op0Known
))
3149 return IsNonZero(II
->getArgOperand(1), Op1NonZero
, Op1Known
) &&
3150 IsNonZero(II
->getArgOperand(0), Op0NonZero
, Op0Known
);
3152 case Intrinsic::smin
: {
3153 // If either arg is negative the result is non-zero. Otherwise
3154 // the result is non-zero if both ops are non-zero.
3155 KnownBits Op1Known
=
3156 computeKnownBits(II
->getArgOperand(1), DemandedElts
, Depth
, Q
);
3157 if (Op1Known
.isNegative())
3159 KnownBits Op0Known
=
3160 computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
);
3161 if (Op0Known
.isNegative())
3164 if (Op1Known
.isNonZero() && Op0Known
.isNonZero())
3168 case Intrinsic::umin
:
3169 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
) &&
3170 isKnownNonZero(II
->getArgOperand(1), DemandedElts
, Q
, Depth
);
3171 case Intrinsic::cttz
:
3172 return computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
)
3174 case Intrinsic::ctlz
:
3175 return computeKnownBits(II
->getArgOperand(0), DemandedElts
, Depth
, Q
)
3177 case Intrinsic::fshr
:
3178 case Intrinsic::fshl
:
3179 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3180 if (II
->getArgOperand(0) == II
->getArgOperand(1))
3181 return isKnownNonZero(II
->getArgOperand(0), DemandedElts
, Q
, Depth
);
3183 case Intrinsic::vscale
:
3185 case Intrinsic::experimental_get_vector_length
:
3186 return isKnownNonZero(I
->getOperand(0), Q
, Depth
);
3197 KnownBits
Known(BitWidth
);
3198 computeKnownBits(I
, DemandedElts
, Known
, Depth
, Q
);
3199 return Known
.One
!= 0;
3202 /// Return true if the given value is known to be non-zero when defined. For
3203 /// vectors, return true if every demanded element is known to be non-zero when
3204 /// defined. For pointers, if the context instruction and dominator tree are
3205 /// specified, perform context-sensitive analysis and return true if the
3206 /// pointer couldn't possibly be null at the specified instruction.
3207 /// Supports values with integer or pointer type and vectors of integers.
3208 bool isKnownNonZero(const Value
*V
, const APInt
&DemandedElts
,
3209 const SimplifyQuery
&Q
, unsigned Depth
) {
3210 Type
*Ty
= V
->getType();
3213 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
3215 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
3217 FVTy
->getNumElements() == DemandedElts
.getBitWidth() &&
3218 "DemandedElt width should equal the fixed vector number of elements");
3220 assert(DemandedElts
== APInt(1, 1) &&
3221 "DemandedElt width should be 1 for scalars");
3225 if (auto *C
= dyn_cast
<Constant
>(V
)) {
3226 if (C
->isNullValue())
3228 if (isa
<ConstantInt
>(C
))
3229 // Must be non-zero due to null test above.
3232 // For constant vectors, check that all elements are poison or known
3233 // non-zero to determine that the whole vector is known non-zero.
3234 if (auto *VecTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
3235 for (unsigned i
= 0, e
= VecTy
->getNumElements(); i
!= e
; ++i
) {
3236 if (!DemandedElts
[i
])
3238 Constant
*Elt
= C
->getAggregateElement(i
);
3239 if (!Elt
|| Elt
->isNullValue())
3241 if (!isa
<PoisonValue
>(Elt
) && !isa
<ConstantInt
>(Elt
))
3247 // Constant ptrauth can be null, iff the base pointer can be.
3248 if (auto *CPA
= dyn_cast
<ConstantPtrAuth
>(V
))
3249 return isKnownNonZero(CPA
->getPointer(), DemandedElts
, Q
, Depth
);
3251 // A global variable in address space 0 is non null unless extern weak
3252 // or an absolute symbol reference. Other address spaces may have null as a
3253 // valid address for a global, so we can't assume anything.
3254 if (const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(V
)) {
3255 if (!GV
->isAbsoluteSymbolRef() && !GV
->hasExternalWeakLinkage() &&
3256 GV
->getType()->getAddressSpace() == 0)
3260 // For constant expressions, fall through to the Operator code below.
3261 if (!isa
<ConstantExpr
>(V
))
3265 if (const auto *A
= dyn_cast
<Argument
>(V
))
3266 if (std::optional
<ConstantRange
> Range
= A
->getRange()) {
3267 const APInt
ZeroValue(Range
->getBitWidth(), 0);
3268 if (!Range
->contains(ZeroValue
))
3272 if (!isa
<Constant
>(V
) && isKnownNonZeroFromAssume(V
, Q
))
3275 // Some of the tests below are recursive, so bail out if we hit the limit.
3276 if (Depth
++ >= MaxAnalysisRecursionDepth
)
3279 // Check for pointer simplifications.
3281 if (PointerType
*PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
3282 // A byval, inalloca may not be null in a non-default addres space. A
3283 // nonnull argument is assumed never 0.
3284 if (const Argument
*A
= dyn_cast
<Argument
>(V
)) {
3285 if (((A
->hasPassPointeeByValueCopyAttr() &&
3286 !NullPointerIsDefined(A
->getParent(), PtrTy
->getAddressSpace())) ||
3287 A
->hasNonNullAttr()))
3292 if (const auto *I
= dyn_cast
<Operator
>(V
))
3293 if (isKnownNonZeroFromOperator(I
, DemandedElts
, Depth
, Q
))
3296 if (!isa
<Constant
>(V
) &&
3297 isKnownNonNullFromDominatingCondition(V
, Q
.CxtI
, Q
.DT
))
3303 bool llvm::isKnownNonZero(const Value
*V
, const SimplifyQuery
&Q
,
3305 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
3306 APInt DemandedElts
=
3307 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
3308 return ::isKnownNonZero(V
, DemandedElts
, Q
, Depth
);
3311 /// If the pair of operators are the same invertible function, return the
3312 /// the operands of the function corresponding to each input. Otherwise,
3313 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3314 /// every input value to exactly one output value. This is equivalent to
3315 /// saying that Op1 and Op2 are equal exactly when the specified pair of
3316 /// operands are equal, (except that Op1 and Op2 may be poison more often.)
3317 static std::optional
<std::pair
<Value
*, Value
*>>
3318 getInvertibleOperands(const Operator
*Op1
,
3319 const Operator
*Op2
) {
3320 if (Op1
->getOpcode() != Op2
->getOpcode())
3321 return std::nullopt
;
3323 auto getOperands
= [&](unsigned OpNum
) -> auto {
3324 return std::make_pair(Op1
->getOperand(OpNum
), Op2
->getOperand(OpNum
));
3327 switch (Op1
->getOpcode()) {
3330 case Instruction::Or
:
3331 if (!cast
<PossiblyDisjointInst
>(Op1
)->isDisjoint() ||
3332 !cast
<PossiblyDisjointInst
>(Op2
)->isDisjoint())
3335 case Instruction::Xor
:
3336 case Instruction::Add
: {
3338 if (match(Op2
, m_c_BinOp(m_Specific(Op1
->getOperand(0)), m_Value(Other
))))
3339 return std::make_pair(Op1
->getOperand(1), Other
);
3340 if (match(Op2
, m_c_BinOp(m_Specific(Op1
->getOperand(1)), m_Value(Other
))))
3341 return std::make_pair(Op1
->getOperand(0), Other
);
3344 case Instruction::Sub
:
3345 if (Op1
->getOperand(0) == Op2
->getOperand(0))
3346 return getOperands(1);
3347 if (Op1
->getOperand(1) == Op2
->getOperand(1))
3348 return getOperands(0);
3350 case Instruction::Mul
: {
3351 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3352 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3353 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3354 auto *OBO1
= cast
<OverflowingBinaryOperator
>(Op1
);
3355 auto *OBO2
= cast
<OverflowingBinaryOperator
>(Op2
);
3356 if ((!OBO1
->hasNoUnsignedWrap() || !OBO2
->hasNoUnsignedWrap()) &&
3357 (!OBO1
->hasNoSignedWrap() || !OBO2
->hasNoSignedWrap()))
3360 // Assume operand order has been canonicalized
3361 if (Op1
->getOperand(1) == Op2
->getOperand(1) &&
3362 isa
<ConstantInt
>(Op1
->getOperand(1)) &&
3363 !cast
<ConstantInt
>(Op1
->getOperand(1))->isZero())
3364 return getOperands(0);
3367 case Instruction::Shl
: {
3368 // Same as multiplies, with the difference that we don't need to check
3369 // for a non-zero multiply. Shifts always multiply by non-zero.
3370 auto *OBO1
= cast
<OverflowingBinaryOperator
>(Op1
);
3371 auto *OBO2
= cast
<OverflowingBinaryOperator
>(Op2
);
3372 if ((!OBO1
->hasNoUnsignedWrap() || !OBO2
->hasNoUnsignedWrap()) &&
3373 (!OBO1
->hasNoSignedWrap() || !OBO2
->hasNoSignedWrap()))
3376 if (Op1
->getOperand(1) == Op2
->getOperand(1))
3377 return getOperands(0);
3380 case Instruction::AShr
:
3381 case Instruction::LShr
: {
3382 auto *PEO1
= cast
<PossiblyExactOperator
>(Op1
);
3383 auto *PEO2
= cast
<PossiblyExactOperator
>(Op2
);
3384 if (!PEO1
->isExact() || !PEO2
->isExact())
3387 if (Op1
->getOperand(1) == Op2
->getOperand(1))
3388 return getOperands(0);
3391 case Instruction::SExt
:
3392 case Instruction::ZExt
:
3393 if (Op1
->getOperand(0)->getType() == Op2
->getOperand(0)->getType())
3394 return getOperands(0);
3396 case Instruction::PHI
: {
3397 const PHINode
*PN1
= cast
<PHINode
>(Op1
);
3398 const PHINode
*PN2
= cast
<PHINode
>(Op2
);
3400 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3401 // are a single invertible function of the start values? Note that repeated
3402 // application of an invertible function is also invertible
3403 BinaryOperator
*BO1
= nullptr;
3404 Value
*Start1
= nullptr, *Step1
= nullptr;
3405 BinaryOperator
*BO2
= nullptr;
3406 Value
*Start2
= nullptr, *Step2
= nullptr;
3407 if (PN1
->getParent() != PN2
->getParent() ||
3408 !matchSimpleRecurrence(PN1
, BO1
, Start1
, Step1
) ||
3409 !matchSimpleRecurrence(PN2
, BO2
, Start2
, Step2
))
3412 auto Values
= getInvertibleOperands(cast
<Operator
>(BO1
),
3413 cast
<Operator
>(BO2
));
3417 // We have to be careful of mutually defined recurrences here. Ex:
3418 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3419 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3420 // The invertibility of these is complicated, and not worth reasoning
3422 if (Values
->first
!= PN1
|| Values
->second
!= PN2
)
3425 return std::make_pair(Start1
, Start2
);
3428 return std::nullopt
;
3431 /// Return true if V1 == (binop V2, X), where X is known non-zero.
3432 /// Only handle a small subset of binops where (binop V2, X) with non-zero X
3433 /// implies V2 != V1.
3434 static bool isModifyingBinopOfNonZero(const Value
*V1
, const Value
*V2
,
3435 const APInt
&DemandedElts
, unsigned Depth
,
3436 const SimplifyQuery
&Q
) {
3437 const BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(V1
);
3440 switch (BO
->getOpcode()) {
3443 case Instruction::Or
:
3444 if (!cast
<PossiblyDisjointInst
>(V1
)->isDisjoint())
3447 case Instruction::Xor
:
3448 case Instruction::Add
:
3449 Value
*Op
= nullptr;
3450 if (V2
== BO
->getOperand(0))
3451 Op
= BO
->getOperand(1);
3452 else if (V2
== BO
->getOperand(1))
3453 Op
= BO
->getOperand(0);
3456 return isKnownNonZero(Op
, DemandedElts
, Q
, Depth
+ 1);
3461 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3462 /// the multiplication is nuw or nsw.
3463 static bool isNonEqualMul(const Value
*V1
, const Value
*V2
,
3464 const APInt
&DemandedElts
, unsigned Depth
,
3465 const SimplifyQuery
&Q
) {
3466 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(V2
)) {
3468 return match(OBO
, m_Mul(m_Specific(V1
), m_APInt(C
))) &&
3469 (OBO
->hasNoUnsignedWrap() || OBO
->hasNoSignedWrap()) &&
3470 !C
->isZero() && !C
->isOne() &&
3471 isKnownNonZero(V1
, DemandedElts
, Q
, Depth
+ 1);
3476 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3477 /// the shift is nuw or nsw.
3478 static bool isNonEqualShl(const Value
*V1
, const Value
*V2
,
3479 const APInt
&DemandedElts
, unsigned Depth
,
3480 const SimplifyQuery
&Q
) {
3481 if (auto *OBO
= dyn_cast
<OverflowingBinaryOperator
>(V2
)) {
3483 return match(OBO
, m_Shl(m_Specific(V1
), m_APInt(C
))) &&
3484 (OBO
->hasNoUnsignedWrap() || OBO
->hasNoSignedWrap()) &&
3485 !C
->isZero() && isKnownNonZero(V1
, DemandedElts
, Q
, Depth
+ 1);
3490 static bool isNonEqualPHIs(const PHINode
*PN1
, const PHINode
*PN2
,
3491 const APInt
&DemandedElts
, unsigned Depth
,
3492 const SimplifyQuery
&Q
) {
3493 // Check two PHIs are in same block.
3494 if (PN1
->getParent() != PN2
->getParent())
3497 SmallPtrSet
<const BasicBlock
*, 8> VisitedBBs
;
3498 bool UsedFullRecursion
= false;
3499 for (const BasicBlock
*IncomBB
: PN1
->blocks()) {
3500 if (!VisitedBBs
.insert(IncomBB
).second
)
3501 continue; // Don't reprocess blocks that we have dealt with already.
3502 const Value
*IV1
= PN1
->getIncomingValueForBlock(IncomBB
);
3503 const Value
*IV2
= PN2
->getIncomingValueForBlock(IncomBB
);
3504 const APInt
*C1
, *C2
;
3505 if (match(IV1
, m_APInt(C1
)) && match(IV2
, m_APInt(C2
)) && *C1
!= *C2
)
3508 // Only one pair of phi operands is allowed for full recursion.
3509 if (UsedFullRecursion
)
3512 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
3513 RecQ
.CxtI
= IncomBB
->getTerminator();
3514 if (!isKnownNonEqual(IV1
, IV2
, DemandedElts
, Depth
+ 1, RecQ
))
3516 UsedFullRecursion
= true;
3521 static bool isNonEqualSelect(const Value
*V1
, const Value
*V2
,
3522 const APInt
&DemandedElts
, unsigned Depth
,
3523 const SimplifyQuery
&Q
) {
3524 const SelectInst
*SI1
= dyn_cast
<SelectInst
>(V1
);
3528 if (const SelectInst
*SI2
= dyn_cast
<SelectInst
>(V2
)) {
3529 const Value
*Cond1
= SI1
->getCondition();
3530 const Value
*Cond2
= SI2
->getCondition();
3532 return isKnownNonEqual(SI1
->getTrueValue(), SI2
->getTrueValue(),
3533 DemandedElts
, Depth
+ 1, Q
) &&
3534 isKnownNonEqual(SI1
->getFalseValue(), SI2
->getFalseValue(),
3535 DemandedElts
, Depth
+ 1, Q
);
3537 return isKnownNonEqual(SI1
->getTrueValue(), V2
, DemandedElts
, Depth
+ 1, Q
) &&
3538 isKnownNonEqual(SI1
->getFalseValue(), V2
, DemandedElts
, Depth
+ 1, Q
);
3541 // Check to see if A is both a GEP and is the incoming value for a PHI in the
3542 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3543 // one of them being the recursive GEP A and the other a ptr at same base and at
3544 // the same/higher offset than B we are only incrementing the pointer further in
3545 // loop if offset of recursive GEP is greater than 0.
3546 static bool isNonEqualPointersWithRecursiveGEP(const Value
*A
, const Value
*B
,
3547 const SimplifyQuery
&Q
) {
3548 if (!A
->getType()->isPointerTy() || !B
->getType()->isPointerTy())
3551 auto *GEPA
= dyn_cast
<GEPOperator
>(A
);
3552 if (!GEPA
|| GEPA
->getNumIndices() != 1 || !isa
<Constant
>(GEPA
->idx_begin()))
3555 // Handle 2 incoming PHI values with one being a recursive GEP.
3556 auto *PN
= dyn_cast
<PHINode
>(GEPA
->getPointerOperand());
3557 if (!PN
|| PN
->getNumIncomingValues() != 2)
3560 // Search for the recursive GEP as an incoming operand, and record that as
3562 Value
*Start
= nullptr;
3563 Value
*Step
= const_cast<Value
*>(A
);
3564 if (PN
->getIncomingValue(0) == Step
)
3565 Start
= PN
->getIncomingValue(1);
3566 else if (PN
->getIncomingValue(1) == Step
)
3567 Start
= PN
->getIncomingValue(0);
3571 // Other incoming node base should match the B base.
3572 // StartOffset >= OffsetB && StepOffset > 0?
3573 // StartOffset <= OffsetB && StepOffset < 0?
3574 // Is non-equal if above are true.
3575 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3576 // optimisation to inbounds GEPs only.
3577 unsigned IndexWidth
= Q
.DL
.getIndexTypeSizeInBits(Start
->getType());
3578 APInt
StartOffset(IndexWidth
, 0);
3579 Start
= Start
->stripAndAccumulateInBoundsConstantOffsets(Q
.DL
, StartOffset
);
3580 APInt
StepOffset(IndexWidth
, 0);
3581 Step
= Step
->stripAndAccumulateInBoundsConstantOffsets(Q
.DL
, StepOffset
);
3583 // Check if Base Pointer of Step matches the PHI.
3586 APInt
OffsetB(IndexWidth
, 0);
3587 B
= B
->stripAndAccumulateInBoundsConstantOffsets(Q
.DL
, OffsetB
);
3588 return Start
== B
&&
3589 ((StartOffset
.sge(OffsetB
) && StepOffset
.isStrictlyPositive()) ||
3590 (StartOffset
.sle(OffsetB
) && StepOffset
.isNegative()));
3593 /// Return true if it is known that V1 != V2.
3594 static bool isKnownNonEqual(const Value
*V1
, const Value
*V2
,
3595 const APInt
&DemandedElts
, unsigned Depth
,
3596 const SimplifyQuery
&Q
) {
3599 if (V1
->getType() != V2
->getType())
3600 // We can't look through casts yet.
3603 if (Depth
>= MaxAnalysisRecursionDepth
)
3606 // See if we can recurse through (exactly one of) our operands. This
3607 // requires our operation be 1-to-1 and map every input value to exactly
3608 // one output value. Such an operation is invertible.
3609 auto *O1
= dyn_cast
<Operator
>(V1
);
3610 auto *O2
= dyn_cast
<Operator
>(V2
);
3611 if (O1
&& O2
&& O1
->getOpcode() == O2
->getOpcode()) {
3612 if (auto Values
= getInvertibleOperands(O1
, O2
))
3613 return isKnownNonEqual(Values
->first
, Values
->second
, DemandedElts
,
3616 if (const PHINode
*PN1
= dyn_cast
<PHINode
>(V1
)) {
3617 const PHINode
*PN2
= cast
<PHINode
>(V2
);
3618 // FIXME: This is missing a generalization to handle the case where one is
3619 // a PHI and another one isn't.
3620 if (isNonEqualPHIs(PN1
, PN2
, DemandedElts
, Depth
, Q
))
3625 if (isModifyingBinopOfNonZero(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3626 isModifyingBinopOfNonZero(V2
, V1
, DemandedElts
, Depth
, Q
))
3629 if (isNonEqualMul(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3630 isNonEqualMul(V2
, V1
, DemandedElts
, Depth
, Q
))
3633 if (isNonEqualShl(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3634 isNonEqualShl(V2
, V1
, DemandedElts
, Depth
, Q
))
3637 if (V1
->getType()->isIntOrIntVectorTy()) {
3638 // Are any known bits in V1 contradictory to known bits in V2? If V1
3639 // has a known zero where V2 has a known one, they must not be equal.
3640 KnownBits Known1
= computeKnownBits(V1
, DemandedElts
, Depth
, Q
);
3641 if (!Known1
.isUnknown()) {
3642 KnownBits Known2
= computeKnownBits(V2
, DemandedElts
, Depth
, Q
);
3643 if (Known1
.Zero
.intersects(Known2
.One
) ||
3644 Known2
.Zero
.intersects(Known1
.One
))
3649 if (isNonEqualSelect(V1
, V2
, DemandedElts
, Depth
, Q
) ||
3650 isNonEqualSelect(V2
, V1
, DemandedElts
, Depth
, Q
))
3653 if (isNonEqualPointersWithRecursiveGEP(V1
, V2
, Q
) ||
3654 isNonEqualPointersWithRecursiveGEP(V2
, V1
, Q
))
3658 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3659 // Check PtrToInt type matches the pointer size.
3660 if (match(V1
, m_PtrToIntSameSize(Q
.DL
, m_Value(A
))) &&
3661 match(V2
, m_PtrToIntSameSize(Q
.DL
, m_Value(B
))))
3662 return isKnownNonEqual(A
, B
, DemandedElts
, Depth
+ 1, Q
);
3667 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
3668 // Returns the input and lower/upper bounds.
3669 static bool isSignedMinMaxClamp(const Value
*Select
, const Value
*&In
,
3670 const APInt
*&CLow
, const APInt
*&CHigh
) {
3671 assert(isa
<Operator
>(Select
) &&
3672 cast
<Operator
>(Select
)->getOpcode() == Instruction::Select
&&
3673 "Input should be a Select!");
3675 const Value
*LHS
= nullptr, *RHS
= nullptr;
3676 SelectPatternFlavor SPF
= matchSelectPattern(Select
, LHS
, RHS
).Flavor
;
3677 if (SPF
!= SPF_SMAX
&& SPF
!= SPF_SMIN
)
3680 if (!match(RHS
, m_APInt(CLow
)))
3683 const Value
*LHS2
= nullptr, *RHS2
= nullptr;
3684 SelectPatternFlavor SPF2
= matchSelectPattern(LHS
, LHS2
, RHS2
).Flavor
;
3685 if (getInverseMinMaxFlavor(SPF
) != SPF2
)
3688 if (!match(RHS2
, m_APInt(CHigh
)))
3691 if (SPF
== SPF_SMIN
)
3692 std::swap(CLow
, CHigh
);
3695 return CLow
->sle(*CHigh
);
3698 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst
*II
,
3700 const APInt
*&CHigh
) {
3701 assert((II
->getIntrinsicID() == Intrinsic::smin
||
3702 II
->getIntrinsicID() == Intrinsic::smax
) && "Must be smin/smax");
3704 Intrinsic::ID InverseID
= getInverseMinMaxIntrinsic(II
->getIntrinsicID());
3705 auto *InnerII
= dyn_cast
<IntrinsicInst
>(II
->getArgOperand(0));
3706 if (!InnerII
|| InnerII
->getIntrinsicID() != InverseID
||
3707 !match(II
->getArgOperand(1), m_APInt(CLow
)) ||
3708 !match(InnerII
->getArgOperand(1), m_APInt(CHigh
)))
3711 if (II
->getIntrinsicID() == Intrinsic::smin
)
3712 std::swap(CLow
, CHigh
);
3713 return CLow
->sle(*CHigh
);
3716 /// For vector constants, loop over the elements and find the constant with the
3717 /// minimum number of sign bits. Return 0 if the value is not a vector constant
3718 /// or if any element was not analyzed; otherwise, return the count for the
3719 /// element with the minimum number of sign bits.
3720 static unsigned computeNumSignBitsVectorConstant(const Value
*V
,
3721 const APInt
&DemandedElts
,
3723 const auto *CV
= dyn_cast
<Constant
>(V
);
3724 if (!CV
|| !isa
<FixedVectorType
>(CV
->getType()))
3727 unsigned MinSignBits
= TyBits
;
3728 unsigned NumElts
= cast
<FixedVectorType
>(CV
->getType())->getNumElements();
3729 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
3730 if (!DemandedElts
[i
])
3732 // If we find a non-ConstantInt, bail out.
3733 auto *Elt
= dyn_cast_or_null
<ConstantInt
>(CV
->getAggregateElement(i
));
3737 MinSignBits
= std::min(MinSignBits
, Elt
->getValue().getNumSignBits());
3743 static unsigned ComputeNumSignBitsImpl(const Value
*V
,
3744 const APInt
&DemandedElts
,
3745 unsigned Depth
, const SimplifyQuery
&Q
);
3747 static unsigned ComputeNumSignBits(const Value
*V
, const APInt
&DemandedElts
,
3748 unsigned Depth
, const SimplifyQuery
&Q
) {
3749 unsigned Result
= ComputeNumSignBitsImpl(V
, DemandedElts
, Depth
, Q
);
3750 assert(Result
> 0 && "At least one sign bit needs to be present!");
3754 /// Return the number of times the sign bit of the register is replicated into
3755 /// the other bits. We know that at least 1 bit is always equal to the sign bit
3756 /// (itself), but other cases can give us information. For example, immediately
3757 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3758 /// other, so we return 3. For vectors, return the number of sign bits for the
3759 /// vector element with the minimum number of known sign bits of the demanded
3760 /// elements in the vector specified by DemandedElts.
3761 static unsigned ComputeNumSignBitsImpl(const Value
*V
,
3762 const APInt
&DemandedElts
,
3763 unsigned Depth
, const SimplifyQuery
&Q
) {
3764 Type
*Ty
= V
->getType();
3766 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
3768 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
)) {
3770 FVTy
->getNumElements() == DemandedElts
.getBitWidth() &&
3771 "DemandedElt width should equal the fixed vector number of elements");
3773 assert(DemandedElts
== APInt(1, 1) &&
3774 "DemandedElt width should be 1 for scalars");
3778 // We return the minimum number of sign bits that are guaranteed to be present
3779 // in V, so for undef we have to conservatively return 1. We don't have the
3780 // same behavior for poison though -- that's a FIXME today.
3782 Type
*ScalarTy
= Ty
->getScalarType();
3783 unsigned TyBits
= ScalarTy
->isPointerTy() ?
3784 Q
.DL
.getPointerTypeSizeInBits(ScalarTy
) :
3785 Q
.DL
.getTypeSizeInBits(ScalarTy
);
3788 unsigned FirstAnswer
= 1;
3790 // Note that ConstantInt is handled by the general computeKnownBits case
3793 if (Depth
== MaxAnalysisRecursionDepth
)
3796 if (auto *U
= dyn_cast
<Operator
>(V
)) {
3797 switch (Operator::getOpcode(V
)) {
3799 case Instruction::SExt
:
3800 Tmp
= TyBits
- U
->getOperand(0)->getType()->getScalarSizeInBits();
3801 return ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
) +
3804 case Instruction::SDiv
: {
3805 const APInt
*Denominator
;
3806 // sdiv X, C -> adds log(C) sign bits.
3807 if (match(U
->getOperand(1), m_APInt(Denominator
))) {
3809 // Ignore non-positive denominator.
3810 if (!Denominator
->isStrictlyPositive())
3813 // Calculate the incoming numerator bits.
3815 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3817 // Add floor(log(C)) bits to the numerator bits.
3818 return std::min(TyBits
, NumBits
+ Denominator
->logBase2());
3823 case Instruction::SRem
: {
3824 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3826 const APInt
*Denominator
;
3827 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3828 // positive constant. This let us put a lower bound on the number of sign
3830 if (match(U
->getOperand(1), m_APInt(Denominator
))) {
3832 // Ignore non-positive denominator.
3833 if (Denominator
->isStrictlyPositive()) {
3834 // Calculate the leading sign bit constraints by examining the
3835 // denominator. Given that the denominator is positive, there are two
3838 // 1. The numerator is positive. The result range is [0,C) and
3839 // [0,C) u< (1 << ceilLogBase2(C)).
3841 // 2. The numerator is negative. Then the result range is (-C,0] and
3842 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3844 // Thus a lower bound on the number of sign bits is `TyBits -
3845 // ceilLogBase2(C)`.
3847 unsigned ResBits
= TyBits
- Denominator
->ceilLogBase2();
3848 Tmp
= std::max(Tmp
, ResBits
);
3854 case Instruction::AShr
: {
3855 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3856 // ashr X, C -> adds C sign bits. Vectors too.
3858 if (match(U
->getOperand(1), m_APInt(ShAmt
))) {
3859 if (ShAmt
->uge(TyBits
))
3860 break; // Bad shift.
3861 unsigned ShAmtLimited
= ShAmt
->getZExtValue();
3862 Tmp
+= ShAmtLimited
;
3863 if (Tmp
> TyBits
) Tmp
= TyBits
;
3867 case Instruction::Shl
: {
3870 if (match(U
->getOperand(1), m_APInt(ShAmt
))) {
3871 // shl destroys sign bits.
3872 if (ShAmt
->uge(TyBits
))
3873 break; // Bad shift.
3874 // We can look through a zext (more or less treating it as a sext) if
3875 // all extended bits are shifted out.
3876 if (match(U
->getOperand(0), m_ZExt(m_Value(X
))) &&
3877 ShAmt
->uge(TyBits
- X
->getType()->getScalarSizeInBits())) {
3878 Tmp
= ComputeNumSignBits(X
, DemandedElts
, Depth
+ 1, Q
);
3879 Tmp
+= TyBits
- X
->getType()->getScalarSizeInBits();
3882 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3883 if (ShAmt
->uge(Tmp
))
3884 break; // Shifted all sign bits out.
3885 Tmp2
= ShAmt
->getZExtValue();
3890 case Instruction::And
:
3891 case Instruction::Or
:
3892 case Instruction::Xor
: // NOT is handled here.
3893 // Logical binary ops preserve the number of sign bits at the worst.
3894 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3896 Tmp2
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
3897 FirstAnswer
= std::min(Tmp
, Tmp2
);
3898 // We computed what we know about the sign bits as our first
3899 // answer. Now proceed to the generic code that uses
3900 // computeKnownBits, and pick whichever answer is better.
3904 case Instruction::Select
: {
3905 // If we have a clamp pattern, we know that the number of sign bits will
3906 // be the minimum of the clamp min/max range.
3908 const APInt
*CLow
, *CHigh
;
3909 if (isSignedMinMaxClamp(U
, X
, CLow
, CHigh
))
3910 return std::min(CLow
->getNumSignBits(), CHigh
->getNumSignBits());
3912 Tmp
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
3915 Tmp2
= ComputeNumSignBits(U
->getOperand(2), DemandedElts
, Depth
+ 1, Q
);
3916 return std::min(Tmp
, Tmp2
);
3919 case Instruction::Add
:
3920 // Add can have at most one carry bit. Thus we know that the output
3921 // is, at worst, one more bit than the inputs.
3922 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
3923 if (Tmp
== 1) break;
3925 // Special case decrementing a value (ADD X, -1):
3926 if (const auto *CRHS
= dyn_cast
<Constant
>(U
->getOperand(1)))
3927 if (CRHS
->isAllOnesValue()) {
3928 KnownBits
Known(TyBits
);
3929 computeKnownBits(U
->getOperand(0), DemandedElts
, Known
, Depth
+ 1, Q
);
3931 // If the input is known to be 0 or 1, the output is 0/-1, which is
3932 // all sign bits set.
3933 if ((Known
.Zero
| 1).isAllOnes())
3936 // If we are subtracting one from a positive number, there is no carry
3937 // out of the result.
3938 if (Known
.isNonNegative())
3942 Tmp2
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
3945 return std::min(Tmp
, Tmp2
) - 1;
3947 case Instruction::Sub
:
3948 Tmp2
= ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
3953 if (const auto *CLHS
= dyn_cast
<Constant
>(U
->getOperand(0)))
3954 if (CLHS
->isNullValue()) {
3955 KnownBits
Known(TyBits
);
3956 computeKnownBits(U
->getOperand(1), DemandedElts
, Known
, Depth
+ 1, Q
);
3957 // If the input is known to be 0 or 1, the output is 0/-1, which is
3958 // all sign bits set.
3959 if ((Known
.Zero
| 1).isAllOnes())
3962 // If the input is known to be positive (the sign bit is known clear),
3963 // the output of the NEG has the same number of sign bits as the
3965 if (Known
.isNonNegative())
3968 // Otherwise, we treat this like a SUB.
3971 // Sub can have at most one carry bit. Thus we know that the output
3972 // is, at worst, one more bit than the inputs.
3973 Tmp
= ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3976 return std::min(Tmp
, Tmp2
) - 1;
3978 case Instruction::Mul
: {
3979 // The output of the Mul can be at most twice the valid bits in the
3981 unsigned SignBitsOp0
=
3982 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
3983 if (SignBitsOp0
== 1)
3985 unsigned SignBitsOp1
=
3986 ComputeNumSignBits(U
->getOperand(1), DemandedElts
, Depth
+ 1, Q
);
3987 if (SignBitsOp1
== 1)
3989 unsigned OutValidBits
=
3990 (TyBits
- SignBitsOp0
+ 1) + (TyBits
- SignBitsOp1
+ 1);
3991 return OutValidBits
> TyBits
? 1 : TyBits
- OutValidBits
+ 1;
3994 case Instruction::PHI
: {
3995 const PHINode
*PN
= cast
<PHINode
>(U
);
3996 unsigned NumIncomingValues
= PN
->getNumIncomingValues();
3997 // Don't analyze large in-degree PHIs.
3998 if (NumIncomingValues
> 4) break;
3999 // Unreachable blocks may have zero-operand PHI nodes.
4000 if (NumIncomingValues
== 0) break;
4002 // Take the minimum of all incoming values. This can't infinitely loop
4003 // because of our depth threshold.
4004 SimplifyQuery RecQ
= Q
.getWithoutCondContext();
4006 for (unsigned i
= 0, e
= NumIncomingValues
; i
!= e
; ++i
) {
4007 if (Tmp
== 1) return Tmp
;
4008 RecQ
.CxtI
= PN
->getIncomingBlock(i
)->getTerminator();
4009 Tmp
= std::min(Tmp
, ComputeNumSignBits(PN
->getIncomingValue(i
),
4010 DemandedElts
, Depth
+ 1, RecQ
));
4015 case Instruction::Trunc
: {
4016 // If the input contained enough sign bits that some remain after the
4017 // truncation, then we can make use of that. Otherwise we don't know
4019 Tmp
= ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
4020 unsigned OperandTyBits
= U
->getOperand(0)->getType()->getScalarSizeInBits();
4021 if (Tmp
> (OperandTyBits
- TyBits
))
4022 return Tmp
- (OperandTyBits
- TyBits
);
4027 case Instruction::ExtractElement
:
4028 // Look through extract element. At the moment we keep this simple and
4029 // skip tracking the specific element. But at least we might find
4030 // information valid for all elements of the vector (for example if vector
4031 // is sign extended, shifted, etc).
4032 return ComputeNumSignBits(U
->getOperand(0), Depth
+ 1, Q
);
4034 case Instruction::ShuffleVector
: {
4035 // Collect the minimum number of sign bits that are shared by every vector
4036 // element referenced by the shuffle.
4037 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(U
);
4039 // FIXME: Add support for shufflevector constant expressions.
4042 APInt DemandedLHS
, DemandedRHS
;
4043 // For undef elements, we don't know anything about the common state of
4044 // the shuffle result.
4045 if (!getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
))
4047 Tmp
= std::numeric_limits
<unsigned>::max();
4048 if (!!DemandedLHS
) {
4049 const Value
*LHS
= Shuf
->getOperand(0);
4050 Tmp
= ComputeNumSignBits(LHS
, DemandedLHS
, Depth
+ 1, Q
);
4052 // If we don't know anything, early out and try computeKnownBits
4056 if (!!DemandedRHS
) {
4057 const Value
*RHS
= Shuf
->getOperand(1);
4058 Tmp2
= ComputeNumSignBits(RHS
, DemandedRHS
, Depth
+ 1, Q
);
4059 Tmp
= std::min(Tmp
, Tmp2
);
4061 // If we don't know anything, early out and try computeKnownBits
4065 assert(Tmp
<= TyBits
&& "Failed to determine minimum sign bits");
4068 case Instruction::Call
: {
4069 if (const auto *II
= dyn_cast
<IntrinsicInst
>(U
)) {
4070 switch (II
->getIntrinsicID()) {
4073 case Intrinsic::abs
:
4075 ComputeNumSignBits(U
->getOperand(0), DemandedElts
, Depth
+ 1, Q
);
4079 // Absolute value reduces number of sign bits by at most 1.
4081 case Intrinsic::smin
:
4082 case Intrinsic::smax
: {
4083 const APInt
*CLow
, *CHigh
;
4084 if (isSignedMinMaxIntrinsicClamp(II
, CLow
, CHigh
))
4085 return std::min(CLow
->getNumSignBits(), CHigh
->getNumSignBits());
4093 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4094 // use this information.
4096 // If we can examine all elements of a vector constant successfully, we're
4097 // done (we can't do any better than that). If not, keep trying.
4098 if (unsigned VecSignBits
=
4099 computeNumSignBitsVectorConstant(V
, DemandedElts
, TyBits
))
4102 KnownBits
Known(TyBits
);
4103 computeKnownBits(V
, DemandedElts
, Known
, Depth
, Q
);
4105 // If we know that the sign bit is either zero or one, determine the number of
4106 // identical bits in the top of the input value.
4107 return std::max(FirstAnswer
, Known
.countMinSignBits());
4110 Intrinsic::ID
llvm::getIntrinsicForCallSite(const CallBase
&CB
,
4111 const TargetLibraryInfo
*TLI
) {
4112 const Function
*F
= CB
.getCalledFunction();
4114 return Intrinsic::not_intrinsic
;
4116 if (F
->isIntrinsic())
4117 return F
->getIntrinsicID();
4119 // We are going to infer semantics of a library function based on mapping it
4120 // to an LLVM intrinsic. Check that the library function is available from
4121 // this callbase and in this environment.
4123 if (F
->hasLocalLinkage() || !TLI
|| !TLI
->getLibFunc(CB
, Func
) ||
4124 !CB
.onlyReadsMemory())
4125 return Intrinsic::not_intrinsic
;
4133 return Intrinsic::sin
;
4137 return Intrinsic::cos
;
4141 return Intrinsic::tan
;
4145 return Intrinsic::exp
;
4149 return Intrinsic::exp2
;
4153 return Intrinsic::log
;
4155 case LibFunc_log10f
:
4156 case LibFunc_log10l
:
4157 return Intrinsic::log10
;
4161 return Intrinsic::log2
;
4165 return Intrinsic::fabs
;
4169 return Intrinsic::minnum
;
4173 return Intrinsic::maxnum
;
4174 case LibFunc_copysign
:
4175 case LibFunc_copysignf
:
4176 case LibFunc_copysignl
:
4177 return Intrinsic::copysign
;
4179 case LibFunc_floorf
:
4180 case LibFunc_floorl
:
4181 return Intrinsic::floor
;
4185 return Intrinsic::ceil
;
4187 case LibFunc_truncf
:
4188 case LibFunc_truncl
:
4189 return Intrinsic::trunc
;
4193 return Intrinsic::rint
;
4194 case LibFunc_nearbyint
:
4195 case LibFunc_nearbyintf
:
4196 case LibFunc_nearbyintl
:
4197 return Intrinsic::nearbyint
;
4199 case LibFunc_roundf
:
4200 case LibFunc_roundl
:
4201 return Intrinsic::round
;
4202 case LibFunc_roundeven
:
4203 case LibFunc_roundevenf
:
4204 case LibFunc_roundevenl
:
4205 return Intrinsic::roundeven
;
4209 return Intrinsic::pow
;
4213 return Intrinsic::sqrt
;
4216 return Intrinsic::not_intrinsic
;
4219 /// Return true if it's possible to assume IEEE treatment of input denormals in
4220 /// \p F for \p Val.
4221 static bool inputDenormalIsIEEE(const Function
&F
, const Type
*Ty
) {
4222 Ty
= Ty
->getScalarType();
4223 return F
.getDenormalMode(Ty
->getFltSemantics()).Input
== DenormalMode::IEEE
;
4226 static bool inputDenormalIsIEEEOrPosZero(const Function
&F
, const Type
*Ty
) {
4227 Ty
= Ty
->getScalarType();
4228 DenormalMode Mode
= F
.getDenormalMode(Ty
->getFltSemantics());
4229 return Mode
.Input
== DenormalMode::IEEE
||
4230 Mode
.Input
== DenormalMode::PositiveZero
;
4233 static bool outputDenormalIsIEEEOrPosZero(const Function
&F
, const Type
*Ty
) {
4234 Ty
= Ty
->getScalarType();
4235 DenormalMode Mode
= F
.getDenormalMode(Ty
->getFltSemantics());
4236 return Mode
.Output
== DenormalMode::IEEE
||
4237 Mode
.Output
== DenormalMode::PositiveZero
;
4240 bool KnownFPClass::isKnownNeverLogicalZero(const Function
&F
, Type
*Ty
) const {
4241 return isKnownNeverZero() &&
4242 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F
, Ty
));
4245 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function
&F
,
4247 return isKnownNeverNegZero() &&
4248 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F
, Ty
));
4251 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function
&F
,
4253 if (!isKnownNeverPosZero())
4256 // If we know there are no denormals, nothing can be flushed to zero.
4257 if (isKnownNeverSubnormal())
4260 DenormalMode Mode
= F
.getDenormalMode(Ty
->getScalarType()->getFltSemantics());
4261 switch (Mode
.Input
) {
4262 case DenormalMode::IEEE
:
4264 case DenormalMode::PreserveSign
:
4265 // Negative subnormal won't flush to +0
4266 return isKnownNeverPosSubnormal();
4267 case DenormalMode::PositiveZero
:
4269 // Both positive and negative subnormal could flush to +0
4273 llvm_unreachable("covered switch over denormal mode");
4276 void KnownFPClass::propagateDenormal(const KnownFPClass
&Src
, const Function
&F
,
4278 KnownFPClasses
= Src
.KnownFPClasses
;
4279 // If we aren't assuming the source can't be a zero, we don't have to check if
4280 // a denormal input could be flushed.
4281 if (!Src
.isKnownNeverPosZero() && !Src
.isKnownNeverNegZero())
4284 // If we know the input can't be a denormal, it can't be flushed to 0.
4285 if (Src
.isKnownNeverSubnormal())
4288 DenormalMode Mode
= F
.getDenormalMode(Ty
->getScalarType()->getFltSemantics());
4290 if (!Src
.isKnownNeverPosSubnormal() && Mode
!= DenormalMode::getIEEE())
4291 KnownFPClasses
|= fcPosZero
;
4293 if (!Src
.isKnownNeverNegSubnormal() && Mode
!= DenormalMode::getIEEE()) {
4294 if (Mode
!= DenormalMode::getPositiveZero())
4295 KnownFPClasses
|= fcNegZero
;
4297 if (Mode
.Input
== DenormalMode::PositiveZero
||
4298 Mode
.Output
== DenormalMode::PositiveZero
||
4299 Mode
.Input
== DenormalMode::Dynamic
||
4300 Mode
.Output
== DenormalMode::Dynamic
)
4301 KnownFPClasses
|= fcPosZero
;
4305 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass
&Src
,
4306 const Function
&F
, Type
*Ty
) {
4307 propagateDenormal(Src
, F
, Ty
);
4308 propagateNaN(Src
, /*PreserveSign=*/true);
4311 /// Given an exploded icmp instruction, return true if the comparison only
4312 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4313 /// the result of the comparison is true when the input value is signed.
4314 bool llvm::isSignBitCheck(ICmpInst::Predicate Pred
, const APInt
&RHS
,
4315 bool &TrueIfSigned
) {
4317 case ICmpInst::ICMP_SLT
: // True if LHS s< 0
4318 TrueIfSigned
= true;
4319 return RHS
.isZero();
4320 case ICmpInst::ICMP_SLE
: // True if LHS s<= -1
4321 TrueIfSigned
= true;
4322 return RHS
.isAllOnes();
4323 case ICmpInst::ICMP_SGT
: // True if LHS s> -1
4324 TrueIfSigned
= false;
4325 return RHS
.isAllOnes();
4326 case ICmpInst::ICMP_SGE
: // True if LHS s>= 0
4327 TrueIfSigned
= false;
4328 return RHS
.isZero();
4329 case ICmpInst::ICMP_UGT
:
4330 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4331 TrueIfSigned
= true;
4332 return RHS
.isMaxSignedValue();
4333 case ICmpInst::ICMP_UGE
:
4334 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4335 TrueIfSigned
= true;
4336 return RHS
.isMinSignedValue();
4337 case ICmpInst::ICMP_ULT
:
4338 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4339 TrueIfSigned
= false;
4340 return RHS
.isMinSignedValue();
4341 case ICmpInst::ICMP_ULE
:
4342 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4343 TrueIfSigned
= false;
4344 return RHS
.isMaxSignedValue();
4350 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4351 /// same result as an fcmp with the given operands.
4352 std::pair
<Value
*, FPClassTest
> llvm::fcmpToClassTest(FCmpInst::Predicate Pred
,
4354 Value
*LHS
, Value
*RHS
,
4355 bool LookThroughSrc
) {
4356 const APFloat
*ConstRHS
;
4357 if (!match(RHS
, m_APFloatAllowPoison(ConstRHS
)))
4358 return {nullptr, fcAllFlags
};
4360 return fcmpToClassTest(Pred
, F
, LHS
, ConstRHS
, LookThroughSrc
);
4363 std::pair
<Value
*, FPClassTest
>
4364 llvm::fcmpToClassTest(FCmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4365 const APFloat
*ConstRHS
, bool LookThroughSrc
) {
4367 auto [Src
, ClassIfTrue
, ClassIfFalse
] =
4368 fcmpImpliesClass(Pred
, F
, LHS
, *ConstRHS
, LookThroughSrc
);
4369 if (Src
&& ClassIfTrue
== ~ClassIfFalse
)
4370 return {Src
, ClassIfTrue
};
4371 return {nullptr, fcAllFlags
};
4374 /// Return the return value for fcmpImpliesClass for a compare that produces an
4375 /// exact class test.
4376 static std::tuple
<Value
*, FPClassTest
, FPClassTest
> exactClass(Value
*V
,
4381 std::tuple
<Value
*, FPClassTest
, FPClassTest
>
4382 llvm::fcmpImpliesClass(CmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4383 FPClassTest RHSClass
, bool LookThroughSrc
) {
4384 assert(RHSClass
!= fcNone
);
4387 if (Pred
== FCmpInst::FCMP_TRUE
)
4388 return exactClass(Src
, fcAllFlags
);
4390 if (Pred
== FCmpInst::FCMP_FALSE
)
4391 return exactClass(Src
, fcNone
);
4393 const FPClassTest OrigClass
= RHSClass
;
4395 const bool IsNegativeRHS
= (RHSClass
& fcNegative
) == RHSClass
;
4396 const bool IsPositiveRHS
= (RHSClass
& fcPositive
) == RHSClass
;
4397 const bool IsNaN
= (RHSClass
& ~fcNan
) == fcNone
;
4400 // fcmp o__ x, nan -> false
4401 // fcmp u__ x, nan -> true
4402 return exactClass(Src
, CmpInst::isOrdered(Pred
) ? fcNone
: fcAllFlags
);
4405 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4406 if (Pred
== FCmpInst::FCMP_ORD
)
4407 return exactClass(Src
, ~fcNan
);
4409 // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4410 if (Pred
== FCmpInst::FCMP_UNO
)
4411 return exactClass(Src
, fcNan
);
4413 const bool IsFabs
= LookThroughSrc
&& match(LHS
, m_FAbs(m_Value(Src
)));
4415 RHSClass
= llvm::inverse_fabs(RHSClass
);
4417 const bool IsZero
= (OrigClass
& fcZero
) == OrigClass
;
4419 assert(Pred
!= FCmpInst::FCMP_ORD
&& Pred
!= FCmpInst::FCMP_UNO
);
4420 // Compares with fcNone are only exactly equal to fcZero if input denormals
4422 // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4423 if (!inputDenormalIsIEEE(F
, LHS
->getType()))
4424 return {nullptr, fcAllFlags
, fcAllFlags
};
4427 case FCmpInst::FCMP_OEQ
: // Match x == 0.0
4428 return exactClass(Src
, fcZero
);
4429 case FCmpInst::FCMP_UEQ
: // Match isnan(x) || (x == 0.0)
4430 return exactClass(Src
, fcZero
| fcNan
);
4431 case FCmpInst::FCMP_UNE
: // Match (x != 0.0)
4432 return exactClass(Src
, ~fcZero
);
4433 case FCmpInst::FCMP_ONE
: // Match !isnan(x) && x != 0.0
4434 return exactClass(Src
, ~fcNan
& ~fcZero
);
4435 case FCmpInst::FCMP_ORD
:
4436 // Canonical form of ord/uno is with a zero. We could also handle
4437 // non-canonical other non-NaN constants or LHS == RHS.
4438 return exactClass(Src
, ~fcNan
);
4439 case FCmpInst::FCMP_UNO
:
4440 return exactClass(Src
, fcNan
);
4441 case FCmpInst::FCMP_OGT
: // x > 0
4442 return exactClass(Src
, fcPosSubnormal
| fcPosNormal
| fcPosInf
);
4443 case FCmpInst::FCMP_UGT
: // isnan(x) || x > 0
4444 return exactClass(Src
, fcPosSubnormal
| fcPosNormal
| fcPosInf
| fcNan
);
4445 case FCmpInst::FCMP_OGE
: // x >= 0
4446 return exactClass(Src
, fcPositive
| fcNegZero
);
4447 case FCmpInst::FCMP_UGE
: // isnan(x) || x >= 0
4448 return exactClass(Src
, fcPositive
| fcNegZero
| fcNan
);
4449 case FCmpInst::FCMP_OLT
: // x < 0
4450 return exactClass(Src
, fcNegSubnormal
| fcNegNormal
| fcNegInf
);
4451 case FCmpInst::FCMP_ULT
: // isnan(x) || x < 0
4452 return exactClass(Src
, fcNegSubnormal
| fcNegNormal
| fcNegInf
| fcNan
);
4453 case FCmpInst::FCMP_OLE
: // x <= 0
4454 return exactClass(Src
, fcNegative
| fcPosZero
);
4455 case FCmpInst::FCMP_ULE
: // isnan(x) || x <= 0
4456 return exactClass(Src
, fcNegative
| fcPosZero
| fcNan
);
4458 llvm_unreachable("all compare types are handled");
4461 return {nullptr, fcAllFlags
, fcAllFlags
};
4464 const bool IsDenormalRHS
= (OrigClass
& fcSubnormal
) == OrigClass
;
4466 const bool IsInf
= (OrigClass
& fcInf
) == OrigClass
;
4468 FPClassTest Mask
= fcAllFlags
;
4471 case FCmpInst::FCMP_OEQ
:
4472 case FCmpInst::FCMP_UNE
: {
4473 // Match __builtin_isinf patterns
4475 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf
4476 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
4477 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf
4478 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
4480 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf
4481 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
4482 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf
4483 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
4484 if (IsNegativeRHS
) {
4495 case FCmpInst::FCMP_ONE
:
4496 case FCmpInst::FCMP_UEQ
: {
4497 // Match __builtin_isinf patterns
4498 // fcmp one x, -inf -> is_fpclass x, fcNegInf
4499 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
4500 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
4501 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
4503 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
4504 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
4505 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
4506 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
4507 if (IsNegativeRHS
) {
4508 Mask
= ~fcNegInf
& ~fcNan
;
4512 Mask
= ~fcPosInf
& ~fcNan
;
4519 case FCmpInst::FCMP_OLT
:
4520 case FCmpInst::FCMP_UGE
: {
4521 if (IsNegativeRHS
) {
4522 // No value is ordered and less than negative infinity.
4523 // All values are unordered with or at least negative infinity.
4524 // fcmp olt x, -inf -> false
4525 // fcmp uge x, -inf -> true
4530 // fcmp olt fabs(x), +inf -> fcFinite
4531 // fcmp uge fabs(x), +inf -> ~fcFinite
4532 // fcmp olt x, +inf -> fcFinite|fcNegInf
4533 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
4539 case FCmpInst::FCMP_OGE
:
4540 case FCmpInst::FCMP_ULT
: {
4541 if (IsNegativeRHS
) {
4542 // fcmp oge x, -inf -> ~fcNan
4543 // fcmp oge fabs(x), -inf -> ~fcNan
4544 // fcmp ult x, -inf -> fcNan
4545 // fcmp ult fabs(x), -inf -> fcNan
4550 // fcmp oge fabs(x), +inf -> fcInf
4551 // fcmp oge x, +inf -> fcPosInf
4552 // fcmp ult fabs(x), +inf -> ~fcInf
4553 // fcmp ult x, +inf -> ~fcPosInf
4559 case FCmpInst::FCMP_OGT
:
4560 case FCmpInst::FCMP_ULE
: {
4561 if (IsNegativeRHS
) {
4562 // fcmp ogt x, -inf -> fcmp one x, -inf
4563 // fcmp ogt fabs(x), -inf -> fcmp ord x, x
4564 // fcmp ule x, -inf -> fcmp ueq x, -inf
4565 // fcmp ule fabs(x), -inf -> fcmp uno x, x
4566 Mask
= IsFabs
? ~fcNan
: ~(fcNegInf
| fcNan
);
4570 // No value is ordered and greater than infinity.
4574 case FCmpInst::FCMP_OLE
:
4575 case FCmpInst::FCMP_UGT
: {
4576 if (IsNegativeRHS
) {
4577 Mask
= IsFabs
? fcNone
: fcNegInf
;
4581 // fcmp ole x, +inf -> fcmp ord x, x
4582 // fcmp ole fabs(x), +inf -> fcmp ord x, x
4583 // fcmp ole x, -inf -> fcmp oeq x, -inf
4584 // fcmp ole fabs(x), -inf -> false
4589 llvm_unreachable("all compare types are handled");
4592 // Invert the comparison for the unordered cases.
4593 if (FCmpInst::isUnordered(Pred
))
4596 return exactClass(Src
, Mask
);
4599 if (Pred
== FCmpInst::FCMP_OEQ
)
4600 return {Src
, RHSClass
, fcAllFlags
};
4602 if (Pred
== FCmpInst::FCMP_UEQ
) {
4603 FPClassTest Class
= RHSClass
| fcNan
;
4604 return {Src
, Class
, ~fcNan
};
4607 if (Pred
== FCmpInst::FCMP_ONE
)
4608 return {Src
, ~fcNan
, RHSClass
| fcNan
};
4610 if (Pred
== FCmpInst::FCMP_UNE
)
4611 return {Src
, fcAllFlags
, RHSClass
};
4613 assert((RHSClass
== fcNone
|| RHSClass
== fcPosNormal
||
4614 RHSClass
== fcNegNormal
|| RHSClass
== fcNormal
||
4615 RHSClass
== fcPosSubnormal
|| RHSClass
== fcNegSubnormal
||
4616 RHSClass
== fcSubnormal
) &&
4617 "should have been recognized as an exact class test");
4619 if (IsNegativeRHS
) {
4620 // TODO: Handle fneg(fabs)
4622 // fabs(x) o> -k -> fcmp ord x, x
4623 // fabs(x) u> -k -> true
4624 // fabs(x) o< -k -> false
4625 // fabs(x) u< -k -> fcmp uno x, x
4627 case FCmpInst::FCMP_OGT
:
4628 case FCmpInst::FCMP_OGE
:
4629 return {Src
, ~fcNan
, fcNan
};
4630 case FCmpInst::FCMP_UGT
:
4631 case FCmpInst::FCMP_UGE
:
4632 return {Src
, fcAllFlags
, fcNone
};
4633 case FCmpInst::FCMP_OLT
:
4634 case FCmpInst::FCMP_OLE
:
4635 return {Src
, fcNone
, fcAllFlags
};
4636 case FCmpInst::FCMP_ULT
:
4637 case FCmpInst::FCMP_ULE
:
4638 return {Src
, fcNan
, ~fcNan
};
4643 return {nullptr, fcAllFlags
, fcAllFlags
};
4646 FPClassTest ClassesLE
= fcNegInf
| fcNegNormal
;
4647 FPClassTest ClassesGE
= fcPositive
| fcNegZero
| fcNegSubnormal
;
4650 ClassesLE
|= fcNegSubnormal
;
4652 ClassesGE
|= fcNegNormal
;
4655 case FCmpInst::FCMP_OGT
:
4656 case FCmpInst::FCMP_OGE
:
4657 return {Src
, ClassesGE
, ~ClassesGE
| RHSClass
};
4658 case FCmpInst::FCMP_UGT
:
4659 case FCmpInst::FCMP_UGE
:
4660 return {Src
, ClassesGE
| fcNan
, ~(ClassesGE
| fcNan
) | RHSClass
};
4661 case FCmpInst::FCMP_OLT
:
4662 case FCmpInst::FCMP_OLE
:
4663 return {Src
, ClassesLE
, ~ClassesLE
| RHSClass
};
4664 case FCmpInst::FCMP_ULT
:
4665 case FCmpInst::FCMP_ULE
:
4666 return {Src
, ClassesLE
| fcNan
, ~(ClassesLE
| fcNan
) | RHSClass
};
4670 } else if (IsPositiveRHS
) {
4671 FPClassTest ClassesGE
= fcPosNormal
| fcPosInf
;
4672 FPClassTest ClassesLE
= fcNegative
| fcPosZero
| fcPosSubnormal
;
4674 ClassesGE
|= fcPosSubnormal
;
4676 ClassesLE
|= fcPosNormal
;
4679 ClassesGE
= llvm::inverse_fabs(ClassesGE
);
4680 ClassesLE
= llvm::inverse_fabs(ClassesLE
);
4684 case FCmpInst::FCMP_OGT
:
4685 case FCmpInst::FCMP_OGE
:
4686 return {Src
, ClassesGE
, ~ClassesGE
| RHSClass
};
4687 case FCmpInst::FCMP_UGT
:
4688 case FCmpInst::FCMP_UGE
:
4689 return {Src
, ClassesGE
| fcNan
, ~(ClassesGE
| fcNan
) | RHSClass
};
4690 case FCmpInst::FCMP_OLT
:
4691 case FCmpInst::FCMP_OLE
:
4692 return {Src
, ClassesLE
, ~ClassesLE
| RHSClass
};
4693 case FCmpInst::FCMP_ULT
:
4694 case FCmpInst::FCMP_ULE
:
4695 return {Src
, ClassesLE
| fcNan
, ~(ClassesLE
| fcNan
) | RHSClass
};
4701 return {nullptr, fcAllFlags
, fcAllFlags
};
4704 std::tuple
<Value
*, FPClassTest
, FPClassTest
>
4705 llvm::fcmpImpliesClass(CmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4706 const APFloat
&ConstRHS
, bool LookThroughSrc
) {
4707 // We can refine checks against smallest normal / largest denormal to an
4708 // exact class test.
4709 if (!ConstRHS
.isNegative() && ConstRHS
.isSmallestNormalized()) {
4711 const bool IsFabs
= LookThroughSrc
&& match(LHS
, m_FAbs(m_Value(Src
)));
4714 // Match pattern that's used in __builtin_isnormal.
4716 case FCmpInst::FCMP_OLT
:
4717 case FCmpInst::FCMP_UGE
: {
4718 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero
4719 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero
4720 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf
4721 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero)
4722 Mask
= fcZero
| fcSubnormal
;
4724 Mask
|= fcNegNormal
| fcNegInf
;
4728 case FCmpInst::FCMP_OGE
:
4729 case FCmpInst::FCMP_ULT
: {
4730 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf
4731 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal
4732 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf)
4733 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal)
4734 Mask
= fcPosInf
| fcPosNormal
;
4736 Mask
|= fcNegInf
| fcNegNormal
;
4740 return fcmpImpliesClass(Pred
, F
, LHS
, ConstRHS
.classify(),
4744 // Invert the comparison for the unordered cases.
4745 if (FCmpInst::isUnordered(Pred
))
4748 return exactClass(Src
, Mask
);
4751 return fcmpImpliesClass(Pred
, F
, LHS
, ConstRHS
.classify(), LookThroughSrc
);
4754 std::tuple
<Value
*, FPClassTest
, FPClassTest
>
4755 llvm::fcmpImpliesClass(CmpInst::Predicate Pred
, const Function
&F
, Value
*LHS
,
4756 Value
*RHS
, bool LookThroughSrc
) {
4757 const APFloat
*ConstRHS
;
4758 if (!match(RHS
, m_APFloatAllowPoison(ConstRHS
)))
4759 return {nullptr, fcAllFlags
, fcAllFlags
};
4761 // TODO: Just call computeKnownFPClass for RHS to handle non-constants.
4762 return fcmpImpliesClass(Pred
, F
, LHS
, *ConstRHS
, LookThroughSrc
);
4765 static void computeKnownFPClassFromCond(const Value
*V
, Value
*Cond
,
4767 const Instruction
*CxtI
,
4768 KnownFPClass
&KnownFromContext
) {
4769 CmpInst::Predicate Pred
;
4771 uint64_t ClassVal
= 0;
4772 const APFloat
*CRHS
;
4774 if (match(Cond
, m_FCmp(Pred
, m_Value(LHS
), m_APFloat(CRHS
)))) {
4775 auto [CmpVal
, MaskIfTrue
, MaskIfFalse
] = fcmpImpliesClass(
4776 Pred
, *CxtI
->getParent()->getParent(), LHS
, *CRHS
, LHS
!= V
);
4778 KnownFromContext
.knownNot(~(CondIsTrue
? MaskIfTrue
: MaskIfFalse
));
4779 } else if (match(Cond
, m_Intrinsic
<Intrinsic::is_fpclass
>(
4780 m_Value(LHS
), m_ConstantInt(ClassVal
)))) {
4781 FPClassTest Mask
= static_cast<FPClassTest
>(ClassVal
);
4782 KnownFromContext
.knownNot(CondIsTrue
? ~Mask
: Mask
);
4783 } else if (match(Cond
, m_ICmp(Pred
, m_ElementWiseBitCast(m_Value(LHS
)),
4786 if (!isSignBitCheck(Pred
, *RHS
, TrueIfSigned
))
4788 if (TrueIfSigned
== CondIsTrue
)
4789 KnownFromContext
.signBitMustBeOne();
4791 KnownFromContext
.signBitMustBeZero();
4795 static KnownFPClass
computeKnownFPClassFromContext(const Value
*V
,
4796 const SimplifyQuery
&Q
) {
4797 KnownFPClass KnownFromContext
;
4800 return KnownFromContext
;
4803 // Handle dominating conditions.
4804 for (BranchInst
*BI
: Q
.DC
->conditionsFor(V
)) {
4805 Value
*Cond
= BI
->getCondition();
4807 BasicBlockEdge
Edge0(BI
->getParent(), BI
->getSuccessor(0));
4808 if (Q
.DT
->dominates(Edge0
, Q
.CxtI
->getParent()))
4809 computeKnownFPClassFromCond(V
, Cond
, /*CondIsTrue=*/true, Q
.CxtI
,
4812 BasicBlockEdge
Edge1(BI
->getParent(), BI
->getSuccessor(1));
4813 if (Q
.DT
->dominates(Edge1
, Q
.CxtI
->getParent()))
4814 computeKnownFPClassFromCond(V
, Cond
, /*CondIsTrue=*/false, Q
.CxtI
,
4820 return KnownFromContext
;
4822 // Try to restrict the floating-point classes based on information from
4824 for (auto &AssumeVH
: Q
.AC
->assumptionsFor(V
)) {
4827 CallInst
*I
= cast
<CallInst
>(AssumeVH
);
4829 assert(I
->getFunction() == Q
.CxtI
->getParent()->getParent() &&
4830 "Got assumption for the wrong function!");
4831 assert(I
->getIntrinsicID() == Intrinsic::assume
&&
4832 "must be an assume intrinsic");
4834 if (!isValidAssumeForContext(I
, Q
.CxtI
, Q
.DT
))
4837 computeKnownFPClassFromCond(V
, I
->getArgOperand(0), /*CondIsTrue=*/true,
4838 Q
.CxtI
, KnownFromContext
);
4841 return KnownFromContext
;
4844 void computeKnownFPClass(const Value
*V
, const APInt
&DemandedElts
,
4845 FPClassTest InterestedClasses
, KnownFPClass
&Known
,
4846 unsigned Depth
, const SimplifyQuery
&Q
);
4848 static void computeKnownFPClass(const Value
*V
, KnownFPClass
&Known
,
4849 FPClassTest InterestedClasses
, unsigned Depth
,
4850 const SimplifyQuery
&Q
) {
4851 auto *FVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
4852 APInt DemandedElts
=
4853 FVTy
? APInt::getAllOnes(FVTy
->getNumElements()) : APInt(1, 1);
4854 computeKnownFPClass(V
, DemandedElts
, InterestedClasses
, Known
, Depth
, Q
);
4857 static void computeKnownFPClassForFPTrunc(const Operator
*Op
,
4858 const APInt
&DemandedElts
,
4859 FPClassTest InterestedClasses
,
4860 KnownFPClass
&Known
, unsigned Depth
,
4861 const SimplifyQuery
&Q
) {
4862 if ((InterestedClasses
&
4863 (KnownFPClass::OrderedLessThanZeroMask
| fcNan
)) == fcNone
)
4866 KnownFPClass KnownSrc
;
4867 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedClasses
,
4868 KnownSrc
, Depth
+ 1, Q
);
4870 // Sign should be preserved
4871 // TODO: Handle cannot be ordered greater than zero
4872 if (KnownSrc
.cannotBeOrderedLessThanZero())
4873 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
4875 Known
.propagateNaN(KnownSrc
, true);
4877 // Infinity needs a range check.
4880 void computeKnownFPClass(const Value
*V
, const APInt
&DemandedElts
,
4881 FPClassTest InterestedClasses
, KnownFPClass
&Known
,
4882 unsigned Depth
, const SimplifyQuery
&Q
) {
4883 assert(Known
.isUnknown() && "should not be called with known information");
4885 if (!DemandedElts
) {
4886 // No demanded elts, better to assume we don't know anything.
4891 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Limit Search Depth");
4893 if (auto *CFP
= dyn_cast
<ConstantFP
>(V
)) {
4894 Known
.KnownFPClasses
= CFP
->getValueAPF().classify();
4895 Known
.SignBit
= CFP
->isNegative();
4899 if (isa
<ConstantAggregateZero
>(V
)) {
4900 Known
.KnownFPClasses
= fcPosZero
;
4901 Known
.SignBit
= false;
4905 if (isa
<PoisonValue
>(V
)) {
4906 Known
.KnownFPClasses
= fcNone
;
4907 Known
.SignBit
= false;
4911 // Try to handle fixed width vector constants
4912 auto *VFVTy
= dyn_cast
<FixedVectorType
>(V
->getType());
4913 const Constant
*CV
= dyn_cast
<Constant
>(V
);
4915 Known
.KnownFPClasses
= fcNone
;
4916 bool SignBitAllZero
= true;
4917 bool SignBitAllOne
= true;
4919 // For vectors, verify that each element is not NaN.
4920 unsigned NumElts
= VFVTy
->getNumElements();
4921 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
4922 if (!DemandedElts
[i
])
4925 Constant
*Elt
= CV
->getAggregateElement(i
);
4927 Known
= KnownFPClass();
4930 if (isa
<PoisonValue
>(Elt
))
4932 auto *CElt
= dyn_cast
<ConstantFP
>(Elt
);
4934 Known
= KnownFPClass();
4938 const APFloat
&C
= CElt
->getValueAPF();
4939 Known
.KnownFPClasses
|= C
.classify();
4941 SignBitAllZero
= false;
4943 SignBitAllOne
= false;
4945 if (SignBitAllOne
!= SignBitAllZero
)
4946 Known
.SignBit
= SignBitAllOne
;
4950 FPClassTest KnownNotFromFlags
= fcNone
;
4951 if (const auto *CB
= dyn_cast
<CallBase
>(V
))
4952 KnownNotFromFlags
|= CB
->getRetNoFPClass();
4953 else if (const auto *Arg
= dyn_cast
<Argument
>(V
))
4954 KnownNotFromFlags
|= Arg
->getNoFPClass();
4956 const Operator
*Op
= dyn_cast
<Operator
>(V
);
4957 if (const FPMathOperator
*FPOp
= dyn_cast_or_null
<FPMathOperator
>(Op
)) {
4958 if (FPOp
->hasNoNaNs())
4959 KnownNotFromFlags
|= fcNan
;
4960 if (FPOp
->hasNoInfs())
4961 KnownNotFromFlags
|= fcInf
;
4964 KnownFPClass AssumedClasses
= computeKnownFPClassFromContext(V
, Q
);
4965 KnownNotFromFlags
|= ~AssumedClasses
.KnownFPClasses
;
4967 // We no longer need to find out about these bits from inputs if we can
4968 // assume this from flags/attributes.
4969 InterestedClasses
&= ~KnownNotFromFlags
;
4971 auto ClearClassesFromFlags
= make_scope_exit([=, &Known
] {
4972 Known
.knownNot(KnownNotFromFlags
);
4973 if (!Known
.SignBit
&& AssumedClasses
.SignBit
) {
4974 if (*AssumedClasses
.SignBit
)
4975 Known
.signBitMustBeOne();
4977 Known
.signBitMustBeZero();
4984 // All recursive calls that increase depth must come after this.
4985 if (Depth
== MaxAnalysisRecursionDepth
)
4988 const unsigned Opc
= Op
->getOpcode();
4990 case Instruction::FNeg
: {
4991 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedClasses
,
4992 Known
, Depth
+ 1, Q
);
4996 case Instruction::Select
: {
4997 Value
*Cond
= Op
->getOperand(0);
4998 Value
*LHS
= Op
->getOperand(1);
4999 Value
*RHS
= Op
->getOperand(2);
5001 FPClassTest FilterLHS
= fcAllFlags
;
5002 FPClassTest FilterRHS
= fcAllFlags
;
5004 Value
*TestedValue
= nullptr;
5005 FPClassTest MaskIfTrue
= fcAllFlags
;
5006 FPClassTest MaskIfFalse
= fcAllFlags
;
5007 uint64_t ClassVal
= 0;
5008 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5009 CmpInst::Predicate Pred
;
5010 Value
*CmpLHS
, *CmpRHS
;
5011 if (F
&& match(Cond
, m_FCmp(Pred
, m_Value(CmpLHS
), m_Value(CmpRHS
)))) {
5012 // If the select filters out a value based on the class, it no longer
5013 // participates in the class of the result
5015 // TODO: In some degenerate cases we can infer something if we try again
5016 // without looking through sign operations.
5017 bool LookThroughFAbsFNeg
= CmpLHS
!= LHS
&& CmpLHS
!= RHS
;
5018 std::tie(TestedValue
, MaskIfTrue
, MaskIfFalse
) =
5019 fcmpImpliesClass(Pred
, *F
, CmpLHS
, CmpRHS
, LookThroughFAbsFNeg
);
5020 } else if (match(Cond
,
5021 m_Intrinsic
<Intrinsic::is_fpclass
>(
5022 m_Value(TestedValue
), m_ConstantInt(ClassVal
)))) {
5023 FPClassTest TestedMask
= static_cast<FPClassTest
>(ClassVal
);
5024 MaskIfTrue
= TestedMask
;
5025 MaskIfFalse
= ~TestedMask
;
5028 if (TestedValue
== LHS
) {
5029 // match !isnan(x) ? x : y
5030 FilterLHS
= MaskIfTrue
;
5031 } else if (TestedValue
== RHS
) { // && IsExactClass
5032 // match !isnan(x) ? y : x
5033 FilterRHS
= MaskIfFalse
;
5036 KnownFPClass Known2
;
5037 computeKnownFPClass(LHS
, DemandedElts
, InterestedClasses
& FilterLHS
, Known
,
5039 Known
.KnownFPClasses
&= FilterLHS
;
5041 computeKnownFPClass(RHS
, DemandedElts
, InterestedClasses
& FilterRHS
,
5042 Known2
, Depth
+ 1, Q
);
5043 Known2
.KnownFPClasses
&= FilterRHS
;
5048 case Instruction::Call
: {
5049 const CallInst
*II
= cast
<CallInst
>(Op
);
5050 const Intrinsic::ID IID
= II
->getIntrinsicID();
5052 case Intrinsic::fabs
: {
5053 if ((InterestedClasses
& (fcNan
| fcPositive
)) != fcNone
) {
5054 // If we only care about the sign bit we don't need to inspect the
5056 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
,
5057 InterestedClasses
, Known
, Depth
+ 1, Q
);
5063 case Intrinsic::copysign
: {
5064 KnownFPClass KnownSign
;
5066 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5067 Known
, Depth
+ 1, Q
);
5068 computeKnownFPClass(II
->getArgOperand(1), DemandedElts
, InterestedClasses
,
5069 KnownSign
, Depth
+ 1, Q
);
5070 Known
.copysign(KnownSign
);
5073 case Intrinsic::fma
:
5074 case Intrinsic::fmuladd
: {
5075 if ((InterestedClasses
& fcNegative
) == fcNone
)
5078 if (II
->getArgOperand(0) != II
->getArgOperand(1))
5081 // The multiply cannot be -0 and therefore the add can't be -0
5082 Known
.knownNot(fcNegZero
);
5084 // x * x + y is non-negative if y is non-negative.
5085 KnownFPClass KnownAddend
;
5086 computeKnownFPClass(II
->getArgOperand(2), DemandedElts
, InterestedClasses
,
5087 KnownAddend
, Depth
+ 1, Q
);
5089 if (KnownAddend
.cannotBeOrderedLessThanZero())
5090 Known
.knownNot(fcNegative
);
5093 case Intrinsic::sqrt
:
5094 case Intrinsic::experimental_constrained_sqrt
: {
5095 KnownFPClass KnownSrc
;
5096 FPClassTest InterestedSrcs
= InterestedClasses
;
5097 if (InterestedClasses
& fcNan
)
5098 InterestedSrcs
|= KnownFPClass::OrderedLessThanZeroMask
;
5100 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedSrcs
,
5101 KnownSrc
, Depth
+ 1, Q
);
5103 if (KnownSrc
.isKnownNeverPosInfinity())
5104 Known
.knownNot(fcPosInf
);
5105 if (KnownSrc
.isKnownNever(fcSNan
))
5106 Known
.knownNot(fcSNan
);
5108 // Any negative value besides -0 returns a nan.
5109 if (KnownSrc
.isKnownNeverNaN() && KnownSrc
.cannotBeOrderedLessThanZero())
5110 Known
.knownNot(fcNan
);
5112 // The only negative value that can be returned is -0 for -0 inputs.
5113 Known
.knownNot(fcNegInf
| fcNegSubnormal
| fcNegNormal
);
5115 // If the input denormal mode could be PreserveSign, a negative
5116 // subnormal input could produce a negative zero output.
5117 const Function
*F
= II
->getFunction();
5118 if (Q
.IIQ
.hasNoSignedZeros(II
) ||
5119 (F
&& KnownSrc
.isKnownNeverLogicalNegZero(*F
, II
->getType())))
5120 Known
.knownNot(fcNegZero
);
5124 case Intrinsic::sin
:
5125 case Intrinsic::cos
: {
5126 // Return NaN on infinite inputs.
5127 KnownFPClass KnownSrc
;
5128 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5129 KnownSrc
, Depth
+ 1, Q
);
5130 Known
.knownNot(fcInf
);
5131 if (KnownSrc
.isKnownNeverNaN() && KnownSrc
.isKnownNeverInfinity())
5132 Known
.knownNot(fcNan
);
5135 case Intrinsic::maxnum
:
5136 case Intrinsic::minnum
:
5137 case Intrinsic::minimum
:
5138 case Intrinsic::maximum
: {
5139 KnownFPClass KnownLHS
, KnownRHS
;
5140 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5141 KnownLHS
, Depth
+ 1, Q
);
5142 computeKnownFPClass(II
->getArgOperand(1), DemandedElts
, InterestedClasses
,
5143 KnownRHS
, Depth
+ 1, Q
);
5145 bool NeverNaN
= KnownLHS
.isKnownNeverNaN() || KnownRHS
.isKnownNeverNaN();
5146 Known
= KnownLHS
| KnownRHS
;
5148 // If either operand is not NaN, the result is not NaN.
5149 if (NeverNaN
&& (IID
== Intrinsic::minnum
|| IID
== Intrinsic::maxnum
))
5150 Known
.knownNot(fcNan
);
5152 if (IID
== Intrinsic::maxnum
) {
5153 // If at least one operand is known to be positive, the result must be
5155 if ((KnownLHS
.cannotBeOrderedLessThanZero() &&
5156 KnownLHS
.isKnownNeverNaN()) ||
5157 (KnownRHS
.cannotBeOrderedLessThanZero() &&
5158 KnownRHS
.isKnownNeverNaN()))
5159 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5160 } else if (IID
== Intrinsic::maximum
) {
5161 // If at least one operand is known to be positive, the result must be
5163 if (KnownLHS
.cannotBeOrderedLessThanZero() ||
5164 KnownRHS
.cannotBeOrderedLessThanZero())
5165 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5166 } else if (IID
== Intrinsic::minnum
) {
5167 // If at least one operand is known to be negative, the result must be
5169 if ((KnownLHS
.cannotBeOrderedGreaterThanZero() &&
5170 KnownLHS
.isKnownNeverNaN()) ||
5171 (KnownRHS
.cannotBeOrderedGreaterThanZero() &&
5172 KnownRHS
.isKnownNeverNaN()))
5173 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5175 // If at least one operand is known to be negative, the result must be
5177 if (KnownLHS
.cannotBeOrderedGreaterThanZero() ||
5178 KnownRHS
.cannotBeOrderedGreaterThanZero())
5179 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5182 // Fixup zero handling if denormals could be returned as a zero.
5184 // As there's no spec for denormal flushing, be conservative with the
5185 // treatment of denormals that could be flushed to zero. For older
5186 // subtargets on AMDGPU the min/max instructions would not flush the
5187 // output and return the original value.
5189 if ((Known
.KnownFPClasses
& fcZero
) != fcNone
&&
5190 !Known
.isKnownNeverSubnormal()) {
5191 const Function
*Parent
= II
->getFunction();
5195 DenormalMode Mode
= Parent
->getDenormalMode(
5196 II
->getType()->getScalarType()->getFltSemantics());
5197 if (Mode
!= DenormalMode::getIEEE())
5198 Known
.KnownFPClasses
|= fcZero
;
5201 if (Known
.isKnownNeverNaN()) {
5202 if (KnownLHS
.SignBit
&& KnownRHS
.SignBit
&&
5203 *KnownLHS
.SignBit
== *KnownRHS
.SignBit
) {
5204 if (*KnownLHS
.SignBit
)
5205 Known
.signBitMustBeOne();
5207 Known
.signBitMustBeZero();
5208 } else if ((IID
== Intrinsic::maximum
|| IID
== Intrinsic::minimum
) ||
5209 ((KnownLHS
.isKnownNeverNegZero() ||
5210 KnownRHS
.isKnownNeverPosZero()) &&
5211 (KnownLHS
.isKnownNeverPosZero() ||
5212 KnownRHS
.isKnownNeverNegZero()))) {
5213 if ((IID
== Intrinsic::maximum
|| IID
== Intrinsic::maxnum
) &&
5214 (KnownLHS
.SignBit
== false || KnownRHS
.SignBit
== false))
5215 Known
.signBitMustBeZero();
5216 else if ((IID
== Intrinsic::minimum
|| IID
== Intrinsic::minnum
) &&
5217 (KnownLHS
.SignBit
== true || KnownRHS
.SignBit
== true))
5218 Known
.signBitMustBeOne();
5223 case Intrinsic::canonicalize
: {
5224 KnownFPClass KnownSrc
;
5225 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5226 KnownSrc
, Depth
+ 1, Q
);
5228 // This is essentially a stronger form of
5229 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5230 // actually have an IR canonicalization guarantee.
5232 // Canonicalize may flush denormals to zero, so we have to consider the
5233 // denormal mode to preserve known-not-0 knowledge.
5234 Known
.KnownFPClasses
= KnownSrc
.KnownFPClasses
| fcZero
| fcQNan
;
5236 // Stronger version of propagateNaN
5237 // Canonicalize is guaranteed to quiet signaling nans.
5238 if (KnownSrc
.isKnownNeverNaN())
5239 Known
.knownNot(fcNan
);
5241 Known
.knownNot(fcSNan
);
5243 const Function
*F
= II
->getFunction();
5247 // If the parent function flushes denormals, the canonical output cannot
5249 const fltSemantics
&FPType
=
5250 II
->getType()->getScalarType()->getFltSemantics();
5251 DenormalMode DenormMode
= F
->getDenormalMode(FPType
);
5252 if (DenormMode
== DenormalMode::getIEEE()) {
5253 if (KnownSrc
.isKnownNever(fcPosZero
))
5254 Known
.knownNot(fcPosZero
);
5255 if (KnownSrc
.isKnownNever(fcNegZero
))
5256 Known
.knownNot(fcNegZero
);
5260 if (DenormMode
.inputsAreZero() || DenormMode
.outputsAreZero())
5261 Known
.knownNot(fcSubnormal
);
5263 if (DenormMode
.Input
== DenormalMode::PositiveZero
||
5264 (DenormMode
.Output
== DenormalMode::PositiveZero
&&
5265 DenormMode
.Input
== DenormalMode::IEEE
))
5266 Known
.knownNot(fcNegZero
);
5270 case Intrinsic::vector_reduce_fmax
:
5271 case Intrinsic::vector_reduce_fmin
:
5272 case Intrinsic::vector_reduce_fmaximum
:
5273 case Intrinsic::vector_reduce_fminimum
: {
5274 // reduce min/max will choose an element from one of the vector elements,
5275 // so we can infer and class information that is common to all elements.
5276 Known
= computeKnownFPClass(II
->getArgOperand(0), II
->getFastMathFlags(),
5277 InterestedClasses
, Depth
+ 1, Q
);
5278 // Can only propagate sign if output is never NaN.
5279 if (!Known
.isKnownNeverNaN())
5280 Known
.SignBit
.reset();
5283 // reverse preserves all characteristics of the input vec's element.
5284 case Intrinsic::vector_reverse
:
5285 Known
= computeKnownFPClass(
5286 II
->getArgOperand(0), DemandedElts
.reverseBits(),
5287 II
->getFastMathFlags(), InterestedClasses
, Depth
+ 1, Q
);
5289 case Intrinsic::trunc
:
5290 case Intrinsic::floor
:
5291 case Intrinsic::ceil
:
5292 case Intrinsic::rint
:
5293 case Intrinsic::nearbyint
:
5294 case Intrinsic::round
:
5295 case Intrinsic::roundeven
: {
5296 KnownFPClass KnownSrc
;
5297 FPClassTest InterestedSrcs
= InterestedClasses
;
5298 if (InterestedSrcs
& fcPosFinite
)
5299 InterestedSrcs
|= fcPosFinite
;
5300 if (InterestedSrcs
& fcNegFinite
)
5301 InterestedSrcs
|= fcNegFinite
;
5302 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedSrcs
,
5303 KnownSrc
, Depth
+ 1, Q
);
5305 // Integer results cannot be subnormal.
5306 Known
.knownNot(fcSubnormal
);
5308 Known
.propagateNaN(KnownSrc
, true);
5310 // Pass through infinities, except PPC_FP128 is a special case for
5311 // intrinsics other than trunc.
5312 if (IID
== Intrinsic::trunc
|| !V
->getType()->isMultiUnitFPType()) {
5313 if (KnownSrc
.isKnownNeverPosInfinity())
5314 Known
.knownNot(fcPosInf
);
5315 if (KnownSrc
.isKnownNeverNegInfinity())
5316 Known
.knownNot(fcNegInf
);
5319 // Negative round ups to 0 produce -0
5320 if (KnownSrc
.isKnownNever(fcPosFinite
))
5321 Known
.knownNot(fcPosFinite
);
5322 if (KnownSrc
.isKnownNever(fcNegFinite
))
5323 Known
.knownNot(fcNegFinite
);
5327 case Intrinsic::exp
:
5328 case Intrinsic::exp2
:
5329 case Intrinsic::exp10
: {
5330 Known
.knownNot(fcNegative
);
5331 if ((InterestedClasses
& fcNan
) == fcNone
)
5334 KnownFPClass KnownSrc
;
5335 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5336 KnownSrc
, Depth
+ 1, Q
);
5337 if (KnownSrc
.isKnownNeverNaN()) {
5338 Known
.knownNot(fcNan
);
5339 Known
.signBitMustBeZero();
5344 case Intrinsic::fptrunc_round
: {
5345 computeKnownFPClassForFPTrunc(Op
, DemandedElts
, InterestedClasses
, Known
,
5349 case Intrinsic::log
:
5350 case Intrinsic::log10
:
5351 case Intrinsic::log2
:
5352 case Intrinsic::experimental_constrained_log
:
5353 case Intrinsic::experimental_constrained_log10
:
5354 case Intrinsic::experimental_constrained_log2
: {
5355 // log(+inf) -> +inf
5356 // log([+-]0.0) -> -inf
5359 if ((InterestedClasses
& (fcNan
| fcInf
)) == fcNone
)
5362 FPClassTest InterestedSrcs
= InterestedClasses
;
5363 if ((InterestedClasses
& fcNegInf
) != fcNone
)
5364 InterestedSrcs
|= fcZero
| fcSubnormal
;
5365 if ((InterestedClasses
& fcNan
) != fcNone
)
5366 InterestedSrcs
|= fcNan
| (fcNegative
& ~fcNan
);
5368 KnownFPClass KnownSrc
;
5369 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedSrcs
,
5370 KnownSrc
, Depth
+ 1, Q
);
5372 if (KnownSrc
.isKnownNeverPosInfinity())
5373 Known
.knownNot(fcPosInf
);
5375 if (KnownSrc
.isKnownNeverNaN() && KnownSrc
.cannotBeOrderedLessThanZero())
5376 Known
.knownNot(fcNan
);
5378 const Function
*F
= II
->getFunction();
5379 if (F
&& KnownSrc
.isKnownNeverLogicalZero(*F
, II
->getType()))
5380 Known
.knownNot(fcNegInf
);
5384 case Intrinsic::powi
: {
5385 if ((InterestedClasses
& fcNegative
) == fcNone
)
5388 const Value
*Exp
= II
->getArgOperand(1);
5389 Type
*ExpTy
= Exp
->getType();
5390 unsigned BitWidth
= ExpTy
->getScalarType()->getIntegerBitWidth();
5391 KnownBits
ExponentKnownBits(BitWidth
);
5392 computeKnownBits(Exp
, isa
<VectorType
>(ExpTy
) ? DemandedElts
: APInt(1, 1),
5393 ExponentKnownBits
, Depth
+ 1, Q
);
5395 if (ExponentKnownBits
.Zero
[0]) { // Is even
5396 Known
.knownNot(fcNegative
);
5400 // Given that exp is an integer, here are the
5401 // ways that pow can return a negative value:
5403 // pow(-x, exp) --> negative if exp is odd and x is negative.
5404 // pow(-0, exp) --> -inf if exp is negative odd.
5405 // pow(-0, exp) --> -0 if exp is positive odd.
5406 // pow(-inf, exp) --> -0 if exp is negative odd.
5407 // pow(-inf, exp) --> -inf if exp is positive odd.
5408 KnownFPClass KnownSrc
;
5409 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, fcNegative
,
5410 KnownSrc
, Depth
+ 1, Q
);
5411 if (KnownSrc
.isKnownNever(fcNegative
))
5412 Known
.knownNot(fcNegative
);
5415 case Intrinsic::ldexp
: {
5416 KnownFPClass KnownSrc
;
5417 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5418 KnownSrc
, Depth
+ 1, Q
);
5419 Known
.propagateNaN(KnownSrc
, /*PropagateSign=*/true);
5421 // Sign is preserved, but underflows may produce zeroes.
5422 if (KnownSrc
.isKnownNever(fcNegative
))
5423 Known
.knownNot(fcNegative
);
5424 else if (KnownSrc
.cannotBeOrderedLessThanZero())
5425 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5427 if (KnownSrc
.isKnownNever(fcPositive
))
5428 Known
.knownNot(fcPositive
);
5429 else if (KnownSrc
.cannotBeOrderedGreaterThanZero())
5430 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5432 // Can refine inf/zero handling based on the exponent operand.
5433 const FPClassTest ExpInfoMask
= fcZero
| fcSubnormal
| fcInf
;
5434 if ((InterestedClasses
& ExpInfoMask
) == fcNone
)
5436 if ((KnownSrc
.KnownFPClasses
& ExpInfoMask
) == fcNone
)
5439 const fltSemantics
&Flt
=
5440 II
->getType()->getScalarType()->getFltSemantics();
5441 unsigned Precision
= APFloat::semanticsPrecision(Flt
);
5442 const Value
*ExpArg
= II
->getArgOperand(1);
5443 ConstantRange ExpRange
= computeConstantRange(
5444 ExpArg
, true, Q
.IIQ
.UseInstrInfo
, Q
.AC
, Q
.CxtI
, Q
.DT
, Depth
+ 1);
5446 const int MantissaBits
= Precision
- 1;
5447 if (ExpRange
.getSignedMin().sge(static_cast<int64_t>(MantissaBits
)))
5448 Known
.knownNot(fcSubnormal
);
5450 const Function
*F
= II
->getFunction();
5451 const APInt
*ConstVal
= ExpRange
.getSingleElement();
5452 if (ConstVal
&& ConstVal
->isZero()) {
5453 // ldexp(x, 0) -> x, so propagate everything.
5454 Known
.propagateCanonicalizingSrc(KnownSrc
, *F
, II
->getType());
5455 } else if (ExpRange
.isAllNegative()) {
5456 // If we know the power is <= 0, can't introduce inf
5457 if (KnownSrc
.isKnownNeverPosInfinity())
5458 Known
.knownNot(fcPosInf
);
5459 if (KnownSrc
.isKnownNeverNegInfinity())
5460 Known
.knownNot(fcNegInf
);
5461 } else if (ExpRange
.isAllNonNegative()) {
5462 // If we know the power is >= 0, can't introduce subnormal or zero
5463 if (KnownSrc
.isKnownNeverPosSubnormal())
5464 Known
.knownNot(fcPosSubnormal
);
5465 if (KnownSrc
.isKnownNeverNegSubnormal())
5466 Known
.knownNot(fcNegSubnormal
);
5467 if (F
&& KnownSrc
.isKnownNeverLogicalPosZero(*F
, II
->getType()))
5468 Known
.knownNot(fcPosZero
);
5469 if (F
&& KnownSrc
.isKnownNeverLogicalNegZero(*F
, II
->getType()))
5470 Known
.knownNot(fcNegZero
);
5475 case Intrinsic::arithmetic_fence
: {
5476 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
, InterestedClasses
,
5477 Known
, Depth
+ 1, Q
);
5480 case Intrinsic::experimental_constrained_sitofp
:
5481 case Intrinsic::experimental_constrained_uitofp
:
5482 // Cannot produce nan
5483 Known
.knownNot(fcNan
);
5485 // sitofp and uitofp turn into +0.0 for zero.
5486 Known
.knownNot(fcNegZero
);
5488 // Integers cannot be subnormal
5489 Known
.knownNot(fcSubnormal
);
5491 if (IID
== Intrinsic::experimental_constrained_uitofp
)
5492 Known
.signBitMustBeZero();
5494 // TODO: Copy inf handling from instructions
5502 case Instruction::FAdd
:
5503 case Instruction::FSub
: {
5504 KnownFPClass KnownLHS
, KnownRHS
;
5506 Op
->getOpcode() == Instruction::FAdd
&&
5507 (InterestedClasses
& KnownFPClass::OrderedLessThanZeroMask
) != fcNone
;
5508 bool WantNaN
= (InterestedClasses
& fcNan
) != fcNone
;
5509 bool WantNegZero
= (InterestedClasses
& fcNegZero
) != fcNone
;
5511 if (!WantNaN
&& !WantNegative
&& !WantNegZero
)
5514 FPClassTest InterestedSrcs
= InterestedClasses
;
5516 InterestedSrcs
|= KnownFPClass::OrderedLessThanZeroMask
;
5517 if (InterestedClasses
& fcNan
)
5518 InterestedSrcs
|= fcInf
;
5519 computeKnownFPClass(Op
->getOperand(1), DemandedElts
, InterestedSrcs
,
5520 KnownRHS
, Depth
+ 1, Q
);
5522 if ((WantNaN
&& KnownRHS
.isKnownNeverNaN()) ||
5523 (WantNegative
&& KnownRHS
.cannotBeOrderedLessThanZero()) ||
5524 WantNegZero
|| Opc
== Instruction::FSub
) {
5526 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5527 // there's no point.
5528 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedSrcs
,
5529 KnownLHS
, Depth
+ 1, Q
);
5530 // Adding positive and negative infinity produces NaN.
5531 // TODO: Check sign of infinities.
5532 if (KnownLHS
.isKnownNeverNaN() && KnownRHS
.isKnownNeverNaN() &&
5533 (KnownLHS
.isKnownNeverInfinity() || KnownRHS
.isKnownNeverInfinity()))
5534 Known
.knownNot(fcNan
);
5536 // FIXME: Context function should always be passed in separately
5537 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5539 if (Op
->getOpcode() == Instruction::FAdd
) {
5540 if (KnownLHS
.cannotBeOrderedLessThanZero() &&
5541 KnownRHS
.cannotBeOrderedLessThanZero())
5542 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5546 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5547 if ((KnownLHS
.isKnownNeverLogicalNegZero(*F
, Op
->getType()) ||
5548 KnownRHS
.isKnownNeverLogicalNegZero(*F
, Op
->getType())) &&
5549 // Make sure output negative denormal can't flush to -0
5550 outputDenormalIsIEEEOrPosZero(*F
, Op
->getType()))
5551 Known
.knownNot(fcNegZero
);
5556 // Only fsub -0, +0 can return -0
5557 if ((KnownLHS
.isKnownNeverLogicalNegZero(*F
, Op
->getType()) ||
5558 KnownRHS
.isKnownNeverLogicalPosZero(*F
, Op
->getType())) &&
5559 // Make sure output negative denormal can't flush to -0
5560 outputDenormalIsIEEEOrPosZero(*F
, Op
->getType()))
5561 Known
.knownNot(fcNegZero
);
5567 case Instruction::FMul
: {
5568 // X * X is always non-negative or a NaN.
5569 if (Op
->getOperand(0) == Op
->getOperand(1))
5570 Known
.knownNot(fcNegative
);
5572 if ((InterestedClasses
& fcNan
) != fcNan
)
5575 // fcSubnormal is only needed in case of DAZ.
5576 const FPClassTest NeedForNan
= fcNan
| fcInf
| fcZero
| fcSubnormal
;
5578 KnownFPClass KnownLHS
, KnownRHS
;
5579 computeKnownFPClass(Op
->getOperand(1), DemandedElts
, NeedForNan
, KnownRHS
,
5581 if (!KnownRHS
.isKnownNeverNaN())
5584 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, NeedForNan
, KnownLHS
,
5586 if (!KnownLHS
.isKnownNeverNaN())
5589 if (KnownLHS
.SignBit
&& KnownRHS
.SignBit
) {
5590 if (*KnownLHS
.SignBit
== *KnownRHS
.SignBit
)
5591 Known
.signBitMustBeZero();
5593 Known
.signBitMustBeOne();
5596 // If 0 * +/-inf produces NaN.
5597 if (KnownLHS
.isKnownNeverInfinity() && KnownRHS
.isKnownNeverInfinity()) {
5598 Known
.knownNot(fcNan
);
5602 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5606 if ((KnownRHS
.isKnownNeverInfinity() ||
5607 KnownLHS
.isKnownNeverLogicalZero(*F
, Op
->getType())) &&
5608 (KnownLHS
.isKnownNeverInfinity() ||
5609 KnownRHS
.isKnownNeverLogicalZero(*F
, Op
->getType())))
5610 Known
.knownNot(fcNan
);
5614 case Instruction::FDiv
:
5615 case Instruction::FRem
: {
5616 if (Op
->getOperand(0) == Op
->getOperand(1)) {
5617 // TODO: Could filter out snan if we inspect the operand
5618 if (Op
->getOpcode() == Instruction::FDiv
) {
5619 // X / X is always exactly 1.0 or a NaN.
5620 Known
.KnownFPClasses
= fcNan
| fcPosNormal
;
5622 // X % X is always exactly [+-]0.0 or a NaN.
5623 Known
.KnownFPClasses
= fcNan
| fcZero
;
5629 const bool WantNan
= (InterestedClasses
& fcNan
) != fcNone
;
5630 const bool WantNegative
= (InterestedClasses
& fcNegative
) != fcNone
;
5631 const bool WantPositive
=
5632 Opc
== Instruction::FRem
&& (InterestedClasses
& fcPositive
) != fcNone
;
5633 if (!WantNan
&& !WantNegative
&& !WantPositive
)
5636 KnownFPClass KnownLHS
, KnownRHS
;
5638 computeKnownFPClass(Op
->getOperand(1), DemandedElts
,
5639 fcNan
| fcInf
| fcZero
| fcNegative
, KnownRHS
,
5642 bool KnowSomethingUseful
=
5643 KnownRHS
.isKnownNeverNaN() || KnownRHS
.isKnownNever(fcNegative
);
5645 if (KnowSomethingUseful
|| WantPositive
) {
5646 const FPClassTest InterestedLHS
=
5647 WantPositive
? fcAllFlags
5648 : fcNan
| fcInf
| fcZero
| fcSubnormal
| fcNegative
;
5650 computeKnownFPClass(Op
->getOperand(0), DemandedElts
,
5651 InterestedClasses
& InterestedLHS
, KnownLHS
,
5655 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5657 if (Op
->getOpcode() == Instruction::FDiv
) {
5658 // Only 0/0, Inf/Inf produce NaN.
5659 if (KnownLHS
.isKnownNeverNaN() && KnownRHS
.isKnownNeverNaN() &&
5660 (KnownLHS
.isKnownNeverInfinity() ||
5661 KnownRHS
.isKnownNeverInfinity()) &&
5662 ((F
&& KnownLHS
.isKnownNeverLogicalZero(*F
, Op
->getType())) ||
5663 (F
&& KnownRHS
.isKnownNeverLogicalZero(*F
, Op
->getType())))) {
5664 Known
.knownNot(fcNan
);
5667 // X / -0.0 is -Inf (or NaN).
5669 if (KnownLHS
.isKnownNever(fcNegative
) && KnownRHS
.isKnownNever(fcNegative
))
5670 Known
.knownNot(fcNegative
);
5672 // Inf REM x and x REM 0 produce NaN.
5673 if (KnownLHS
.isKnownNeverNaN() && KnownRHS
.isKnownNeverNaN() &&
5674 KnownLHS
.isKnownNeverInfinity() && F
&&
5675 KnownRHS
.isKnownNeverLogicalZero(*F
, Op
->getType())) {
5676 Known
.knownNot(fcNan
);
5679 // The sign for frem is the same as the first operand.
5680 if (KnownLHS
.cannotBeOrderedLessThanZero())
5681 Known
.knownNot(KnownFPClass::OrderedLessThanZeroMask
);
5682 if (KnownLHS
.cannotBeOrderedGreaterThanZero())
5683 Known
.knownNot(KnownFPClass::OrderedGreaterThanZeroMask
);
5685 // See if we can be more aggressive about the sign of 0.
5686 if (KnownLHS
.isKnownNever(fcNegative
))
5687 Known
.knownNot(fcNegative
);
5688 if (KnownLHS
.isKnownNever(fcPositive
))
5689 Known
.knownNot(fcPositive
);
5694 case Instruction::FPExt
: {
5695 // Infinity, nan and zero propagate from source.
5696 computeKnownFPClass(Op
->getOperand(0), DemandedElts
, InterestedClasses
,
5697 Known
, Depth
+ 1, Q
);
5699 const fltSemantics
&DstTy
=
5700 Op
->getType()->getScalarType()->getFltSemantics();
5701 const fltSemantics
&SrcTy
=
5702 Op
->getOperand(0)->getType()->getScalarType()->getFltSemantics();
5704 // All subnormal inputs should be in the normal range in the result type.
5705 if (APFloat::isRepresentableAsNormalIn(SrcTy
, DstTy
)) {
5706 if (Known
.KnownFPClasses
& fcPosSubnormal
)
5707 Known
.KnownFPClasses
|= fcPosNormal
;
5708 if (Known
.KnownFPClasses
& fcNegSubnormal
)
5709 Known
.KnownFPClasses
|= fcNegNormal
;
5710 Known
.knownNot(fcSubnormal
);
5713 // Sign bit of a nan isn't guaranteed.
5714 if (!Known
.isKnownNeverNaN())
5715 Known
.SignBit
= std::nullopt
;
5718 case Instruction::FPTrunc
: {
5719 computeKnownFPClassForFPTrunc(Op
, DemandedElts
, InterestedClasses
, Known
,
5723 case Instruction::SIToFP
:
5724 case Instruction::UIToFP
: {
5725 // Cannot produce nan
5726 Known
.knownNot(fcNan
);
5728 // Integers cannot be subnormal
5729 Known
.knownNot(fcSubnormal
);
5731 // sitofp and uitofp turn into +0.0 for zero.
5732 Known
.knownNot(fcNegZero
);
5733 if (Op
->getOpcode() == Instruction::UIToFP
)
5734 Known
.signBitMustBeZero();
5736 if (InterestedClasses
& fcInf
) {
5737 // Get width of largest magnitude integer (remove a bit if signed).
5738 // This still works for a signed minimum value because the largest FP
5739 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5740 int IntSize
= Op
->getOperand(0)->getType()->getScalarSizeInBits();
5741 if (Op
->getOpcode() == Instruction::SIToFP
)
5744 // If the exponent of the largest finite FP value can hold the largest
5745 // integer, the result of the cast must be finite.
5746 Type
*FPTy
= Op
->getType()->getScalarType();
5747 if (ilogb(APFloat::getLargest(FPTy
->getFltSemantics())) >= IntSize
)
5748 Known
.knownNot(fcInf
);
5753 case Instruction::ExtractElement
: {
5754 // Look through extract element. If the index is non-constant or
5755 // out-of-range demand all elements, otherwise just the extracted element.
5756 const Value
*Vec
= Op
->getOperand(0);
5757 const Value
*Idx
= Op
->getOperand(1);
5758 auto *CIdx
= dyn_cast
<ConstantInt
>(Idx
);
5760 if (auto *VecTy
= dyn_cast
<FixedVectorType
>(Vec
->getType())) {
5761 unsigned NumElts
= VecTy
->getNumElements();
5762 APInt DemandedVecElts
= APInt::getAllOnes(NumElts
);
5763 if (CIdx
&& CIdx
->getValue().ult(NumElts
))
5764 DemandedVecElts
= APInt::getOneBitSet(NumElts
, CIdx
->getZExtValue());
5765 return computeKnownFPClass(Vec
, DemandedVecElts
, InterestedClasses
, Known
,
5771 case Instruction::InsertElement
: {
5772 if (isa
<ScalableVectorType
>(Op
->getType()))
5775 const Value
*Vec
= Op
->getOperand(0);
5776 const Value
*Elt
= Op
->getOperand(1);
5777 auto *CIdx
= dyn_cast
<ConstantInt
>(Op
->getOperand(2));
5778 unsigned NumElts
= DemandedElts
.getBitWidth();
5779 APInt DemandedVecElts
= DemandedElts
;
5780 bool NeedsElt
= true;
5781 // If we know the index we are inserting to, clear it from Vec check.
5782 if (CIdx
&& CIdx
->getValue().ult(NumElts
)) {
5783 DemandedVecElts
.clearBit(CIdx
->getZExtValue());
5784 NeedsElt
= DemandedElts
[CIdx
->getZExtValue()];
5787 // Do we demand the inserted element?
5789 computeKnownFPClass(Elt
, Known
, InterestedClasses
, Depth
+ 1, Q
);
5790 // If we don't know any bits, early out.
5791 if (Known
.isUnknown())
5794 Known
.KnownFPClasses
= fcNone
;
5797 // Do we need anymore elements from Vec?
5798 if (!DemandedVecElts
.isZero()) {
5799 KnownFPClass Known2
;
5800 computeKnownFPClass(Vec
, DemandedVecElts
, InterestedClasses
, Known2
,
5807 case Instruction::ShuffleVector
: {
5808 // For undef elements, we don't know anything about the common state of
5809 // the shuffle result.
5810 APInt DemandedLHS
, DemandedRHS
;
5811 auto *Shuf
= dyn_cast
<ShuffleVectorInst
>(Op
);
5812 if (!Shuf
|| !getShuffleDemandedElts(Shuf
, DemandedElts
, DemandedLHS
, DemandedRHS
))
5815 if (!!DemandedLHS
) {
5816 const Value
*LHS
= Shuf
->getOperand(0);
5817 computeKnownFPClass(LHS
, DemandedLHS
, InterestedClasses
, Known
,
5820 // If we don't know any bits, early out.
5821 if (Known
.isUnknown())
5824 Known
.KnownFPClasses
= fcNone
;
5827 if (!!DemandedRHS
) {
5828 KnownFPClass Known2
;
5829 const Value
*RHS
= Shuf
->getOperand(1);
5830 computeKnownFPClass(RHS
, DemandedRHS
, InterestedClasses
, Known2
,
5837 case Instruction::ExtractValue
: {
5838 const ExtractValueInst
*Extract
= cast
<ExtractValueInst
>(Op
);
5839 ArrayRef
<unsigned> Indices
= Extract
->getIndices();
5840 const Value
*Src
= Extract
->getAggregateOperand();
5841 if (isa
<StructType
>(Src
->getType()) && Indices
.size() == 1 &&
5843 if (const auto *II
= dyn_cast
<IntrinsicInst
>(Src
)) {
5844 switch (II
->getIntrinsicID()) {
5845 case Intrinsic::frexp
: {
5846 Known
.knownNot(fcSubnormal
);
5848 KnownFPClass KnownSrc
;
5849 computeKnownFPClass(II
->getArgOperand(0), DemandedElts
,
5850 InterestedClasses
, KnownSrc
, Depth
+ 1, Q
);
5852 const Function
*F
= cast
<Instruction
>(Op
)->getFunction();
5854 if (KnownSrc
.isKnownNever(fcNegative
))
5855 Known
.knownNot(fcNegative
);
5857 if (F
&& KnownSrc
.isKnownNeverLogicalNegZero(*F
, Op
->getType()))
5858 Known
.knownNot(fcNegZero
);
5859 if (KnownSrc
.isKnownNever(fcNegInf
))
5860 Known
.knownNot(fcNegInf
);
5863 if (KnownSrc
.isKnownNever(fcPositive
))
5864 Known
.knownNot(fcPositive
);
5866 if (F
&& KnownSrc
.isKnownNeverLogicalPosZero(*F
, Op
->getType()))
5867 Known
.knownNot(fcPosZero
);
5868 if (KnownSrc
.isKnownNever(fcPosInf
))
5869 Known
.knownNot(fcPosInf
);
5872 Known
.propagateNaN(KnownSrc
);
5881 computeKnownFPClass(Src
, DemandedElts
, InterestedClasses
, Known
, Depth
+ 1,
5885 case Instruction::PHI
: {
5886 const PHINode
*P
= cast
<PHINode
>(Op
);
5887 // Unreachable blocks may have zero-operand PHI nodes.
5888 if (P
->getNumIncomingValues() == 0)
5891 // Otherwise take the unions of the known bit sets of the operands,
5892 // taking conservative care to avoid excessive recursion.
5893 const unsigned PhiRecursionLimit
= MaxAnalysisRecursionDepth
- 2;
5895 if (Depth
< PhiRecursionLimit
) {
5896 // Skip if every incoming value references to ourself.
5897 if (isa_and_nonnull
<UndefValue
>(P
->hasConstantValue()))
5902 for (const Use
&U
: P
->operands()) {
5903 Value
*IncValue
= U
.get();
5904 // Skip direct self references.
5908 KnownFPClass KnownSrc
;
5909 // Recurse, but cap the recursion to two levels, because we don't want
5910 // to waste time spinning around in loops. We need at least depth 2 to
5911 // detect known sign bits.
5912 computeKnownFPClass(IncValue
, DemandedElts
, InterestedClasses
, KnownSrc
,
5914 Q
.getWithoutCondContext().getWithInstruction(
5915 P
->getIncomingBlock(U
)->getTerminator()));
5924 if (Known
.KnownFPClasses
== fcAllFlags
)
5936 KnownFPClass
llvm::computeKnownFPClass(const Value
*V
,
5937 const APInt
&DemandedElts
,
5938 FPClassTest InterestedClasses
,
5940 const SimplifyQuery
&SQ
) {
5941 KnownFPClass KnownClasses
;
5942 ::computeKnownFPClass(V
, DemandedElts
, InterestedClasses
, KnownClasses
, Depth
,
5944 return KnownClasses
;
5947 KnownFPClass
llvm::computeKnownFPClass(const Value
*V
,
5948 FPClassTest InterestedClasses
,
5950 const SimplifyQuery
&SQ
) {
5952 ::computeKnownFPClass(V
, Known
, InterestedClasses
, Depth
, SQ
);
5956 Value
*llvm::isBytewiseValue(Value
*V
, const DataLayout
&DL
) {
5958 // All byte-wide stores are splatable, even of arbitrary variables.
5959 if (V
->getType()->isIntegerTy(8))
5962 LLVMContext
&Ctx
= V
->getContext();
5964 // Undef don't care.
5965 auto *UndefInt8
= UndefValue::get(Type::getInt8Ty(Ctx
));
5966 if (isa
<UndefValue
>(V
))
5969 // Return Undef for zero-sized type.
5970 if (DL
.getTypeStoreSize(V
->getType()).isZero())
5973 Constant
*C
= dyn_cast
<Constant
>(V
);
5975 // Conceptually, we could handle things like:
5976 // %a = zext i8 %X to i16
5977 // %b = shl i16 %a, 8
5978 // %c = or i16 %a, %b
5979 // but until there is an example that actually needs this, it doesn't seem
5980 // worth worrying about.
5984 // Handle 'null' ConstantArrayZero etc.
5985 if (C
->isNullValue())
5986 return Constant::getNullValue(Type::getInt8Ty(Ctx
));
5988 // Constant floating-point values can be handled as integer values if the
5989 // corresponding integer value is "byteable". An important case is 0.0.
5990 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(C
)) {
5992 if (CFP
->getType()->isHalfTy())
5993 Ty
= Type::getInt16Ty(Ctx
);
5994 else if (CFP
->getType()->isFloatTy())
5995 Ty
= Type::getInt32Ty(Ctx
);
5996 else if (CFP
->getType()->isDoubleTy())
5997 Ty
= Type::getInt64Ty(Ctx
);
5998 // Don't handle long double formats, which have strange constraints.
5999 return Ty
? isBytewiseValue(ConstantExpr::getBitCast(CFP
, Ty
), DL
)
6003 // We can handle constant integers that are multiple of 8 bits.
6004 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(C
)) {
6005 if (CI
->getBitWidth() % 8 == 0) {
6006 assert(CI
->getBitWidth() > 8 && "8 bits should be handled above!");
6007 if (!CI
->getValue().isSplat(8))
6009 return ConstantInt::get(Ctx
, CI
->getValue().trunc(8));
6013 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
6014 if (CE
->getOpcode() == Instruction::IntToPtr
) {
6015 if (auto *PtrTy
= dyn_cast
<PointerType
>(CE
->getType())) {
6016 unsigned BitWidth
= DL
.getPointerSizeInBits(PtrTy
->getAddressSpace());
6017 if (Constant
*Op
= ConstantFoldIntegerCast(
6018 CE
->getOperand(0), Type::getIntNTy(Ctx
, BitWidth
), false, DL
))
6019 return isBytewiseValue(Op
, DL
);
6024 auto Merge
= [&](Value
*LHS
, Value
*RHS
) -> Value
* {
6029 if (LHS
== UndefInt8
)
6031 if (RHS
== UndefInt8
)
6036 if (ConstantDataSequential
*CA
= dyn_cast
<ConstantDataSequential
>(C
)) {
6037 Value
*Val
= UndefInt8
;
6038 for (unsigned I
= 0, E
= CA
->getNumElements(); I
!= E
; ++I
)
6039 if (!(Val
= Merge(Val
, isBytewiseValue(CA
->getElementAsConstant(I
), DL
))))
6044 if (isa
<ConstantAggregate
>(C
)) {
6045 Value
*Val
= UndefInt8
;
6046 for (unsigned I
= 0, E
= C
->getNumOperands(); I
!= E
; ++I
)
6047 if (!(Val
= Merge(Val
, isBytewiseValue(C
->getOperand(I
), DL
))))
6052 // Don't try to handle the handful of other constants.
6056 // This is the recursive version of BuildSubAggregate. It takes a few different
6057 // arguments. Idxs is the index within the nested struct From that we are
6058 // looking at now (which is of type IndexedType). IdxSkip is the number of
6059 // indices from Idxs that should be left out when inserting into the resulting
6060 // struct. To is the result struct built so far, new insertvalue instructions
6062 static Value
*BuildSubAggregate(Value
*From
, Value
*To
, Type
*IndexedType
,
6063 SmallVectorImpl
<unsigned> &Idxs
,
6065 BasicBlock::iterator InsertBefore
) {
6066 StructType
*STy
= dyn_cast
<StructType
>(IndexedType
);
6068 // Save the original To argument so we can modify it
6070 // General case, the type indexed by Idxs is a struct
6071 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
6072 // Process each struct element recursively
6075 To
= BuildSubAggregate(From
, To
, STy
->getElementType(i
), Idxs
, IdxSkip
,
6079 // Couldn't find any inserted value for this index? Cleanup
6080 while (PrevTo
!= OrigTo
) {
6081 InsertValueInst
* Del
= cast
<InsertValueInst
>(PrevTo
);
6082 PrevTo
= Del
->getAggregateOperand();
6083 Del
->eraseFromParent();
6085 // Stop processing elements
6089 // If we successfully found a value for each of our subaggregates
6093 // Base case, the type indexed by SourceIdxs is not a struct, or not all of
6094 // the struct's elements had a value that was inserted directly. In the latter
6095 // case, perhaps we can't determine each of the subelements individually, but
6096 // we might be able to find the complete struct somewhere.
6098 // Find the value that is at that particular spot
6099 Value
*V
= FindInsertedValue(From
, Idxs
);
6104 // Insert the value in the new (sub) aggregate
6105 return InsertValueInst::Create(To
, V
, ArrayRef(Idxs
).slice(IdxSkip
), "tmp",
6109 // This helper takes a nested struct and extracts a part of it (which is again a
6110 // struct) into a new value. For example, given the struct:
6111 // { a, { b, { c, d }, e } }
6112 // and the indices "1, 1" this returns
6115 // It does this by inserting an insertvalue for each element in the resulting
6116 // struct, as opposed to just inserting a single struct. This will only work if
6117 // each of the elements of the substruct are known (ie, inserted into From by an
6118 // insertvalue instruction somewhere).
6120 // All inserted insertvalue instructions are inserted before InsertBefore
6121 static Value
*BuildSubAggregate(Value
*From
, ArrayRef
<unsigned> idx_range
,
6122 BasicBlock::iterator InsertBefore
) {
6123 Type
*IndexedType
= ExtractValueInst::getIndexedType(From
->getType(),
6125 Value
*To
= PoisonValue::get(IndexedType
);
6126 SmallVector
<unsigned, 10> Idxs(idx_range
.begin(), idx_range
.end());
6127 unsigned IdxSkip
= Idxs
.size();
6129 return BuildSubAggregate(From
, To
, IndexedType
, Idxs
, IdxSkip
, InsertBefore
);
6132 /// Given an aggregate and a sequence of indices, see if the scalar value
6133 /// indexed is already around as a register, for example if it was inserted
6134 /// directly into the aggregate.
6136 /// If InsertBefore is not null, this function will duplicate (modified)
6137 /// insertvalues when a part of a nested struct is extracted.
6139 llvm::FindInsertedValue(Value
*V
, ArrayRef
<unsigned> idx_range
,
6140 std::optional
<BasicBlock::iterator
> InsertBefore
) {
6141 // Nothing to index? Just return V then (this is useful at the end of our
6143 if (idx_range
.empty())
6145 // We have indices, so V should have an indexable type.
6146 assert((V
->getType()->isStructTy() || V
->getType()->isArrayTy()) &&
6147 "Not looking at a struct or array?");
6148 assert(ExtractValueInst::getIndexedType(V
->getType(), idx_range
) &&
6149 "Invalid indices for type?");
6151 if (Constant
*C
= dyn_cast
<Constant
>(V
)) {
6152 C
= C
->getAggregateElement(idx_range
[0]);
6153 if (!C
) return nullptr;
6154 return FindInsertedValue(C
, idx_range
.slice(1), InsertBefore
);
6157 if (InsertValueInst
*I
= dyn_cast
<InsertValueInst
>(V
)) {
6158 // Loop the indices for the insertvalue instruction in parallel with the
6159 // requested indices
6160 const unsigned *req_idx
= idx_range
.begin();
6161 for (const unsigned *i
= I
->idx_begin(), *e
= I
->idx_end();
6162 i
!= e
; ++i
, ++req_idx
) {
6163 if (req_idx
== idx_range
.end()) {
6164 // We can't handle this without inserting insertvalues
6168 // The requested index identifies a part of a nested aggregate. Handle
6169 // this specially. For example,
6170 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
6171 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
6172 // %C = extractvalue {i32, { i32, i32 } } %B, 1
6173 // This can be changed into
6174 // %A = insertvalue {i32, i32 } undef, i32 10, 0
6175 // %C = insertvalue {i32, i32 } %A, i32 11, 1
6176 // which allows the unused 0,0 element from the nested struct to be
6178 return BuildSubAggregate(V
, ArrayRef(idx_range
.begin(), req_idx
),
6182 // This insert value inserts something else than what we are looking for.
6183 // See if the (aggregate) value inserted into has the value we are
6184 // looking for, then.
6186 return FindInsertedValue(I
->getAggregateOperand(), idx_range
,
6189 // If we end up here, the indices of the insertvalue match with those
6190 // requested (though possibly only partially). Now we recursively look at
6191 // the inserted value, passing any remaining indices.
6192 return FindInsertedValue(I
->getInsertedValueOperand(),
6193 ArrayRef(req_idx
, idx_range
.end()), InsertBefore
);
6196 if (ExtractValueInst
*I
= dyn_cast
<ExtractValueInst
>(V
)) {
6197 // If we're extracting a value from an aggregate that was extracted from
6198 // something else, we can extract from that something else directly instead.
6199 // However, we will need to chain I's indices with the requested indices.
6201 // Calculate the number of indices required
6202 unsigned size
= I
->getNumIndices() + idx_range
.size();
6203 // Allocate some space to put the new indices in
6204 SmallVector
<unsigned, 5> Idxs
;
6206 // Add indices from the extract value instruction
6207 Idxs
.append(I
->idx_begin(), I
->idx_end());
6209 // Add requested indices
6210 Idxs
.append(idx_range
.begin(), idx_range
.end());
6212 assert(Idxs
.size() == size
6213 && "Number of indices added not correct?");
6215 return FindInsertedValue(I
->getAggregateOperand(), Idxs
, InsertBefore
);
6217 // Otherwise, we don't know (such as, extracting from a function return value
6218 // or load instruction)
6222 bool llvm::isGEPBasedOnPointerToString(const GEPOperator
*GEP
,
6223 unsigned CharSize
) {
6224 // Make sure the GEP has exactly three arguments.
6225 if (GEP
->getNumOperands() != 3)
6228 // Make sure the index-ee is a pointer to array of \p CharSize integers.
6230 ArrayType
*AT
= dyn_cast
<ArrayType
>(GEP
->getSourceElementType());
6231 if (!AT
|| !AT
->getElementType()->isIntegerTy(CharSize
))
6234 // Check to make sure that the first operand of the GEP is an integer and
6235 // has value 0 so that we are sure we're indexing into the initializer.
6236 const ConstantInt
*FirstIdx
= dyn_cast
<ConstantInt
>(GEP
->getOperand(1));
6237 if (!FirstIdx
|| !FirstIdx
->isZero())
6243 // If V refers to an initialized global constant, set Slice either to
6244 // its initializer if the size of its elements equals ElementSize, or,
6245 // for ElementSize == 8, to its representation as an array of unsiged
6246 // char. Return true on success.
6247 // Offset is in the unit "nr of ElementSize sized elements".
6248 bool llvm::getConstantDataArrayInfo(const Value
*V
,
6249 ConstantDataArraySlice
&Slice
,
6250 unsigned ElementSize
, uint64_t Offset
) {
6251 assert(V
&& "V should not be null.");
6252 assert((ElementSize
% 8) == 0 &&
6253 "ElementSize expected to be a multiple of the size of a byte.");
6254 unsigned ElementSizeInBytes
= ElementSize
/ 8;
6256 // Drill down into the pointer expression V, ignoring any intervening
6257 // casts, and determine the identity of the object it references along
6258 // with the cumulative byte offset into it.
6259 const GlobalVariable
*GV
=
6260 dyn_cast
<GlobalVariable
>(getUnderlyingObject(V
));
6261 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer())
6262 // Fail if V is not based on constant global object.
6265 const DataLayout
&DL
= GV
->getDataLayout();
6266 APInt
Off(DL
.getIndexTypeSizeInBits(V
->getType()), 0);
6268 if (GV
!= V
->stripAndAccumulateConstantOffsets(DL
, Off
,
6269 /*AllowNonInbounds*/ true))
6270 // Fail if a constant offset could not be determined.
6273 uint64_t StartIdx
= Off
.getLimitedValue();
6274 if (StartIdx
== UINT64_MAX
)
6275 // Fail if the constant offset is excessive.
6278 // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6279 // elements. Simply bail out if that isn't possible.
6280 if ((StartIdx
% ElementSizeInBytes
) != 0)
6283 Offset
+= StartIdx
/ ElementSizeInBytes
;
6284 ConstantDataArray
*Array
= nullptr;
6285 ArrayType
*ArrayTy
= nullptr;
6287 if (GV
->getInitializer()->isNullValue()) {
6288 Type
*GVTy
= GV
->getValueType();
6289 uint64_t SizeInBytes
= DL
.getTypeStoreSize(GVTy
).getFixedValue();
6290 uint64_t Length
= SizeInBytes
/ ElementSizeInBytes
;
6292 Slice
.Array
= nullptr;
6294 // Return an empty Slice for undersized constants to let callers
6295 // transform even undefined library calls into simpler, well-defined
6296 // expressions. This is preferable to making the calls although it
6297 // prevents sanitizers from detecting such calls.
6298 Slice
.Length
= Length
< Offset
? 0 : Length
- Offset
;
6302 auto *Init
= const_cast<Constant
*>(GV
->getInitializer());
6303 if (auto *ArrayInit
= dyn_cast
<ConstantDataArray
>(Init
)) {
6304 Type
*InitElTy
= ArrayInit
->getElementType();
6305 if (InitElTy
->isIntegerTy(ElementSize
)) {
6306 // If Init is an initializer for an array of the expected type
6307 // and size, use it as is.
6309 ArrayTy
= ArrayInit
->getType();
6314 if (ElementSize
!= 8)
6315 // TODO: Handle conversions to larger integral types.
6318 // Otherwise extract the portion of the initializer starting
6319 // at Offset as an array of bytes, and reset Offset.
6320 Init
= ReadByteArrayFromGlobal(GV
, Offset
);
6325 Array
= dyn_cast
<ConstantDataArray
>(Init
);
6326 ArrayTy
= dyn_cast
<ArrayType
>(Init
->getType());
6329 uint64_t NumElts
= ArrayTy
->getArrayNumElements();
6330 if (Offset
> NumElts
)
6333 Slice
.Array
= Array
;
6334 Slice
.Offset
= Offset
;
6335 Slice
.Length
= NumElts
- Offset
;
6339 /// Extract bytes from the initializer of the constant array V, which need
6340 /// not be a nul-terminated string. On success, store the bytes in Str and
6341 /// return true. When TrimAtNul is set, Str will contain only the bytes up
6342 /// to but not including the first nul. Return false on failure.
6343 bool llvm::getConstantStringInfo(const Value
*V
, StringRef
&Str
,
6345 ConstantDataArraySlice Slice
;
6346 if (!getConstantDataArrayInfo(V
, Slice
, 8))
6349 if (Slice
.Array
== nullptr) {
6351 // Return a nul-terminated string even for an empty Slice. This is
6352 // safe because all existing SimplifyLibcalls callers require string
6353 // arguments and the behavior of the functions they fold is undefined
6354 // otherwise. Folding the calls this way is preferable to making
6355 // the undefined library calls, even though it prevents sanitizers
6356 // from reporting such calls.
6360 if (Slice
.Length
== 1) {
6361 Str
= StringRef("", 1);
6364 // We cannot instantiate a StringRef as we do not have an appropriate string
6369 // Start out with the entire array in the StringRef.
6370 Str
= Slice
.Array
->getAsString();
6371 // Skip over 'offset' bytes.
6372 Str
= Str
.substr(Slice
.Offset
);
6375 // Trim off the \0 and anything after it. If the array is not nul
6376 // terminated, we just return the whole end of string. The client may know
6377 // some other way that the string is length-bound.
6378 Str
= Str
.substr(0, Str
.find('\0'));
6383 // These next two are very similar to the above, but also look through PHI
6385 // TODO: See if we can integrate these two together.
6387 /// If we can compute the length of the string pointed to by
6388 /// the specified pointer, return 'len+1'. If we can't, return 0.
6389 static uint64_t GetStringLengthH(const Value
*V
,
6390 SmallPtrSetImpl
<const PHINode
*> &PHIs
,
6391 unsigned CharSize
) {
6392 // Look through noop bitcast instructions.
6393 V
= V
->stripPointerCasts();
6395 // If this is a PHI node, there are two cases: either we have already seen it
6397 if (const PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
6398 if (!PHIs
.insert(PN
).second
)
6399 return ~0ULL; // already in the set.
6401 // If it was new, see if all the input strings are the same length.
6402 uint64_t LenSoFar
= ~0ULL;
6403 for (Value
*IncValue
: PN
->incoming_values()) {
6404 uint64_t Len
= GetStringLengthH(IncValue
, PHIs
, CharSize
);
6405 if (Len
== 0) return 0; // Unknown length -> unknown.
6407 if (Len
== ~0ULL) continue;
6409 if (Len
!= LenSoFar
&& LenSoFar
!= ~0ULL)
6410 return 0; // Disagree -> unknown.
6414 // Success, all agree.
6418 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6419 if (const SelectInst
*SI
= dyn_cast
<SelectInst
>(V
)) {
6420 uint64_t Len1
= GetStringLengthH(SI
->getTrueValue(), PHIs
, CharSize
);
6421 if (Len1
== 0) return 0;
6422 uint64_t Len2
= GetStringLengthH(SI
->getFalseValue(), PHIs
, CharSize
);
6423 if (Len2
== 0) return 0;
6424 if (Len1
== ~0ULL) return Len2
;
6425 if (Len2
== ~0ULL) return Len1
;
6426 if (Len1
!= Len2
) return 0;
6430 // Otherwise, see if we can read the string.
6431 ConstantDataArraySlice Slice
;
6432 if (!getConstantDataArrayInfo(V
, Slice
, CharSize
))
6435 if (Slice
.Array
== nullptr)
6436 // Zeroinitializer (including an empty one).
6439 // Search for the first nul character. Return a conservative result even
6440 // when there is no nul. This is safe since otherwise the string function
6441 // being folded such as strlen is undefined, and can be preferable to
6442 // making the undefined library call.
6443 unsigned NullIndex
= 0;
6444 for (unsigned E
= Slice
.Length
; NullIndex
< E
; ++NullIndex
) {
6445 if (Slice
.Array
->getElementAsInteger(Slice
.Offset
+ NullIndex
) == 0)
6449 return NullIndex
+ 1;
6452 /// If we can compute the length of the string pointed to by
6453 /// the specified pointer, return 'len+1'. If we can't, return 0.
6454 uint64_t llvm::GetStringLength(const Value
*V
, unsigned CharSize
) {
6455 if (!V
->getType()->isPointerTy())
6458 SmallPtrSet
<const PHINode
*, 32> PHIs
;
6459 uint64_t Len
= GetStringLengthH(V
, PHIs
, CharSize
);
6460 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6461 // an empty string as a length.
6462 return Len
== ~0ULL ? 1 : Len
;
6466 llvm::getArgumentAliasingToReturnedPointer(const CallBase
*Call
,
6467 bool MustPreserveNullness
) {
6469 "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6470 if (const Value
*RV
= Call
->getReturnedArgOperand())
6472 // This can be used only as a aliasing property.
6473 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6474 Call
, MustPreserveNullness
))
6475 return Call
->getArgOperand(0);
6479 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6480 const CallBase
*Call
, bool MustPreserveNullness
) {
6481 switch (Call
->getIntrinsicID()) {
6482 case Intrinsic::launder_invariant_group
:
6483 case Intrinsic::strip_invariant_group
:
6484 case Intrinsic::aarch64_irg
:
6485 case Intrinsic::aarch64_tagp
:
6486 // The amdgcn_make_buffer_rsrc function does not alter the address of the
6487 // input pointer (and thus preserve null-ness for the purposes of escape
6488 // analysis, which is where the MustPreserveNullness flag comes in to play).
6489 // However, it will not necessarily map ptr addrspace(N) null to ptr
6490 // addrspace(8) null, aka the "null descriptor", which has "all loads return
6491 // 0, all stores are dropped" semantics. Given the context of this intrinsic
6492 // list, no one should be relying on such a strict interpretation of
6493 // MustPreserveNullness (and, at time of writing, they are not), but we
6494 // document this fact out of an abundance of caution.
6495 case Intrinsic::amdgcn_make_buffer_rsrc
:
6497 case Intrinsic::ptrmask
:
6498 return !MustPreserveNullness
;
6499 case Intrinsic::threadlocal_address
:
6500 // The underlying variable changes with thread ID. The Thread ID may change
6501 // at coroutine suspend points.
6502 return !Call
->getParent()->getParent()->isPresplitCoroutine();
6508 /// \p PN defines a loop-variant pointer to an object. Check if the
6509 /// previous iteration of the loop was referring to the same object as \p PN.
6510 static bool isSameUnderlyingObjectInLoop(const PHINode
*PN
,
6511 const LoopInfo
*LI
) {
6512 // Find the loop-defined value.
6513 Loop
*L
= LI
->getLoopFor(PN
->getParent());
6514 if (PN
->getNumIncomingValues() != 2)
6517 // Find the value from previous iteration.
6518 auto *PrevValue
= dyn_cast
<Instruction
>(PN
->getIncomingValue(0));
6519 if (!PrevValue
|| LI
->getLoopFor(PrevValue
->getParent()) != L
)
6520 PrevValue
= dyn_cast
<Instruction
>(PN
->getIncomingValue(1));
6521 if (!PrevValue
|| LI
->getLoopFor(PrevValue
->getParent()) != L
)
6524 // If a new pointer is loaded in the loop, the pointer references a different
6525 // object in every iteration. E.g.:
6529 if (auto *Load
= dyn_cast
<LoadInst
>(PrevValue
))
6530 if (!L
->isLoopInvariant(Load
->getPointerOperand()))
6535 const Value
*llvm::getUnderlyingObject(const Value
*V
, unsigned MaxLookup
) {
6536 if (!V
->getType()->isPointerTy())
6538 for (unsigned Count
= 0; MaxLookup
== 0 || Count
< MaxLookup
; ++Count
) {
6539 if (auto *GEP
= dyn_cast
<GEPOperator
>(V
)) {
6540 V
= GEP
->getPointerOperand();
6541 } else if (Operator::getOpcode(V
) == Instruction::BitCast
||
6542 Operator::getOpcode(V
) == Instruction::AddrSpaceCast
) {
6543 Value
*NewV
= cast
<Operator
>(V
)->getOperand(0);
6544 if (!NewV
->getType()->isPointerTy())
6547 } else if (auto *GA
= dyn_cast
<GlobalAlias
>(V
)) {
6548 if (GA
->isInterposable())
6550 V
= GA
->getAliasee();
6552 if (auto *PHI
= dyn_cast
<PHINode
>(V
)) {
6553 // Look through single-arg phi nodes created by LCSSA.
6554 if (PHI
->getNumIncomingValues() == 1) {
6555 V
= PHI
->getIncomingValue(0);
6558 } else if (auto *Call
= dyn_cast
<CallBase
>(V
)) {
6559 // CaptureTracking can know about special capturing properties of some
6560 // intrinsics like launder.invariant.group, that can't be expressed with
6561 // the attributes, but have properties like returning aliasing pointer.
6562 // Because some analysis may assume that nocaptured pointer is not
6563 // returned from some special intrinsic (because function would have to
6564 // be marked with returns attribute), it is crucial to use this function
6565 // because it should be in sync with CaptureTracking. Not using it may
6566 // cause weird miscompilations where 2 aliasing pointers are assumed to
6568 if (auto *RP
= getArgumentAliasingToReturnedPointer(Call
, false)) {
6576 assert(V
->getType()->isPointerTy() && "Unexpected operand type!");
6581 void llvm::getUnderlyingObjects(const Value
*V
,
6582 SmallVectorImpl
<const Value
*> &Objects
,
6583 LoopInfo
*LI
, unsigned MaxLookup
) {
6584 SmallPtrSet
<const Value
*, 4> Visited
;
6585 SmallVector
<const Value
*, 4> Worklist
;
6586 Worklist
.push_back(V
);
6588 const Value
*P
= Worklist
.pop_back_val();
6589 P
= getUnderlyingObject(P
, MaxLookup
);
6591 if (!Visited
.insert(P
).second
)
6594 if (auto *SI
= dyn_cast
<SelectInst
>(P
)) {
6595 Worklist
.push_back(SI
->getTrueValue());
6596 Worklist
.push_back(SI
->getFalseValue());
6600 if (auto *PN
= dyn_cast
<PHINode
>(P
)) {
6601 // If this PHI changes the underlying object in every iteration of the
6602 // loop, don't look through it. Consider:
6605 // Prev = Curr; // Prev = PHI (Prev_0, Curr)
6609 // Prev is tracking Curr one iteration behind so they refer to different
6610 // underlying objects.
6611 if (!LI
|| !LI
->isLoopHeader(PN
->getParent()) ||
6612 isSameUnderlyingObjectInLoop(PN
, LI
))
6613 append_range(Worklist
, PN
->incoming_values());
6615 Objects
.push_back(P
);
6619 Objects
.push_back(P
);
6620 } while (!Worklist
.empty());
6623 const Value
*llvm::getUnderlyingObjectAggressive(const Value
*V
) {
6624 const unsigned MaxVisited
= 8;
6626 SmallPtrSet
<const Value
*, 8> Visited
;
6627 SmallVector
<const Value
*, 8> Worklist
;
6628 Worklist
.push_back(V
);
6629 const Value
*Object
= nullptr;
6630 // Used as fallback if we can't find a common underlying object through
6633 const Value
*FirstObject
= getUnderlyingObject(V
);
6635 const Value
*P
= Worklist
.pop_back_val();
6636 P
= First
? FirstObject
: getUnderlyingObject(P
);
6639 if (!Visited
.insert(P
).second
)
6642 if (Visited
.size() == MaxVisited
)
6645 if (auto *SI
= dyn_cast
<SelectInst
>(P
)) {
6646 Worklist
.push_back(SI
->getTrueValue());
6647 Worklist
.push_back(SI
->getFalseValue());
6651 if (auto *PN
= dyn_cast
<PHINode
>(P
)) {
6652 append_range(Worklist
, PN
->incoming_values());
6658 else if (Object
!= P
)
6660 } while (!Worklist
.empty());
6665 /// This is the function that does the work of looking through basic
6666 /// ptrtoint+arithmetic+inttoptr sequences.
6667 static const Value
*getUnderlyingObjectFromInt(const Value
*V
) {
6669 if (const Operator
*U
= dyn_cast
<Operator
>(V
)) {
6670 // If we find a ptrtoint, we can transfer control back to the
6671 // regular getUnderlyingObjectFromInt.
6672 if (U
->getOpcode() == Instruction::PtrToInt
)
6673 return U
->getOperand(0);
6674 // If we find an add of a constant, a multiplied value, or a phi, it's
6675 // likely that the other operand will lead us to the base
6676 // object. We don't have to worry about the case where the
6677 // object address is somehow being computed by the multiply,
6678 // because our callers only care when the result is an
6679 // identifiable object.
6680 if (U
->getOpcode() != Instruction::Add
||
6681 (!isa
<ConstantInt
>(U
->getOperand(1)) &&
6682 Operator::getOpcode(U
->getOperand(1)) != Instruction::Mul
&&
6683 !isa
<PHINode
>(U
->getOperand(1))))
6685 V
= U
->getOperand(0);
6689 assert(V
->getType()->isIntegerTy() && "Unexpected operand type!");
6693 /// This is a wrapper around getUnderlyingObjects and adds support for basic
6694 /// ptrtoint+arithmetic+inttoptr sequences.
6695 /// It returns false if unidentified object is found in getUnderlyingObjects.
6696 bool llvm::getUnderlyingObjectsForCodeGen(const Value
*V
,
6697 SmallVectorImpl
<Value
*> &Objects
) {
6698 SmallPtrSet
<const Value
*, 16> Visited
;
6699 SmallVector
<const Value
*, 4> Working(1, V
);
6701 V
= Working
.pop_back_val();
6703 SmallVector
<const Value
*, 4> Objs
;
6704 getUnderlyingObjects(V
, Objs
);
6706 for (const Value
*V
: Objs
) {
6707 if (!Visited
.insert(V
).second
)
6709 if (Operator::getOpcode(V
) == Instruction::IntToPtr
) {
6711 getUnderlyingObjectFromInt(cast
<User
>(V
)->getOperand(0));
6712 if (O
->getType()->isPointerTy()) {
6713 Working
.push_back(O
);
6717 // If getUnderlyingObjects fails to find an identifiable object,
6718 // getUnderlyingObjectsForCodeGen also fails for safety.
6719 if (!isIdentifiedObject(V
)) {
6723 Objects
.push_back(const_cast<Value
*>(V
));
6725 } while (!Working
.empty());
6729 AllocaInst
*llvm::findAllocaForValue(Value
*V
, bool OffsetZero
) {
6730 AllocaInst
*Result
= nullptr;
6731 SmallPtrSet
<Value
*, 4> Visited
;
6732 SmallVector
<Value
*, 4> Worklist
;
6734 auto AddWork
= [&](Value
*V
) {
6735 if (Visited
.insert(V
).second
)
6736 Worklist
.push_back(V
);
6741 V
= Worklist
.pop_back_val();
6742 assert(Visited
.count(V
));
6744 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(V
)) {
6745 if (Result
&& Result
!= AI
)
6748 } else if (CastInst
*CI
= dyn_cast
<CastInst
>(V
)) {
6749 AddWork(CI
->getOperand(0));
6750 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
6751 for (Value
*IncValue
: PN
->incoming_values())
6753 } else if (auto *SI
= dyn_cast
<SelectInst
>(V
)) {
6754 AddWork(SI
->getTrueValue());
6755 AddWork(SI
->getFalseValue());
6756 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(V
)) {
6757 if (OffsetZero
&& !GEP
->hasAllZeroIndices())
6759 AddWork(GEP
->getPointerOperand());
6760 } else if (CallBase
*CB
= dyn_cast
<CallBase
>(V
)) {
6761 Value
*Returned
= CB
->getReturnedArgOperand();
6769 } while (!Worklist
.empty());
6774 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6775 const Value
*V
, bool AllowLifetime
, bool AllowDroppable
) {
6776 for (const User
*U
: V
->users()) {
6777 const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(U
);
6781 if (AllowLifetime
&& II
->isLifetimeStartOrEnd())
6784 if (AllowDroppable
&& II
->isDroppable())
6792 bool llvm::onlyUsedByLifetimeMarkers(const Value
*V
) {
6793 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6794 V
, /* AllowLifetime */ true, /* AllowDroppable */ false);
6796 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value
*V
) {
6797 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6798 V
, /* AllowLifetime */ true, /* AllowDroppable */ true);
6801 bool llvm::mustSuppressSpeculation(const LoadInst
&LI
) {
6802 if (!LI
.isUnordered())
6804 const Function
&F
= *LI
.getFunction();
6805 // Speculative load may create a race that did not exist in the source.
6806 return F
.hasFnAttribute(Attribute::SanitizeThread
) ||
6807 // Speculative load may load data from dirty regions.
6808 F
.hasFnAttribute(Attribute::SanitizeAddress
) ||
6809 F
.hasFnAttribute(Attribute::SanitizeHWAddress
);
6812 bool llvm::isSafeToSpeculativelyExecute(const Instruction
*Inst
,
6813 const Instruction
*CtxI
,
6814 AssumptionCache
*AC
,
6815 const DominatorTree
*DT
,
6816 const TargetLibraryInfo
*TLI
,
6817 bool UseVariableInfo
) {
6818 return isSafeToSpeculativelyExecuteWithOpcode(Inst
->getOpcode(), Inst
, CtxI
,
6819 AC
, DT
, TLI
, UseVariableInfo
);
6822 bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
6823 unsigned Opcode
, const Instruction
*Inst
, const Instruction
*CtxI
,
6824 AssumptionCache
*AC
, const DominatorTree
*DT
, const TargetLibraryInfo
*TLI
,
6825 bool UseVariableInfo
) {
6827 if (Inst
->getOpcode() != Opcode
) {
6828 // Check that the operands are actually compatible with the Opcode override.
6829 auto hasEqualReturnAndLeadingOperandTypes
=
6830 [](const Instruction
*Inst
, unsigned NumLeadingOperands
) {
6831 if (Inst
->getNumOperands() < NumLeadingOperands
)
6833 const Type
*ExpectedType
= Inst
->getType();
6834 for (unsigned ItOp
= 0; ItOp
< NumLeadingOperands
; ++ItOp
)
6835 if (Inst
->getOperand(ItOp
)->getType() != ExpectedType
)
6839 assert(!Instruction::isBinaryOp(Opcode
) ||
6840 hasEqualReturnAndLeadingOperandTypes(Inst
, 2));
6841 assert(!Instruction::isUnaryOp(Opcode
) ||
6842 hasEqualReturnAndLeadingOperandTypes(Inst
, 1));
6849 case Instruction::UDiv
:
6850 case Instruction::URem
: {
6851 // x / y is undefined if y == 0.
6853 if (match(Inst
->getOperand(1), m_APInt(V
)))
6857 case Instruction::SDiv
:
6858 case Instruction::SRem
: {
6859 // x / y is undefined if y == 0 or x == INT_MIN and y == -1
6860 const APInt
*Numerator
, *Denominator
;
6861 if (!match(Inst
->getOperand(1), m_APInt(Denominator
)))
6863 // We cannot hoist this division if the denominator is 0.
6864 if (*Denominator
== 0)
6866 // It's safe to hoist if the denominator is not 0 or -1.
6867 if (!Denominator
->isAllOnes())
6869 // At this point we know that the denominator is -1. It is safe to hoist as
6870 // long we know that the numerator is not INT_MIN.
6871 if (match(Inst
->getOperand(0), m_APInt(Numerator
)))
6872 return !Numerator
->isMinSignedValue();
6873 // The numerator *might* be MinSignedValue.
6876 case Instruction::Load
: {
6877 if (!UseVariableInfo
)
6880 const LoadInst
*LI
= dyn_cast
<LoadInst
>(Inst
);
6883 if (mustSuppressSpeculation(*LI
))
6885 const DataLayout
&DL
= LI
->getDataLayout();
6886 return isDereferenceableAndAlignedPointer(LI
->getPointerOperand(),
6887 LI
->getType(), LI
->getAlign(), DL
,
6890 case Instruction::Call
: {
6891 auto *CI
= dyn_cast
<const CallInst
>(Inst
);
6894 const Function
*Callee
= CI
->getCalledFunction();
6896 // The called function could have undefined behavior or side-effects, even
6897 // if marked readnone nounwind.
6898 return Callee
&& Callee
->isSpeculatable();
6900 case Instruction::VAArg
:
6901 case Instruction::Alloca
:
6902 case Instruction::Invoke
:
6903 case Instruction::CallBr
:
6904 case Instruction::PHI
:
6905 case Instruction::Store
:
6906 case Instruction::Ret
:
6907 case Instruction::Br
:
6908 case Instruction::IndirectBr
:
6909 case Instruction::Switch
:
6910 case Instruction::Unreachable
:
6911 case Instruction::Fence
:
6912 case Instruction::AtomicRMW
:
6913 case Instruction::AtomicCmpXchg
:
6914 case Instruction::LandingPad
:
6915 case Instruction::Resume
:
6916 case Instruction::CatchSwitch
:
6917 case Instruction::CatchPad
:
6918 case Instruction::CatchRet
:
6919 case Instruction::CleanupPad
:
6920 case Instruction::CleanupRet
:
6921 return false; // Misc instructions which have effects
6925 bool llvm::mayHaveNonDefUseDependency(const Instruction
&I
) {
6926 if (I
.mayReadOrWriteMemory())
6927 // Memory dependency possible
6929 if (!isSafeToSpeculativelyExecute(&I
))
6930 // Can't move above a maythrow call or infinite loop. Or if an
6931 // inalloca alloca, above a stacksave call.
6933 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
6934 // 1) Can't reorder two inf-loop calls, even if readonly
6935 // 2) Also can't reorder an inf-loop call below a instruction which isn't
6936 // safe to speculative execute. (Inverse of above)
6941 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
6942 static OverflowResult
mapOverflowResult(ConstantRange::OverflowResult OR
) {
6944 case ConstantRange::OverflowResult::MayOverflow
:
6945 return OverflowResult::MayOverflow
;
6946 case ConstantRange::OverflowResult::AlwaysOverflowsLow
:
6947 return OverflowResult::AlwaysOverflowsLow
;
6948 case ConstantRange::OverflowResult::AlwaysOverflowsHigh
:
6949 return OverflowResult::AlwaysOverflowsHigh
;
6950 case ConstantRange::OverflowResult::NeverOverflows
:
6951 return OverflowResult::NeverOverflows
;
6953 llvm_unreachable("Unknown OverflowResult");
6956 /// Combine constant ranges from computeConstantRange() and computeKnownBits().
6958 llvm::computeConstantRangeIncludingKnownBits(const WithCache
<const Value
*> &V
,
6960 const SimplifyQuery
&SQ
) {
6962 ConstantRange::fromKnownBits(V
.getKnownBits(SQ
), ForSigned
);
6963 ConstantRange CR2
= computeConstantRange(V
, ForSigned
, SQ
.IIQ
.UseInstrInfo
);
6964 ConstantRange::PreferredRangeType RangeType
=
6965 ForSigned
? ConstantRange::Signed
: ConstantRange::Unsigned
;
6966 return CR1
.intersectWith(CR2
, RangeType
);
6969 OverflowResult
llvm::computeOverflowForUnsignedMul(const Value
*LHS
,
6971 const SimplifyQuery
&SQ
,
6973 KnownBits LHSKnown
= computeKnownBits(LHS
, /*Depth=*/0, SQ
);
6974 KnownBits RHSKnown
= computeKnownBits(RHS
, /*Depth=*/0, SQ
);
6976 // mul nsw of two non-negative numbers is also nuw.
6977 if (IsNSW
&& LHSKnown
.isNonNegative() && RHSKnown
.isNonNegative())
6978 return OverflowResult::NeverOverflows
;
6980 ConstantRange LHSRange
= ConstantRange::fromKnownBits(LHSKnown
, false);
6981 ConstantRange RHSRange
= ConstantRange::fromKnownBits(RHSKnown
, false);
6982 return mapOverflowResult(LHSRange
.unsignedMulMayOverflow(RHSRange
));
6985 OverflowResult
llvm::computeOverflowForSignedMul(const Value
*LHS
,
6987 const SimplifyQuery
&SQ
) {
6988 // Multiplying n * m significant bits yields a result of n + m significant
6989 // bits. If the total number of significant bits does not exceed the
6990 // result bit width (minus 1), there is no overflow.
6991 // This means if we have enough leading sign bits in the operands
6992 // we can guarantee that the result does not overflow.
6993 // Ref: "Hacker's Delight" by Henry Warren
6994 unsigned BitWidth
= LHS
->getType()->getScalarSizeInBits();
6996 // Note that underestimating the number of sign bits gives a more
6997 // conservative answer.
6999 ::ComputeNumSignBits(LHS
, 0, SQ
) + ::ComputeNumSignBits(RHS
, 0, SQ
);
7001 // First handle the easy case: if we have enough sign bits there's
7002 // definitely no overflow.
7003 if (SignBits
> BitWidth
+ 1)
7004 return OverflowResult::NeverOverflows
;
7006 // There are two ambiguous cases where there can be no overflow:
7007 // SignBits == BitWidth + 1 and
7008 // SignBits == BitWidth
7009 // The second case is difficult to check, therefore we only handle the
7011 if (SignBits
== BitWidth
+ 1) {
7012 // It overflows only when both arguments are negative and the true
7013 // product is exactly the minimum negative number.
7014 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
7015 // For simplicity we just check if at least one side is not negative.
7016 KnownBits LHSKnown
= computeKnownBits(LHS
, /*Depth=*/0, SQ
);
7017 KnownBits RHSKnown
= computeKnownBits(RHS
, /*Depth=*/0, SQ
);
7018 if (LHSKnown
.isNonNegative() || RHSKnown
.isNonNegative())
7019 return OverflowResult::NeverOverflows
;
7021 return OverflowResult::MayOverflow
;
7025 llvm::computeOverflowForUnsignedAdd(const WithCache
<const Value
*> &LHS
,
7026 const WithCache
<const Value
*> &RHS
,
7027 const SimplifyQuery
&SQ
) {
7028 ConstantRange LHSRange
=
7029 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/false, SQ
);
7030 ConstantRange RHSRange
=
7031 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/false, SQ
);
7032 return mapOverflowResult(LHSRange
.unsignedAddMayOverflow(RHSRange
));
7035 static OverflowResult
7036 computeOverflowForSignedAdd(const WithCache
<const Value
*> &LHS
,
7037 const WithCache
<const Value
*> &RHS
,
7038 const AddOperator
*Add
, const SimplifyQuery
&SQ
) {
7039 if (Add
&& Add
->hasNoSignedWrap()) {
7040 return OverflowResult::NeverOverflows
;
7043 // If LHS and RHS each have at least two sign bits, the addition will look
7049 // If the carry into the most significant position is 0, X and Y can't both
7050 // be 1 and therefore the carry out of the addition is also 0.
7052 // If the carry into the most significant position is 1, X and Y can't both
7053 // be 0 and therefore the carry out of the addition is also 1.
7055 // Since the carry into the most significant position is always equal to
7056 // the carry out of the addition, there is no signed overflow.
7057 if (::ComputeNumSignBits(LHS
, 0, SQ
) > 1 &&
7058 ::ComputeNumSignBits(RHS
, 0, SQ
) > 1)
7059 return OverflowResult::NeverOverflows
;
7061 ConstantRange LHSRange
=
7062 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/true, SQ
);
7063 ConstantRange RHSRange
=
7064 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/true, SQ
);
7066 mapOverflowResult(LHSRange
.signedAddMayOverflow(RHSRange
));
7067 if (OR
!= OverflowResult::MayOverflow
)
7070 // The remaining code needs Add to be available. Early returns if not so.
7072 return OverflowResult::MayOverflow
;
7074 // If the sign of Add is the same as at least one of the operands, this add
7075 // CANNOT overflow. If this can be determined from the known bits of the
7076 // operands the above signedAddMayOverflow() check will have already done so.
7077 // The only other way to improve on the known bits is from an assumption, so
7078 // call computeKnownBitsFromContext() directly.
7079 bool LHSOrRHSKnownNonNegative
=
7080 (LHSRange
.isAllNonNegative() || RHSRange
.isAllNonNegative());
7081 bool LHSOrRHSKnownNegative
=
7082 (LHSRange
.isAllNegative() || RHSRange
.isAllNegative());
7083 if (LHSOrRHSKnownNonNegative
|| LHSOrRHSKnownNegative
) {
7084 KnownBits
AddKnown(LHSRange
.getBitWidth());
7085 computeKnownBitsFromContext(Add
, AddKnown
, /*Depth=*/0, SQ
);
7086 if ((AddKnown
.isNonNegative() && LHSOrRHSKnownNonNegative
) ||
7087 (AddKnown
.isNegative() && LHSOrRHSKnownNegative
))
7088 return OverflowResult::NeverOverflows
;
7091 return OverflowResult::MayOverflow
;
7094 OverflowResult
llvm::computeOverflowForUnsignedSub(const Value
*LHS
,
7096 const SimplifyQuery
&SQ
) {
7098 // The remainder of a value can't have greater magnitude than itself,
7099 // so the subtraction can't overflow.
7102 // In the minimal case, this would simplify to "?", so there's no subtract
7103 // at all. But if this analysis is used to peek through casts, for example,
7104 // then determining no-overflow may allow other transforms.
7106 // TODO: There are other patterns like this.
7107 // See simplifyICmpWithBinOpOnLHS() for candidates.
7108 if (match(RHS
, m_URem(m_Specific(LHS
), m_Value())) ||
7109 match(RHS
, m_NUWSub(m_Specific(LHS
), m_Value())))
7110 if (isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
7111 return OverflowResult::NeverOverflows
;
7113 // Checking for conditions implied by dominating conditions may be expensive.
7114 // Limit it to usub_with_overflow calls for now.
7116 m_Intrinsic
<Intrinsic::usub_with_overflow
>(m_Value(), m_Value())))
7117 if (auto C
= isImpliedByDomCondition(CmpInst::ICMP_UGE
, LHS
, RHS
, SQ
.CxtI
,
7120 return OverflowResult::NeverOverflows
;
7121 return OverflowResult::AlwaysOverflowsLow
;
7123 ConstantRange LHSRange
=
7124 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/false, SQ
);
7125 ConstantRange RHSRange
=
7126 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/false, SQ
);
7127 return mapOverflowResult(LHSRange
.unsignedSubMayOverflow(RHSRange
));
7130 OverflowResult
llvm::computeOverflowForSignedSub(const Value
*LHS
,
7132 const SimplifyQuery
&SQ
) {
7134 // The remainder of a value can't have greater magnitude than itself,
7135 // so the subtraction can't overflow.
7138 // In the minimal case, this would simplify to "?", so there's no subtract
7139 // at all. But if this analysis is used to peek through casts, for example,
7140 // then determining no-overflow may allow other transforms.
7141 if (match(RHS
, m_SRem(m_Specific(LHS
), m_Value())) ||
7142 match(RHS
, m_NSWSub(m_Specific(LHS
), m_Value())))
7143 if (isGuaranteedNotToBeUndef(LHS
, SQ
.AC
, SQ
.CxtI
, SQ
.DT
))
7144 return OverflowResult::NeverOverflows
;
7146 // If LHS and RHS each have at least two sign bits, the subtraction
7148 if (::ComputeNumSignBits(LHS
, 0, SQ
) > 1 &&
7149 ::ComputeNumSignBits(RHS
, 0, SQ
) > 1)
7150 return OverflowResult::NeverOverflows
;
7152 ConstantRange LHSRange
=
7153 computeConstantRangeIncludingKnownBits(LHS
, /*ForSigned=*/true, SQ
);
7154 ConstantRange RHSRange
=
7155 computeConstantRangeIncludingKnownBits(RHS
, /*ForSigned=*/true, SQ
);
7156 return mapOverflowResult(LHSRange
.signedSubMayOverflow(RHSRange
));
7159 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst
*WO
,
7160 const DominatorTree
&DT
) {
7161 SmallVector
<const BranchInst
*, 2> GuardingBranches
;
7162 SmallVector
<const ExtractValueInst
*, 2> Results
;
7164 for (const User
*U
: WO
->users()) {
7165 if (const auto *EVI
= dyn_cast
<ExtractValueInst
>(U
)) {
7166 assert(EVI
->getNumIndices() == 1 && "Obvious from CI's type");
7168 if (EVI
->getIndices()[0] == 0)
7169 Results
.push_back(EVI
);
7171 assert(EVI
->getIndices()[0] == 1 && "Obvious from CI's type");
7173 for (const auto *U
: EVI
->users())
7174 if (const auto *B
= dyn_cast
<BranchInst
>(U
)) {
7175 assert(B
->isConditional() && "How else is it using an i1?");
7176 GuardingBranches
.push_back(B
);
7180 // We are using the aggregate directly in a way we don't want to analyze
7181 // here (storing it to a global, say).
7186 auto AllUsesGuardedByBranch
= [&](const BranchInst
*BI
) {
7187 BasicBlockEdge
NoWrapEdge(BI
->getParent(), BI
->getSuccessor(1));
7188 if (!NoWrapEdge
.isSingleEdge())
7191 // Check if all users of the add are provably no-wrap.
7192 for (const auto *Result
: Results
) {
7193 // If the extractvalue itself is not executed on overflow, the we don't
7194 // need to check each use separately, since domination is transitive.
7195 if (DT
.dominates(NoWrapEdge
, Result
->getParent()))
7198 for (const auto &RU
: Result
->uses())
7199 if (!DT
.dominates(NoWrapEdge
, RU
))
7206 return llvm::any_of(GuardingBranches
, AllUsesGuardedByBranch
);
7209 /// Shifts return poison if shiftwidth is larger than the bitwidth.
7210 static bool shiftAmountKnownInRange(const Value
*ShiftAmount
) {
7211 auto *C
= dyn_cast
<Constant
>(ShiftAmount
);
7215 // Shifts return poison if shiftwidth is larger than the bitwidth.
7216 SmallVector
<const Constant
*, 4> ShiftAmounts
;
7217 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(C
->getType())) {
7218 unsigned NumElts
= FVTy
->getNumElements();
7219 for (unsigned i
= 0; i
< NumElts
; ++i
)
7220 ShiftAmounts
.push_back(C
->getAggregateElement(i
));
7221 } else if (isa
<ScalableVectorType
>(C
->getType()))
7222 return false; // Can't tell, just return false to be safe
7224 ShiftAmounts
.push_back(C
);
7226 bool Safe
= llvm::all_of(ShiftAmounts
, [](const Constant
*C
) {
7227 auto *CI
= dyn_cast_or_null
<ConstantInt
>(C
);
7228 return CI
&& CI
->getValue().ult(C
->getType()->getIntegerBitWidth());
7234 enum class UndefPoisonKind
{
7235 PoisonOnly
= (1 << 0),
7236 UndefOnly
= (1 << 1),
7237 UndefOrPoison
= PoisonOnly
| UndefOnly
,
7240 static bool includesPoison(UndefPoisonKind Kind
) {
7241 return (unsigned(Kind
) & unsigned(UndefPoisonKind::PoisonOnly
)) != 0;
7244 static bool includesUndef(UndefPoisonKind Kind
) {
7245 return (unsigned(Kind
) & unsigned(UndefPoisonKind::UndefOnly
)) != 0;
7248 static bool canCreateUndefOrPoison(const Operator
*Op
, UndefPoisonKind Kind
,
7249 bool ConsiderFlagsAndMetadata
) {
7251 if (ConsiderFlagsAndMetadata
&& includesPoison(Kind
) &&
7252 Op
->hasPoisonGeneratingAnnotations())
7255 unsigned Opcode
= Op
->getOpcode();
7257 // Check whether opcode is a poison/undef-generating operation
7259 case Instruction::Shl
:
7260 case Instruction::AShr
:
7261 case Instruction::LShr
:
7262 return includesPoison(Kind
) && !shiftAmountKnownInRange(Op
->getOperand(1));
7263 case Instruction::FPToSI
:
7264 case Instruction::FPToUI
:
7265 // fptosi/ui yields poison if the resulting value does not fit in the
7266 // destination type.
7268 case Instruction::Call
:
7269 if (auto *II
= dyn_cast
<IntrinsicInst
>(Op
)) {
7270 switch (II
->getIntrinsicID()) {
7271 // TODO: Add more intrinsics.
7272 case Intrinsic::ctlz
:
7273 case Intrinsic::cttz
:
7274 case Intrinsic::abs
:
7275 if (cast
<ConstantInt
>(II
->getArgOperand(1))->isNullValue())
7278 case Intrinsic::ctpop
:
7279 case Intrinsic::bswap
:
7280 case Intrinsic::bitreverse
:
7281 case Intrinsic::fshl
:
7282 case Intrinsic::fshr
:
7283 case Intrinsic::smax
:
7284 case Intrinsic::smin
:
7285 case Intrinsic::umax
:
7286 case Intrinsic::umin
:
7287 case Intrinsic::ptrmask
:
7288 case Intrinsic::fptoui_sat
:
7289 case Intrinsic::fptosi_sat
:
7290 case Intrinsic::sadd_with_overflow
:
7291 case Intrinsic::ssub_with_overflow
:
7292 case Intrinsic::smul_with_overflow
:
7293 case Intrinsic::uadd_with_overflow
:
7294 case Intrinsic::usub_with_overflow
:
7295 case Intrinsic::umul_with_overflow
:
7296 case Intrinsic::sadd_sat
:
7297 case Intrinsic::uadd_sat
:
7298 case Intrinsic::ssub_sat
:
7299 case Intrinsic::usub_sat
:
7301 case Intrinsic::sshl_sat
:
7302 case Intrinsic::ushl_sat
:
7303 return includesPoison(Kind
) &&
7304 !shiftAmountKnownInRange(II
->getArgOperand(1));
7305 case Intrinsic::fma
:
7306 case Intrinsic::fmuladd
:
7307 case Intrinsic::sqrt
:
7308 case Intrinsic::powi
:
7309 case Intrinsic::sin
:
7310 case Intrinsic::cos
:
7311 case Intrinsic::pow
:
7312 case Intrinsic::log
:
7313 case Intrinsic::log10
:
7314 case Intrinsic::log2
:
7315 case Intrinsic::exp
:
7316 case Intrinsic::exp2
:
7317 case Intrinsic::exp10
:
7318 case Intrinsic::fabs
:
7319 case Intrinsic::copysign
:
7320 case Intrinsic::floor
:
7321 case Intrinsic::ceil
:
7322 case Intrinsic::trunc
:
7323 case Intrinsic::rint
:
7324 case Intrinsic::nearbyint
:
7325 case Intrinsic::round
:
7326 case Intrinsic::roundeven
:
7327 case Intrinsic::fptrunc_round
:
7328 case Intrinsic::canonicalize
:
7329 case Intrinsic::arithmetic_fence
:
7330 case Intrinsic::minnum
:
7331 case Intrinsic::maxnum
:
7332 case Intrinsic::minimum
:
7333 case Intrinsic::maximum
:
7334 case Intrinsic::is_fpclass
:
7335 case Intrinsic::ldexp
:
7336 case Intrinsic::frexp
:
7338 case Intrinsic::lround
:
7339 case Intrinsic::llround
:
7340 case Intrinsic::lrint
:
7341 case Intrinsic::llrint
:
7342 // If the value doesn't fit an unspecified value is returned (but this
7348 case Instruction::CallBr
:
7349 case Instruction::Invoke
: {
7350 const auto *CB
= cast
<CallBase
>(Op
);
7351 return !CB
->hasRetAttr(Attribute::NoUndef
);
7353 case Instruction::InsertElement
:
7354 case Instruction::ExtractElement
: {
7355 // If index exceeds the length of the vector, it returns poison
7356 auto *VTy
= cast
<VectorType
>(Op
->getOperand(0)->getType());
7357 unsigned IdxOp
= Op
->getOpcode() == Instruction::InsertElement
? 2 : 1;
7358 auto *Idx
= dyn_cast
<ConstantInt
>(Op
->getOperand(IdxOp
));
7359 if (includesPoison(Kind
))
7361 Idx
->getValue().uge(VTy
->getElementCount().getKnownMinValue());
7364 case Instruction::ShuffleVector
: {
7365 ArrayRef
<int> Mask
= isa
<ConstantExpr
>(Op
)
7366 ? cast
<ConstantExpr
>(Op
)->getShuffleMask()
7367 : cast
<ShuffleVectorInst
>(Op
)->getShuffleMask();
7368 return includesPoison(Kind
) && is_contained(Mask
, PoisonMaskElem
);
7370 case Instruction::FNeg
:
7371 case Instruction::PHI
:
7372 case Instruction::Select
:
7373 case Instruction::URem
:
7374 case Instruction::SRem
:
7375 case Instruction::ExtractValue
:
7376 case Instruction::InsertValue
:
7377 case Instruction::Freeze
:
7378 case Instruction::ICmp
:
7379 case Instruction::FCmp
:
7380 case Instruction::FAdd
:
7381 case Instruction::FSub
:
7382 case Instruction::FMul
:
7383 case Instruction::FDiv
:
7384 case Instruction::FRem
:
7386 case Instruction::GetElementPtr
:
7387 // inbounds is handled above
7388 // TODO: what about inrange on constexpr?
7391 const auto *CE
= dyn_cast
<ConstantExpr
>(Op
);
7392 if (isa
<CastInst
>(Op
) || (CE
&& CE
->isCast()))
7394 else if (Instruction::isBinaryOp(Opcode
))
7396 // Be conservative and return true.
7402 bool llvm::canCreateUndefOrPoison(const Operator
*Op
,
7403 bool ConsiderFlagsAndMetadata
) {
7404 return ::canCreateUndefOrPoison(Op
, UndefPoisonKind::UndefOrPoison
,
7405 ConsiderFlagsAndMetadata
);
7408 bool llvm::canCreatePoison(const Operator
*Op
, bool ConsiderFlagsAndMetadata
) {
7409 return ::canCreateUndefOrPoison(Op
, UndefPoisonKind::PoisonOnly
,
7410 ConsiderFlagsAndMetadata
);
7413 static bool directlyImpliesPoison(const Value
*ValAssumedPoison
, const Value
*V
,
7415 if (ValAssumedPoison
== V
)
7418 const unsigned MaxDepth
= 2;
7419 if (Depth
>= MaxDepth
)
7422 if (const auto *I
= dyn_cast
<Instruction
>(V
)) {
7423 if (any_of(I
->operands(), [=](const Use
&Op
) {
7424 return propagatesPoison(Op
) &&
7425 directlyImpliesPoison(ValAssumedPoison
, Op
, Depth
+ 1);
7429 // V = extractvalue V0, idx
7430 // V2 = extractvalue V0, idx2
7431 // V0's elements are all poison or not. (e.g., add_with_overflow)
7432 const WithOverflowInst
*II
;
7433 if (match(I
, m_ExtractValue(m_WithOverflowInst(II
))) &&
7434 (match(ValAssumedPoison
, m_ExtractValue(m_Specific(II
))) ||
7435 llvm::is_contained(II
->args(), ValAssumedPoison
)))
7441 static bool impliesPoison(const Value
*ValAssumedPoison
, const Value
*V
,
7443 if (isGuaranteedNotToBePoison(ValAssumedPoison
))
7446 if (directlyImpliesPoison(ValAssumedPoison
, V
, /* Depth */ 0))
7449 const unsigned MaxDepth
= 2;
7450 if (Depth
>= MaxDepth
)
7453 const auto *I
= dyn_cast
<Instruction
>(ValAssumedPoison
);
7454 if (I
&& !canCreatePoison(cast
<Operator
>(I
))) {
7455 return all_of(I
->operands(), [=](const Value
*Op
) {
7456 return impliesPoison(Op
, V
, Depth
+ 1);
7462 bool llvm::impliesPoison(const Value
*ValAssumedPoison
, const Value
*V
) {
7463 return ::impliesPoison(ValAssumedPoison
, V
, /* Depth */ 0);
7466 static bool programUndefinedIfUndefOrPoison(const Value
*V
, bool PoisonOnly
);
7468 static bool isGuaranteedNotToBeUndefOrPoison(
7469 const Value
*V
, AssumptionCache
*AC
, const Instruction
*CtxI
,
7470 const DominatorTree
*DT
, unsigned Depth
, UndefPoisonKind Kind
) {
7471 if (Depth
>= MaxAnalysisRecursionDepth
)
7474 if (isa
<MetadataAsValue
>(V
))
7477 if (const auto *A
= dyn_cast
<Argument
>(V
)) {
7478 if (A
->hasAttribute(Attribute::NoUndef
) ||
7479 A
->hasAttribute(Attribute::Dereferenceable
) ||
7480 A
->hasAttribute(Attribute::DereferenceableOrNull
))
7484 if (auto *C
= dyn_cast
<Constant
>(V
)) {
7485 if (isa
<PoisonValue
>(C
))
7486 return !includesPoison(Kind
);
7488 if (isa
<UndefValue
>(C
))
7489 return !includesUndef(Kind
);
7491 if (isa
<ConstantInt
>(C
) || isa
<GlobalVariable
>(C
) || isa
<ConstantFP
>(V
) ||
7492 isa
<ConstantPointerNull
>(C
) || isa
<Function
>(C
))
7495 if (C
->getType()->isVectorTy() && !isa
<ConstantExpr
>(C
)) {
7496 if (includesUndef(Kind
) && C
->containsUndefElement())
7498 if (includesPoison(Kind
) && C
->containsPoisonElement())
7500 return !C
->containsConstantExpression();
7504 // Strip cast operations from a pointer value.
7505 // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7506 // inbounds with zero offset. To guarantee that the result isn't poison, the
7507 // stripped pointer is checked as it has to be pointing into an allocated
7508 // object or be null `null` to ensure `inbounds` getelement pointers with a
7509 // zero offset could not produce poison.
7510 // It can strip off addrspacecast that do not change bit representation as
7511 // well. We believe that such addrspacecast is equivalent to no-op.
7512 auto *StrippedV
= V
->stripPointerCastsSameRepresentation();
7513 if (isa
<AllocaInst
>(StrippedV
) || isa
<GlobalVariable
>(StrippedV
) ||
7514 isa
<Function
>(StrippedV
) || isa
<ConstantPointerNull
>(StrippedV
))
7517 auto OpCheck
= [&](const Value
*V
) {
7518 return isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
+ 1, Kind
);
7521 if (auto *Opr
= dyn_cast
<Operator
>(V
)) {
7522 // If the value is a freeze instruction, then it can never
7523 // be undef or poison.
7524 if (isa
<FreezeInst
>(V
))
7527 if (const auto *CB
= dyn_cast
<CallBase
>(V
)) {
7528 if (CB
->hasRetAttr(Attribute::NoUndef
) ||
7529 CB
->hasRetAttr(Attribute::Dereferenceable
) ||
7530 CB
->hasRetAttr(Attribute::DereferenceableOrNull
))
7534 if (const auto *PN
= dyn_cast
<PHINode
>(V
)) {
7535 unsigned Num
= PN
->getNumIncomingValues();
7536 bool IsWellDefined
= true;
7537 for (unsigned i
= 0; i
< Num
; ++i
) {
7538 auto *TI
= PN
->getIncomingBlock(i
)->getTerminator();
7539 if (!isGuaranteedNotToBeUndefOrPoison(PN
->getIncomingValue(i
), AC
, TI
,
7540 DT
, Depth
+ 1, Kind
)) {
7541 IsWellDefined
= false;
7547 } else if (!::canCreateUndefOrPoison(Opr
, Kind
,
7548 /*ConsiderFlagsAndMetadata*/ true) &&
7549 all_of(Opr
->operands(), OpCheck
))
7553 if (auto *I
= dyn_cast
<LoadInst
>(V
))
7554 if (I
->hasMetadata(LLVMContext::MD_noundef
) ||
7555 I
->hasMetadata(LLVMContext::MD_dereferenceable
) ||
7556 I
->hasMetadata(LLVMContext::MD_dereferenceable_or_null
))
7559 if (programUndefinedIfUndefOrPoison(V
, !includesUndef(Kind
)))
7562 // CxtI may be null or a cloned instruction.
7563 if (!CtxI
|| !CtxI
->getParent() || !DT
)
7566 auto *DNode
= DT
->getNode(CtxI
->getParent());
7568 // Unreachable block
7571 // If V is used as a branch condition before reaching CtxI, V cannot be
7575 // CtxI ; V cannot be undef or poison here
7576 auto *Dominator
= DNode
->getIDom();
7577 // This check is purely for compile time reasons: we can skip the IDom walk
7578 // if what we are checking for includes undef and the value is not an integer.
7579 if (!includesUndef(Kind
) || V
->getType()->isIntegerTy())
7581 auto *TI
= Dominator
->getBlock()->getTerminator();
7583 Value
*Cond
= nullptr;
7584 if (auto BI
= dyn_cast_or_null
<BranchInst
>(TI
)) {
7585 if (BI
->isConditional())
7586 Cond
= BI
->getCondition();
7587 } else if (auto SI
= dyn_cast_or_null
<SwitchInst
>(TI
)) {
7588 Cond
= SI
->getCondition();
7594 else if (!includesUndef(Kind
) && isa
<Operator
>(Cond
)) {
7595 // For poison, we can analyze further
7596 auto *Opr
= cast
<Operator
>(Cond
);
7597 if (any_of(Opr
->operands(), [V
](const Use
&U
) {
7598 return V
== U
&& propagatesPoison(U
);
7604 Dominator
= Dominator
->getIDom();
7607 if (getKnowledgeValidInContext(V
, {Attribute::NoUndef
}, CtxI
, DT
, AC
))
7613 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value
*V
, AssumptionCache
*AC
,
7614 const Instruction
*CtxI
,
7615 const DominatorTree
*DT
,
7617 return ::isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
,
7618 UndefPoisonKind::UndefOrPoison
);
7621 bool llvm::isGuaranteedNotToBePoison(const Value
*V
, AssumptionCache
*AC
,
7622 const Instruction
*CtxI
,
7623 const DominatorTree
*DT
, unsigned Depth
) {
7624 return ::isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
,
7625 UndefPoisonKind::PoisonOnly
);
7628 bool llvm::isGuaranteedNotToBeUndef(const Value
*V
, AssumptionCache
*AC
,
7629 const Instruction
*CtxI
,
7630 const DominatorTree
*DT
, unsigned Depth
) {
7631 return ::isGuaranteedNotToBeUndefOrPoison(V
, AC
, CtxI
, DT
, Depth
,
7632 UndefPoisonKind::UndefOnly
);
7635 /// Return true if undefined behavior would provably be executed on the path to
7636 /// OnPathTo if Root produced a posion result. Note that this doesn't say
7637 /// anything about whether OnPathTo is actually executed or whether Root is
7638 /// actually poison. This can be used to assess whether a new use of Root can
7639 /// be added at a location which is control equivalent with OnPathTo (such as
7640 /// immediately before it) without introducing UB which didn't previously
7641 /// exist. Note that a false result conveys no information.
7642 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction
*Root
,
7643 Instruction
*OnPathTo
,
7644 DominatorTree
*DT
) {
7645 // Basic approach is to assume Root is poison, propagate poison forward
7646 // through all users we can easily track, and then check whether any of those
7647 // users are provable UB and must execute before out exiting block might
7650 // The set of all recursive users we've visited (which are assumed to all be
7651 // poison because of said visit)
7652 SmallSet
<const Value
*, 16> KnownPoison
;
7653 SmallVector
<const Instruction
*, 16> Worklist
;
7654 Worklist
.push_back(Root
);
7655 while (!Worklist
.empty()) {
7656 const Instruction
*I
= Worklist
.pop_back_val();
7658 // If we know this must trigger UB on a path leading our target.
7659 if (mustTriggerUB(I
, KnownPoison
) && DT
->dominates(I
, OnPathTo
))
7662 // If we can't analyze propagation through this instruction, just skip it
7663 // and transitive users. Safe as false is a conservative result.
7664 if (I
!= Root
&& !any_of(I
->operands(), [&KnownPoison
](const Use
&U
) {
7665 return KnownPoison
.contains(U
) && propagatesPoison(U
);
7669 if (KnownPoison
.insert(I
).second
)
7670 for (const User
*User
: I
->users())
7671 Worklist
.push_back(cast
<Instruction
>(User
));
7674 // Might be non-UB, or might have a path we couldn't prove must execute on
7675 // way to exiting bb.
7679 OverflowResult
llvm::computeOverflowForSignedAdd(const AddOperator
*Add
,
7680 const SimplifyQuery
&SQ
) {
7681 return ::computeOverflowForSignedAdd(Add
->getOperand(0), Add
->getOperand(1),
7686 llvm::computeOverflowForSignedAdd(const WithCache
<const Value
*> &LHS
,
7687 const WithCache
<const Value
*> &RHS
,
7688 const SimplifyQuery
&SQ
) {
7689 return ::computeOverflowForSignedAdd(LHS
, RHS
, nullptr, SQ
);
7692 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction
*I
) {
7693 // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7694 // of time because it's possible for another thread to interfere with it for an
7695 // arbitrary length of time, but programs aren't allowed to rely on that.
7697 // If there is no successor, then execution can't transfer to it.
7698 if (isa
<ReturnInst
>(I
))
7700 if (isa
<UnreachableInst
>(I
))
7703 // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7704 // Instruction::willReturn.
7706 // FIXME: Move this check into Instruction::willReturn.
7707 if (isa
<CatchPadInst
>(I
)) {
7708 switch (classifyEHPersonality(I
->getFunction()->getPersonalityFn())) {
7710 // A catchpad may invoke exception object constructors and such, which
7711 // in some languages can be arbitrary code, so be conservative by default.
7713 case EHPersonality::CoreCLR
:
7714 // For CoreCLR, it just involves a type test.
7719 // An instruction that returns without throwing must transfer control flow
7721 return !I
->mayThrow() && I
->willReturn();
7724 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock
*BB
) {
7725 // TODO: This is slightly conservative for invoke instruction since exiting
7726 // via an exception *is* normal control for them.
7727 for (const Instruction
&I
: *BB
)
7728 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7733 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7734 BasicBlock::const_iterator Begin
, BasicBlock::const_iterator End
,
7735 unsigned ScanLimit
) {
7736 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin
, End
),
7740 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7741 iterator_range
<BasicBlock::const_iterator
> Range
, unsigned ScanLimit
) {
7742 assert(ScanLimit
&& "scan limit must be non-zero");
7743 for (const Instruction
&I
: Range
) {
7744 if (isa
<DbgInfoIntrinsic
>(I
))
7746 if (--ScanLimit
== 0)
7748 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7754 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction
*I
,
7756 // The loop header is guaranteed to be executed for every iteration.
7758 // FIXME: Relax this constraint to cover all basic blocks that are
7759 // guaranteed to be executed at every iteration.
7760 if (I
->getParent() != L
->getHeader()) return false;
7762 for (const Instruction
&LI
: *L
->getHeader()) {
7763 if (&LI
== I
) return true;
7764 if (!isGuaranteedToTransferExecutionToSuccessor(&LI
)) return false;
7766 llvm_unreachable("Instruction not contained in its own parent basic block.");
7769 bool llvm::propagatesPoison(const Use
&PoisonOp
) {
7770 const Operator
*I
= cast
<Operator
>(PoisonOp
.getUser());
7771 switch (I
->getOpcode()) {
7772 case Instruction::Freeze
:
7773 case Instruction::PHI
:
7774 case Instruction::Invoke
:
7776 case Instruction::Select
:
7777 return PoisonOp
.getOperandNo() == 0;
7778 case Instruction::Call
:
7779 if (auto *II
= dyn_cast
<IntrinsicInst
>(I
)) {
7780 switch (II
->getIntrinsicID()) {
7781 // TODO: Add more intrinsics.
7782 case Intrinsic::sadd_with_overflow
:
7783 case Intrinsic::ssub_with_overflow
:
7784 case Intrinsic::smul_with_overflow
:
7785 case Intrinsic::uadd_with_overflow
:
7786 case Intrinsic::usub_with_overflow
:
7787 case Intrinsic::umul_with_overflow
:
7788 // If an input is a vector containing a poison element, the
7789 // two output vectors (calculated results, overflow bits)'
7790 // corresponding lanes are poison.
7792 case Intrinsic::ctpop
:
7793 case Intrinsic::ctlz
:
7794 case Intrinsic::cttz
:
7795 case Intrinsic::abs
:
7796 case Intrinsic::smax
:
7797 case Intrinsic::smin
:
7798 case Intrinsic::umax
:
7799 case Intrinsic::umin
:
7800 case Intrinsic::bitreverse
:
7801 case Intrinsic::bswap
:
7802 case Intrinsic::sadd_sat
:
7803 case Intrinsic::ssub_sat
:
7804 case Intrinsic::sshl_sat
:
7805 case Intrinsic::uadd_sat
:
7806 case Intrinsic::usub_sat
:
7807 case Intrinsic::ushl_sat
:
7812 case Instruction::ICmp
:
7813 case Instruction::FCmp
:
7814 case Instruction::GetElementPtr
:
7817 if (isa
<BinaryOperator
>(I
) || isa
<UnaryOperator
>(I
) || isa
<CastInst
>(I
))
7820 // Be conservative and return false.
7825 /// Enumerates all operands of \p I that are guaranteed to not be undef or
7826 /// poison. If the callback \p Handle returns true, stop processing and return
7827 /// true. Otherwise, return false.
7828 template <typename CallableT
>
7829 static bool handleGuaranteedWellDefinedOps(const Instruction
*I
,
7830 const CallableT
&Handle
) {
7831 switch (I
->getOpcode()) {
7832 case Instruction::Store
:
7833 if (Handle(cast
<StoreInst
>(I
)->getPointerOperand()))
7837 case Instruction::Load
:
7838 if (Handle(cast
<LoadInst
>(I
)->getPointerOperand()))
7842 // Since dereferenceable attribute imply noundef, atomic operations
7843 // also implicitly have noundef pointers too
7844 case Instruction::AtomicCmpXchg
:
7845 if (Handle(cast
<AtomicCmpXchgInst
>(I
)->getPointerOperand()))
7849 case Instruction::AtomicRMW
:
7850 if (Handle(cast
<AtomicRMWInst
>(I
)->getPointerOperand()))
7854 case Instruction::Call
:
7855 case Instruction::Invoke
: {
7856 const CallBase
*CB
= cast
<CallBase
>(I
);
7857 if (CB
->isIndirectCall() && Handle(CB
->getCalledOperand()))
7859 for (unsigned i
= 0; i
< CB
->arg_size(); ++i
)
7860 if ((CB
->paramHasAttr(i
, Attribute::NoUndef
) ||
7861 CB
->paramHasAttr(i
, Attribute::Dereferenceable
) ||
7862 CB
->paramHasAttr(i
, Attribute::DereferenceableOrNull
)) &&
7863 Handle(CB
->getArgOperand(i
)))
7867 case Instruction::Ret
:
7868 if (I
->getFunction()->hasRetAttribute(Attribute::NoUndef
) &&
7869 Handle(I
->getOperand(0)))
7872 case Instruction::Switch
:
7873 if (Handle(cast
<SwitchInst
>(I
)->getCondition()))
7876 case Instruction::Br
: {
7877 auto *BR
= cast
<BranchInst
>(I
);
7878 if (BR
->isConditional() && Handle(BR
->getCondition()))
7889 void llvm::getGuaranteedWellDefinedOps(
7890 const Instruction
*I
, SmallVectorImpl
<const Value
*> &Operands
) {
7891 handleGuaranteedWellDefinedOps(I
, [&](const Value
*V
) {
7892 Operands
.push_back(V
);
7897 /// Enumerates all operands of \p I that are guaranteed to not be poison.
7898 template <typename CallableT
>
7899 static bool handleGuaranteedNonPoisonOps(const Instruction
*I
,
7900 const CallableT
&Handle
) {
7901 if (handleGuaranteedWellDefinedOps(I
, Handle
))
7903 switch (I
->getOpcode()) {
7904 // Divisors of these operations are allowed to be partially undef.
7905 case Instruction::UDiv
:
7906 case Instruction::SDiv
:
7907 case Instruction::URem
:
7908 case Instruction::SRem
:
7909 return Handle(I
->getOperand(1));
7915 void llvm::getGuaranteedNonPoisonOps(const Instruction
*I
,
7916 SmallVectorImpl
<const Value
*> &Operands
) {
7917 handleGuaranteedNonPoisonOps(I
, [&](const Value
*V
) {
7918 Operands
.push_back(V
);
7923 bool llvm::mustTriggerUB(const Instruction
*I
,
7924 const SmallPtrSetImpl
<const Value
*> &KnownPoison
) {
7925 return handleGuaranteedNonPoisonOps(
7926 I
, [&](const Value
*V
) { return KnownPoison
.count(V
); });
7929 static bool programUndefinedIfUndefOrPoison(const Value
*V
,
7931 // We currently only look for uses of values within the same basic
7932 // block, as that makes it easier to guarantee that the uses will be
7933 // executed given that Inst is executed.
7935 // FIXME: Expand this to consider uses beyond the same basic block. To do
7936 // this, look out for the distinction between post-dominance and strong
7938 const BasicBlock
*BB
= nullptr;
7939 BasicBlock::const_iterator Begin
;
7940 if (const auto *Inst
= dyn_cast
<Instruction
>(V
)) {
7941 BB
= Inst
->getParent();
7942 Begin
= Inst
->getIterator();
7944 } else if (const auto *Arg
= dyn_cast
<Argument
>(V
)) {
7945 if (Arg
->getParent()->isDeclaration())
7947 BB
= &Arg
->getParent()->getEntryBlock();
7948 Begin
= BB
->begin();
7953 // Limit number of instructions we look at, to avoid scanning through large
7954 // blocks. The current limit is chosen arbitrarily.
7955 unsigned ScanLimit
= 32;
7956 BasicBlock::const_iterator End
= BB
->end();
7959 // Since undef does not propagate eagerly, be conservative & just check
7960 // whether a value is directly passed to an instruction that must take
7961 // well-defined operands.
7963 for (const auto &I
: make_range(Begin
, End
)) {
7964 if (isa
<DbgInfoIntrinsic
>(I
))
7966 if (--ScanLimit
== 0)
7969 if (handleGuaranteedWellDefinedOps(&I
, [V
](const Value
*WellDefinedOp
) {
7970 return WellDefinedOp
== V
;
7974 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7980 // Set of instructions that we have proved will yield poison if Inst
7982 SmallSet
<const Value
*, 16> YieldsPoison
;
7983 SmallSet
<const BasicBlock
*, 4> Visited
;
7985 YieldsPoison
.insert(V
);
7989 for (const auto &I
: make_range(Begin
, End
)) {
7990 if (isa
<DbgInfoIntrinsic
>(I
))
7992 if (--ScanLimit
== 0)
7994 if (mustTriggerUB(&I
, YieldsPoison
))
7996 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
7999 // If an operand is poison and propagates it, mark I as yielding poison.
8000 for (const Use
&Op
: I
.operands()) {
8001 if (YieldsPoison
.count(Op
) && propagatesPoison(Op
)) {
8002 YieldsPoison
.insert(&I
);
8007 // Special handling for select, which returns poison if its operand 0 is
8008 // poison (handled in the loop above) *or* if both its true/false operands
8009 // are poison (handled here).
8010 if (I
.getOpcode() == Instruction::Select
&&
8011 YieldsPoison
.count(I
.getOperand(1)) &&
8012 YieldsPoison
.count(I
.getOperand(2))) {
8013 YieldsPoison
.insert(&I
);
8017 BB
= BB
->getSingleSuccessor();
8018 if (!BB
|| !Visited
.insert(BB
).second
)
8021 Begin
= BB
->getFirstNonPHI()->getIterator();
8027 bool llvm::programUndefinedIfUndefOrPoison(const Instruction
*Inst
) {
8028 return ::programUndefinedIfUndefOrPoison(Inst
, false);
8031 bool llvm::programUndefinedIfPoison(const Instruction
*Inst
) {
8032 return ::programUndefinedIfUndefOrPoison(Inst
, true);
8035 static bool isKnownNonNaN(const Value
*V
, FastMathFlags FMF
) {
8039 if (auto *C
= dyn_cast
<ConstantFP
>(V
))
8042 if (auto *C
= dyn_cast
<ConstantDataVector
>(V
)) {
8043 if (!C
->getElementType()->isFloatingPointTy())
8045 for (unsigned I
= 0, E
= C
->getNumElements(); I
< E
; ++I
) {
8046 if (C
->getElementAsAPFloat(I
).isNaN())
8052 if (isa
<ConstantAggregateZero
>(V
))
8058 static bool isKnownNonZero(const Value
*V
) {
8059 if (auto *C
= dyn_cast
<ConstantFP
>(V
))
8060 return !C
->isZero();
8062 if (auto *C
= dyn_cast
<ConstantDataVector
>(V
)) {
8063 if (!C
->getElementType()->isFloatingPointTy())
8065 for (unsigned I
= 0, E
= C
->getNumElements(); I
< E
; ++I
) {
8066 if (C
->getElementAsAPFloat(I
).isZero())
8075 /// Match clamp pattern for float types without care about NaNs or signed zeros.
8076 /// Given non-min/max outer cmp/select from the clamp pattern this
8077 /// function recognizes if it can be substitued by a "canonical" min/max
8079 static SelectPatternResult
matchFastFloatClamp(CmpInst::Predicate Pred
,
8080 Value
*CmpLHS
, Value
*CmpRHS
,
8081 Value
*TrueVal
, Value
*FalseVal
,
8082 Value
*&LHS
, Value
*&RHS
) {
8084 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
8085 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
8086 // and return description of the outer Max/Min.
8088 // First, check if select has inverse order:
8089 if (CmpRHS
== FalseVal
) {
8090 std::swap(TrueVal
, FalseVal
);
8091 Pred
= CmpInst::getInversePredicate(Pred
);
8094 // Assume success now. If there's no match, callers should not use these anyway.
8099 if (CmpRHS
!= TrueVal
|| !match(CmpRHS
, m_APFloat(FC1
)) || !FC1
->isFinite())
8100 return {SPF_UNKNOWN
, SPNB_NA
, false};
8104 case CmpInst::FCMP_OLT
:
8105 case CmpInst::FCMP_OLE
:
8106 case CmpInst::FCMP_ULT
:
8107 case CmpInst::FCMP_ULE
:
8109 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS
), m_APFloat(FC2
)),
8110 m_UnordFMin(m_Specific(CmpLHS
), m_APFloat(FC2
)))) &&
8112 return {SPF_FMAXNUM
, SPNB_RETURNS_ANY
, false};
8114 case CmpInst::FCMP_OGT
:
8115 case CmpInst::FCMP_OGE
:
8116 case CmpInst::FCMP_UGT
:
8117 case CmpInst::FCMP_UGE
:
8119 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS
), m_APFloat(FC2
)),
8120 m_UnordFMax(m_Specific(CmpLHS
), m_APFloat(FC2
)))) &&
8122 return {SPF_FMINNUM
, SPNB_RETURNS_ANY
, false};
8128 return {SPF_UNKNOWN
, SPNB_NA
, false};
8131 /// Recognize variations of:
8132 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
8133 static SelectPatternResult
matchClamp(CmpInst::Predicate Pred
,
8134 Value
*CmpLHS
, Value
*CmpRHS
,
8135 Value
*TrueVal
, Value
*FalseVal
) {
8136 // Swap the select operands and predicate to match the patterns below.
8137 if (CmpRHS
!= TrueVal
) {
8138 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8139 std::swap(TrueVal
, FalseVal
);
8142 if (CmpRHS
== TrueVal
&& match(CmpRHS
, m_APInt(C1
))) {
8144 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
8145 if (match(FalseVal
, m_SMin(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8146 C1
->slt(*C2
) && Pred
== CmpInst::ICMP_SLT
)
8147 return {SPF_SMAX
, SPNB_NA
, false};
8149 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
8150 if (match(FalseVal
, m_SMax(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8151 C1
->sgt(*C2
) && Pred
== CmpInst::ICMP_SGT
)
8152 return {SPF_SMIN
, SPNB_NA
, false};
8154 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
8155 if (match(FalseVal
, m_UMin(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8156 C1
->ult(*C2
) && Pred
== CmpInst::ICMP_ULT
)
8157 return {SPF_UMAX
, SPNB_NA
, false};
8159 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
8160 if (match(FalseVal
, m_UMax(m_Specific(CmpLHS
), m_APInt(C2
))) &&
8161 C1
->ugt(*C2
) && Pred
== CmpInst::ICMP_UGT
)
8162 return {SPF_UMIN
, SPNB_NA
, false};
8164 return {SPF_UNKNOWN
, SPNB_NA
, false};
8167 /// Recognize variations of:
8168 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
8169 static SelectPatternResult
matchMinMaxOfMinMax(CmpInst::Predicate Pred
,
8170 Value
*CmpLHS
, Value
*CmpRHS
,
8171 Value
*TVal
, Value
*FVal
,
8173 // TODO: Allow FP min/max with nnan/nsz.
8174 assert(CmpInst::isIntPredicate(Pred
) && "Expected integer comparison");
8176 Value
*A
= nullptr, *B
= nullptr;
8177 SelectPatternResult L
= matchSelectPattern(TVal
, A
, B
, nullptr, Depth
+ 1);
8178 if (!SelectPatternResult::isMinOrMax(L
.Flavor
))
8179 return {SPF_UNKNOWN
, SPNB_NA
, false};
8181 Value
*C
= nullptr, *D
= nullptr;
8182 SelectPatternResult R
= matchSelectPattern(FVal
, C
, D
, nullptr, Depth
+ 1);
8183 if (L
.Flavor
!= R
.Flavor
)
8184 return {SPF_UNKNOWN
, SPNB_NA
, false};
8186 // We have something like: x Pred y ? min(a, b) : min(c, d).
8187 // Try to match the compare to the min/max operations of the select operands.
8188 // First, make sure we have the right compare predicate.
8191 if (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_SGE
) {
8192 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8193 std::swap(CmpLHS
, CmpRHS
);
8195 if (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SLE
)
8197 return {SPF_UNKNOWN
, SPNB_NA
, false};
8199 if (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SLE
) {
8200 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8201 std::swap(CmpLHS
, CmpRHS
);
8203 if (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_SGE
)
8205 return {SPF_UNKNOWN
, SPNB_NA
, false};
8207 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
) {
8208 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8209 std::swap(CmpLHS
, CmpRHS
);
8211 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
)
8213 return {SPF_UNKNOWN
, SPNB_NA
, false};
8215 if (Pred
== ICmpInst::ICMP_ULT
|| Pred
== ICmpInst::ICMP_ULE
) {
8216 Pred
= ICmpInst::getSwappedPredicate(Pred
);
8217 std::swap(CmpLHS
, CmpRHS
);
8219 if (Pred
== ICmpInst::ICMP_UGT
|| Pred
== ICmpInst::ICMP_UGE
)
8221 return {SPF_UNKNOWN
, SPNB_NA
, false};
8223 return {SPF_UNKNOWN
, SPNB_NA
, false};
8226 // If there is a common operand in the already matched min/max and the other
8227 // min/max operands match the compare operands (either directly or inverted),
8228 // then this is min/max of the same flavor.
8230 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8231 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8233 if ((CmpLHS
== A
&& CmpRHS
== C
) || (match(C
, m_Not(m_Specific(CmpLHS
))) &&
8234 match(A
, m_Not(m_Specific(CmpRHS
)))))
8235 return {L
.Flavor
, SPNB_NA
, false};
8237 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8238 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8240 if ((CmpLHS
== A
&& CmpRHS
== D
) || (match(D
, m_Not(m_Specific(CmpLHS
))) &&
8241 match(A
, m_Not(m_Specific(CmpRHS
)))))
8242 return {L
.Flavor
, SPNB_NA
, false};
8244 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8245 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8247 if ((CmpLHS
== B
&& CmpRHS
== C
) || (match(C
, m_Not(m_Specific(CmpLHS
))) &&
8248 match(B
, m_Not(m_Specific(CmpRHS
)))))
8249 return {L
.Flavor
, SPNB_NA
, false};
8251 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8252 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8254 if ((CmpLHS
== B
&& CmpRHS
== D
) || (match(D
, m_Not(m_Specific(CmpLHS
))) &&
8255 match(B
, m_Not(m_Specific(CmpRHS
)))))
8256 return {L
.Flavor
, SPNB_NA
, false};
8259 return {SPF_UNKNOWN
, SPNB_NA
, false};
8262 /// If the input value is the result of a 'not' op, constant integer, or vector
8263 /// splat of a constant integer, return the bitwise-not source value.
8264 /// TODO: This could be extended to handle non-splat vector integer constants.
8265 static Value
*getNotValue(Value
*V
) {
8267 if (match(V
, m_Not(m_Value(NotV
))))
8271 if (match(V
, m_APInt(C
)))
8272 return ConstantInt::get(V
->getType(), ~(*C
));
8277 /// Match non-obvious integer minimum and maximum sequences.
8278 static SelectPatternResult
matchMinMax(CmpInst::Predicate Pred
,
8279 Value
*CmpLHS
, Value
*CmpRHS
,
8280 Value
*TrueVal
, Value
*FalseVal
,
8281 Value
*&LHS
, Value
*&RHS
,
8283 // Assume success. If there's no match, callers should not use these anyway.
8287 SelectPatternResult SPR
= matchClamp(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
);
8288 if (SPR
.Flavor
!= SelectPatternFlavor::SPF_UNKNOWN
)
8291 SPR
= matchMinMaxOfMinMax(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, Depth
);
8292 if (SPR
.Flavor
!= SelectPatternFlavor::SPF_UNKNOWN
)
8295 // Look through 'not' ops to find disguised min/max.
8296 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8297 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8298 if (CmpLHS
== getNotValue(TrueVal
) && CmpRHS
== getNotValue(FalseVal
)) {
8300 case CmpInst::ICMP_SGT
: return {SPF_SMIN
, SPNB_NA
, false};
8301 case CmpInst::ICMP_SLT
: return {SPF_SMAX
, SPNB_NA
, false};
8302 case CmpInst::ICMP_UGT
: return {SPF_UMIN
, SPNB_NA
, false};
8303 case CmpInst::ICMP_ULT
: return {SPF_UMAX
, SPNB_NA
, false};
8308 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8309 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8310 if (CmpLHS
== getNotValue(FalseVal
) && CmpRHS
== getNotValue(TrueVal
)) {
8312 case CmpInst::ICMP_SGT
: return {SPF_SMAX
, SPNB_NA
, false};
8313 case CmpInst::ICMP_SLT
: return {SPF_SMIN
, SPNB_NA
, false};
8314 case CmpInst::ICMP_UGT
: return {SPF_UMAX
, SPNB_NA
, false};
8315 case CmpInst::ICMP_ULT
: return {SPF_UMIN
, SPNB_NA
, false};
8320 if (Pred
!= CmpInst::ICMP_SGT
&& Pred
!= CmpInst::ICMP_SLT
)
8321 return {SPF_UNKNOWN
, SPNB_NA
, false};
8324 if (!match(CmpRHS
, m_APInt(C1
)))
8325 return {SPF_UNKNOWN
, SPNB_NA
, false};
8327 // An unsigned min/max can be written with a signed compare.
8329 if ((CmpLHS
== TrueVal
&& match(FalseVal
, m_APInt(C2
))) ||
8330 (CmpLHS
== FalseVal
&& match(TrueVal
, m_APInt(C2
)))) {
8331 // Is the sign bit set?
8332 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8333 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8334 if (Pred
== CmpInst::ICMP_SLT
&& C1
->isZero() && C2
->isMaxSignedValue())
8335 return {CmpLHS
== TrueVal
? SPF_UMAX
: SPF_UMIN
, SPNB_NA
, false};
8337 // Is the sign bit clear?
8338 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8339 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8340 if (Pred
== CmpInst::ICMP_SGT
&& C1
->isAllOnes() && C2
->isMinSignedValue())
8341 return {CmpLHS
== FalseVal
? SPF_UMAX
: SPF_UMIN
, SPNB_NA
, false};
8344 return {SPF_UNKNOWN
, SPNB_NA
, false};
8347 bool llvm::isKnownNegation(const Value
*X
, const Value
*Y
, bool NeedNSW
,
8349 assert(X
&& Y
&& "Invalid operand");
8351 auto IsNegationOf
= [&](const Value
*X
, const Value
*Y
) {
8352 if (!match(X
, m_Neg(m_Specific(Y
))))
8355 auto *BO
= cast
<BinaryOperator
>(X
);
8356 if (NeedNSW
&& !BO
->hasNoSignedWrap())
8359 auto *Zero
= cast
<Constant
>(BO
->getOperand(0));
8360 if (!AllowPoison
&& !Zero
->isNullValue())
8367 if (IsNegationOf(X
, Y
) || IsNegationOf(Y
, X
))
8370 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8372 return (!NeedNSW
&& (match(X
, m_Sub(m_Value(A
), m_Value(B
))) &&
8373 match(Y
, m_Sub(m_Specific(B
), m_Specific(A
))))) ||
8374 (NeedNSW
&& (match(X
, m_NSWSub(m_Value(A
), m_Value(B
))) &&
8375 match(Y
, m_NSWSub(m_Specific(B
), m_Specific(A
)))));
8378 bool llvm::isKnownInversion(const Value
*X
, const Value
*Y
) {
8379 // Handle X = icmp pred A, B, Y = icmp pred A, C.
8381 ICmpInst::Predicate Pred1
, Pred2
;
8382 if (!match(X
, m_ICmp(Pred1
, m_Value(A
), m_Value(B
))) ||
8383 !match(Y
, m_c_ICmp(Pred2
, m_Specific(A
), m_Value(C
))))
8387 return Pred1
== ICmpInst::getInversePredicate(Pred2
);
8389 // Try to infer the relationship from constant ranges.
8390 const APInt
*RHSC1
, *RHSC2
;
8391 if (!match(B
, m_APInt(RHSC1
)) || !match(C
, m_APInt(RHSC2
)))
8394 const auto CR1
= ConstantRange::makeExactICmpRegion(Pred1
, *RHSC1
);
8395 const auto CR2
= ConstantRange::makeExactICmpRegion(Pred2
, *RHSC2
);
8397 return CR1
.inverse() == CR2
;
8400 static SelectPatternResult
matchSelectPattern(CmpInst::Predicate Pred
,
8402 Value
*CmpLHS
, Value
*CmpRHS
,
8403 Value
*TrueVal
, Value
*FalseVal
,
8404 Value
*&LHS
, Value
*&RHS
,
8406 bool HasMismatchedZeros
= false;
8407 if (CmpInst::isFPPredicate(Pred
)) {
8408 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8409 // 0.0 operand, set the compare's 0.0 operands to that same value for the
8410 // purpose of identifying min/max. Disregard vector constants with undefined
8411 // elements because those can not be back-propagated for analysis.
8412 Value
*OutputZeroVal
= nullptr;
8413 if (match(TrueVal
, m_AnyZeroFP()) && !match(FalseVal
, m_AnyZeroFP()) &&
8414 !cast
<Constant
>(TrueVal
)->containsUndefOrPoisonElement())
8415 OutputZeroVal
= TrueVal
;
8416 else if (match(FalseVal
, m_AnyZeroFP()) && !match(TrueVal
, m_AnyZeroFP()) &&
8417 !cast
<Constant
>(FalseVal
)->containsUndefOrPoisonElement())
8418 OutputZeroVal
= FalseVal
;
8420 if (OutputZeroVal
) {
8421 if (match(CmpLHS
, m_AnyZeroFP()) && CmpLHS
!= OutputZeroVal
) {
8422 HasMismatchedZeros
= true;
8423 CmpLHS
= OutputZeroVal
;
8425 if (match(CmpRHS
, m_AnyZeroFP()) && CmpRHS
!= OutputZeroVal
) {
8426 HasMismatchedZeros
= true;
8427 CmpRHS
= OutputZeroVal
;
8435 // Signed zero may return inconsistent results between implementations.
8436 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8437 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8438 // Therefore, we behave conservatively and only proceed if at least one of the
8439 // operands is known to not be zero or if we don't care about signed zero.
8442 case CmpInst::FCMP_OGT
: case CmpInst::FCMP_OLT
:
8443 case CmpInst::FCMP_UGT
: case CmpInst::FCMP_ULT
:
8444 if (!HasMismatchedZeros
)
8447 case CmpInst::FCMP_OGE
: case CmpInst::FCMP_OLE
:
8448 case CmpInst::FCMP_UGE
: case CmpInst::FCMP_ULE
:
8449 if (!FMF
.noSignedZeros() && !isKnownNonZero(CmpLHS
) &&
8450 !isKnownNonZero(CmpRHS
))
8451 return {SPF_UNKNOWN
, SPNB_NA
, false};
8454 SelectPatternNaNBehavior NaNBehavior
= SPNB_NA
;
8455 bool Ordered
= false;
8457 // When given one NaN and one non-NaN input:
8458 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8459 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8460 // ordered comparison fails), which could be NaN or non-NaN.
8461 // so here we discover exactly what NaN behavior is required/accepted.
8462 if (CmpInst::isFPPredicate(Pred
)) {
8463 bool LHSSafe
= isKnownNonNaN(CmpLHS
, FMF
);
8464 bool RHSSafe
= isKnownNonNaN(CmpRHS
, FMF
);
8466 if (LHSSafe
&& RHSSafe
) {
8467 // Both operands are known non-NaN.
8468 NaNBehavior
= SPNB_RETURNS_ANY
;
8469 } else if (CmpInst::isOrdered(Pred
)) {
8470 // An ordered comparison will return false when given a NaN, so it
8474 // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8475 NaNBehavior
= SPNB_RETURNS_NAN
;
8477 NaNBehavior
= SPNB_RETURNS_OTHER
;
8479 // Completely unsafe.
8480 return {SPF_UNKNOWN
, SPNB_NA
, false};
8483 // An unordered comparison will return true when given a NaN, so it
8486 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8487 NaNBehavior
= SPNB_RETURNS_OTHER
;
8489 NaNBehavior
= SPNB_RETURNS_NAN
;
8491 // Completely unsafe.
8492 return {SPF_UNKNOWN
, SPNB_NA
, false};
8496 if (TrueVal
== CmpRHS
&& FalseVal
== CmpLHS
) {
8497 std::swap(CmpLHS
, CmpRHS
);
8498 Pred
= CmpInst::getSwappedPredicate(Pred
);
8499 if (NaNBehavior
== SPNB_RETURNS_NAN
)
8500 NaNBehavior
= SPNB_RETURNS_OTHER
;
8501 else if (NaNBehavior
== SPNB_RETURNS_OTHER
)
8502 NaNBehavior
= SPNB_RETURNS_NAN
;
8506 // ([if]cmp X, Y) ? X : Y
8507 if (TrueVal
== CmpLHS
&& FalseVal
== CmpRHS
) {
8509 default: return {SPF_UNKNOWN
, SPNB_NA
, false}; // Equality.
8510 case ICmpInst::ICMP_UGT
:
8511 case ICmpInst::ICMP_UGE
: return {SPF_UMAX
, SPNB_NA
, false};
8512 case ICmpInst::ICMP_SGT
:
8513 case ICmpInst::ICMP_SGE
: return {SPF_SMAX
, SPNB_NA
, false};
8514 case ICmpInst::ICMP_ULT
:
8515 case ICmpInst::ICMP_ULE
: return {SPF_UMIN
, SPNB_NA
, false};
8516 case ICmpInst::ICMP_SLT
:
8517 case ICmpInst::ICMP_SLE
: return {SPF_SMIN
, SPNB_NA
, false};
8518 case FCmpInst::FCMP_UGT
:
8519 case FCmpInst::FCMP_UGE
:
8520 case FCmpInst::FCMP_OGT
:
8521 case FCmpInst::FCMP_OGE
: return {SPF_FMAXNUM
, NaNBehavior
, Ordered
};
8522 case FCmpInst::FCMP_ULT
:
8523 case FCmpInst::FCMP_ULE
:
8524 case FCmpInst::FCMP_OLT
:
8525 case FCmpInst::FCMP_OLE
: return {SPF_FMINNUM
, NaNBehavior
, Ordered
};
8529 if (isKnownNegation(TrueVal
, FalseVal
)) {
8530 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8531 // match against either LHS or sext(LHS).
8532 auto MaybeSExtCmpLHS
=
8533 m_CombineOr(m_Specific(CmpLHS
), m_SExt(m_Specific(CmpLHS
)));
8534 auto ZeroOrAllOnes
= m_CombineOr(m_ZeroInt(), m_AllOnes());
8535 auto ZeroOrOne
= m_CombineOr(m_ZeroInt(), m_One());
8536 if (match(TrueVal
, MaybeSExtCmpLHS
)) {
8537 // Set the return values. If the compare uses the negated value (-X >s 0),
8538 // swap the return values because the negated value is always 'RHS'.
8541 if (match(CmpLHS
, m_Neg(m_Specific(FalseVal
))))
8542 std::swap(LHS
, RHS
);
8544 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8545 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8546 if (Pred
== ICmpInst::ICMP_SGT
&& match(CmpRHS
, ZeroOrAllOnes
))
8547 return {SPF_ABS
, SPNB_NA
, false};
8549 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8550 if (Pred
== ICmpInst::ICMP_SGE
&& match(CmpRHS
, ZeroOrOne
))
8551 return {SPF_ABS
, SPNB_NA
, false};
8553 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8554 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8555 if (Pred
== ICmpInst::ICMP_SLT
&& match(CmpRHS
, ZeroOrOne
))
8556 return {SPF_NABS
, SPNB_NA
, false};
8558 else if (match(FalseVal
, MaybeSExtCmpLHS
)) {
8559 // Set the return values. If the compare uses the negated value (-X >s 0),
8560 // swap the return values because the negated value is always 'RHS'.
8563 if (match(CmpLHS
, m_Neg(m_Specific(TrueVal
))))
8564 std::swap(LHS
, RHS
);
8566 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8567 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8568 if (Pred
== ICmpInst::ICMP_SGT
&& match(CmpRHS
, ZeroOrAllOnes
))
8569 return {SPF_NABS
, SPNB_NA
, false};
8571 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8572 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8573 if (Pred
== ICmpInst::ICMP_SLT
&& match(CmpRHS
, ZeroOrOne
))
8574 return {SPF_ABS
, SPNB_NA
, false};
8578 if (CmpInst::isIntPredicate(Pred
))
8579 return matchMinMax(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, LHS
, RHS
, Depth
);
8581 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8582 // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8583 // semantics than minNum. Be conservative in such case.
8584 if (NaNBehavior
!= SPNB_RETURNS_ANY
||
8585 (!FMF
.noSignedZeros() && !isKnownNonZero(CmpLHS
) &&
8586 !isKnownNonZero(CmpRHS
)))
8587 return {SPF_UNKNOWN
, SPNB_NA
, false};
8589 return matchFastFloatClamp(Pred
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
, LHS
, RHS
);
8592 /// Helps to match a select pattern in case of a type mismatch.
8594 /// The function processes the case when type of true and false values of a
8595 /// select instruction differs from type of the cmp instruction operands because
8596 /// of a cast instruction. The function checks if it is legal to move the cast
8597 /// operation after "select". If yes, it returns the new second value of
8598 /// "select" (with the assumption that cast is moved):
8599 /// 1. As operand of cast instruction when both values of "select" are same cast
8601 /// 2. As restored constant (by applying reverse cast operation) when the first
8602 /// value of the "select" is a cast operation and the second value is a
8604 /// NOTE: We return only the new second value because the first value could be
8605 /// accessed as operand of cast instruction.
8606 static Value
*lookThroughCast(CmpInst
*CmpI
, Value
*V1
, Value
*V2
,
8607 Instruction::CastOps
*CastOp
) {
8608 auto *Cast1
= dyn_cast
<CastInst
>(V1
);
8612 *CastOp
= Cast1
->getOpcode();
8613 Type
*SrcTy
= Cast1
->getSrcTy();
8614 if (auto *Cast2
= dyn_cast
<CastInst
>(V2
)) {
8615 // If V1 and V2 are both the same cast from the same type, look through V1.
8616 if (*CastOp
== Cast2
->getOpcode() && SrcTy
== Cast2
->getSrcTy())
8617 return Cast2
->getOperand(0);
8621 auto *C
= dyn_cast
<Constant
>(V2
);
8625 const DataLayout
&DL
= CmpI
->getDataLayout();
8626 Constant
*CastedTo
= nullptr;
8628 case Instruction::ZExt
:
8629 if (CmpI
->isUnsigned())
8630 CastedTo
= ConstantExpr::getTrunc(C
, SrcTy
);
8632 case Instruction::SExt
:
8633 if (CmpI
->isSigned())
8634 CastedTo
= ConstantExpr::getTrunc(C
, SrcTy
, true);
8636 case Instruction::Trunc
:
8638 if (match(CmpI
->getOperand(1), m_Constant(CmpConst
)) &&
8639 CmpConst
->getType() == SrcTy
) {
8640 // Here we have the following case:
8642 // %cond = cmp iN %x, CmpConst
8643 // %tr = trunc iN %x to iK
8644 // %narrowsel = select i1 %cond, iK %t, iK C
8646 // We can always move trunc after select operation:
8648 // %cond = cmp iN %x, CmpConst
8649 // %widesel = select i1 %cond, iN %x, iN CmpConst
8650 // %tr = trunc iN %widesel to iK
8652 // Note that C could be extended in any way because we don't care about
8653 // upper bits after truncation. It can't be abs pattern, because it would
8656 // select i1 %cond, x, -x.
8658 // So only min/max pattern could be matched. Such match requires widened C
8659 // == CmpConst. That is why set widened C = CmpConst, condition trunc
8660 // CmpConst == C is checked below.
8661 CastedTo
= CmpConst
;
8663 unsigned ExtOp
= CmpI
->isSigned() ? Instruction::SExt
: Instruction::ZExt
;
8664 CastedTo
= ConstantFoldCastOperand(ExtOp
, C
, SrcTy
, DL
);
8667 case Instruction::FPTrunc
:
8668 CastedTo
= ConstantFoldCastOperand(Instruction::FPExt
, C
, SrcTy
, DL
);
8670 case Instruction::FPExt
:
8671 CastedTo
= ConstantFoldCastOperand(Instruction::FPTrunc
, C
, SrcTy
, DL
);
8673 case Instruction::FPToUI
:
8674 CastedTo
= ConstantFoldCastOperand(Instruction::UIToFP
, C
, SrcTy
, DL
);
8676 case Instruction::FPToSI
:
8677 CastedTo
= ConstantFoldCastOperand(Instruction::SIToFP
, C
, SrcTy
, DL
);
8679 case Instruction::UIToFP
:
8680 CastedTo
= ConstantFoldCastOperand(Instruction::FPToUI
, C
, SrcTy
, DL
);
8682 case Instruction::SIToFP
:
8683 CastedTo
= ConstantFoldCastOperand(Instruction::FPToSI
, C
, SrcTy
, DL
);
8692 // Make sure the cast doesn't lose any information.
8693 Constant
*CastedBack
=
8694 ConstantFoldCastOperand(*CastOp
, CastedTo
, C
->getType(), DL
);
8695 if (CastedBack
&& CastedBack
!= C
)
8701 SelectPatternResult
llvm::matchSelectPattern(Value
*V
, Value
*&LHS
, Value
*&RHS
,
8702 Instruction::CastOps
*CastOp
,
8704 if (Depth
>= MaxAnalysisRecursionDepth
)
8705 return {SPF_UNKNOWN
, SPNB_NA
, false};
8707 SelectInst
*SI
= dyn_cast
<SelectInst
>(V
);
8708 if (!SI
) return {SPF_UNKNOWN
, SPNB_NA
, false};
8710 CmpInst
*CmpI
= dyn_cast
<CmpInst
>(SI
->getCondition());
8711 if (!CmpI
) return {SPF_UNKNOWN
, SPNB_NA
, false};
8713 Value
*TrueVal
= SI
->getTrueValue();
8714 Value
*FalseVal
= SI
->getFalseValue();
8716 return llvm::matchDecomposedSelectPattern(CmpI
, TrueVal
, FalseVal
, LHS
, RHS
,
8720 SelectPatternResult
llvm::matchDecomposedSelectPattern(
8721 CmpInst
*CmpI
, Value
*TrueVal
, Value
*FalseVal
, Value
*&LHS
, Value
*&RHS
,
8722 Instruction::CastOps
*CastOp
, unsigned Depth
) {
8723 CmpInst::Predicate Pred
= CmpI
->getPredicate();
8724 Value
*CmpLHS
= CmpI
->getOperand(0);
8725 Value
*CmpRHS
= CmpI
->getOperand(1);
8727 if (isa
<FPMathOperator
>(CmpI
))
8728 FMF
= CmpI
->getFastMathFlags();
8731 if (CmpI
->isEquality())
8732 return {SPF_UNKNOWN
, SPNB_NA
, false};
8734 // Deal with type mismatches.
8735 if (CastOp
&& CmpLHS
->getType() != TrueVal
->getType()) {
8736 if (Value
*C
= lookThroughCast(CmpI
, TrueVal
, FalseVal
, CastOp
)) {
8737 // If this is a potential fmin/fmax with a cast to integer, then ignore
8738 // -0.0 because there is no corresponding integer value.
8739 if (*CastOp
== Instruction::FPToSI
|| *CastOp
== Instruction::FPToUI
)
8740 FMF
.setNoSignedZeros();
8741 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
,
8742 cast
<CastInst
>(TrueVal
)->getOperand(0), C
,
8745 if (Value
*C
= lookThroughCast(CmpI
, FalseVal
, TrueVal
, CastOp
)) {
8746 // If this is a potential fmin/fmax with a cast to integer, then ignore
8747 // -0.0 because there is no corresponding integer value.
8748 if (*CastOp
== Instruction::FPToSI
|| *CastOp
== Instruction::FPToUI
)
8749 FMF
.setNoSignedZeros();
8750 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
,
8751 C
, cast
<CastInst
>(FalseVal
)->getOperand(0),
8755 return ::matchSelectPattern(Pred
, FMF
, CmpLHS
, CmpRHS
, TrueVal
, FalseVal
,
8759 CmpInst::Predicate
llvm::getMinMaxPred(SelectPatternFlavor SPF
, bool Ordered
) {
8760 if (SPF
== SPF_SMIN
) return ICmpInst::ICMP_SLT
;
8761 if (SPF
== SPF_UMIN
) return ICmpInst::ICMP_ULT
;
8762 if (SPF
== SPF_SMAX
) return ICmpInst::ICMP_SGT
;
8763 if (SPF
== SPF_UMAX
) return ICmpInst::ICMP_UGT
;
8764 if (SPF
== SPF_FMINNUM
)
8765 return Ordered
? FCmpInst::FCMP_OLT
: FCmpInst::FCMP_ULT
;
8766 if (SPF
== SPF_FMAXNUM
)
8767 return Ordered
? FCmpInst::FCMP_OGT
: FCmpInst::FCMP_UGT
;
8768 llvm_unreachable("unhandled!");
8771 SelectPatternFlavor
llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF
) {
8772 if (SPF
== SPF_SMIN
) return SPF_SMAX
;
8773 if (SPF
== SPF_UMIN
) return SPF_UMAX
;
8774 if (SPF
== SPF_SMAX
) return SPF_SMIN
;
8775 if (SPF
== SPF_UMAX
) return SPF_UMIN
;
8776 llvm_unreachable("unhandled!");
8779 Intrinsic::ID
llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID
) {
8781 case Intrinsic::smax
: return Intrinsic::smin
;
8782 case Intrinsic::smin
: return Intrinsic::smax
;
8783 case Intrinsic::umax
: return Intrinsic::umin
;
8784 case Intrinsic::umin
: return Intrinsic::umax
;
8785 // Please note that next four intrinsics may produce the same result for
8786 // original and inverted case even if X != Y due to NaN is handled specially.
8787 case Intrinsic::maximum
: return Intrinsic::minimum
;
8788 case Intrinsic::minimum
: return Intrinsic::maximum
;
8789 case Intrinsic::maxnum
: return Intrinsic::minnum
;
8790 case Intrinsic::minnum
: return Intrinsic::maxnum
;
8791 default: llvm_unreachable("Unexpected intrinsic");
8795 APInt
llvm::getMinMaxLimit(SelectPatternFlavor SPF
, unsigned BitWidth
) {
8797 case SPF_SMAX
: return APInt::getSignedMaxValue(BitWidth
);
8798 case SPF_SMIN
: return APInt::getSignedMinValue(BitWidth
);
8799 case SPF_UMAX
: return APInt::getMaxValue(BitWidth
);
8800 case SPF_UMIN
: return APInt::getMinValue(BitWidth
);
8801 default: llvm_unreachable("Unexpected flavor");
8805 std::pair
<Intrinsic::ID
, bool>
8806 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef
<Value
*> VL
) {
8807 // Check if VL contains select instructions that can be folded into a min/max
8808 // vector intrinsic and return the intrinsic if it is possible.
8809 // TODO: Support floating point min/max.
8810 bool AllCmpSingleUse
= true;
8811 SelectPatternResult SelectPattern
;
8812 SelectPattern
.Flavor
= SPF_UNKNOWN
;
8813 if (all_of(VL
, [&SelectPattern
, &AllCmpSingleUse
](Value
*I
) {
8815 auto CurrentPattern
= matchSelectPattern(I
, LHS
, RHS
);
8816 if (!SelectPatternResult::isMinOrMax(CurrentPattern
.Flavor
))
8818 if (SelectPattern
.Flavor
!= SPF_UNKNOWN
&&
8819 SelectPattern
.Flavor
!= CurrentPattern
.Flavor
)
8821 SelectPattern
= CurrentPattern
;
8823 match(I
, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
8826 switch (SelectPattern
.Flavor
) {
8828 return {Intrinsic::smin
, AllCmpSingleUse
};
8830 return {Intrinsic::umin
, AllCmpSingleUse
};
8832 return {Intrinsic::smax
, AllCmpSingleUse
};
8834 return {Intrinsic::umax
, AllCmpSingleUse
};
8836 return {Intrinsic::maxnum
, AllCmpSingleUse
};
8838 return {Intrinsic::minnum
, AllCmpSingleUse
};
8840 llvm_unreachable("unexpected select pattern flavor");
8843 return {Intrinsic::not_intrinsic
, false};
8846 bool llvm::matchSimpleRecurrence(const PHINode
*P
, BinaryOperator
*&BO
,
8847 Value
*&Start
, Value
*&Step
) {
8848 // Handle the case of a simple two-predecessor recurrence PHI.
8849 // There's a lot more that could theoretically be done here, but
8850 // this is sufficient to catch some interesting cases.
8851 if (P
->getNumIncomingValues() != 2)
8854 for (unsigned i
= 0; i
!= 2; ++i
) {
8855 Value
*L
= P
->getIncomingValue(i
);
8856 Value
*R
= P
->getIncomingValue(!i
);
8857 auto *LU
= dyn_cast
<BinaryOperator
>(L
);
8860 unsigned Opcode
= LU
->getOpcode();
8865 // TODO: Expand list -- xor, div, gep, uaddo, etc..
8866 case Instruction::LShr
:
8867 case Instruction::AShr
:
8868 case Instruction::Shl
:
8869 case Instruction::Add
:
8870 case Instruction::Sub
:
8871 case Instruction::And
:
8872 case Instruction::Or
:
8873 case Instruction::Mul
:
8874 case Instruction::FMul
: {
8875 Value
*LL
= LU
->getOperand(0);
8876 Value
*LR
= LU
->getOperand(1);
8877 // Find a recurrence.
8883 continue; // Check for recurrence with L and R flipped.
8889 // We have matched a recurrence of the form:
8890 // %iv = [R, %entry], [%iv.next, %backedge]
8891 // %iv.next = binop %iv, L
8893 // %iv = [R, %entry], [%iv.next, %backedge]
8894 // %iv.next = binop L, %iv
8903 bool llvm::matchSimpleRecurrence(const BinaryOperator
*I
, PHINode
*&P
,
8904 Value
*&Start
, Value
*&Step
) {
8905 BinaryOperator
*BO
= nullptr;
8906 P
= dyn_cast
<PHINode
>(I
->getOperand(0));
8908 P
= dyn_cast
<PHINode
>(I
->getOperand(1));
8909 return P
&& matchSimpleRecurrence(P
, BO
, Start
, Step
) && BO
== I
;
8912 /// Return true if "icmp Pred LHS RHS" is always true.
8913 static bool isTruePredicate(CmpInst::Predicate Pred
, const Value
*LHS
,
8915 if (ICmpInst::isTrueWhenEqual(Pred
) && LHS
== RHS
)
8922 case CmpInst::ICMP_SLE
: {
8925 // LHS s<= LHS +_{nsw} C if C >= 0
8926 // LHS s<= LHS | C if C >= 0
8927 if (match(RHS
, m_NSWAdd(m_Specific(LHS
), m_APInt(C
))) ||
8928 match(RHS
, m_Or(m_Specific(LHS
), m_APInt(C
))))
8929 return !C
->isNegative();
8931 // LHS s<= smax(LHS, V) for any V
8932 if (match(RHS
, m_c_SMax(m_Specific(LHS
), m_Value())))
8935 // smin(RHS, V) s<= RHS for any V
8936 if (match(LHS
, m_c_SMin(m_Specific(RHS
), m_Value())))
8939 // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
8941 const APInt
*CLHS
, *CRHS
;
8942 if (match(LHS
, m_NSWAddLike(m_Value(X
), m_APInt(CLHS
))) &&
8943 match(RHS
, m_NSWAddLike(m_Specific(X
), m_APInt(CRHS
))))
8944 return CLHS
->sle(*CRHS
);
8949 case CmpInst::ICMP_ULE
: {
8950 // LHS u<= LHS +_{nuw} V for any V
8951 if (match(RHS
, m_c_Add(m_Specific(LHS
), m_Value())) &&
8952 cast
<OverflowingBinaryOperator
>(RHS
)->hasNoUnsignedWrap())
8955 // LHS u<= LHS | V for any V
8956 if (match(RHS
, m_c_Or(m_Specific(LHS
), m_Value())))
8959 // LHS u<= umax(LHS, V) for any V
8960 if (match(RHS
, m_c_UMax(m_Specific(LHS
), m_Value())))
8963 // RHS >> V u<= RHS for any V
8964 if (match(LHS
, m_LShr(m_Specific(RHS
), m_Value())))
8967 // RHS u/ C_ugt_1 u<= RHS
8969 if (match(LHS
, m_UDiv(m_Specific(RHS
), m_APInt(C
))) && C
->ugt(1))
8972 // RHS & V u<= RHS for any V
8973 if (match(LHS
, m_c_And(m_Specific(RHS
), m_Value())))
8976 // umin(RHS, V) u<= RHS for any V
8977 if (match(LHS
, m_c_UMin(m_Specific(RHS
), m_Value())))
8980 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
8982 const APInt
*CLHS
, *CRHS
;
8983 if (match(LHS
, m_NUWAddLike(m_Value(X
), m_APInt(CLHS
))) &&
8984 match(RHS
, m_NUWAddLike(m_Specific(X
), m_APInt(CRHS
))))
8985 return CLHS
->ule(*CRHS
);
8992 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
8993 /// ALHS ARHS" is true. Otherwise, return std::nullopt.
8994 static std::optional
<bool>
8995 isImpliedCondOperands(CmpInst::Predicate Pred
, const Value
*ALHS
,
8996 const Value
*ARHS
, const Value
*BLHS
, const Value
*BRHS
) {
8999 return std::nullopt
;
9001 case CmpInst::ICMP_SLT
:
9002 case CmpInst::ICMP_SLE
:
9003 if (isTruePredicate(CmpInst::ICMP_SLE
, BLHS
, ALHS
) &&
9004 isTruePredicate(CmpInst::ICMP_SLE
, ARHS
, BRHS
))
9006 return std::nullopt
;
9008 case CmpInst::ICMP_SGT
:
9009 case CmpInst::ICMP_SGE
:
9010 if (isTruePredicate(CmpInst::ICMP_SLE
, ALHS
, BLHS
) &&
9011 isTruePredicate(CmpInst::ICMP_SLE
, BRHS
, ARHS
))
9013 return std::nullopt
;
9015 case CmpInst::ICMP_ULT
:
9016 case CmpInst::ICMP_ULE
:
9017 if (isTruePredicate(CmpInst::ICMP_ULE
, BLHS
, ALHS
) &&
9018 isTruePredicate(CmpInst::ICMP_ULE
, ARHS
, BRHS
))
9020 return std::nullopt
;
9022 case CmpInst::ICMP_UGT
:
9023 case CmpInst::ICMP_UGE
:
9024 if (isTruePredicate(CmpInst::ICMP_ULE
, ALHS
, BLHS
) &&
9025 isTruePredicate(CmpInst::ICMP_ULE
, BRHS
, ARHS
))
9027 return std::nullopt
;
9031 /// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true.
9032 /// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false.
9033 /// Otherwise, return std::nullopt if we can't infer anything.
9034 static std::optional
<bool>
9035 isImpliedCondMatchingOperands(CmpInst::Predicate LPred
,
9036 CmpInst::Predicate RPred
) {
9037 if (CmpInst::isImpliedTrueByMatchingCmp(LPred
, RPred
))
9039 if (CmpInst::isImpliedFalseByMatchingCmp(LPred
, RPred
))
9042 return std::nullopt
;
9045 /// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true.
9046 /// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false.
9047 /// Otherwise, return std::nullopt if we can't infer anything.
9048 static std::optional
<bool> isImpliedCondCommonOperandWithCR(
9049 CmpInst::Predicate LPred
, const ConstantRange
&LCR
,
9050 CmpInst::Predicate RPred
, const ConstantRange
&RCR
) {
9051 ConstantRange DomCR
= ConstantRange::makeAllowedICmpRegion(LPred
, LCR
);
9052 // If all true values for lhs and true for rhs, lhs implies rhs
9053 if (DomCR
.icmp(RPred
, RCR
))
9056 // If there is no overlap, lhs implies not rhs
9057 if (DomCR
.icmp(CmpInst::getInversePredicate(RPred
), RCR
))
9059 return std::nullopt
;
9062 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
9063 /// is true. Return false if LHS implies RHS is false. Otherwise, return
9064 /// std::nullopt if we can't infer anything.
9065 static std::optional
<bool> isImpliedCondICmps(const ICmpInst
*LHS
,
9066 CmpInst::Predicate RPred
,
9067 const Value
*R0
, const Value
*R1
,
9068 const DataLayout
&DL
,
9070 Value
*L0
= LHS
->getOperand(0);
9071 Value
*L1
= LHS
->getOperand(1);
9073 // The rest of the logic assumes the LHS condition is true. If that's not the
9074 // case, invert the predicate to make it so.
9075 CmpInst::Predicate LPred
=
9076 LHSIsTrue
? LHS
->getPredicate() : LHS
->getInversePredicate();
9078 // We can have non-canonical operands, so try to normalize any common operand
9082 RPred
= ICmpInst::getSwappedPredicate(RPred
);
9086 LPred
= ICmpInst::getSwappedPredicate(LPred
);
9089 // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
9090 if (L0
!= R0
|| match(L0
, m_ImmConstant())) {
9092 LPred
= ICmpInst::getSwappedPredicate(LPred
);
9094 RPred
= ICmpInst::getSwappedPredicate(RPred
);
9098 // See if we can infer anything if operand-0 matches and we have at least one
9100 const APInt
*Unused
;
9101 if (L0
== R0
&& (match(L1
, m_APInt(Unused
)) || match(R1
, m_APInt(Unused
)))) {
9102 // Potential TODO: We could also further use the constant range of L0/R0 to
9103 // further constraint the constant ranges. At the moment this leads to
9104 // several regressions related to not transforming `multi_use(A + C0) eq/ne
9105 // C1` (see discussion: D58633).
9106 ConstantRange LCR
= computeConstantRange(
9107 L1
, ICmpInst::isSigned(LPred
), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9108 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth
- 1);
9109 ConstantRange RCR
= computeConstantRange(
9110 R1
, ICmpInst::isSigned(RPred
), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9111 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth
- 1);
9112 // Even if L1/R1 are not both constant, we can still sometimes deduce
9113 // relationship from a single constant. For example X u> Y implies X != 0.
9114 if (auto R
= isImpliedCondCommonOperandWithCR(LPred
, LCR
, RPred
, RCR
))
9116 // If both L1/R1 were exact constant ranges and we didn't get anything
9117 // here, we won't be able to deduce this.
9118 if (match(L1
, m_APInt(Unused
)) && match(R1
, m_APInt(Unused
)))
9119 return std::nullopt
;
9122 // Can we infer anything when the two compares have matching operands?
9123 if (L0
== R0
&& L1
== R1
)
9124 return isImpliedCondMatchingOperands(LPred
, RPred
);
9126 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
9128 (LPred
== ICmpInst::ICMP_ULT
|| LPred
== ICmpInst::ICMP_UGE
) &&
9129 (RPred
== ICmpInst::ICMP_ULT
|| RPred
== ICmpInst::ICMP_UGE
) &&
9130 match(L0
, m_c_Add(m_Specific(L1
), m_Specific(R1
))))
9131 return LPred
== RPred
;
9134 return isImpliedCondOperands(LPred
, L0
, L1
, R0
, R1
);
9136 return std::nullopt
;
9139 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
9140 /// false. Otherwise, return std::nullopt if we can't infer anything. We
9141 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
9143 static std::optional
<bool>
9144 isImpliedCondAndOr(const Instruction
*LHS
, CmpInst::Predicate RHSPred
,
9145 const Value
*RHSOp0
, const Value
*RHSOp1
,
9146 const DataLayout
&DL
, bool LHSIsTrue
, unsigned Depth
) {
9147 // The LHS must be an 'or', 'and', or a 'select' instruction.
9148 assert((LHS
->getOpcode() == Instruction::And
||
9149 LHS
->getOpcode() == Instruction::Or
||
9150 LHS
->getOpcode() == Instruction::Select
) &&
9151 "Expected LHS to be 'and', 'or', or 'select'.");
9153 assert(Depth
<= MaxAnalysisRecursionDepth
&& "Hit recursion limit");
9155 // If the result of an 'or' is false, then we know both legs of the 'or' are
9156 // false. Similarly, if the result of an 'and' is true, then we know both
9157 // legs of the 'and' are true.
9158 const Value
*ALHS
, *ARHS
;
9159 if ((!LHSIsTrue
&& match(LHS
, m_LogicalOr(m_Value(ALHS
), m_Value(ARHS
)))) ||
9160 (LHSIsTrue
&& match(LHS
, m_LogicalAnd(m_Value(ALHS
), m_Value(ARHS
))))) {
9161 // FIXME: Make this non-recursion.
9162 if (std::optional
<bool> Implication
= isImpliedCondition(
9163 ALHS
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
, Depth
+ 1))
9165 if (std::optional
<bool> Implication
= isImpliedCondition(
9166 ARHS
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
, Depth
+ 1))
9168 return std::nullopt
;
9170 return std::nullopt
;
9174 llvm::isImpliedCondition(const Value
*LHS
, CmpInst::Predicate RHSPred
,
9175 const Value
*RHSOp0
, const Value
*RHSOp1
,
9176 const DataLayout
&DL
, bool LHSIsTrue
, unsigned Depth
) {
9177 // Bail out when we hit the limit.
9178 if (Depth
== MaxAnalysisRecursionDepth
)
9179 return std::nullopt
;
9181 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
9183 if (RHSOp0
->getType()->isVectorTy() != LHS
->getType()->isVectorTy())
9184 return std::nullopt
;
9186 assert(LHS
->getType()->isIntOrIntVectorTy(1) &&
9187 "Expected integer type only!");
9190 if (match(LHS
, m_Not(m_Value(LHS
))))
9191 LHSIsTrue
= !LHSIsTrue
;
9193 // Both LHS and RHS are icmps.
9194 const ICmpInst
*LHSCmp
= dyn_cast
<ICmpInst
>(LHS
);
9196 return isImpliedCondICmps(LHSCmp
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
);
9198 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
9199 /// the RHS to be an icmp.
9200 /// FIXME: Add support for and/or/select on the RHS.
9201 if (const Instruction
*LHSI
= dyn_cast
<Instruction
>(LHS
)) {
9202 if ((LHSI
->getOpcode() == Instruction::And
||
9203 LHSI
->getOpcode() == Instruction::Or
||
9204 LHSI
->getOpcode() == Instruction::Select
))
9205 return isImpliedCondAndOr(LHSI
, RHSPred
, RHSOp0
, RHSOp1
, DL
, LHSIsTrue
,
9208 return std::nullopt
;
9211 std::optional
<bool> llvm::isImpliedCondition(const Value
*LHS
, const Value
*RHS
,
9212 const DataLayout
&DL
,
9213 bool LHSIsTrue
, unsigned Depth
) {
9214 // LHS ==> RHS by definition
9219 bool InvertRHS
= false;
9220 if (match(RHS
, m_Not(m_Value(RHS
)))) {
9226 if (const ICmpInst
*RHSCmp
= dyn_cast
<ICmpInst
>(RHS
)) {
9227 if (auto Implied
= isImpliedCondition(
9228 LHS
, RHSCmp
->getPredicate(), RHSCmp
->getOperand(0),
9229 RHSCmp
->getOperand(1), DL
, LHSIsTrue
, Depth
))
9230 return InvertRHS
? !*Implied
: *Implied
;
9231 return std::nullopt
;
9234 if (Depth
== MaxAnalysisRecursionDepth
)
9235 return std::nullopt
;
9237 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
9238 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
9239 const Value
*RHS1
, *RHS2
;
9240 if (match(RHS
, m_LogicalOr(m_Value(RHS1
), m_Value(RHS2
)))) {
9241 if (std::optional
<bool> Imp
=
9242 isImpliedCondition(LHS
, RHS1
, DL
, LHSIsTrue
, Depth
+ 1))
9245 if (std::optional
<bool> Imp
=
9246 isImpliedCondition(LHS
, RHS2
, DL
, LHSIsTrue
, Depth
+ 1))
9250 if (match(RHS
, m_LogicalAnd(m_Value(RHS1
), m_Value(RHS2
)))) {
9251 if (std::optional
<bool> Imp
=
9252 isImpliedCondition(LHS
, RHS1
, DL
, LHSIsTrue
, Depth
+ 1))
9255 if (std::optional
<bool> Imp
=
9256 isImpliedCondition(LHS
, RHS2
, DL
, LHSIsTrue
, Depth
+ 1))
9261 return std::nullopt
;
9264 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
9265 // condition dominating ContextI or nullptr, if no condition is found.
9266 static std::pair
<Value
*, bool>
9267 getDomPredecessorCondition(const Instruction
*ContextI
) {
9268 if (!ContextI
|| !ContextI
->getParent())
9269 return {nullptr, false};
9271 // TODO: This is a poor/cheap way to determine dominance. Should we use a
9272 // dominator tree (eg, from a SimplifyQuery) instead?
9273 const BasicBlock
*ContextBB
= ContextI
->getParent();
9274 const BasicBlock
*PredBB
= ContextBB
->getSinglePredecessor();
9276 return {nullptr, false};
9278 // We need a conditional branch in the predecessor.
9280 BasicBlock
*TrueBB
, *FalseBB
;
9281 if (!match(PredBB
->getTerminator(), m_Br(m_Value(PredCond
), TrueBB
, FalseBB
)))
9282 return {nullptr, false};
9284 // The branch should get simplified. Don't bother simplifying this condition.
9285 if (TrueBB
== FalseBB
)
9286 return {nullptr, false};
9288 assert((TrueBB
== ContextBB
|| FalseBB
== ContextBB
) &&
9289 "Predecessor block does not point to successor?");
9291 // Is this condition implied by the predecessor condition?
9292 return {PredCond
, TrueBB
== ContextBB
};
9295 std::optional
<bool> llvm::isImpliedByDomCondition(const Value
*Cond
,
9296 const Instruction
*ContextI
,
9297 const DataLayout
&DL
) {
9298 assert(Cond
->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
9299 auto PredCond
= getDomPredecessorCondition(ContextI
);
9301 return isImpliedCondition(PredCond
.first
, Cond
, DL
, PredCond
.second
);
9302 return std::nullopt
;
9305 std::optional
<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred
,
9308 const Instruction
*ContextI
,
9309 const DataLayout
&DL
) {
9310 auto PredCond
= getDomPredecessorCondition(ContextI
);
9312 return isImpliedCondition(PredCond
.first
, Pred
, LHS
, RHS
, DL
,
9314 return std::nullopt
;
9317 static void setLimitsForBinOp(const BinaryOperator
&BO
, APInt
&Lower
,
9318 APInt
&Upper
, const InstrInfoQuery
&IIQ
,
9319 bool PreferSignedRange
) {
9320 unsigned Width
= Lower
.getBitWidth();
9322 switch (BO
.getOpcode()) {
9323 case Instruction::Add
:
9324 if (match(BO
.getOperand(1), m_APInt(C
)) && !C
->isZero()) {
9325 bool HasNSW
= IIQ
.hasNoSignedWrap(&BO
);
9326 bool HasNUW
= IIQ
.hasNoUnsignedWrap(&BO
);
9328 // If the caller expects a signed compare, then try to use a signed range.
9329 // Otherwise if both no-wraps are set, use the unsigned range because it
9330 // is never larger than the signed range. Example:
9331 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9332 if (PreferSignedRange
&& HasNSW
&& HasNUW
)
9336 // 'add nuw x, C' produces [C, UINT_MAX].
9338 } else if (HasNSW
) {
9339 if (C
->isNegative()) {
9340 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9341 Lower
= APInt::getSignedMinValue(Width
);
9342 Upper
= APInt::getSignedMaxValue(Width
) + *C
+ 1;
9344 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9345 Lower
= APInt::getSignedMinValue(Width
) + *C
;
9346 Upper
= APInt::getSignedMaxValue(Width
) + 1;
9352 case Instruction::And
:
9353 if (match(BO
.getOperand(1), m_APInt(C
)))
9354 // 'and x, C' produces [0, C].
9356 // X & -X is a power of two or zero. So we can cap the value at max power of
9358 if (match(BO
.getOperand(0), m_Neg(m_Specific(BO
.getOperand(1)))) ||
9359 match(BO
.getOperand(1), m_Neg(m_Specific(BO
.getOperand(0)))))
9360 Upper
= APInt::getSignedMinValue(Width
) + 1;
9363 case Instruction::Or
:
9364 if (match(BO
.getOperand(1), m_APInt(C
)))
9365 // 'or x, C' produces [C, UINT_MAX].
9369 case Instruction::AShr
:
9370 if (match(BO
.getOperand(1), m_APInt(C
)) && C
->ult(Width
)) {
9371 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9372 Lower
= APInt::getSignedMinValue(Width
).ashr(*C
);
9373 Upper
= APInt::getSignedMaxValue(Width
).ashr(*C
) + 1;
9374 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9375 unsigned ShiftAmount
= Width
- 1;
9376 if (!C
->isZero() && IIQ
.isExact(&BO
))
9377 ShiftAmount
= C
->countr_zero();
9378 if (C
->isNegative()) {
9379 // 'ashr C, x' produces [C, C >> (Width-1)]
9381 Upper
= C
->ashr(ShiftAmount
) + 1;
9383 // 'ashr C, x' produces [C >> (Width-1), C]
9384 Lower
= C
->ashr(ShiftAmount
);
9390 case Instruction::LShr
:
9391 if (match(BO
.getOperand(1), m_APInt(C
)) && C
->ult(Width
)) {
9392 // 'lshr x, C' produces [0, UINT_MAX >> C].
9393 Upper
= APInt::getAllOnes(Width
).lshr(*C
) + 1;
9394 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9395 // 'lshr C, x' produces [C >> (Width-1), C].
9396 unsigned ShiftAmount
= Width
- 1;
9397 if (!C
->isZero() && IIQ
.isExact(&BO
))
9398 ShiftAmount
= C
->countr_zero();
9399 Lower
= C
->lshr(ShiftAmount
);
9404 case Instruction::Shl
:
9405 if (match(BO
.getOperand(0), m_APInt(C
))) {
9406 if (IIQ
.hasNoUnsignedWrap(&BO
)) {
9407 // 'shl nuw C, x' produces [C, C << CLZ(C)]
9409 Upper
= Lower
.shl(Lower
.countl_zero()) + 1;
9410 } else if (BO
.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
9411 if (C
->isNegative()) {
9412 // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9413 unsigned ShiftAmount
= C
->countl_one() - 1;
9414 Lower
= C
->shl(ShiftAmount
);
9417 // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9418 unsigned ShiftAmount
= C
->countl_zero() - 1;
9420 Upper
= C
->shl(ShiftAmount
) + 1;
9423 // If lowbit is set, value can never be zero.
9425 Lower
= APInt::getOneBitSet(Width
, 0);
9426 // If we are shifting a constant the largest it can be is if the longest
9427 // sequence of consecutive ones is shifted to the highbits (breaking
9428 // ties for which sequence is higher). At the moment we take a liberal
9429 // upper bound on this by just popcounting the constant.
9430 // TODO: There may be a bitwise trick for it longest/highest
9431 // consecutative sequence of ones (naive method is O(Width) loop).
9432 Upper
= APInt::getHighBitsSet(Width
, C
->popcount()) + 1;
9434 } else if (match(BO
.getOperand(1), m_APInt(C
)) && C
->ult(Width
)) {
9435 Upper
= APInt::getBitsSetFrom(Width
, C
->getZExtValue()) + 1;
9439 case Instruction::SDiv
:
9440 if (match(BO
.getOperand(1), m_APInt(C
))) {
9441 APInt IntMin
= APInt::getSignedMinValue(Width
);
9442 APInt IntMax
= APInt::getSignedMaxValue(Width
);
9443 if (C
->isAllOnes()) {
9444 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9445 // where C != -1 and C != 0 and C != 1
9448 } else if (C
->countl_zero() < Width
- 1) {
9449 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9450 // where C != -1 and C != 0 and C != 1
9451 Lower
= IntMin
.sdiv(*C
);
9452 Upper
= IntMax
.sdiv(*C
);
9453 if (Lower
.sgt(Upper
))
9454 std::swap(Lower
, Upper
);
9456 assert(Upper
!= Lower
&& "Upper part of range has wrapped!");
9458 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9459 if (C
->isMinSignedValue()) {
9460 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9462 Upper
= Lower
.lshr(1) + 1;
9464 // 'sdiv C, x' produces [-|C|, |C|].
9465 Upper
= C
->abs() + 1;
9466 Lower
= (-Upper
) + 1;
9471 case Instruction::UDiv
:
9472 if (match(BO
.getOperand(1), m_APInt(C
)) && !C
->isZero()) {
9473 // 'udiv x, C' produces [0, UINT_MAX / C].
9474 Upper
= APInt::getMaxValue(Width
).udiv(*C
) + 1;
9475 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9476 // 'udiv C, x' produces [0, C].
9481 case Instruction::SRem
:
9482 if (match(BO
.getOperand(1), m_APInt(C
))) {
9483 // 'srem x, C' produces (-|C|, |C|).
9485 Lower
= (-Upper
) + 1;
9486 } else if (match(BO
.getOperand(0), m_APInt(C
))) {
9487 if (C
->isNegative()) {
9488 // 'srem -|C|, x' produces [-|C|, 0].
9492 // 'srem |C|, x' produces [0, |C|].
9498 case Instruction::URem
:
9499 if (match(BO
.getOperand(1), m_APInt(C
)))
9500 // 'urem x, C' produces [0, C).
9502 else if (match(BO
.getOperand(0), m_APInt(C
)))
9503 // 'urem C, x' produces [0, C].
9512 static ConstantRange
getRangeForIntrinsic(const IntrinsicInst
&II
) {
9513 unsigned Width
= II
.getType()->getScalarSizeInBits();
9515 switch (II
.getIntrinsicID()) {
9516 case Intrinsic::ctpop
:
9517 case Intrinsic::ctlz
:
9518 case Intrinsic::cttz
:
9519 // Maximum of set/clear bits is the bit width.
9520 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9521 APInt(Width
, Width
+ 1));
9522 case Intrinsic::uadd_sat
:
9523 // uadd.sat(x, C) produces [C, UINT_MAX].
9524 if (match(II
.getOperand(0), m_APInt(C
)) ||
9525 match(II
.getOperand(1), m_APInt(C
)))
9526 return ConstantRange::getNonEmpty(*C
, APInt::getZero(Width
));
9528 case Intrinsic::sadd_sat
:
9529 if (match(II
.getOperand(0), m_APInt(C
)) ||
9530 match(II
.getOperand(1), m_APInt(C
))) {
9531 if (C
->isNegative())
9532 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9533 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9534 APInt::getSignedMaxValue(Width
) + *C
+
9537 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9538 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
) + *C
,
9539 APInt::getSignedMaxValue(Width
) + 1);
9542 case Intrinsic::usub_sat
:
9543 // usub.sat(C, x) produces [0, C].
9544 if (match(II
.getOperand(0), m_APInt(C
)))
9545 return ConstantRange::getNonEmpty(APInt::getZero(Width
), *C
+ 1);
9547 // usub.sat(x, C) produces [0, UINT_MAX - C].
9548 if (match(II
.getOperand(1), m_APInt(C
)))
9549 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9550 APInt::getMaxValue(Width
) - *C
+ 1);
9552 case Intrinsic::ssub_sat
:
9553 if (match(II
.getOperand(0), m_APInt(C
))) {
9554 if (C
->isNegative())
9555 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9556 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9557 *C
- APInt::getSignedMinValue(Width
) +
9560 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9561 return ConstantRange::getNonEmpty(*C
- APInt::getSignedMaxValue(Width
),
9562 APInt::getSignedMaxValue(Width
) + 1);
9563 } else if (match(II
.getOperand(1), m_APInt(C
))) {
9564 if (C
->isNegative())
9565 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9566 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
) - *C
,
9567 APInt::getSignedMaxValue(Width
) + 1);
9569 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9570 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9571 APInt::getSignedMaxValue(Width
) - *C
+
9575 case Intrinsic::umin
:
9576 case Intrinsic::umax
:
9577 case Intrinsic::smin
:
9578 case Intrinsic::smax
:
9579 if (!match(II
.getOperand(0), m_APInt(C
)) &&
9580 !match(II
.getOperand(1), m_APInt(C
)))
9583 switch (II
.getIntrinsicID()) {
9584 case Intrinsic::umin
:
9585 return ConstantRange::getNonEmpty(APInt::getZero(Width
), *C
+ 1);
9586 case Intrinsic::umax
:
9587 return ConstantRange::getNonEmpty(*C
, APInt::getZero(Width
));
9588 case Intrinsic::smin
:
9589 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width
),
9591 case Intrinsic::smax
:
9592 return ConstantRange::getNonEmpty(*C
,
9593 APInt::getSignedMaxValue(Width
) + 1);
9595 llvm_unreachable("Must be min/max intrinsic");
9598 case Intrinsic::abs
:
9599 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9600 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9601 if (match(II
.getOperand(1), m_One()))
9602 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9603 APInt::getSignedMaxValue(Width
) + 1);
9605 return ConstantRange::getNonEmpty(APInt::getZero(Width
),
9606 APInt::getSignedMinValue(Width
) + 1);
9607 case Intrinsic::vscale
:
9608 if (!II
.getParent() || !II
.getFunction())
9610 return getVScaleRange(II
.getFunction(), Width
);
9611 case Intrinsic::scmp
:
9612 case Intrinsic::ucmp
:
9613 return ConstantRange::getNonEmpty(APInt::getAllOnes(Width
),
9619 return ConstantRange::getFull(Width
);
9622 static ConstantRange
getRangeForSelectPattern(const SelectInst
&SI
,
9623 const InstrInfoQuery
&IIQ
) {
9624 unsigned BitWidth
= SI
.getType()->getScalarSizeInBits();
9625 const Value
*LHS
= nullptr, *RHS
= nullptr;
9626 SelectPatternResult R
= matchSelectPattern(&SI
, LHS
, RHS
);
9627 if (R
.Flavor
== SPF_UNKNOWN
)
9628 return ConstantRange::getFull(BitWidth
);
9630 if (R
.Flavor
== SelectPatternFlavor::SPF_ABS
) {
9631 // If the negation part of the abs (in RHS) has the NSW flag,
9632 // then the result of abs(X) is [0..SIGNED_MAX],
9633 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9634 if (match(RHS
, m_Neg(m_Specific(LHS
))) &&
9635 IIQ
.hasNoSignedWrap(cast
<Instruction
>(RHS
)))
9636 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth
),
9637 APInt::getSignedMaxValue(BitWidth
) + 1);
9639 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth
),
9640 APInt::getSignedMinValue(BitWidth
) + 1);
9643 if (R
.Flavor
== SelectPatternFlavor::SPF_NABS
) {
9644 // The result of -abs(X) is <= 0.
9645 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth
),
9646 APInt(BitWidth
, 1));
9650 if (!match(LHS
, m_APInt(C
)) && !match(RHS
, m_APInt(C
)))
9651 return ConstantRange::getFull(BitWidth
);
9655 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth
), *C
+ 1);
9657 return ConstantRange::getNonEmpty(*C
, APInt::getZero(BitWidth
));
9659 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth
),
9662 return ConstantRange::getNonEmpty(*C
,
9663 APInt::getSignedMaxValue(BitWidth
) + 1);
9665 return ConstantRange::getFull(BitWidth
);
9669 static void setLimitForFPToI(const Instruction
*I
, APInt
&Lower
, APInt
&Upper
) {
9670 // The maximum representable value of a half is 65504. For floats the maximum
9671 // value is 3.4e38 which requires roughly 129 bits.
9672 unsigned BitWidth
= I
->getType()->getScalarSizeInBits();
9673 if (!I
->getOperand(0)->getType()->getScalarType()->isHalfTy())
9675 if (isa
<FPToSIInst
>(I
) && BitWidth
>= 17) {
9676 Lower
= APInt(BitWidth
, -65504);
9677 Upper
= APInt(BitWidth
, 65505);
9680 if (isa
<FPToUIInst
>(I
) && BitWidth
>= 16) {
9681 // For a fptoui the lower limit is left as 0.
9682 Upper
= APInt(BitWidth
, 65505);
9686 ConstantRange
llvm::computeConstantRange(const Value
*V
, bool ForSigned
,
9687 bool UseInstrInfo
, AssumptionCache
*AC
,
9688 const Instruction
*CtxI
,
9689 const DominatorTree
*DT
,
9691 assert(V
->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
9693 if (Depth
== MaxAnalysisRecursionDepth
)
9694 return ConstantRange::getFull(V
->getType()->getScalarSizeInBits());
9696 if (auto *C
= dyn_cast
<Constant
>(V
))
9697 return C
->toConstantRange();
9699 unsigned BitWidth
= V
->getType()->getScalarSizeInBits();
9700 InstrInfoQuery
IIQ(UseInstrInfo
);
9701 ConstantRange CR
= ConstantRange::getFull(BitWidth
);
9702 if (auto *BO
= dyn_cast
<BinaryOperator
>(V
)) {
9703 APInt Lower
= APInt(BitWidth
, 0);
9704 APInt Upper
= APInt(BitWidth
, 0);
9705 // TODO: Return ConstantRange.
9706 setLimitsForBinOp(*BO
, Lower
, Upper
, IIQ
, ForSigned
);
9707 CR
= ConstantRange::getNonEmpty(Lower
, Upper
);
9708 } else if (auto *II
= dyn_cast
<IntrinsicInst
>(V
))
9709 CR
= getRangeForIntrinsic(*II
);
9710 else if (auto *SI
= dyn_cast
<SelectInst
>(V
)) {
9711 ConstantRange CRTrue
= computeConstantRange(
9712 SI
->getTrueValue(), ForSigned
, UseInstrInfo
, AC
, CtxI
, DT
, Depth
+ 1);
9713 ConstantRange CRFalse
= computeConstantRange(
9714 SI
->getFalseValue(), ForSigned
, UseInstrInfo
, AC
, CtxI
, DT
, Depth
+ 1);
9715 CR
= CRTrue
.unionWith(CRFalse
);
9716 CR
= CR
.intersectWith(getRangeForSelectPattern(*SI
, IIQ
));
9717 } else if (isa
<FPToUIInst
>(V
) || isa
<FPToSIInst
>(V
)) {
9718 APInt Lower
= APInt(BitWidth
, 0);
9719 APInt Upper
= APInt(BitWidth
, 0);
9720 // TODO: Return ConstantRange.
9721 setLimitForFPToI(cast
<Instruction
>(V
), Lower
, Upper
);
9722 CR
= ConstantRange::getNonEmpty(Lower
, Upper
);
9723 } else if (const auto *A
= dyn_cast
<Argument
>(V
))
9724 if (std::optional
<ConstantRange
> Range
= A
->getRange())
9727 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
9728 if (auto *Range
= IIQ
.getMetadata(I
, LLVMContext::MD_range
))
9729 CR
= CR
.intersectWith(getConstantRangeFromMetadata(*Range
));
9731 if (const auto *CB
= dyn_cast
<CallBase
>(V
))
9732 if (std::optional
<ConstantRange
> Range
= CB
->getRange())
9733 CR
= CR
.intersectWith(*Range
);
9737 // Try to restrict the range based on information from assumptions.
9738 for (auto &AssumeVH
: AC
->assumptionsFor(V
)) {
9741 CallInst
*I
= cast
<CallInst
>(AssumeVH
);
9742 assert(I
->getParent()->getParent() == CtxI
->getParent()->getParent() &&
9743 "Got assumption for the wrong function!");
9744 assert(I
->getIntrinsicID() == Intrinsic::assume
&&
9745 "must be an assume intrinsic");
9747 if (!isValidAssumeForContext(I
, CtxI
, DT
))
9749 Value
*Arg
= I
->getArgOperand(0);
9750 ICmpInst
*Cmp
= dyn_cast
<ICmpInst
>(Arg
);
9751 // Currently we just use information from comparisons.
9752 if (!Cmp
|| Cmp
->getOperand(0) != V
)
9754 // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
9756 computeConstantRange(Cmp
->getOperand(1), /* ForSigned */ false,
9757 UseInstrInfo
, AC
, I
, DT
, Depth
+ 1);
9758 CR
= CR
.intersectWith(
9759 ConstantRange::makeAllowedICmpRegion(Cmp
->getPredicate(), RHS
));
9767 addValueAffectedByCondition(Value
*V
,
9768 function_ref
<void(Value
*)> InsertAffected
) {
9769 assert(V
!= nullptr);
9770 if (isa
<Argument
>(V
) || isa
<GlobalValue
>(V
)) {
9772 } else if (auto *I
= dyn_cast
<Instruction
>(V
)) {
9775 // Peek through unary operators to find the source of the condition.
9777 if (match(I
, m_CombineOr(m_PtrToInt(m_Value(Op
)), m_Trunc(m_Value(Op
))))) {
9778 if (isa
<Instruction
>(Op
) || isa
<Argument
>(Op
))
9784 void llvm::findValuesAffectedByCondition(
9785 Value
*Cond
, bool IsAssume
, function_ref
<void(Value
*)> InsertAffected
) {
9786 auto AddAffected
= [&InsertAffected
](Value
*V
) {
9787 addValueAffectedByCondition(V
, InsertAffected
);
9790 auto AddCmpOperands
= [&AddAffected
, IsAssume
](Value
*LHS
, Value
*RHS
) {
9794 } else if (match(RHS
, m_Constant()))
9798 SmallVector
<Value
*, 8> Worklist
;
9799 SmallPtrSet
<Value
*, 8> Visited
;
9800 Worklist
.push_back(Cond
);
9801 while (!Worklist
.empty()) {
9802 Value
*V
= Worklist
.pop_back_val();
9803 if (!Visited
.insert(V
).second
)
9806 CmpInst::Predicate Pred
;
9811 if (match(V
, m_Not(m_Value(X
))))
9815 if (match(V
, m_LogicalOp(m_Value(A
), m_Value(B
)))) {
9816 // assume(A && B) is split to -> assume(A); assume(B);
9817 // assume(!(A || B)) is split to -> assume(!A); assume(!B);
9818 // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
9819 // enough information to be worth handling (intersection of information as
9820 // opposed to union).
9822 Worklist
.push_back(A
);
9823 Worklist
.push_back(B
);
9825 } else if (match(V
, m_ICmp(Pred
, m_Value(A
), m_Value(B
)))) {
9826 AddCmpOperands(A
, B
);
9828 if (ICmpInst::isEquality(Pred
)) {
9829 if (match(B
, m_ConstantInt())) {
9831 // (X & C) or (X | C) or (X ^ C).
9832 // (X << C) or (X >>_s C) or (X >>_u C).
9833 if (match(A
, m_BitwiseLogic(m_Value(X
), m_ConstantInt())) ||
9834 match(A
, m_Shift(m_Value(X
), m_ConstantInt())))
9836 else if (match(A
, m_And(m_Value(X
), m_Value(Y
))) ||
9837 match(A
, m_Or(m_Value(X
), m_Value(Y
)))) {
9843 if (match(B
, m_ConstantInt())) {
9844 // Handle (A + C1) u< C2, which is the canonical form of
9845 // A > C3 && A < C4.
9846 if (match(A
, m_AddLike(m_Value(X
), m_ConstantInt())))
9849 if (ICmpInst::isUnsigned(Pred
)) {
9851 // X & Y u> C -> X >u C && Y >u C
9852 // X | Y u< C -> X u< C && Y u< C
9853 // X nuw+ Y u< C -> X u< C && Y u< C
9854 if (match(A
, m_And(m_Value(X
), m_Value(Y
))) ||
9855 match(A
, m_Or(m_Value(X
), m_Value(Y
))) ||
9856 match(A
, m_NUWAdd(m_Value(X
), m_Value(Y
)))) {
9860 // X nuw- Y u> C -> X u> C
9861 if (match(A
, m_NUWSub(m_Value(X
), m_Value())))
9866 // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
9867 // by computeKnownFPClass().
9868 if (match(A
, m_ElementWiseBitCast(m_Value(X
)))) {
9869 if (Pred
== ICmpInst::ICMP_SLT
&& match(B
, m_Zero()))
9871 else if (Pred
== ICmpInst::ICMP_SGT
&& match(B
, m_AllOnes()))
9875 } else if (match(Cond
, m_FCmp(Pred
, m_Value(A
), m_Value(B
)))) {
9876 AddCmpOperands(A
, B
);
9880 // fcmp fneg(fabs(x)), y
9881 if (match(A
, m_FNeg(m_Value(A
))))
9883 if (match(A
, m_FAbs(m_Value(A
))))
9886 } else if (match(V
, m_Intrinsic
<Intrinsic::is_fpclass
>(m_Value(A
),
9888 // Handle patterns that computeKnownFPClass() support.