[mlir][linalg] Add support for masked vectorization of `tensor.insert_slice` (1/N...
[llvm-project.git] / llvm / lib / Analysis / ValueTracking.cpp
blobb3216f8fb78db988442227be6c3e4da03856add1
1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains routines that help analyze properties that chains of
10 // computations have.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/ValueTracking.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/iterator_range.h"
25 #include "llvm/Analysis/AliasAnalysis.h"
26 #include "llvm/Analysis/AssumeBundleQueries.h"
27 #include "llvm/Analysis/AssumptionCache.h"
28 #include "llvm/Analysis/ConstantFolding.h"
29 #include "llvm/Analysis/DomConditionCache.h"
30 #include "llvm/Analysis/GuardUtils.h"
31 #include "llvm/Analysis/InstructionSimplify.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/Analysis/WithCache.h"
37 #include "llvm/IR/Argument.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/EHPersonalities.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalAlias.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/GlobalVariable.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/IntrinsicsAArch64.h"
58 #include "llvm/IR/IntrinsicsAMDGPU.h"
59 #include "llvm/IR/IntrinsicsRISCV.h"
60 #include "llvm/IR/IntrinsicsX86.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/IR/Metadata.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/Operator.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/ErrorHandling.h"
73 #include "llvm/Support/KnownBits.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/TargetParser/RISCVTargetParser.h"
76 #include <algorithm>
77 #include <cassert>
78 #include <cstdint>
79 #include <optional>
80 #include <utility>
82 using namespace llvm;
83 using namespace llvm::PatternMatch;
85 // Controls the number of uses of the value searched for possible
86 // dominating comparisons.
87 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
88 cl::Hidden, cl::init(20));
91 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
92 /// returns the element type's bitwidth.
93 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
94 if (unsigned BitWidth = Ty->getScalarSizeInBits())
95 return BitWidth;
97 return DL.getPointerTypeSizeInBits(Ty);
100 // Given the provided Value and, potentially, a context instruction, return
101 // the preferred context instruction (if any).
102 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
103 // If we've been provided with a context instruction, then use that (provided
104 // it has been inserted).
105 if (CxtI && CxtI->getParent())
106 return CxtI;
108 // If the value is really an already-inserted instruction, then use that.
109 CxtI = dyn_cast<Instruction>(V);
110 if (CxtI && CxtI->getParent())
111 return CxtI;
113 return nullptr;
116 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
117 const APInt &DemandedElts,
118 APInt &DemandedLHS, APInt &DemandedRHS) {
119 if (isa<ScalableVectorType>(Shuf->getType())) {
120 assert(DemandedElts == APInt(1,1));
121 DemandedLHS = DemandedRHS = DemandedElts;
122 return true;
125 int NumElts =
126 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
127 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
128 DemandedElts, DemandedLHS, DemandedRHS);
131 static void computeKnownBits(const Value *V, const APInt &DemandedElts,
132 KnownBits &Known, unsigned Depth,
133 const SimplifyQuery &Q);
135 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
136 const SimplifyQuery &Q) {
137 // Since the number of lanes in a scalable vector is unknown at compile time,
138 // we track one bit which is implicitly broadcast to all lanes. This means
139 // that all lanes in a scalable vector are considered demanded.
140 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
141 APInt DemandedElts =
142 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
143 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
146 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
147 const DataLayout &DL, unsigned Depth,
148 AssumptionCache *AC, const Instruction *CxtI,
149 const DominatorTree *DT, bool UseInstrInfo) {
150 computeKnownBits(
151 V, Known, Depth,
152 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
155 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
156 unsigned Depth, AssumptionCache *AC,
157 const Instruction *CxtI,
158 const DominatorTree *DT, bool UseInstrInfo) {
159 return computeKnownBits(
160 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
163 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
164 const DataLayout &DL, unsigned Depth,
165 AssumptionCache *AC, const Instruction *CxtI,
166 const DominatorTree *DT, bool UseInstrInfo) {
167 return computeKnownBits(
168 V, DemandedElts, Depth,
169 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
172 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
173 const SimplifyQuery &SQ) {
174 // Look for an inverted mask: (X & ~M) op (Y & M).
176 Value *M;
177 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
178 match(RHS, m_c_And(m_Specific(M), m_Value())) &&
179 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
180 return true;
183 // X op (Y & ~X)
184 if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) &&
185 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
186 return true;
188 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
189 // for constant Y.
190 Value *Y;
191 if (match(RHS,
192 m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) &&
193 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
194 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
195 return true;
197 // Peek through extends to find a 'not' of the other side:
198 // (ext Y) op ext(~Y)
199 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
200 match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) &&
201 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
202 return true;
204 // Look for: (A & B) op ~(A | B)
206 Value *A, *B;
207 if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
208 match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) &&
209 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
210 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
211 return true;
214 // Look for: (X << V) op (Y >> (BitWidth - V))
215 // or (X >> V) op (Y << (BitWidth - V))
217 const Value *V;
218 const APInt *R;
219 if (((match(RHS, m_Shl(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
220 match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
221 (match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
222 match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
223 R->uge(LHS->getType()->getScalarSizeInBits()))
224 return true;
227 return false;
230 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
231 const WithCache<const Value *> &RHSCache,
232 const SimplifyQuery &SQ) {
233 const Value *LHS = LHSCache.getValue();
234 const Value *RHS = RHSCache.getValue();
236 assert(LHS->getType() == RHS->getType() &&
237 "LHS and RHS should have the same type");
238 assert(LHS->getType()->isIntOrIntVectorTy() &&
239 "LHS and RHS should be integers");
241 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) ||
242 haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ))
243 return true;
245 return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ),
246 RHSCache.getKnownBits(SQ));
249 bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
250 return !I->user_empty() && all_of(I->users(), [](const User *U) {
251 return match(U, m_ICmp(m_Value(), m_Zero()));
255 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
256 return !I->user_empty() && all_of(I->users(), [](const User *U) {
257 CmpPredicate P;
258 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
262 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
263 bool OrZero, unsigned Depth,
264 AssumptionCache *AC, const Instruction *CxtI,
265 const DominatorTree *DT, bool UseInstrInfo) {
266 return ::isKnownToBeAPowerOfTwo(
267 V, OrZero, Depth,
268 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
271 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
272 const SimplifyQuery &Q, unsigned Depth);
274 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
275 unsigned Depth) {
276 return computeKnownBits(V, Depth, SQ).isNonNegative();
279 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
280 unsigned Depth) {
281 if (auto *CI = dyn_cast<ConstantInt>(V))
282 return CI->getValue().isStrictlyPositive();
284 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
285 // this updated.
286 KnownBits Known = computeKnownBits(V, Depth, SQ);
287 return Known.isNonNegative() &&
288 (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
291 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
292 unsigned Depth) {
293 return computeKnownBits(V, Depth, SQ).isNegative();
296 static bool isKnownNonEqual(const Value *V1, const Value *V2,
297 const APInt &DemandedElts, unsigned Depth,
298 const SimplifyQuery &Q);
300 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
301 const SimplifyQuery &Q, unsigned Depth) {
302 // We don't support looking through casts.
303 if (V1 == V2 || V1->getType() != V2->getType())
304 return false;
305 auto *FVTy = dyn_cast<FixedVectorType>(V1->getType());
306 APInt DemandedElts =
307 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
308 return ::isKnownNonEqual(V1, V2, DemandedElts, Depth, Q);
311 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
312 const SimplifyQuery &SQ, unsigned Depth) {
313 KnownBits Known(Mask.getBitWidth());
314 computeKnownBits(V, Known, Depth, SQ);
315 return Mask.isSubsetOf(Known.Zero);
318 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
319 unsigned Depth, const SimplifyQuery &Q);
321 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
322 const SimplifyQuery &Q) {
323 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
324 APInt DemandedElts =
325 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
326 return ComputeNumSignBits(V, DemandedElts, Depth, Q);
329 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
330 unsigned Depth, AssumptionCache *AC,
331 const Instruction *CxtI,
332 const DominatorTree *DT, bool UseInstrInfo) {
333 return ::ComputeNumSignBits(
334 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
337 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
338 unsigned Depth, AssumptionCache *AC,
339 const Instruction *CxtI,
340 const DominatorTree *DT) {
341 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
342 return V->getType()->getScalarSizeInBits() - SignBits + 1;
345 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
346 bool NSW, bool NUW,
347 const APInt &DemandedElts,
348 KnownBits &KnownOut, KnownBits &Known2,
349 unsigned Depth, const SimplifyQuery &Q) {
350 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
352 // If one operand is unknown and we have no nowrap information,
353 // the result will be unknown independently of the second operand.
354 if (KnownOut.isUnknown() && !NSW && !NUW)
355 return;
357 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
358 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
361 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
362 bool NUW, const APInt &DemandedElts,
363 KnownBits &Known, KnownBits &Known2,
364 unsigned Depth, const SimplifyQuery &Q) {
365 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
366 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
368 bool isKnownNegative = false;
369 bool isKnownNonNegative = false;
370 // If the multiplication is known not to overflow, compute the sign bit.
371 if (NSW) {
372 if (Op0 == Op1) {
373 // The product of a number with itself is non-negative.
374 isKnownNonNegative = true;
375 } else {
376 bool isKnownNonNegativeOp1 = Known.isNonNegative();
377 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
378 bool isKnownNegativeOp1 = Known.isNegative();
379 bool isKnownNegativeOp0 = Known2.isNegative();
380 // The product of two numbers with the same sign is non-negative.
381 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
382 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
383 if (!isKnownNonNegative && NUW) {
384 // mul nuw nsw with a factor > 1 is non-negative.
385 KnownBits One = KnownBits::makeConstant(APInt(Known.getBitWidth(), 1));
386 isKnownNonNegative = KnownBits::sgt(Known, One).value_or(false) ||
387 KnownBits::sgt(Known2, One).value_or(false);
390 // The product of a negative number and a non-negative number is either
391 // negative or zero.
392 if (!isKnownNonNegative)
393 isKnownNegative =
394 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
395 Known2.isNonZero()) ||
396 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
400 bool SelfMultiply = Op0 == Op1;
401 if (SelfMultiply)
402 SelfMultiply &=
403 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
404 Known = KnownBits::mul(Known, Known2, SelfMultiply);
406 // Only make use of no-wrap flags if we failed to compute the sign bit
407 // directly. This matters if the multiplication always overflows, in
408 // which case we prefer to follow the result of the direct computation,
409 // though as the program is invoking undefined behaviour we can choose
410 // whatever we like here.
411 if (isKnownNonNegative && !Known.isNegative())
412 Known.makeNonNegative();
413 else if (isKnownNegative && !Known.isNonNegative())
414 Known.makeNegative();
417 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
418 KnownBits &Known) {
419 unsigned BitWidth = Known.getBitWidth();
420 unsigned NumRanges = Ranges.getNumOperands() / 2;
421 assert(NumRanges >= 1);
423 Known.Zero.setAllBits();
424 Known.One.setAllBits();
426 for (unsigned i = 0; i < NumRanges; ++i) {
427 ConstantInt *Lower =
428 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
429 ConstantInt *Upper =
430 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
431 ConstantRange Range(Lower->getValue(), Upper->getValue());
433 // The first CommonPrefixBits of all values in Range are equal.
434 unsigned CommonPrefixBits =
435 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
436 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
437 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
438 Known.One &= UnsignedMax & Mask;
439 Known.Zero &= ~UnsignedMax & Mask;
443 static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
444 SmallVector<const Value *, 16> WorkSet(1, I);
445 SmallPtrSet<const Value *, 32> Visited;
446 SmallPtrSet<const Value *, 16> EphValues;
448 // The instruction defining an assumption's condition itself is always
449 // considered ephemeral to that assumption (even if it has other
450 // non-ephemeral users). See r246696's test case for an example.
451 if (is_contained(I->operands(), E))
452 return true;
454 while (!WorkSet.empty()) {
455 const Value *V = WorkSet.pop_back_val();
456 if (!Visited.insert(V).second)
457 continue;
459 // If all uses of this value are ephemeral, then so is this value.
460 if (llvm::all_of(V->users(), [&](const User *U) {
461 return EphValues.count(U);
462 })) {
463 if (V == E)
464 return true;
466 if (V == I || (isa<Instruction>(V) &&
467 !cast<Instruction>(V)->mayHaveSideEffects() &&
468 !cast<Instruction>(V)->isTerminator())) {
469 EphValues.insert(V);
470 if (const User *U = dyn_cast<User>(V))
471 append_range(WorkSet, U->operands());
476 return false;
479 // Is this an intrinsic that cannot be speculated but also cannot trap?
480 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
481 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
482 return CI->isAssumeLikeIntrinsic();
484 return false;
487 bool llvm::isValidAssumeForContext(const Instruction *Inv,
488 const Instruction *CxtI,
489 const DominatorTree *DT,
490 bool AllowEphemerals) {
491 // There are two restrictions on the use of an assume:
492 // 1. The assume must dominate the context (or the control flow must
493 // reach the assume whenever it reaches the context).
494 // 2. The context must not be in the assume's set of ephemeral values
495 // (otherwise we will use the assume to prove that the condition
496 // feeding the assume is trivially true, thus causing the removal of
497 // the assume).
499 if (Inv->getParent() == CxtI->getParent()) {
500 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
501 // in the BB.
502 if (Inv->comesBefore(CxtI))
503 return true;
505 // Don't let an assume affect itself - this would cause the problems
506 // `isEphemeralValueOf` is trying to prevent, and it would also make
507 // the loop below go out of bounds.
508 if (!AllowEphemerals && Inv == CxtI)
509 return false;
511 // The context comes first, but they're both in the same block.
512 // Make sure there is nothing in between that might interrupt
513 // the control flow, not even CxtI itself.
514 // We limit the scan distance between the assume and its context instruction
515 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
516 // it can be adjusted if needed (could be turned into a cl::opt).
517 auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
518 if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15))
519 return false;
521 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI);
524 // Inv and CxtI are in different blocks.
525 if (DT) {
526 if (DT->dominates(Inv, CxtI))
527 return true;
528 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor() ||
529 Inv->getParent()->isEntryBlock()) {
530 // We don't have a DT, but this trivially dominates.
531 return true;
534 return false;
537 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
538 // we still have enough information about `RHS` to conclude non-zero. For
539 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
540 // so the extra compile time may not be worth it, but possibly a second API
541 // should be created for use outside of loops.
542 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
543 // v u> y implies v != 0.
544 if (Pred == ICmpInst::ICMP_UGT)
545 return true;
547 // Special-case v != 0 to also handle v != null.
548 if (Pred == ICmpInst::ICMP_NE)
549 return match(RHS, m_Zero());
551 // All other predicates - rely on generic ConstantRange handling.
552 const APInt *C;
553 auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits());
554 if (match(RHS, m_APInt(C))) {
555 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
556 return !TrueValues.contains(Zero);
559 auto *VC = dyn_cast<ConstantDataVector>(RHS);
560 if (VC == nullptr)
561 return false;
563 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
564 ++ElemIdx) {
565 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
566 Pred, VC->getElementAsAPInt(ElemIdx));
567 if (TrueValues.contains(Zero))
568 return false;
570 return true;
573 static void breakSelfRecursivePHI(const Use *U, const PHINode *PHI,
574 Value *&ValOut, Instruction *&CtxIOut,
575 const PHINode **PhiOut = nullptr) {
576 ValOut = U->get();
577 if (ValOut == PHI)
578 return;
579 CtxIOut = PHI->getIncomingBlock(*U)->getTerminator();
580 if (PhiOut)
581 *PhiOut = PHI;
582 Value *V;
583 // If the Use is a select of this phi, compute analysis on other arm to break
584 // recursion.
585 // TODO: Min/Max
586 if (match(ValOut, m_Select(m_Value(), m_Specific(PHI), m_Value(V))) ||
587 match(ValOut, m_Select(m_Value(), m_Value(V), m_Specific(PHI))))
588 ValOut = V;
590 // Same for select, if this phi is 2-operand phi, compute analysis on other
591 // incoming value to break recursion.
592 // TODO: We could handle any number of incoming edges as long as we only have
593 // two unique values.
594 if (auto *IncPhi = dyn_cast<PHINode>(ValOut);
595 IncPhi && IncPhi->getNumIncomingValues() == 2) {
596 for (int Idx = 0; Idx < 2; ++Idx) {
597 if (IncPhi->getIncomingValue(Idx) == PHI) {
598 ValOut = IncPhi->getIncomingValue(1 - Idx);
599 if (PhiOut)
600 *PhiOut = IncPhi;
601 CtxIOut = IncPhi->getIncomingBlock(1 - Idx)->getTerminator();
602 break;
608 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
609 // Use of assumptions is context-sensitive. If we don't have a context, we
610 // cannot use them!
611 if (!Q.AC || !Q.CxtI)
612 return false;
614 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
615 if (!Elem.Assume)
616 continue;
618 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
619 assert(I->getFunction() == Q.CxtI->getFunction() &&
620 "Got assumption for the wrong function!");
622 if (Elem.Index != AssumptionCache::ExprResultIdx) {
623 if (!V->getType()->isPointerTy())
624 continue;
625 if (RetainedKnowledge RK = getKnowledgeFromBundle(
626 *I, I->bundle_op_info_begin()[Elem.Index])) {
627 if (RK.WasOn == V &&
628 (RK.AttrKind == Attribute::NonNull ||
629 (RK.AttrKind == Attribute::Dereferenceable &&
630 !NullPointerIsDefined(Q.CxtI->getFunction(),
631 V->getType()->getPointerAddressSpace()))) &&
632 isValidAssumeForContext(I, Q.CxtI, Q.DT))
633 return true;
635 continue;
638 // Warning: This loop can end up being somewhat performance sensitive.
639 // We're running this loop for once for each value queried resulting in a
640 // runtime of ~O(#assumes * #values).
642 Value *RHS;
643 CmpPredicate Pred;
644 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
645 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
646 continue;
648 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
649 return true;
652 return false;
655 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
656 Value *LHS, Value *RHS, KnownBits &Known,
657 const SimplifyQuery &Q) {
658 if (RHS->getType()->isPointerTy()) {
659 // Handle comparison of pointer to null explicitly, as it will not be
660 // covered by the m_APInt() logic below.
661 if (LHS == V && match(RHS, m_Zero())) {
662 switch (Pred) {
663 case ICmpInst::ICMP_EQ:
664 Known.setAllZero();
665 break;
666 case ICmpInst::ICMP_SGE:
667 case ICmpInst::ICMP_SGT:
668 Known.makeNonNegative();
669 break;
670 case ICmpInst::ICMP_SLT:
671 Known.makeNegative();
672 break;
673 default:
674 break;
677 return;
680 unsigned BitWidth = Known.getBitWidth();
681 auto m_V =
682 m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V)));
684 Value *Y;
685 const APInt *Mask, *C;
686 uint64_t ShAmt;
687 switch (Pred) {
688 case ICmpInst::ICMP_EQ:
689 // assume(V = C)
690 if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
691 Known = Known.unionWith(KnownBits::makeConstant(*C));
692 // assume(V & Mask = C)
693 } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
694 match(RHS, m_APInt(C))) {
695 // For one bits in Mask, we can propagate bits from C to V.
696 Known.One |= *C;
697 if (match(Y, m_APInt(Mask)))
698 Known.Zero |= ~*C & *Mask;
699 // assume(V | Mask = C)
700 } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
701 // For zero bits in Mask, we can propagate bits from C to V.
702 Known.Zero |= ~*C;
703 if (match(Y, m_APInt(Mask)))
704 Known.One |= *C & ~*Mask;
705 // assume(V ^ Mask = C)
706 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
707 match(RHS, m_APInt(C))) {
708 // Equivalent to assume(V == Mask ^ C)
709 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
710 // assume(V << ShAmt = C)
711 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
712 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
713 // For those bits in C that are known, we can propagate them to known
714 // bits in V shifted to the right by ShAmt.
715 KnownBits RHSKnown = KnownBits::makeConstant(*C);
716 RHSKnown.Zero.lshrInPlace(ShAmt);
717 RHSKnown.One.lshrInPlace(ShAmt);
718 Known = Known.unionWith(RHSKnown);
719 // assume(V >> ShAmt = C)
720 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
721 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
722 KnownBits RHSKnown = KnownBits::makeConstant(*C);
723 // For those bits in RHS that are known, we can propagate them to known
724 // bits in V shifted to the right by C.
725 Known.Zero |= RHSKnown.Zero << ShAmt;
726 Known.One |= RHSKnown.One << ShAmt;
728 break;
729 case ICmpInst::ICMP_NE: {
730 // assume (V & B != 0) where B is a power of 2
731 const APInt *BPow2;
732 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
733 Known.One |= *BPow2;
734 break;
736 default:
737 if (match(RHS, m_APInt(C))) {
738 const APInt *Offset = nullptr;
739 if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
740 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C);
741 if (Offset)
742 LHSRange = LHSRange.sub(*Offset);
743 Known = Known.unionWith(LHSRange.toKnownBits());
745 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
746 // X & Y u> C -> X u> C && Y u> C
747 // X nuw- Y u> C -> X u> C
748 if (match(LHS, m_c_And(m_V, m_Value())) ||
749 match(LHS, m_NUWSub(m_V, m_Value())))
750 Known.One.setHighBits(
751 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
753 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
754 // X | Y u< C -> X u< C && Y u< C
755 // X nuw+ Y u< C -> X u< C && Y u< C
756 if (match(LHS, m_c_Or(m_V, m_Value())) ||
757 match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
758 Known.Zero.setHighBits(
759 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
763 break;
767 static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
768 KnownBits &Known,
769 const SimplifyQuery &SQ, bool Invert) {
770 ICmpInst::Predicate Pred =
771 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
772 Value *LHS = Cmp->getOperand(0);
773 Value *RHS = Cmp->getOperand(1);
775 // Handle icmp pred (trunc V), C
776 if (match(LHS, m_Trunc(m_Specific(V)))) {
777 KnownBits DstKnown(LHS->getType()->getScalarSizeInBits());
778 computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
779 Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
780 return;
783 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
786 static void computeKnownBitsFromCond(const Value *V, Value *Cond,
787 KnownBits &Known, unsigned Depth,
788 const SimplifyQuery &SQ, bool Invert) {
789 Value *A, *B;
790 if (Depth < MaxAnalysisRecursionDepth &&
791 match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
792 KnownBits Known2(Known.getBitWidth());
793 KnownBits Known3(Known.getBitWidth());
794 computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert);
795 computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert);
796 if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value()))
797 : match(Cond, m_LogicalAnd(m_Value(), m_Value())))
798 Known2 = Known2.unionWith(Known3);
799 else
800 Known2 = Known2.intersectWith(Known3);
801 Known = Known.unionWith(Known2);
804 if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
805 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
808 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
809 unsigned Depth, const SimplifyQuery &Q) {
810 // Handle injected condition.
811 if (Q.CC && Q.CC->AffectedValues.contains(V))
812 computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert);
814 if (!Q.CxtI)
815 return;
817 if (Q.DC && Q.DT) {
818 // Handle dominating conditions.
819 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
820 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
821 if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
822 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
823 /*Invert*/ false);
825 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
826 if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
827 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
828 /*Invert*/ true);
831 if (Known.hasConflict())
832 Known.resetAll();
835 if (!Q.AC)
836 return;
838 unsigned BitWidth = Known.getBitWidth();
840 // Note that the patterns below need to be kept in sync with the code
841 // in AssumptionCache::updateAffectedValues.
843 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
844 if (!Elem.Assume)
845 continue;
847 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
848 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
849 "Got assumption for the wrong function!");
851 if (Elem.Index != AssumptionCache::ExprResultIdx) {
852 if (!V->getType()->isPointerTy())
853 continue;
854 if (RetainedKnowledge RK = getKnowledgeFromBundle(
855 *I, I->bundle_op_info_begin()[Elem.Index])) {
856 // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
857 // be the producer of the pointer in the bundle. At the moment, align
858 // assumptions aren't optimized away.
859 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
860 isPowerOf2_64(RK.ArgValue) &&
861 isValidAssumeForContext(I, Q.CxtI, Q.DT, /*AllowEphemerals*/ true))
862 Known.Zero.setLowBits(Log2_64(RK.ArgValue));
864 continue;
867 // Warning: This loop can end up being somewhat performance sensitive.
868 // We're running this loop for once for each value queried resulting in a
869 // runtime of ~O(#assumes * #values).
871 Value *Arg = I->getArgOperand(0);
873 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
874 assert(BitWidth == 1 && "assume operand is not i1?");
875 (void)BitWidth;
876 Known.setAllOnes();
877 return;
879 if (match(Arg, m_Not(m_Specific(V))) &&
880 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
881 assert(BitWidth == 1 && "assume operand is not i1?");
882 (void)BitWidth;
883 Known.setAllZero();
884 return;
887 // The remaining tests are all recursive, so bail out if we hit the limit.
888 if (Depth == MaxAnalysisRecursionDepth)
889 continue;
891 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
892 if (!Cmp)
893 continue;
895 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
896 continue;
898 computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
901 // Conflicting assumption: Undefined behavior will occur on this execution
902 // path.
903 if (Known.hasConflict())
904 Known.resetAll();
907 /// Compute known bits from a shift operator, including those with a
908 /// non-constant shift amount. Known is the output of this function. Known2 is a
909 /// pre-allocated temporary with the same bit width as Known and on return
910 /// contains the known bit of the shift value source. KF is an
911 /// operator-specific function that, given the known-bits and a shift amount,
912 /// compute the implied known-bits of the shift operator's result respectively
913 /// for that shift amount. The results from calling KF are conservatively
914 /// combined for all permitted shift amounts.
915 static void computeKnownBitsFromShiftOperator(
916 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
917 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
918 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
919 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
920 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
921 // To limit compile-time impact, only query isKnownNonZero() if we know at
922 // least something about the shift amount.
923 bool ShAmtNonZero =
924 Known.isNonZero() ||
925 (Known.getMaxValue().ult(Known.getBitWidth()) &&
926 isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
927 Known = KF(Known2, Known, ShAmtNonZero);
930 static KnownBits
931 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
932 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
933 unsigned Depth, const SimplifyQuery &Q) {
934 unsigned BitWidth = KnownLHS.getBitWidth();
935 KnownBits KnownOut(BitWidth);
936 bool IsAnd = false;
937 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
938 Value *X = nullptr, *Y = nullptr;
940 switch (I->getOpcode()) {
941 case Instruction::And:
942 KnownOut = KnownLHS & KnownRHS;
943 IsAnd = true;
944 // and(x, -x) is common idioms that will clear all but lowest set
945 // bit. If we have a single known bit in x, we can clear all bits
946 // above it.
947 // TODO: instcombine often reassociates independent `and` which can hide
948 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
949 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
950 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
951 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
952 KnownOut = KnownLHS.blsi();
953 else
954 KnownOut = KnownRHS.blsi();
956 break;
957 case Instruction::Or:
958 KnownOut = KnownLHS | KnownRHS;
959 break;
960 case Instruction::Xor:
961 KnownOut = KnownLHS ^ KnownRHS;
962 // xor(x, x-1) is common idioms that will clear all but lowest set
963 // bit. If we have a single known bit in x, we can clear all bits
964 // above it.
965 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
966 // -1 but for the purpose of demanded bits (xor(x, x-C) &
967 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
968 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
969 if (HasKnownOne &&
970 match(I, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())))) {
971 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
972 KnownOut = XBits.blsmsk();
974 break;
975 default:
976 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
979 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
980 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
981 // here we handle the more general case of adding any odd number by
982 // matching the form and/xor/or(x, add(x, y)) where y is odd.
983 // TODO: This could be generalized to clearing any bit set in y where the
984 // following bit is known to be unset in y.
985 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
986 (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) ||
987 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) ||
988 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) {
989 KnownBits KnownY(BitWidth);
990 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
991 if (KnownY.countMinTrailingOnes() > 0) {
992 if (IsAnd)
993 KnownOut.Zero.setBit(0);
994 else
995 KnownOut.One.setBit(0);
998 return KnownOut;
1001 static KnownBits computeKnownBitsForHorizontalOperation(
1002 const Operator *I, const APInt &DemandedElts, unsigned Depth,
1003 const SimplifyQuery &Q,
1004 const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
1005 KnownBitsFunc) {
1006 APInt DemandedEltsLHS, DemandedEltsRHS;
1007 getHorizDemandedEltsForFirstOperand(Q.DL.getTypeSizeInBits(I->getType()),
1008 DemandedElts, DemandedEltsLHS,
1009 DemandedEltsRHS);
1011 const auto ComputeForSingleOpFunc =
1012 [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
1013 return KnownBitsFunc(
1014 computeKnownBits(Op, DemandedEltsOp, Depth + 1, Q),
1015 computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1, Q));
1018 if (DemandedEltsRHS.isZero())
1019 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS);
1020 if (DemandedEltsLHS.isZero())
1021 return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS);
1023 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS)
1024 .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS));
1027 // Public so this can be used in `SimplifyDemandedUseBits`.
1028 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I,
1029 const KnownBits &KnownLHS,
1030 const KnownBits &KnownRHS,
1031 unsigned Depth,
1032 const SimplifyQuery &SQ) {
1033 auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
1034 APInt DemandedElts =
1035 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
1037 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
1038 SQ);
1041 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
1042 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
1043 // Without vscale_range, we only know that vscale is non-zero.
1044 if (!Attr.isValid())
1045 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth));
1047 unsigned AttrMin = Attr.getVScaleRangeMin();
1048 // Minimum is larger than vscale width, result is always poison.
1049 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
1050 return ConstantRange::getEmpty(BitWidth);
1052 APInt Min(BitWidth, AttrMin);
1053 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1054 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
1055 return ConstantRange(Min, APInt::getZero(BitWidth));
1057 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1060 void llvm::adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond,
1061 Value *Arm, bool Invert, unsigned Depth,
1062 const SimplifyQuery &Q) {
1063 // If we have a constant arm, we are done.
1064 if (Known.isConstant())
1065 return;
1067 // See what condition implies about the bits of the select arm.
1068 KnownBits CondRes(Known.getBitWidth());
1069 computeKnownBitsFromCond(Arm, Cond, CondRes, Depth + 1, Q, Invert);
1070 // If we don't get any information from the condition, no reason to
1071 // proceed.
1072 if (CondRes.isUnknown())
1073 return;
1075 // We can have conflict if the condition is dead. I.e if we have
1076 // (x | 64) < 32 ? (x | 64) : y
1077 // we will have conflict at bit 6 from the condition/the `or`.
1078 // In that case just return. Its not particularly important
1079 // what we do, as this select is going to be simplified soon.
1080 CondRes = CondRes.unionWith(Known);
1081 if (CondRes.hasConflict())
1082 return;
1084 // Finally make sure the information we found is valid. This is relatively
1085 // expensive so it's left for the very end.
1086 if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1087 return;
1089 // Finally, we know we get information from the condition and its valid,
1090 // so return it.
1091 Known = CondRes;
1094 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
1095 // Returns the input and lower/upper bounds.
1096 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
1097 const APInt *&CLow, const APInt *&CHigh) {
1098 assert(isa<Operator>(Select) &&
1099 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
1100 "Input should be a Select!");
1102 const Value *LHS = nullptr, *RHS = nullptr;
1103 SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
1104 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
1105 return false;
1107 if (!match(RHS, m_APInt(CLow)))
1108 return false;
1110 const Value *LHS2 = nullptr, *RHS2 = nullptr;
1111 SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
1112 if (getInverseMinMaxFlavor(SPF) != SPF2)
1113 return false;
1115 if (!match(RHS2, m_APInt(CHigh)))
1116 return false;
1118 if (SPF == SPF_SMIN)
1119 std::swap(CLow, CHigh);
1121 In = LHS2;
1122 return CLow->sle(*CHigh);
1125 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
1126 const APInt *&CLow,
1127 const APInt *&CHigh) {
1128 assert((II->getIntrinsicID() == Intrinsic::smin ||
1129 II->getIntrinsicID() == Intrinsic::smax) &&
1130 "Must be smin/smax");
1132 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
1133 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1134 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
1135 !match(II->getArgOperand(1), m_APInt(CLow)) ||
1136 !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
1137 return false;
1139 if (II->getIntrinsicID() == Intrinsic::smin)
1140 std::swap(CLow, CHigh);
1141 return CLow->sle(*CHigh);
1144 static void unionWithMinMaxIntrinsicClamp(const IntrinsicInst *II,
1145 KnownBits &Known) {
1146 const APInt *CLow, *CHigh;
1147 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
1148 Known = Known.unionWith(
1149 ConstantRange::getNonEmpty(*CLow, *CHigh + 1).toKnownBits());
1152 static void computeKnownBitsFromOperator(const Operator *I,
1153 const APInt &DemandedElts,
1154 KnownBits &Known, unsigned Depth,
1155 const SimplifyQuery &Q) {
1156 unsigned BitWidth = Known.getBitWidth();
1158 KnownBits Known2(BitWidth);
1159 switch (I->getOpcode()) {
1160 default: break;
1161 case Instruction::Load:
1162 if (MDNode *MD =
1163 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1164 computeKnownBitsFromRangeMetadata(*MD, Known);
1165 break;
1166 case Instruction::And:
1167 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1168 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1170 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1171 break;
1172 case Instruction::Or:
1173 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1174 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1176 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1177 break;
1178 case Instruction::Xor:
1179 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1180 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1182 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1183 break;
1184 case Instruction::Mul: {
1185 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1186 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1187 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, NUW,
1188 DemandedElts, Known, Known2, Depth, Q);
1189 break;
1191 case Instruction::UDiv: {
1192 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1193 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1194 Known =
1195 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1196 break;
1198 case Instruction::SDiv: {
1199 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1200 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1201 Known =
1202 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1203 break;
1205 case Instruction::Select: {
1206 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1207 KnownBits Res(Known.getBitWidth());
1208 computeKnownBits(Arm, DemandedElts, Res, Depth + 1, Q);
1209 adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Depth, Q);
1210 return Res;
1212 // Only known if known in both the LHS and RHS.
1213 Known =
1214 ComputeForArm(I->getOperand(1), /*Invert=*/false)
1215 .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1216 break;
1218 case Instruction::FPTrunc:
1219 case Instruction::FPExt:
1220 case Instruction::FPToUI:
1221 case Instruction::FPToSI:
1222 case Instruction::SIToFP:
1223 case Instruction::UIToFP:
1224 break; // Can't work with floating point.
1225 case Instruction::PtrToInt:
1226 case Instruction::IntToPtr:
1227 // Fall through and handle them the same as zext/trunc.
1228 [[fallthrough]];
1229 case Instruction::ZExt:
1230 case Instruction::Trunc: {
1231 Type *SrcTy = I->getOperand(0)->getType();
1233 unsigned SrcBitWidth;
1234 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1235 // which fall through here.
1236 Type *ScalarTy = SrcTy->getScalarType();
1237 SrcBitWidth = ScalarTy->isPointerTy() ?
1238 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1239 Q.DL.getTypeSizeInBits(ScalarTy);
1241 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1242 Known = Known.anyextOrTrunc(SrcBitWidth);
1243 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1244 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1245 Inst && Inst->hasNonNeg() && !Known.isNegative())
1246 Known.makeNonNegative();
1247 Known = Known.zextOrTrunc(BitWidth);
1248 break;
1250 case Instruction::BitCast: {
1251 Type *SrcTy = I->getOperand(0)->getType();
1252 if (SrcTy->isIntOrPtrTy() &&
1253 // TODO: For now, not handling conversions like:
1254 // (bitcast i64 %x to <2 x i32>)
1255 !I->getType()->isVectorTy()) {
1256 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1257 break;
1260 const Value *V;
1261 // Handle bitcast from floating point to integer.
1262 if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1263 V->getType()->isFPOrFPVectorTy()) {
1264 Type *FPType = V->getType()->getScalarType();
1265 KnownFPClass Result =
1266 computeKnownFPClass(V, DemandedElts, fcAllFlags, Depth + 1, Q);
1267 FPClassTest FPClasses = Result.KnownFPClasses;
1269 // TODO: Treat it as zero/poison if the use of I is unreachable.
1270 if (FPClasses == fcNone)
1271 break;
1273 if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1274 Known.Zero.setAllBits();
1275 Known.One.setAllBits();
1277 if (FPClasses & fcInf)
1278 Known = Known.intersectWith(KnownBits::makeConstant(
1279 APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
1281 if (FPClasses & fcZero)
1282 Known = Known.intersectWith(KnownBits::makeConstant(
1283 APInt::getZero(FPType->getScalarSizeInBits())));
1285 Known.Zero.clearSignBit();
1286 Known.One.clearSignBit();
1289 if (Result.SignBit) {
1290 if (*Result.SignBit)
1291 Known.makeNegative();
1292 else
1293 Known.makeNonNegative();
1296 break;
1299 // Handle cast from vector integer type to scalar or vector integer.
1300 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1301 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1302 !I->getType()->isIntOrIntVectorTy() ||
1303 isa<ScalableVectorType>(I->getType()))
1304 break;
1306 // Look through a cast from narrow vector elements to wider type.
1307 // Examples: v4i32 -> v2i64, v3i8 -> v24
1308 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1309 if (BitWidth % SubBitWidth == 0) {
1310 // Known bits are automatically intersected across demanded elements of a
1311 // vector. So for example, if a bit is computed as known zero, it must be
1312 // zero across all demanded elements of the vector.
1314 // For this bitcast, each demanded element of the output is sub-divided
1315 // across a set of smaller vector elements in the source vector. To get
1316 // the known bits for an entire element of the output, compute the known
1317 // bits for each sub-element sequentially. This is done by shifting the
1318 // one-set-bit demanded elements parameter across the sub-elements for
1319 // consecutive calls to computeKnownBits. We are using the demanded
1320 // elements parameter as a mask operator.
1322 // The known bits of each sub-element are then inserted into place
1323 // (dependent on endian) to form the full result of known bits.
1324 unsigned NumElts = DemandedElts.getBitWidth();
1325 unsigned SubScale = BitWidth / SubBitWidth;
1326 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1327 for (unsigned i = 0; i != NumElts; ++i) {
1328 if (DemandedElts[i])
1329 SubDemandedElts.setBit(i * SubScale);
1332 KnownBits KnownSrc(SubBitWidth);
1333 for (unsigned i = 0; i != SubScale; ++i) {
1334 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1335 Depth + 1, Q);
1336 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1337 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1340 break;
1342 case Instruction::SExt: {
1343 // Compute the bits in the result that are not present in the input.
1344 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1346 Known = Known.trunc(SrcBitWidth);
1347 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1348 // If the sign bit of the input is known set or clear, then we know the
1349 // top bits of the result.
1350 Known = Known.sext(BitWidth);
1351 break;
1353 case Instruction::Shl: {
1354 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1355 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1356 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1357 bool ShAmtNonZero) {
1358 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1360 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1361 KF);
1362 // Trailing zeros of a right-shifted constant never decrease.
1363 const APInt *C;
1364 if (match(I->getOperand(0), m_APInt(C)))
1365 Known.Zero.setLowBits(C->countr_zero());
1366 break;
1368 case Instruction::LShr: {
1369 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1370 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1371 bool ShAmtNonZero) {
1372 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1374 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1375 KF);
1376 // Leading zeros of a left-shifted constant never decrease.
1377 const APInt *C;
1378 if (match(I->getOperand(0), m_APInt(C)))
1379 Known.Zero.setHighBits(C->countl_zero());
1380 break;
1382 case Instruction::AShr: {
1383 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1384 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1385 bool ShAmtNonZero) {
1386 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1388 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1389 KF);
1390 break;
1392 case Instruction::Sub: {
1393 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1394 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1395 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1396 DemandedElts, Known, Known2, Depth, Q);
1397 break;
1399 case Instruction::Add: {
1400 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1401 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1402 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1403 DemandedElts, Known, Known2, Depth, Q);
1404 break;
1406 case Instruction::SRem:
1407 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1408 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1409 Known = KnownBits::srem(Known, Known2);
1410 break;
1412 case Instruction::URem:
1413 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1414 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1415 Known = KnownBits::urem(Known, Known2);
1416 break;
1417 case Instruction::Alloca:
1418 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1419 break;
1420 case Instruction::GetElementPtr: {
1421 // Analyze all of the subscripts of this getelementptr instruction
1422 // to determine if we can prove known low zero bits.
1423 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1424 // Accumulate the constant indices in a separate variable
1425 // to minimize the number of calls to computeForAddSub.
1426 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1428 gep_type_iterator GTI = gep_type_begin(I);
1429 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1430 // TrailZ can only become smaller, short-circuit if we hit zero.
1431 if (Known.isUnknown())
1432 break;
1434 Value *Index = I->getOperand(i);
1436 // Handle case when index is zero.
1437 Constant *CIndex = dyn_cast<Constant>(Index);
1438 if (CIndex && CIndex->isZeroValue())
1439 continue;
1441 if (StructType *STy = GTI.getStructTypeOrNull()) {
1442 // Handle struct member offset arithmetic.
1444 assert(CIndex &&
1445 "Access to structure field must be known at compile time");
1447 if (CIndex->getType()->isVectorTy())
1448 Index = CIndex->getSplatValue();
1450 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1451 const StructLayout *SL = Q.DL.getStructLayout(STy);
1452 uint64_t Offset = SL->getElementOffset(Idx);
1453 AccConstIndices += Offset;
1454 continue;
1457 // Handle array index arithmetic.
1458 Type *IndexedTy = GTI.getIndexedType();
1459 if (!IndexedTy->isSized()) {
1460 Known.resetAll();
1461 break;
1464 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1465 KnownBits IndexBits(IndexBitWidth);
1466 computeKnownBits(Index, IndexBits, Depth + 1, Q);
1467 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
1468 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1469 KnownBits ScalingFactor(IndexBitWidth);
1470 // Multiply by current sizeof type.
1471 // &A[i] == A + i * sizeof(*A[i]).
1472 if (IndexTypeSize.isScalable()) {
1473 // For scalable types the only thing we know about sizeof is
1474 // that this is a multiple of the minimum size.
1475 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
1476 } else if (IndexBits.isConstant()) {
1477 APInt IndexConst = IndexBits.getConstant();
1478 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1479 IndexConst *= ScalingFactor;
1480 AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1481 continue;
1482 } else {
1483 ScalingFactor =
1484 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1486 IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1488 // If the offsets have a different width from the pointer, according
1489 // to the language reference we need to sign-extend or truncate them
1490 // to the width of the pointer.
1491 IndexBits = IndexBits.sextOrTrunc(BitWidth);
1493 // Note that inbounds does *not* guarantee nsw for the addition, as only
1494 // the offset is signed, while the base address is unsigned.
1495 Known = KnownBits::add(Known, IndexBits);
1497 if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1498 KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1499 Known = KnownBits::add(Known, Index);
1501 break;
1503 case Instruction::PHI: {
1504 const PHINode *P = cast<PHINode>(I);
1505 BinaryOperator *BO = nullptr;
1506 Value *R = nullptr, *L = nullptr;
1507 if (matchSimpleRecurrence(P, BO, R, L)) {
1508 // Handle the case of a simple two-predecessor recurrence PHI.
1509 // There's a lot more that could theoretically be done here, but
1510 // this is sufficient to catch some interesting cases.
1511 unsigned Opcode = BO->getOpcode();
1513 switch (Opcode) {
1514 // If this is a shift recurrence, we know the bits being shifted in. We
1515 // can combine that with information about the start value of the
1516 // recurrence to conclude facts about the result. If this is a udiv
1517 // recurrence, we know that the result can never exceed either the
1518 // numerator or the start value, whichever is greater.
1519 case Instruction::LShr:
1520 case Instruction::AShr:
1521 case Instruction::Shl:
1522 case Instruction::UDiv:
1523 if (BO->getOperand(0) != I)
1524 break;
1525 [[fallthrough]];
1527 // For a urem recurrence, the result can never exceed the start value. The
1528 // phi could either be the numerator or the denominator.
1529 case Instruction::URem: {
1530 // We have matched a recurrence of the form:
1531 // %iv = [R, %entry], [%iv.next, %backedge]
1532 // %iv.next = shift_op %iv, L
1534 // Recurse with the phi context to avoid concern about whether facts
1535 // inferred hold at original context instruction. TODO: It may be
1536 // correct to use the original context. IF warranted, explore and
1537 // add sufficient tests to cover.
1538 SimplifyQuery RecQ = Q.getWithoutCondContext();
1539 RecQ.CxtI = P;
1540 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1541 switch (Opcode) {
1542 case Instruction::Shl:
1543 // A shl recurrence will only increase the tailing zeros
1544 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1545 break;
1546 case Instruction::LShr:
1547 case Instruction::UDiv:
1548 case Instruction::URem:
1549 // lshr, udiv, and urem recurrences will preserve the leading zeros of
1550 // the start value.
1551 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1552 break;
1553 case Instruction::AShr:
1554 // An ashr recurrence will extend the initial sign bit
1555 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1556 Known.One.setHighBits(Known2.countMinLeadingOnes());
1557 break;
1559 break;
1562 // Check for operations that have the property that if
1563 // both their operands have low zero bits, the result
1564 // will have low zero bits.
1565 case Instruction::Add:
1566 case Instruction::Sub:
1567 case Instruction::And:
1568 case Instruction::Or:
1569 case Instruction::Mul: {
1570 // Change the context instruction to the "edge" that flows into the
1571 // phi. This is important because that is where the value is actually
1572 // "evaluated" even though it is used later somewhere else. (see also
1573 // D69571).
1574 SimplifyQuery RecQ = Q.getWithoutCondContext();
1576 unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1577 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1578 Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator();
1580 // Ok, we have a PHI of the form L op= R. Check for low
1581 // zero bits.
1582 RecQ.CxtI = RInst;
1583 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1585 // We need to take the minimum number of known bits
1586 KnownBits Known3(BitWidth);
1587 RecQ.CxtI = LInst;
1588 computeKnownBits(L, DemandedElts, Known3, Depth + 1, RecQ);
1590 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1591 Known3.countMinTrailingZeros()));
1593 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1594 if (!OverflowOp || !Q.IIQ.hasNoSignedWrap(OverflowOp))
1595 break;
1597 switch (Opcode) {
1598 // If initial value of recurrence is nonnegative, and we are adding
1599 // a nonnegative number with nsw, the result can only be nonnegative
1600 // or poison value regardless of the number of times we execute the
1601 // add in phi recurrence. If initial value is negative and we are
1602 // adding a negative number with nsw, the result can only be
1603 // negative or poison value. Similar arguments apply to sub and mul.
1605 // (add non-negative, non-negative) --> non-negative
1606 // (add negative, negative) --> negative
1607 case Instruction::Add: {
1608 if (Known2.isNonNegative() && Known3.isNonNegative())
1609 Known.makeNonNegative();
1610 else if (Known2.isNegative() && Known3.isNegative())
1611 Known.makeNegative();
1612 break;
1615 // (sub nsw non-negative, negative) --> non-negative
1616 // (sub nsw negative, non-negative) --> negative
1617 case Instruction::Sub: {
1618 if (BO->getOperand(0) != I)
1619 break;
1620 if (Known2.isNonNegative() && Known3.isNegative())
1621 Known.makeNonNegative();
1622 else if (Known2.isNegative() && Known3.isNonNegative())
1623 Known.makeNegative();
1624 break;
1627 // (mul nsw non-negative, non-negative) --> non-negative
1628 case Instruction::Mul:
1629 if (Known2.isNonNegative() && Known3.isNonNegative())
1630 Known.makeNonNegative();
1631 break;
1633 default:
1634 break;
1636 break;
1639 default:
1640 break;
1644 // Unreachable blocks may have zero-operand PHI nodes.
1645 if (P->getNumIncomingValues() == 0)
1646 break;
1648 // Otherwise take the unions of the known bit sets of the operands,
1649 // taking conservative care to avoid excessive recursion.
1650 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1651 // Skip if every incoming value references to ourself.
1652 if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1653 break;
1655 Known.Zero.setAllBits();
1656 Known.One.setAllBits();
1657 for (const Use &U : P->operands()) {
1658 Value *IncValue;
1659 const PHINode *CxtPhi;
1660 Instruction *CxtI;
1661 breakSelfRecursivePHI(&U, P, IncValue, CxtI, &CxtPhi);
1662 // Skip direct self references.
1663 if (IncValue == P)
1664 continue;
1666 // Change the context instruction to the "edge" that flows into the
1667 // phi. This is important because that is where the value is actually
1668 // "evaluated" even though it is used later somewhere else. (see also
1669 // D69571).
1670 SimplifyQuery RecQ = Q.getWithoutCondContext().getWithInstruction(CxtI);
1672 Known2 = KnownBits(BitWidth);
1674 // Recurse, but cap the recursion to one level, because we don't
1675 // want to waste time spinning around in loops.
1676 // TODO: See if we can base recursion limiter on number of incoming phi
1677 // edges so we don't overly clamp analysis.
1678 computeKnownBits(IncValue, DemandedElts, Known2,
1679 MaxAnalysisRecursionDepth - 1, RecQ);
1681 // See if we can further use a conditional branch into the phi
1682 // to help us determine the range of the value.
1683 if (!Known2.isConstant()) {
1684 CmpPredicate Pred;
1685 const APInt *RHSC;
1686 BasicBlock *TrueSucc, *FalseSucc;
1687 // TODO: Use RHS Value and compute range from its known bits.
1688 if (match(RecQ.CxtI,
1689 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1690 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1691 // Check for cases of duplicate successors.
1692 if ((TrueSucc == CxtPhi->getParent()) !=
1693 (FalseSucc == CxtPhi->getParent())) {
1694 // If we're using the false successor, invert the predicate.
1695 if (FalseSucc == CxtPhi->getParent())
1696 Pred = CmpInst::getInversePredicate(Pred);
1697 // Get the knownbits implied by the incoming phi condition.
1698 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1699 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1700 // We can have conflicts here if we are analyzing deadcode (its
1701 // impossible for us reach this BB based the icmp).
1702 if (KnownUnion.hasConflict()) {
1703 // No reason to continue analyzing in a known dead region, so
1704 // just resetAll and break. This will cause us to also exit the
1705 // outer loop.
1706 Known.resetAll();
1707 break;
1709 Known2 = KnownUnion;
1714 Known = Known.intersectWith(Known2);
1715 // If all bits have been ruled out, there's no need to check
1716 // more operands.
1717 if (Known.isUnknown())
1718 break;
1721 break;
1723 case Instruction::Call:
1724 case Instruction::Invoke: {
1725 // If range metadata is attached to this call, set known bits from that,
1726 // and then intersect with known bits based on other properties of the
1727 // function.
1728 if (MDNode *MD =
1729 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1730 computeKnownBitsFromRangeMetadata(*MD, Known);
1732 const auto *CB = cast<CallBase>(I);
1734 if (std::optional<ConstantRange> Range = CB->getRange())
1735 Known = Known.unionWith(Range->toKnownBits());
1737 if (const Value *RV = CB->getReturnedArgOperand()) {
1738 if (RV->getType() == I->getType()) {
1739 computeKnownBits(RV, Known2, Depth + 1, Q);
1740 Known = Known.unionWith(Known2);
1741 // If the function doesn't return properly for all input values
1742 // (e.g. unreachable exits) then there might be conflicts between the
1743 // argument value and the range metadata. Simply discard the known bits
1744 // in case of conflicts.
1745 if (Known.hasConflict())
1746 Known.resetAll();
1749 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1750 switch (II->getIntrinsicID()) {
1751 default:
1752 break;
1753 case Intrinsic::abs: {
1754 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1755 bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1756 Known = Known2.abs(IntMinIsPoison);
1757 break;
1759 case Intrinsic::bitreverse:
1760 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1761 Known.Zero |= Known2.Zero.reverseBits();
1762 Known.One |= Known2.One.reverseBits();
1763 break;
1764 case Intrinsic::bswap:
1765 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1766 Known.Zero |= Known2.Zero.byteSwap();
1767 Known.One |= Known2.One.byteSwap();
1768 break;
1769 case Intrinsic::ctlz: {
1770 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1771 // If we have a known 1, its position is our upper bound.
1772 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1773 // If this call is poison for 0 input, the result will be less than 2^n.
1774 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1775 PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1776 unsigned LowBits = llvm::bit_width(PossibleLZ);
1777 Known.Zero.setBitsFrom(LowBits);
1778 break;
1780 case Intrinsic::cttz: {
1781 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1782 // If we have a known 1, its position is our upper bound.
1783 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1784 // If this call is poison for 0 input, the result will be less than 2^n.
1785 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1786 PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1787 unsigned LowBits = llvm::bit_width(PossibleTZ);
1788 Known.Zero.setBitsFrom(LowBits);
1789 break;
1791 case Intrinsic::ctpop: {
1792 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1793 // We can bound the space the count needs. Also, bits known to be zero
1794 // can't contribute to the population.
1795 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1796 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1797 Known.Zero.setBitsFrom(LowBits);
1798 // TODO: we could bound KnownOne using the lower bound on the number
1799 // of bits which might be set provided by popcnt KnownOne2.
1800 break;
1802 case Intrinsic::fshr:
1803 case Intrinsic::fshl: {
1804 const APInt *SA;
1805 if (!match(I->getOperand(2), m_APInt(SA)))
1806 break;
1808 // Normalize to funnel shift left.
1809 uint64_t ShiftAmt = SA->urem(BitWidth);
1810 if (II->getIntrinsicID() == Intrinsic::fshr)
1811 ShiftAmt = BitWidth - ShiftAmt;
1813 KnownBits Known3(BitWidth);
1814 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1815 computeKnownBits(I->getOperand(1), DemandedElts, Known3, Depth + 1, Q);
1817 Known.Zero =
1818 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1819 Known.One =
1820 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1821 break;
1823 case Intrinsic::uadd_sat:
1824 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1825 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1826 Known = KnownBits::uadd_sat(Known, Known2);
1827 break;
1828 case Intrinsic::usub_sat:
1829 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1830 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1831 Known = KnownBits::usub_sat(Known, Known2);
1832 break;
1833 case Intrinsic::sadd_sat:
1834 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1835 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1836 Known = KnownBits::sadd_sat(Known, Known2);
1837 break;
1838 case Intrinsic::ssub_sat:
1839 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1840 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1841 Known = KnownBits::ssub_sat(Known, Known2);
1842 break;
1843 // Vec reverse preserves bits from input vec.
1844 case Intrinsic::vector_reverse:
1845 computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known,
1846 Depth + 1, Q);
1847 break;
1848 // for min/max/and/or reduce, any bit common to each element in the
1849 // input vec is set in the output.
1850 case Intrinsic::vector_reduce_and:
1851 case Intrinsic::vector_reduce_or:
1852 case Intrinsic::vector_reduce_umax:
1853 case Intrinsic::vector_reduce_umin:
1854 case Intrinsic::vector_reduce_smax:
1855 case Intrinsic::vector_reduce_smin:
1856 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1857 break;
1858 case Intrinsic::vector_reduce_xor: {
1859 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1860 // The zeros common to all vecs are zero in the output.
1861 // If the number of elements is odd, then the common ones remain. If the
1862 // number of elements is even, then the common ones becomes zeros.
1863 auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1864 // Even, so the ones become zeros.
1865 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1866 if (EvenCnt)
1867 Known.Zero |= Known.One;
1868 // Maybe even element count so need to clear ones.
1869 if (VecTy->isScalableTy() || EvenCnt)
1870 Known.One.clearAllBits();
1871 break;
1873 case Intrinsic::umin:
1874 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1875 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1876 Known = KnownBits::umin(Known, Known2);
1877 break;
1878 case Intrinsic::umax:
1879 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1880 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1881 Known = KnownBits::umax(Known, Known2);
1882 break;
1883 case Intrinsic::smin:
1884 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1885 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1886 Known = KnownBits::smin(Known, Known2);
1887 unionWithMinMaxIntrinsicClamp(II, Known);
1888 break;
1889 case Intrinsic::smax:
1890 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1891 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1892 Known = KnownBits::smax(Known, Known2);
1893 unionWithMinMaxIntrinsicClamp(II, Known);
1894 break;
1895 case Intrinsic::ptrmask: {
1896 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1898 const Value *Mask = I->getOperand(1);
1899 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1900 computeKnownBits(Mask, DemandedElts, Known2, Depth + 1, Q);
1901 // TODO: 1-extend would be more precise.
1902 Known &= Known2.anyextOrTrunc(BitWidth);
1903 break;
1905 case Intrinsic::x86_sse2_pmulh_w:
1906 case Intrinsic::x86_avx2_pmulh_w:
1907 case Intrinsic::x86_avx512_pmulh_w_512:
1908 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1909 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1910 Known = KnownBits::mulhs(Known, Known2);
1911 break;
1912 case Intrinsic::x86_sse2_pmulhu_w:
1913 case Intrinsic::x86_avx2_pmulhu_w:
1914 case Intrinsic::x86_avx512_pmulhu_w_512:
1915 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1916 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1917 Known = KnownBits::mulhu(Known, Known2);
1918 break;
1919 case Intrinsic::x86_sse42_crc32_64_64:
1920 Known.Zero.setBitsFrom(32);
1921 break;
1922 case Intrinsic::x86_ssse3_phadd_d_128:
1923 case Intrinsic::x86_ssse3_phadd_w_128:
1924 case Intrinsic::x86_avx2_phadd_d:
1925 case Intrinsic::x86_avx2_phadd_w: {
1926 Known = computeKnownBitsForHorizontalOperation(
1927 I, DemandedElts, Depth, Q,
1928 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1929 return KnownBits::add(KnownLHS, KnownRHS);
1931 break;
1933 case Intrinsic::x86_ssse3_phadd_sw_128:
1934 case Intrinsic::x86_avx2_phadd_sw: {
1935 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1936 Q, KnownBits::sadd_sat);
1937 break;
1939 case Intrinsic::x86_ssse3_phsub_d_128:
1940 case Intrinsic::x86_ssse3_phsub_w_128:
1941 case Intrinsic::x86_avx2_phsub_d:
1942 case Intrinsic::x86_avx2_phsub_w: {
1943 Known = computeKnownBitsForHorizontalOperation(
1944 I, DemandedElts, Depth, Q,
1945 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1946 return KnownBits::sub(KnownLHS, KnownRHS);
1948 break;
1950 case Intrinsic::x86_ssse3_phsub_sw_128:
1951 case Intrinsic::x86_avx2_phsub_sw: {
1952 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1953 Q, KnownBits::ssub_sat);
1954 break;
1956 case Intrinsic::riscv_vsetvli:
1957 case Intrinsic::riscv_vsetvlimax: {
1958 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1959 const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
1960 uint64_t SEW = RISCVVType::decodeVSEW(
1961 cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
1962 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1963 cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
1964 uint64_t MaxVLEN =
1965 Range.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock;
1966 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
1968 // Result of vsetvli must be not larger than AVL.
1969 if (HasAVL)
1970 if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1971 MaxVL = std::min(MaxVL, CI->getZExtValue());
1973 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
1974 if (BitWidth > KnownZeroFirstBit)
1975 Known.Zero.setBitsFrom(KnownZeroFirstBit);
1976 break;
1978 case Intrinsic::vscale: {
1979 if (!II->getParent() || !II->getFunction())
1980 break;
1982 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
1983 break;
1987 break;
1989 case Instruction::ShuffleVector: {
1990 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1991 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1992 if (!Shuf) {
1993 Known.resetAll();
1994 return;
1996 // For undef elements, we don't know anything about the common state of
1997 // the shuffle result.
1998 APInt DemandedLHS, DemandedRHS;
1999 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
2000 Known.resetAll();
2001 return;
2003 Known.One.setAllBits();
2004 Known.Zero.setAllBits();
2005 if (!!DemandedLHS) {
2006 const Value *LHS = Shuf->getOperand(0);
2007 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
2008 // If we don't know any bits, early out.
2009 if (Known.isUnknown())
2010 break;
2012 if (!!DemandedRHS) {
2013 const Value *RHS = Shuf->getOperand(1);
2014 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
2015 Known = Known.intersectWith(Known2);
2017 break;
2019 case Instruction::InsertElement: {
2020 if (isa<ScalableVectorType>(I->getType())) {
2021 Known.resetAll();
2022 return;
2024 const Value *Vec = I->getOperand(0);
2025 const Value *Elt = I->getOperand(1);
2026 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
2027 unsigned NumElts = DemandedElts.getBitWidth();
2028 APInt DemandedVecElts = DemandedElts;
2029 bool NeedsElt = true;
2030 // If we know the index we are inserting too, clear it from Vec check.
2031 if (CIdx && CIdx->getValue().ult(NumElts)) {
2032 DemandedVecElts.clearBit(CIdx->getZExtValue());
2033 NeedsElt = DemandedElts[CIdx->getZExtValue()];
2036 Known.One.setAllBits();
2037 Known.Zero.setAllBits();
2038 if (NeedsElt) {
2039 computeKnownBits(Elt, Known, Depth + 1, Q);
2040 // If we don't know any bits, early out.
2041 if (Known.isUnknown())
2042 break;
2045 if (!DemandedVecElts.isZero()) {
2046 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
2047 Known = Known.intersectWith(Known2);
2049 break;
2051 case Instruction::ExtractElement: {
2052 // Look through extract element. If the index is non-constant or
2053 // out-of-range demand all elements, otherwise just the extracted element.
2054 const Value *Vec = I->getOperand(0);
2055 const Value *Idx = I->getOperand(1);
2056 auto *CIdx = dyn_cast<ConstantInt>(Idx);
2057 if (isa<ScalableVectorType>(Vec->getType())) {
2058 // FIXME: there's probably *something* we can do with scalable vectors
2059 Known.resetAll();
2060 break;
2062 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2063 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2064 if (CIdx && CIdx->getValue().ult(NumElts))
2065 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2066 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
2067 break;
2069 case Instruction::ExtractValue:
2070 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
2071 const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
2072 if (EVI->getNumIndices() != 1) break;
2073 if (EVI->getIndices()[0] == 0) {
2074 switch (II->getIntrinsicID()) {
2075 default: break;
2076 case Intrinsic::uadd_with_overflow:
2077 case Intrinsic::sadd_with_overflow:
2078 computeKnownBitsAddSub(
2079 true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2080 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2081 break;
2082 case Intrinsic::usub_with_overflow:
2083 case Intrinsic::ssub_with_overflow:
2084 computeKnownBitsAddSub(
2085 false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2086 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2087 break;
2088 case Intrinsic::umul_with_overflow:
2089 case Intrinsic::smul_with_overflow:
2090 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
2091 false, DemandedElts, Known, Known2, Depth, Q);
2092 break;
2096 break;
2097 case Instruction::Freeze:
2098 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
2099 Depth + 1))
2100 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
2101 break;
2105 /// Determine which bits of V are known to be either zero or one and return
2106 /// them.
2107 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
2108 unsigned Depth, const SimplifyQuery &Q) {
2109 KnownBits Known(getBitWidth(V->getType(), Q.DL));
2110 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
2111 return Known;
2114 /// Determine which bits of V are known to be either zero or one and return
2115 /// them.
2116 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth,
2117 const SimplifyQuery &Q) {
2118 KnownBits Known(getBitWidth(V->getType(), Q.DL));
2119 computeKnownBits(V, Known, Depth, Q);
2120 return Known;
2123 /// Determine which bits of V are known to be either zero or one and return
2124 /// them in the Known bit set.
2126 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2127 /// we cannot optimize based on the assumption that it is zero without changing
2128 /// it to be an explicit zero. If we don't change it to zero, other code could
2129 /// optimized based on the contradictory assumption that it is non-zero.
2130 /// Because instcombine aggressively folds operations with undef args anyway,
2131 /// this won't lose us code quality.
2133 /// This function is defined on values with integer type, values with pointer
2134 /// type, and vectors of integers. In the case
2135 /// where V is a vector, known zero, and known one values are the
2136 /// same width as the vector element, and the bit is set only if it is true
2137 /// for all of the demanded elements in the vector specified by DemandedElts.
2138 void computeKnownBits(const Value *V, const APInt &DemandedElts,
2139 KnownBits &Known, unsigned Depth,
2140 const SimplifyQuery &Q) {
2141 if (!DemandedElts) {
2142 // No demanded elts, better to assume we don't know anything.
2143 Known.resetAll();
2144 return;
2147 assert(V && "No Value?");
2148 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2150 #ifndef NDEBUG
2151 Type *Ty = V->getType();
2152 unsigned BitWidth = Known.getBitWidth();
2154 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) &&
2155 "Not integer or pointer type!");
2157 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2158 assert(
2159 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2160 "DemandedElt width should equal the fixed vector number of elements");
2161 } else {
2162 assert(DemandedElts == APInt(1, 1) &&
2163 "DemandedElt width should be 1 for scalars or scalable vectors");
2166 Type *ScalarTy = Ty->getScalarType();
2167 if (ScalarTy->isPointerTy()) {
2168 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2169 "V and Known should have same BitWidth");
2170 } else {
2171 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2172 "V and Known should have same BitWidth");
2174 #endif
2176 const APInt *C;
2177 if (match(V, m_APInt(C))) {
2178 // We know all of the bits for a scalar constant or a splat vector constant!
2179 Known = KnownBits::makeConstant(*C);
2180 return;
2182 // Null and aggregate-zero are all-zeros.
2183 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
2184 Known.setAllZero();
2185 return;
2187 // Handle a constant vector by taking the intersection of the known bits of
2188 // each element.
2189 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2190 assert(!isa<ScalableVectorType>(V->getType()));
2191 // We know that CDV must be a vector of integers. Take the intersection of
2192 // each element.
2193 Known.Zero.setAllBits(); Known.One.setAllBits();
2194 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2195 if (!DemandedElts[i])
2196 continue;
2197 APInt Elt = CDV->getElementAsAPInt(i);
2198 Known.Zero &= ~Elt;
2199 Known.One &= Elt;
2201 if (Known.hasConflict())
2202 Known.resetAll();
2203 return;
2206 if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2207 assert(!isa<ScalableVectorType>(V->getType()));
2208 // We know that CV must be a vector of integers. Take the intersection of
2209 // each element.
2210 Known.Zero.setAllBits(); Known.One.setAllBits();
2211 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2212 if (!DemandedElts[i])
2213 continue;
2214 Constant *Element = CV->getAggregateElement(i);
2215 if (isa<PoisonValue>(Element))
2216 continue;
2217 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2218 if (!ElementCI) {
2219 Known.resetAll();
2220 return;
2222 const APInt &Elt = ElementCI->getValue();
2223 Known.Zero &= ~Elt;
2224 Known.One &= Elt;
2226 if (Known.hasConflict())
2227 Known.resetAll();
2228 return;
2231 // Start out not knowing anything.
2232 Known.resetAll();
2234 // We can't imply anything about undefs.
2235 if (isa<UndefValue>(V))
2236 return;
2238 // There's no point in looking through other users of ConstantData for
2239 // assumptions. Confirm that we've handled them all.
2240 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2242 if (const auto *A = dyn_cast<Argument>(V))
2243 if (std::optional<ConstantRange> Range = A->getRange())
2244 Known = Range->toKnownBits();
2246 // All recursive calls that increase depth must come after this.
2247 if (Depth == MaxAnalysisRecursionDepth)
2248 return;
2250 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2251 // the bits of its aliasee.
2252 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2253 if (!GA->isInterposable())
2254 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2255 return;
2258 if (const Operator *I = dyn_cast<Operator>(V))
2259 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2260 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2261 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2262 Known = CR->toKnownBits();
2265 // Aligned pointers have trailing zeros - refine Known.Zero set
2266 if (isa<PointerType>(V->getType())) {
2267 Align Alignment = V->getPointerAlignment(Q.DL);
2268 Known.Zero.setLowBits(Log2(Alignment));
2271 // computeKnownBitsFromContext strictly refines Known.
2272 // Therefore, we run them after computeKnownBitsFromOperator.
2274 // Check whether we can determine known bits from context such as assumes.
2275 computeKnownBitsFromContext(V, Known, Depth, Q);
2278 /// Try to detect a recurrence that the value of the induction variable is
2279 /// always a power of two (or zero).
2280 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2281 unsigned Depth, SimplifyQuery &Q) {
2282 BinaryOperator *BO = nullptr;
2283 Value *Start = nullptr, *Step = nullptr;
2284 if (!matchSimpleRecurrence(PN, BO, Start, Step))
2285 return false;
2287 // Initial value must be a power of two.
2288 for (const Use &U : PN->operands()) {
2289 if (U.get() == Start) {
2290 // Initial value comes from a different BB, need to adjust context
2291 // instruction for analysis.
2292 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2293 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2294 return false;
2298 // Except for Mul, the induction variable must be on the left side of the
2299 // increment expression, otherwise its value can be arbitrary.
2300 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2301 return false;
2303 Q.CxtI = BO->getParent()->getTerminator();
2304 switch (BO->getOpcode()) {
2305 case Instruction::Mul:
2306 // Power of two is closed under multiplication.
2307 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2308 Q.IIQ.hasNoSignedWrap(BO)) &&
2309 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2310 case Instruction::SDiv:
2311 // Start value must not be signmask for signed division, so simply being a
2312 // power of two is not sufficient, and it has to be a constant.
2313 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2314 return false;
2315 [[fallthrough]];
2316 case Instruction::UDiv:
2317 // Divisor must be a power of two.
2318 // If OrZero is false, cannot guarantee induction variable is non-zero after
2319 // division, same for Shr, unless it is exact division.
2320 return (OrZero || Q.IIQ.isExact(BO)) &&
2321 isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2322 case Instruction::Shl:
2323 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2324 case Instruction::AShr:
2325 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2326 return false;
2327 [[fallthrough]];
2328 case Instruction::LShr:
2329 return OrZero || Q.IIQ.isExact(BO);
2330 default:
2331 return false;
2335 /// Return true if we can infer that \p V is known to be a power of 2 from
2336 /// dominating condition \p Cond (e.g., ctpop(V) == 1).
2337 static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
2338 const Value *Cond,
2339 bool CondIsTrue) {
2340 CmpPredicate Pred;
2341 const APInt *RHSC;
2342 if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)),
2343 m_APInt(RHSC))))
2344 return false;
2345 if (!CondIsTrue)
2346 Pred = ICmpInst::getInversePredicate(Pred);
2347 // ctpop(V) u< 2
2348 if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2)
2349 return true;
2350 // ctpop(V) == 1
2351 return Pred == ICmpInst::ICMP_EQ && *RHSC == 1;
2354 /// Return true if the given value is known to have exactly one
2355 /// bit set when defined. For vectors return true if every element is known to
2356 /// be a power of two when defined. Supports values with integer or pointer
2357 /// types and vectors of integers.
2358 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2359 const SimplifyQuery &Q) {
2360 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2362 if (isa<Constant>(V))
2363 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
2365 // i1 is by definition a power of 2 or zero.
2366 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2367 return true;
2369 // Try to infer from assumptions.
2370 if (Q.AC && Q.CxtI) {
2371 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
2372 if (!AssumeVH)
2373 continue;
2374 CallInst *I = cast<CallInst>(AssumeVH);
2375 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0),
2376 /*CondIsTrue=*/true) &&
2377 isValidAssumeForContext(I, Q.CxtI, Q.DT))
2378 return true;
2382 // Handle dominating conditions.
2383 if (Q.DC && Q.CxtI && Q.DT) {
2384 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
2385 Value *Cond = BI->getCondition();
2387 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
2388 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2389 /*CondIsTrue=*/true) &&
2390 Q.DT->dominates(Edge0, Q.CxtI->getParent()))
2391 return true;
2393 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
2394 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2395 /*CondIsTrue=*/false) &&
2396 Q.DT->dominates(Edge1, Q.CxtI->getParent()))
2397 return true;
2401 auto *I = dyn_cast<Instruction>(V);
2402 if (!I)
2403 return false;
2405 if (Q.CxtI && match(V, m_VScale())) {
2406 const Function *F = Q.CxtI->getFunction();
2407 // The vscale_range indicates vscale is a power-of-two.
2408 return F->hasFnAttribute(Attribute::VScaleRange);
2411 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2412 // it is shifted off the end then the result is undefined.
2413 if (match(I, m_Shl(m_One(), m_Value())))
2414 return true;
2416 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2417 // the bottom. If it is shifted off the bottom then the result is undefined.
2418 if (match(I, m_LShr(m_SignMask(), m_Value())))
2419 return true;
2421 // The remaining tests are all recursive, so bail out if we hit the limit.
2422 if (Depth++ == MaxAnalysisRecursionDepth)
2423 return false;
2425 switch (I->getOpcode()) {
2426 case Instruction::ZExt:
2427 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2428 case Instruction::Trunc:
2429 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2430 case Instruction::Shl:
2431 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
2432 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2433 return false;
2434 case Instruction::LShr:
2435 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
2436 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2437 return false;
2438 case Instruction::UDiv:
2439 if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2440 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2441 return false;
2442 case Instruction::Mul:
2443 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2444 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
2445 (OrZero || isKnownNonZero(I, Q, Depth));
2446 case Instruction::And:
2447 // A power of two and'd with anything is a power of two or zero.
2448 if (OrZero &&
2449 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
2450 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
2451 return true;
2452 // X & (-X) is always a power of two or zero.
2453 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2454 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2455 return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
2456 return false;
2457 case Instruction::Add: {
2458 // Adding a power-of-two or zero to the same power-of-two or zero yields
2459 // either the original power-of-two, a larger power-of-two or zero.
2460 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2461 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2462 Q.IIQ.hasNoSignedWrap(VOBO)) {
2463 if (match(I->getOperand(0),
2464 m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2465 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
2466 return true;
2467 if (match(I->getOperand(1),
2468 m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2469 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
2470 return true;
2472 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2473 KnownBits LHSBits(BitWidth);
2474 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
2476 KnownBits RHSBits(BitWidth);
2477 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
2478 // If i8 V is a power of two or zero:
2479 // ZeroBits: 1 1 1 0 1 1 1 1
2480 // ~ZeroBits: 0 0 0 1 0 0 0 0
2481 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2482 // If OrZero isn't set, we cannot give back a zero result.
2483 // Make sure either the LHS or RHS has a bit set.
2484 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2485 return true;
2488 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2489 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2490 if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2491 return true;
2492 return false;
2494 case Instruction::Select:
2495 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2496 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
2497 case Instruction::PHI: {
2498 // A PHI node is power of two if all incoming values are power of two, or if
2499 // it is an induction variable where in each step its value is a power of
2500 // two.
2501 auto *PN = cast<PHINode>(I);
2502 SimplifyQuery RecQ = Q.getWithoutCondContext();
2504 // Check if it is an induction variable and always power of two.
2505 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2506 return true;
2508 // Recursively check all incoming values. Limit recursion to 2 levels, so
2509 // that search complexity is limited to number of operands^2.
2510 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2511 return llvm::all_of(PN->operands(), [&](const Use &U) {
2512 // Value is power of 2 if it is coming from PHI node itself by induction.
2513 if (U.get() == PN)
2514 return true;
2516 // Change the context instruction to the incoming block where it is
2517 // evaluated.
2518 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2519 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2522 case Instruction::Invoke:
2523 case Instruction::Call: {
2524 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2525 switch (II->getIntrinsicID()) {
2526 case Intrinsic::umax:
2527 case Intrinsic::smax:
2528 case Intrinsic::umin:
2529 case Intrinsic::smin:
2530 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
2531 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2532 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2533 // thus dont change pow2/non-pow2 status.
2534 case Intrinsic::bitreverse:
2535 case Intrinsic::bswap:
2536 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2537 case Intrinsic::fshr:
2538 case Intrinsic::fshl:
2539 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2540 if (II->getArgOperand(0) == II->getArgOperand(1))
2541 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2542 break;
2543 default:
2544 break;
2547 return false;
2549 default:
2550 return false;
2554 /// Test whether a GEP's result is known to be non-null.
2556 /// Uses properties inherent in a GEP to try to determine whether it is known
2557 /// to be non-null.
2559 /// Currently this routine does not support vector GEPs.
2560 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2561 const SimplifyQuery &Q) {
2562 const Function *F = nullptr;
2563 if (const Instruction *I = dyn_cast<Instruction>(GEP))
2564 F = I->getFunction();
2566 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2567 // may be null iff the base pointer is null and the offset is zero.
2568 if (!GEP->hasNoUnsignedWrap() &&
2569 !(GEP->isInBounds() &&
2570 !NullPointerIsDefined(F, GEP->getPointerAddressSpace())))
2571 return false;
2573 // FIXME: Support vector-GEPs.
2574 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2576 // If the base pointer is non-null, we cannot walk to a null address with an
2577 // inbounds GEP in address space zero.
2578 if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2579 return true;
2581 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2582 // If so, then the GEP cannot produce a null pointer, as doing so would
2583 // inherently violate the inbounds contract within address space zero.
2584 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2585 GTI != GTE; ++GTI) {
2586 // Struct types are easy -- they must always be indexed by a constant.
2587 if (StructType *STy = GTI.getStructTypeOrNull()) {
2588 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2589 unsigned ElementIdx = OpC->getZExtValue();
2590 const StructLayout *SL = Q.DL.getStructLayout(STy);
2591 uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2592 if (ElementOffset > 0)
2593 return true;
2594 continue;
2597 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2598 if (GTI.getSequentialElementStride(Q.DL).isZero())
2599 continue;
2601 // Fast path the constant operand case both for efficiency and so we don't
2602 // increment Depth when just zipping down an all-constant GEP.
2603 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2604 if (!OpC->isZero())
2605 return true;
2606 continue;
2609 // We post-increment Depth here because while isKnownNonZero increments it
2610 // as well, when we pop back up that increment won't persist. We don't want
2611 // to recurse 10k times just because we have 10k GEP operands. We don't
2612 // bail completely out because we want to handle constant GEPs regardless
2613 // of depth.
2614 if (Depth++ >= MaxAnalysisRecursionDepth)
2615 continue;
2617 if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2618 return true;
2621 return false;
2624 static bool isKnownNonNullFromDominatingCondition(const Value *V,
2625 const Instruction *CtxI,
2626 const DominatorTree *DT) {
2627 assert(!isa<Constant>(V) && "Called for constant?");
2629 if (!CtxI || !DT)
2630 return false;
2632 unsigned NumUsesExplored = 0;
2633 for (auto &U : V->uses()) {
2634 // Avoid massive lists
2635 if (NumUsesExplored >= DomConditionsMaxUses)
2636 break;
2637 NumUsesExplored++;
2639 const Instruction *UI = cast<Instruction>(U.getUser());
2640 // If the value is used as an argument to a call or invoke, then argument
2641 // attributes may provide an answer about null-ness.
2642 if (V->getType()->isPointerTy()) {
2643 if (const auto *CB = dyn_cast<CallBase>(UI)) {
2644 if (CB->isArgOperand(&U) &&
2645 CB->paramHasNonNullAttr(CB->getArgOperandNo(&U),
2646 /*AllowUndefOrPoison=*/false) &&
2647 DT->dominates(CB, CtxI))
2648 return true;
2652 // If the value is used as a load/store, then the pointer must be non null.
2653 if (V == getLoadStorePointerOperand(UI)) {
2654 if (!NullPointerIsDefined(UI->getFunction(),
2655 V->getType()->getPointerAddressSpace()) &&
2656 DT->dominates(UI, CtxI))
2657 return true;
2660 if ((match(UI, m_IDiv(m_Value(), m_Specific(V))) ||
2661 match(UI, m_IRem(m_Value(), m_Specific(V)))) &&
2662 isValidAssumeForContext(UI, CtxI, DT))
2663 return true;
2665 // Consider only compare instructions uniquely controlling a branch
2666 Value *RHS;
2667 CmpPredicate Pred;
2668 if (!match(UI, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2669 continue;
2671 bool NonNullIfTrue;
2672 if (cmpExcludesZero(Pred, RHS))
2673 NonNullIfTrue = true;
2674 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS))
2675 NonNullIfTrue = false;
2676 else
2677 continue;
2679 SmallVector<const User *, 4> WorkList;
2680 SmallPtrSet<const User *, 4> Visited;
2681 for (const auto *CmpU : UI->users()) {
2682 assert(WorkList.empty() && "Should be!");
2683 if (Visited.insert(CmpU).second)
2684 WorkList.push_back(CmpU);
2686 while (!WorkList.empty()) {
2687 auto *Curr = WorkList.pop_back_val();
2689 // If a user is an AND, add all its users to the work list. We only
2690 // propagate "pred != null" condition through AND because it is only
2691 // correct to assume that all conditions of AND are met in true branch.
2692 // TODO: Support similar logic of OR and EQ predicate?
2693 if (NonNullIfTrue)
2694 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2695 for (const auto *CurrU : Curr->users())
2696 if (Visited.insert(CurrU).second)
2697 WorkList.push_back(CurrU);
2698 continue;
2701 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2702 assert(BI->isConditional() && "uses a comparison!");
2704 BasicBlock *NonNullSuccessor =
2705 BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2706 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2707 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2708 return true;
2709 } else if (NonNullIfTrue && isGuard(Curr) &&
2710 DT->dominates(cast<Instruction>(Curr), CtxI)) {
2711 return true;
2717 return false;
2720 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
2721 /// ensure that the value it's attached to is never Value? 'RangeType' is
2722 /// is the type of the value described by the range.
2723 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2724 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2725 assert(NumRanges >= 1);
2726 for (unsigned i = 0; i < NumRanges; ++i) {
2727 ConstantInt *Lower =
2728 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2729 ConstantInt *Upper =
2730 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2731 ConstantRange Range(Lower->getValue(), Upper->getValue());
2732 if (Range.contains(Value))
2733 return false;
2735 return true;
2738 /// Try to detect a recurrence that monotonically increases/decreases from a
2739 /// non-zero starting value. These are common as induction variables.
2740 static bool isNonZeroRecurrence(const PHINode *PN) {
2741 BinaryOperator *BO = nullptr;
2742 Value *Start = nullptr, *Step = nullptr;
2743 const APInt *StartC, *StepC;
2744 if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2745 !match(Start, m_APInt(StartC)) || StartC->isZero())
2746 return false;
2748 switch (BO->getOpcode()) {
2749 case Instruction::Add:
2750 // Starting from non-zero and stepping away from zero can never wrap back
2751 // to zero.
2752 return BO->hasNoUnsignedWrap() ||
2753 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2754 StartC->isNegative() == StepC->isNegative());
2755 case Instruction::Mul:
2756 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2757 match(Step, m_APInt(StepC)) && !StepC->isZero();
2758 case Instruction::Shl:
2759 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2760 case Instruction::AShr:
2761 case Instruction::LShr:
2762 return BO->isExact();
2763 default:
2764 return false;
2768 static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2769 return match(Op0, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2770 m_Specific(Op1), m_Zero()))) ||
2771 match(Op1, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2772 m_Specific(Op0), m_Zero())));
2775 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2776 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2777 Value *Y, bool NSW, bool NUW) {
2778 // (X + (X != 0)) is non zero
2779 if (matchOpWithOpEqZero(X, Y))
2780 return true;
2782 if (NUW)
2783 return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2784 isKnownNonZero(X, DemandedElts, Q, Depth);
2786 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2787 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2789 // If X and Y are both non-negative (as signed values) then their sum is not
2790 // zero unless both X and Y are zero.
2791 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2792 if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2793 isKnownNonZero(X, DemandedElts, Q, Depth))
2794 return true;
2796 // If X and Y are both negative (as signed values) then their sum is not
2797 // zero unless both X and Y equal INT_MIN.
2798 if (XKnown.isNegative() && YKnown.isNegative()) {
2799 APInt Mask = APInt::getSignedMaxValue(BitWidth);
2800 // The sign bit of X is set. If some other bit is set then X is not equal
2801 // to INT_MIN.
2802 if (XKnown.One.intersects(Mask))
2803 return true;
2804 // The sign bit of Y is set. If some other bit is set then Y is not equal
2805 // to INT_MIN.
2806 if (YKnown.One.intersects(Mask))
2807 return true;
2810 // The sum of a non-negative number and a power of two is not zero.
2811 if (XKnown.isNonNegative() &&
2812 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2813 return true;
2814 if (YKnown.isNonNegative() &&
2815 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2816 return true;
2818 return KnownBits::add(XKnown, YKnown, NSW, NUW).isNonZero();
2821 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2822 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2823 Value *Y) {
2824 // (X - (X != 0)) is non zero
2825 // ((X != 0) - X) is non zero
2826 if (matchOpWithOpEqZero(X, Y))
2827 return true;
2829 // TODO: Move this case into isKnownNonEqual().
2830 if (auto *C = dyn_cast<Constant>(X))
2831 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
2832 return true;
2834 return ::isKnownNonEqual(X, Y, DemandedElts, Depth, Q);
2837 static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2838 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2839 Value *Y, bool NSW, bool NUW) {
2840 // If X and Y are non-zero then so is X * Y as long as the multiplication
2841 // does not overflow.
2842 if (NSW || NUW)
2843 return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2844 isKnownNonZero(Y, DemandedElts, Q, Depth);
2846 // If either X or Y is odd, then if the other is non-zero the result can't
2847 // be zero.
2848 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2849 if (XKnown.One[0])
2850 return isKnownNonZero(Y, DemandedElts, Q, Depth);
2852 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2853 if (YKnown.One[0])
2854 return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2856 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2857 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2858 // the lowest known One of X and Y. If they are non-zero, the result
2859 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2860 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2861 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2862 BitWidth;
2865 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2866 unsigned Depth, const SimplifyQuery &Q,
2867 const KnownBits &KnownVal) {
2868 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2869 switch (I->getOpcode()) {
2870 case Instruction::Shl:
2871 return Lhs.shl(Rhs);
2872 case Instruction::LShr:
2873 return Lhs.lshr(Rhs);
2874 case Instruction::AShr:
2875 return Lhs.ashr(Rhs);
2876 default:
2877 llvm_unreachable("Unknown Shift Opcode");
2881 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2882 switch (I->getOpcode()) {
2883 case Instruction::Shl:
2884 return Lhs.lshr(Rhs);
2885 case Instruction::LShr:
2886 case Instruction::AShr:
2887 return Lhs.shl(Rhs);
2888 default:
2889 llvm_unreachable("Unknown Shift Opcode");
2893 if (KnownVal.isUnknown())
2894 return false;
2896 KnownBits KnownCnt =
2897 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2898 APInt MaxShift = KnownCnt.getMaxValue();
2899 unsigned NumBits = KnownVal.getBitWidth();
2900 if (MaxShift.uge(NumBits))
2901 return false;
2903 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2904 return true;
2906 // If all of the bits shifted out are known to be zero, and Val is known
2907 // non-zero then at least one non-zero bit must remain.
2908 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2909 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2910 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
2911 return true;
2913 return false;
2916 static bool isKnownNonZeroFromOperator(const Operator *I,
2917 const APInt &DemandedElts,
2918 unsigned Depth, const SimplifyQuery &Q) {
2919 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2920 switch (I->getOpcode()) {
2921 case Instruction::Alloca:
2922 // Alloca never returns null, malloc might.
2923 return I->getType()->getPointerAddressSpace() == 0;
2924 case Instruction::GetElementPtr:
2925 if (I->getType()->isPointerTy())
2926 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
2927 break;
2928 case Instruction::BitCast: {
2929 // We need to be a bit careful here. We can only peek through the bitcast
2930 // if the scalar size of elements in the operand are smaller than and a
2931 // multiple of the size they are casting too. Take three cases:
2933 // 1) Unsafe:
2934 // bitcast <2 x i16> %NonZero to <4 x i8>
2936 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2937 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2938 // guranteed (imagine just sign bit set in the 2 i16 elements).
2940 // 2) Unsafe:
2941 // bitcast <4 x i3> %NonZero to <3 x i4>
2943 // Even though the scalar size of the src (`i3`) is smaller than the
2944 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2945 // its possible for the `3 x i4` elements to be zero because there are
2946 // some elements in the destination that don't contain any full src
2947 // element.
2949 // 3) Safe:
2950 // bitcast <4 x i8> %NonZero to <2 x i16>
2952 // This is always safe as non-zero in the 4 i8 elements implies
2953 // non-zero in the combination of any two adjacent ones. Since i8 is a
2954 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2955 // This all implies the 2 i16 elements are non-zero.
2956 Type *FromTy = I->getOperand(0)->getType();
2957 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2958 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
2959 return isKnownNonZero(I->getOperand(0), Q, Depth);
2960 } break;
2961 case Instruction::IntToPtr:
2962 // Note that we have to take special care to avoid looking through
2963 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2964 // as casts that can alter the value, e.g., AddrSpaceCasts.
2965 if (!isa<ScalableVectorType>(I->getType()) &&
2966 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2967 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2968 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2969 break;
2970 case Instruction::PtrToInt:
2971 // Similar to int2ptr above, we can look through ptr2int here if the cast
2972 // is a no-op or an extend and not a truncate.
2973 if (!isa<ScalableVectorType>(I->getType()) &&
2974 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2975 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2976 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2977 break;
2978 case Instruction::Trunc:
2979 // nuw/nsw trunc preserves zero/non-zero status of input.
2980 if (auto *TI = dyn_cast<TruncInst>(I))
2981 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2982 return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth);
2983 break;
2985 case Instruction::Sub:
2986 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2987 I->getOperand(1));
2988 case Instruction::Xor:
2989 // (X ^ (X != 0)) is non zero
2990 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2991 return true;
2992 break;
2993 case Instruction::Or:
2994 // (X | (X != 0)) is non zero
2995 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2996 return true;
2997 // X | Y != 0 if X != 0 or Y != 0.
2998 return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
2999 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3000 case Instruction::SExt:
3001 case Instruction::ZExt:
3002 // ext X != 0 if X != 0.
3003 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3005 case Instruction::Shl: {
3006 // shl nsw/nuw can't remove any non-zero bits.
3007 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3008 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
3009 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3011 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
3012 // if the lowest bit is shifted off the end.
3013 KnownBits Known(BitWidth);
3014 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
3015 if (Known.One[0])
3016 return true;
3018 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
3020 case Instruction::LShr:
3021 case Instruction::AShr: {
3022 // shr exact can only shift out zero bits.
3023 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
3024 if (BO->isExact())
3025 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3027 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
3028 // defined if the sign bit is shifted off the end.
3029 KnownBits Known =
3030 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
3031 if (Known.isNegative())
3032 return true;
3034 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
3036 case Instruction::UDiv:
3037 case Instruction::SDiv: {
3038 // X / Y
3039 // div exact can only produce a zero if the dividend is zero.
3040 if (cast<PossiblyExactOperator>(I)->isExact())
3041 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3043 KnownBits XKnown =
3044 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
3045 // If X is fully unknown we won't be able to figure anything out so don't
3046 // both computing knownbits for Y.
3047 if (XKnown.isUnknown())
3048 return false;
3050 KnownBits YKnown =
3051 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
3052 if (I->getOpcode() == Instruction::SDiv) {
3053 // For signed division need to compare abs value of the operands.
3054 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
3055 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
3057 // If X u>= Y then div is non zero (0/0 is UB).
3058 std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
3059 // If X is total unknown or X u< Y we won't be able to prove non-zero
3060 // with compute known bits so just return early.
3061 return XUgeY && *XUgeY;
3063 case Instruction::Add: {
3064 // X + Y.
3066 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
3067 // non-zero.
3068 auto *BO = cast<OverflowingBinaryOperator>(I);
3069 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
3070 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3071 Q.IIQ.hasNoUnsignedWrap(BO));
3073 case Instruction::Mul: {
3074 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3075 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
3076 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3077 Q.IIQ.hasNoUnsignedWrap(BO));
3079 case Instruction::Select: {
3080 // (C ? X : Y) != 0 if X != 0 and Y != 0.
3082 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
3083 // then see if the select condition implies the arm is non-zero. For example
3084 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3085 // dominated by `X != 0`.
3086 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
3087 Value *Op;
3088 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
3089 // Op is trivially non-zero.
3090 if (isKnownNonZero(Op, DemandedElts, Q, Depth))
3091 return true;
3093 // The condition of the select dominates the true/false arm. Check if the
3094 // condition implies that a given arm is non-zero.
3095 Value *X;
3096 CmpPredicate Pred;
3097 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
3098 return false;
3100 if (!IsTrueArm)
3101 Pred = ICmpInst::getInversePredicate(Pred);
3103 return cmpExcludesZero(Pred, X);
3106 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3107 SelectArmIsNonZero(/* IsTrueArm */ false))
3108 return true;
3109 break;
3111 case Instruction::PHI: {
3112 auto *PN = cast<PHINode>(I);
3113 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
3114 return true;
3116 // Check if all incoming values are non-zero using recursion.
3117 SimplifyQuery RecQ = Q.getWithoutCondContext();
3118 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
3119 return llvm::all_of(PN->operands(), [&](const Use &U) {
3120 if (U.get() == PN)
3121 return true;
3122 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
3123 // Check if the branch on the phi excludes zero.
3124 CmpPredicate Pred;
3125 Value *X;
3126 BasicBlock *TrueSucc, *FalseSucc;
3127 if (match(RecQ.CxtI,
3128 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
3129 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
3130 // Check for cases of duplicate successors.
3131 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
3132 // If we're using the false successor, invert the predicate.
3133 if (FalseSucc == PN->getParent())
3134 Pred = CmpInst::getInversePredicate(Pred);
3135 if (cmpExcludesZero(Pred, X))
3136 return true;
3139 // Finally recurse on the edge and check it directly.
3140 return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
3143 case Instruction::InsertElement: {
3144 if (isa<ScalableVectorType>(I->getType()))
3145 break;
3147 const Value *Vec = I->getOperand(0);
3148 const Value *Elt = I->getOperand(1);
3149 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
3151 unsigned NumElts = DemandedElts.getBitWidth();
3152 APInt DemandedVecElts = DemandedElts;
3153 bool SkipElt = false;
3154 // If we know the index we are inserting too, clear it from Vec check.
3155 if (CIdx && CIdx->getValue().ult(NumElts)) {
3156 DemandedVecElts.clearBit(CIdx->getZExtValue());
3157 SkipElt = !DemandedElts[CIdx->getZExtValue()];
3160 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3161 // are non-zero.
3162 return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
3163 (DemandedVecElts.isZero() ||
3164 isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
3166 case Instruction::ExtractElement:
3167 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
3168 const Value *Vec = EEI->getVectorOperand();
3169 const Value *Idx = EEI->getIndexOperand();
3170 auto *CIdx = dyn_cast<ConstantInt>(Idx);
3171 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
3172 unsigned NumElts = VecTy->getNumElements();
3173 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
3174 if (CIdx && CIdx->getValue().ult(NumElts))
3175 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
3176 return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
3179 break;
3180 case Instruction::ShuffleVector: {
3181 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
3182 if (!Shuf)
3183 break;
3184 APInt DemandedLHS, DemandedRHS;
3185 // For undef elements, we don't know anything about the common state of
3186 // the shuffle result.
3187 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3188 break;
3189 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3190 return (DemandedRHS.isZero() ||
3191 isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
3192 (DemandedLHS.isZero() ||
3193 isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
3195 case Instruction::Freeze:
3196 return isKnownNonZero(I->getOperand(0), Q, Depth) &&
3197 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
3198 Depth);
3199 case Instruction::Load: {
3200 auto *LI = cast<LoadInst>(I);
3201 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3202 // is never null.
3203 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
3204 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
3205 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
3206 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
3207 return true;
3208 } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
3209 return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
3212 // No need to fall through to computeKnownBits as range metadata is already
3213 // handled in isKnownNonZero.
3214 return false;
3216 case Instruction::ExtractValue: {
3217 const WithOverflowInst *WO;
3218 if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
3219 switch (WO->getBinaryOp()) {
3220 default:
3221 break;
3222 case Instruction::Add:
3223 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3224 WO->getArgOperand(0), WO->getArgOperand(1),
3225 /*NSW=*/false,
3226 /*NUW=*/false);
3227 case Instruction::Sub:
3228 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3229 WO->getArgOperand(0), WO->getArgOperand(1));
3230 case Instruction::Mul:
3231 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
3232 WO->getArgOperand(0), WO->getArgOperand(1),
3233 /*NSW=*/false, /*NUW=*/false);
3234 break;
3237 break;
3239 case Instruction::Call:
3240 case Instruction::Invoke: {
3241 const auto *Call = cast<CallBase>(I);
3242 if (I->getType()->isPointerTy()) {
3243 if (Call->isReturnNonNull())
3244 return true;
3245 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
3246 return isKnownNonZero(RP, Q, Depth);
3247 } else {
3248 if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
3249 return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
3250 if (std::optional<ConstantRange> Range = Call->getRange()) {
3251 const APInt ZeroValue(Range->getBitWidth(), 0);
3252 if (!Range->contains(ZeroValue))
3253 return true;
3255 if (const Value *RV = Call->getReturnedArgOperand())
3256 if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
3257 return true;
3260 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
3261 switch (II->getIntrinsicID()) {
3262 case Intrinsic::sshl_sat:
3263 case Intrinsic::ushl_sat:
3264 case Intrinsic::abs:
3265 case Intrinsic::bitreverse:
3266 case Intrinsic::bswap:
3267 case Intrinsic::ctpop:
3268 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3269 // NB: We don't do usub_sat here as in any case we can prove its
3270 // non-zero, we will fold it to `sub nuw` in InstCombine.
3271 case Intrinsic::ssub_sat:
3272 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3273 II->getArgOperand(0), II->getArgOperand(1));
3274 case Intrinsic::sadd_sat:
3275 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3276 II->getArgOperand(0), II->getArgOperand(1),
3277 /*NSW=*/true, /* NUW=*/false);
3278 // Vec reverse preserves zero/non-zero status from input vec.
3279 case Intrinsic::vector_reverse:
3280 return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(),
3281 Q, Depth);
3282 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3283 case Intrinsic::vector_reduce_or:
3284 case Intrinsic::vector_reduce_umax:
3285 case Intrinsic::vector_reduce_umin:
3286 case Intrinsic::vector_reduce_smax:
3287 case Intrinsic::vector_reduce_smin:
3288 return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3289 case Intrinsic::umax:
3290 case Intrinsic::uadd_sat:
3291 // umax(X, (X != 0)) is non zero
3292 // X +usat (X != 0) is non zero
3293 if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3294 return true;
3296 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3297 isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3298 case Intrinsic::smax: {
3299 // If either arg is strictly positive the result is non-zero. Otherwise
3300 // the result is non-zero if both ops are non-zero.
3301 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3302 const KnownBits &OpKnown) {
3303 if (!OpNonZero.has_value())
3304 OpNonZero = OpKnown.isNonZero() ||
3305 isKnownNonZero(Op, DemandedElts, Q, Depth);
3306 return *OpNonZero;
3308 // Avoid re-computing isKnownNonZero.
3309 std::optional<bool> Op0NonZero, Op1NonZero;
3310 KnownBits Op1Known =
3311 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3312 if (Op1Known.isNonNegative() &&
3313 IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
3314 return true;
3315 KnownBits Op0Known =
3316 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3317 if (Op0Known.isNonNegative() &&
3318 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
3319 return true;
3320 return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3321 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
3323 case Intrinsic::smin: {
3324 // If either arg is negative the result is non-zero. Otherwise
3325 // the result is non-zero if both ops are non-zero.
3326 KnownBits Op1Known =
3327 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3328 if (Op1Known.isNegative())
3329 return true;
3330 KnownBits Op0Known =
3331 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3332 if (Op0Known.isNegative())
3333 return true;
3335 if (Op1Known.isNonZero() && Op0Known.isNonZero())
3336 return true;
3338 [[fallthrough]];
3339 case Intrinsic::umin:
3340 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3341 isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
3342 case Intrinsic::cttz:
3343 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3344 .Zero[0];
3345 case Intrinsic::ctlz:
3346 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3347 .isNonNegative();
3348 case Intrinsic::fshr:
3349 case Intrinsic::fshl:
3350 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3351 if (II->getArgOperand(0) == II->getArgOperand(1))
3352 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3353 break;
3354 case Intrinsic::vscale:
3355 return true;
3356 case Intrinsic::experimental_get_vector_length:
3357 return isKnownNonZero(I->getOperand(0), Q, Depth);
3358 default:
3359 break;
3361 break;
3364 return false;
3368 KnownBits Known(BitWidth);
3369 computeKnownBits(I, DemandedElts, Known, Depth, Q);
3370 return Known.One != 0;
3373 /// Return true if the given value is known to be non-zero when defined. For
3374 /// vectors, return true if every demanded element is known to be non-zero when
3375 /// defined. For pointers, if the context instruction and dominator tree are
3376 /// specified, perform context-sensitive analysis and return true if the
3377 /// pointer couldn't possibly be null at the specified instruction.
3378 /// Supports values with integer or pointer type and vectors of integers.
3379 bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3380 const SimplifyQuery &Q, unsigned Depth) {
3381 Type *Ty = V->getType();
3383 #ifndef NDEBUG
3384 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3386 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3387 assert(
3388 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3389 "DemandedElt width should equal the fixed vector number of elements");
3390 } else {
3391 assert(DemandedElts == APInt(1, 1) &&
3392 "DemandedElt width should be 1 for scalars");
3394 #endif
3396 if (auto *C = dyn_cast<Constant>(V)) {
3397 if (C->isNullValue())
3398 return false;
3399 if (isa<ConstantInt>(C))
3400 // Must be non-zero due to null test above.
3401 return true;
3403 // For constant vectors, check that all elements are poison or known
3404 // non-zero to determine that the whole vector is known non-zero.
3405 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3406 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3407 if (!DemandedElts[i])
3408 continue;
3409 Constant *Elt = C->getAggregateElement(i);
3410 if (!Elt || Elt->isNullValue())
3411 return false;
3412 if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3413 return false;
3415 return true;
3418 // Constant ptrauth can be null, iff the base pointer can be.
3419 if (auto *CPA = dyn_cast<ConstantPtrAuth>(V))
3420 return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth);
3422 // A global variable in address space 0 is non null unless extern weak
3423 // or an absolute symbol reference. Other address spaces may have null as a
3424 // valid address for a global, so we can't assume anything.
3425 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3426 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3427 GV->getType()->getAddressSpace() == 0)
3428 return true;
3431 // For constant expressions, fall through to the Operator code below.
3432 if (!isa<ConstantExpr>(V))
3433 return false;
3436 if (const auto *A = dyn_cast<Argument>(V))
3437 if (std::optional<ConstantRange> Range = A->getRange()) {
3438 const APInt ZeroValue(Range->getBitWidth(), 0);
3439 if (!Range->contains(ZeroValue))
3440 return true;
3443 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
3444 return true;
3446 // Some of the tests below are recursive, so bail out if we hit the limit.
3447 if (Depth++ >= MaxAnalysisRecursionDepth)
3448 return false;
3450 // Check for pointer simplifications.
3452 if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3453 // A byval, inalloca may not be null in a non-default addres space. A
3454 // nonnull argument is assumed never 0.
3455 if (const Argument *A = dyn_cast<Argument>(V)) {
3456 if (((A->hasPassPointeeByValueCopyAttr() &&
3457 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
3458 A->hasNonNullAttr()))
3459 return true;
3463 if (const auto *I = dyn_cast<Operator>(V))
3464 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3465 return true;
3467 if (!isa<Constant>(V) &&
3468 isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
3469 return true;
3471 return false;
3474 bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q,
3475 unsigned Depth) {
3476 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3477 APInt DemandedElts =
3478 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
3479 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3482 /// If the pair of operators are the same invertible function, return the
3483 /// the operands of the function corresponding to each input. Otherwise,
3484 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3485 /// every input value to exactly one output value. This is equivalent to
3486 /// saying that Op1 and Op2 are equal exactly when the specified pair of
3487 /// operands are equal, (except that Op1 and Op2 may be poison more often.)
3488 static std::optional<std::pair<Value*, Value*>>
3489 getInvertibleOperands(const Operator *Op1,
3490 const Operator *Op2) {
3491 if (Op1->getOpcode() != Op2->getOpcode())
3492 return std::nullopt;
3494 auto getOperands = [&](unsigned OpNum) -> auto {
3495 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3498 switch (Op1->getOpcode()) {
3499 default:
3500 break;
3501 case Instruction::Or:
3502 if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3503 !cast<PossiblyDisjointInst>(Op2)->isDisjoint())
3504 break;
3505 [[fallthrough]];
3506 case Instruction::Xor:
3507 case Instruction::Add: {
3508 Value *Other;
3509 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3510 return std::make_pair(Op1->getOperand(1), Other);
3511 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3512 return std::make_pair(Op1->getOperand(0), Other);
3513 break;
3515 case Instruction::Sub:
3516 if (Op1->getOperand(0) == Op2->getOperand(0))
3517 return getOperands(1);
3518 if (Op1->getOperand(1) == Op2->getOperand(1))
3519 return getOperands(0);
3520 break;
3521 case Instruction::Mul: {
3522 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3523 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3524 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3525 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3526 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3527 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3528 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3529 break;
3531 // Assume operand order has been canonicalized
3532 if (Op1->getOperand(1) == Op2->getOperand(1) &&
3533 isa<ConstantInt>(Op1->getOperand(1)) &&
3534 !cast<ConstantInt>(Op1->getOperand(1))->isZero())
3535 return getOperands(0);
3536 break;
3538 case Instruction::Shl: {
3539 // Same as multiplies, with the difference that we don't need to check
3540 // for a non-zero multiply. Shifts always multiply by non-zero.
3541 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3542 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3543 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3544 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3545 break;
3547 if (Op1->getOperand(1) == Op2->getOperand(1))
3548 return getOperands(0);
3549 break;
3551 case Instruction::AShr:
3552 case Instruction::LShr: {
3553 auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3554 auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3555 if (!PEO1->isExact() || !PEO2->isExact())
3556 break;
3558 if (Op1->getOperand(1) == Op2->getOperand(1))
3559 return getOperands(0);
3560 break;
3562 case Instruction::SExt:
3563 case Instruction::ZExt:
3564 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3565 return getOperands(0);
3566 break;
3567 case Instruction::PHI: {
3568 const PHINode *PN1 = cast<PHINode>(Op1);
3569 const PHINode *PN2 = cast<PHINode>(Op2);
3571 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3572 // are a single invertible function of the start values? Note that repeated
3573 // application of an invertible function is also invertible
3574 BinaryOperator *BO1 = nullptr;
3575 Value *Start1 = nullptr, *Step1 = nullptr;
3576 BinaryOperator *BO2 = nullptr;
3577 Value *Start2 = nullptr, *Step2 = nullptr;
3578 if (PN1->getParent() != PN2->getParent() ||
3579 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
3580 !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
3581 break;
3583 auto Values = getInvertibleOperands(cast<Operator>(BO1),
3584 cast<Operator>(BO2));
3585 if (!Values)
3586 break;
3588 // We have to be careful of mutually defined recurrences here. Ex:
3589 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3590 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3591 // The invertibility of these is complicated, and not worth reasoning
3592 // about (yet?).
3593 if (Values->first != PN1 || Values->second != PN2)
3594 break;
3596 return std::make_pair(Start1, Start2);
3599 return std::nullopt;
3602 /// Return true if V1 == (binop V2, X), where X is known non-zero.
3603 /// Only handle a small subset of binops where (binop V2, X) with non-zero X
3604 /// implies V2 != V1.
3605 static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3606 const APInt &DemandedElts, unsigned Depth,
3607 const SimplifyQuery &Q) {
3608 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3609 if (!BO)
3610 return false;
3611 switch (BO->getOpcode()) {
3612 default:
3613 break;
3614 case Instruction::Or:
3615 if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3616 break;
3617 [[fallthrough]];
3618 case Instruction::Xor:
3619 case Instruction::Add:
3620 Value *Op = nullptr;
3621 if (V2 == BO->getOperand(0))
3622 Op = BO->getOperand(1);
3623 else if (V2 == BO->getOperand(1))
3624 Op = BO->getOperand(0);
3625 else
3626 return false;
3627 return isKnownNonZero(Op, DemandedElts, Q, Depth + 1);
3629 return false;
3632 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3633 /// the multiplication is nuw or nsw.
3634 static bool isNonEqualMul(const Value *V1, const Value *V2,
3635 const APInt &DemandedElts, unsigned Depth,
3636 const SimplifyQuery &Q) {
3637 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3638 const APInt *C;
3639 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3640 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3641 !C->isZero() && !C->isOne() &&
3642 isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3644 return false;
3647 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3648 /// the shift is nuw or nsw.
3649 static bool isNonEqualShl(const Value *V1, const Value *V2,
3650 const APInt &DemandedElts, unsigned Depth,
3651 const SimplifyQuery &Q) {
3652 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3653 const APInt *C;
3654 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3655 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3656 !C->isZero() && isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3658 return false;
3661 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3662 const APInt &DemandedElts, unsigned Depth,
3663 const SimplifyQuery &Q) {
3664 // Check two PHIs are in same block.
3665 if (PN1->getParent() != PN2->getParent())
3666 return false;
3668 SmallPtrSet<const BasicBlock *, 8> VisitedBBs;
3669 bool UsedFullRecursion = false;
3670 for (const BasicBlock *IncomBB : PN1->blocks()) {
3671 if (!VisitedBBs.insert(IncomBB).second)
3672 continue; // Don't reprocess blocks that we have dealt with already.
3673 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3674 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3675 const APInt *C1, *C2;
3676 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
3677 continue;
3679 // Only one pair of phi operands is allowed for full recursion.
3680 if (UsedFullRecursion)
3681 return false;
3683 SimplifyQuery RecQ = Q.getWithoutCondContext();
3684 RecQ.CxtI = IncomBB->getTerminator();
3685 if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
3686 return false;
3687 UsedFullRecursion = true;
3689 return true;
3692 static bool isNonEqualSelect(const Value *V1, const Value *V2,
3693 const APInt &DemandedElts, unsigned Depth,
3694 const SimplifyQuery &Q) {
3695 const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3696 if (!SI1)
3697 return false;
3699 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3700 const Value *Cond1 = SI1->getCondition();
3701 const Value *Cond2 = SI2->getCondition();
3702 if (Cond1 == Cond2)
3703 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3704 DemandedElts, Depth + 1, Q) &&
3705 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3706 DemandedElts, Depth + 1, Q);
3708 return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Depth + 1, Q) &&
3709 isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Depth + 1, Q);
3712 // Check to see if A is both a GEP and is the incoming value for a PHI in the
3713 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3714 // one of them being the recursive GEP A and the other a ptr at same base and at
3715 // the same/higher offset than B we are only incrementing the pointer further in
3716 // loop if offset of recursive GEP is greater than 0.
3717 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B,
3718 const SimplifyQuery &Q) {
3719 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3720 return false;
3722 auto *GEPA = dyn_cast<GEPOperator>(A);
3723 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
3724 return false;
3726 // Handle 2 incoming PHI values with one being a recursive GEP.
3727 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3728 if (!PN || PN->getNumIncomingValues() != 2)
3729 return false;
3731 // Search for the recursive GEP as an incoming operand, and record that as
3732 // Step.
3733 Value *Start = nullptr;
3734 Value *Step = const_cast<Value *>(A);
3735 if (PN->getIncomingValue(0) == Step)
3736 Start = PN->getIncomingValue(1);
3737 else if (PN->getIncomingValue(1) == Step)
3738 Start = PN->getIncomingValue(0);
3739 else
3740 return false;
3742 // Other incoming node base should match the B base.
3743 // StartOffset >= OffsetB && StepOffset > 0?
3744 // StartOffset <= OffsetB && StepOffset < 0?
3745 // Is non-equal if above are true.
3746 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3747 // optimisation to inbounds GEPs only.
3748 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3749 APInt StartOffset(IndexWidth, 0);
3750 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3751 APInt StepOffset(IndexWidth, 0);
3752 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3754 // Check if Base Pointer of Step matches the PHI.
3755 if (Step != PN)
3756 return false;
3757 APInt OffsetB(IndexWidth, 0);
3758 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3759 return Start == B &&
3760 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
3761 (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
3764 /// Return true if it is known that V1 != V2.
3765 static bool isKnownNonEqual(const Value *V1, const Value *V2,
3766 const APInt &DemandedElts, unsigned Depth,
3767 const SimplifyQuery &Q) {
3768 if (V1 == V2)
3769 return false;
3770 if (V1->getType() != V2->getType())
3771 // We can't look through casts yet.
3772 return false;
3774 if (Depth >= MaxAnalysisRecursionDepth)
3775 return false;
3777 // See if we can recurse through (exactly one of) our operands. This
3778 // requires our operation be 1-to-1 and map every input value to exactly
3779 // one output value. Such an operation is invertible.
3780 auto *O1 = dyn_cast<Operator>(V1);
3781 auto *O2 = dyn_cast<Operator>(V2);
3782 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3783 if (auto Values = getInvertibleOperands(O1, O2))
3784 return isKnownNonEqual(Values->first, Values->second, DemandedElts,
3785 Depth + 1, Q);
3787 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3788 const PHINode *PN2 = cast<PHINode>(V2);
3789 // FIXME: This is missing a generalization to handle the case where one is
3790 // a PHI and another one isn't.
3791 if (isNonEqualPHIs(PN1, PN2, DemandedElts, Depth, Q))
3792 return true;
3796 if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Depth, Q) ||
3797 isModifyingBinopOfNonZero(V2, V1, DemandedElts, Depth, Q))
3798 return true;
3800 if (isNonEqualMul(V1, V2, DemandedElts, Depth, Q) ||
3801 isNonEqualMul(V2, V1, DemandedElts, Depth, Q))
3802 return true;
3804 if (isNonEqualShl(V1, V2, DemandedElts, Depth, Q) ||
3805 isNonEqualShl(V2, V1, DemandedElts, Depth, Q))
3806 return true;
3808 if (V1->getType()->isIntOrIntVectorTy()) {
3809 // Are any known bits in V1 contradictory to known bits in V2? If V1
3810 // has a known zero where V2 has a known one, they must not be equal.
3811 KnownBits Known1 = computeKnownBits(V1, DemandedElts, Depth, Q);
3812 if (!Known1.isUnknown()) {
3813 KnownBits Known2 = computeKnownBits(V2, DemandedElts, Depth, Q);
3814 if (Known1.Zero.intersects(Known2.One) ||
3815 Known2.Zero.intersects(Known1.One))
3816 return true;
3820 if (isNonEqualSelect(V1, V2, DemandedElts, Depth, Q) ||
3821 isNonEqualSelect(V2, V1, DemandedElts, Depth, Q))
3822 return true;
3824 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3825 isNonEqualPointersWithRecursiveGEP(V2, V1, Q))
3826 return true;
3828 Value *A, *B;
3829 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3830 // Check PtrToInt type matches the pointer size.
3831 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3832 match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B))))
3833 return isKnownNonEqual(A, B, DemandedElts, Depth + 1, Q);
3835 return false;
3838 /// For vector constants, loop over the elements and find the constant with the
3839 /// minimum number of sign bits. Return 0 if the value is not a vector constant
3840 /// or if any element was not analyzed; otherwise, return the count for the
3841 /// element with the minimum number of sign bits.
3842 static unsigned computeNumSignBitsVectorConstant(const Value *V,
3843 const APInt &DemandedElts,
3844 unsigned TyBits) {
3845 const auto *CV = dyn_cast<Constant>(V);
3846 if (!CV || !isa<FixedVectorType>(CV->getType()))
3847 return 0;
3849 unsigned MinSignBits = TyBits;
3850 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3851 for (unsigned i = 0; i != NumElts; ++i) {
3852 if (!DemandedElts[i])
3853 continue;
3854 // If we find a non-ConstantInt, bail out.
3855 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3856 if (!Elt)
3857 return 0;
3859 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3862 return MinSignBits;
3865 static unsigned ComputeNumSignBitsImpl(const Value *V,
3866 const APInt &DemandedElts,
3867 unsigned Depth, const SimplifyQuery &Q);
3869 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3870 unsigned Depth, const SimplifyQuery &Q) {
3871 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3872 assert(Result > 0 && "At least one sign bit needs to be present!");
3873 return Result;
3876 /// Return the number of times the sign bit of the register is replicated into
3877 /// the other bits. We know that at least 1 bit is always equal to the sign bit
3878 /// (itself), but other cases can give us information. For example, immediately
3879 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3880 /// other, so we return 3. For vectors, return the number of sign bits for the
3881 /// vector element with the minimum number of known sign bits of the demanded
3882 /// elements in the vector specified by DemandedElts.
3883 static unsigned ComputeNumSignBitsImpl(const Value *V,
3884 const APInt &DemandedElts,
3885 unsigned Depth, const SimplifyQuery &Q) {
3886 Type *Ty = V->getType();
3887 #ifndef NDEBUG
3888 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3890 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3891 assert(
3892 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3893 "DemandedElt width should equal the fixed vector number of elements");
3894 } else {
3895 assert(DemandedElts == APInt(1, 1) &&
3896 "DemandedElt width should be 1 for scalars");
3898 #endif
3900 // We return the minimum number of sign bits that are guaranteed to be present
3901 // in V, so for undef we have to conservatively return 1. We don't have the
3902 // same behavior for poison though -- that's a FIXME today.
3904 Type *ScalarTy = Ty->getScalarType();
3905 unsigned TyBits = ScalarTy->isPointerTy() ?
3906 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3907 Q.DL.getTypeSizeInBits(ScalarTy);
3909 unsigned Tmp, Tmp2;
3910 unsigned FirstAnswer = 1;
3912 // Note that ConstantInt is handled by the general computeKnownBits case
3913 // below.
3915 if (Depth == MaxAnalysisRecursionDepth)
3916 return 1;
3918 if (auto *U = dyn_cast<Operator>(V)) {
3919 switch (Operator::getOpcode(V)) {
3920 default: break;
3921 case Instruction::SExt:
3922 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3923 return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
3924 Tmp;
3926 case Instruction::SDiv: {
3927 const APInt *Denominator;
3928 // sdiv X, C -> adds log(C) sign bits.
3929 if (match(U->getOperand(1), m_APInt(Denominator))) {
3931 // Ignore non-positive denominator.
3932 if (!Denominator->isStrictlyPositive())
3933 break;
3935 // Calculate the incoming numerator bits.
3936 unsigned NumBits =
3937 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3939 // Add floor(log(C)) bits to the numerator bits.
3940 return std::min(TyBits, NumBits + Denominator->logBase2());
3942 break;
3945 case Instruction::SRem: {
3946 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3948 const APInt *Denominator;
3949 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3950 // positive constant. This let us put a lower bound on the number of sign
3951 // bits.
3952 if (match(U->getOperand(1), m_APInt(Denominator))) {
3954 // Ignore non-positive denominator.
3955 if (Denominator->isStrictlyPositive()) {
3956 // Calculate the leading sign bit constraints by examining the
3957 // denominator. Given that the denominator is positive, there are two
3958 // cases:
3960 // 1. The numerator is positive. The result range is [0,C) and
3961 // [0,C) u< (1 << ceilLogBase2(C)).
3963 // 2. The numerator is negative. Then the result range is (-C,0] and
3964 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3966 // Thus a lower bound on the number of sign bits is `TyBits -
3967 // ceilLogBase2(C)`.
3969 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3970 Tmp = std::max(Tmp, ResBits);
3973 return Tmp;
3976 case Instruction::AShr: {
3977 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3978 // ashr X, C -> adds C sign bits. Vectors too.
3979 const APInt *ShAmt;
3980 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3981 if (ShAmt->uge(TyBits))
3982 break; // Bad shift.
3983 unsigned ShAmtLimited = ShAmt->getZExtValue();
3984 Tmp += ShAmtLimited;
3985 if (Tmp > TyBits) Tmp = TyBits;
3987 return Tmp;
3989 case Instruction::Shl: {
3990 const APInt *ShAmt;
3991 Value *X = nullptr;
3992 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3993 // shl destroys sign bits.
3994 if (ShAmt->uge(TyBits))
3995 break; // Bad shift.
3996 // We can look through a zext (more or less treating it as a sext) if
3997 // all extended bits are shifted out.
3998 if (match(U->getOperand(0), m_ZExt(m_Value(X))) &&
3999 ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())) {
4000 Tmp = ComputeNumSignBits(X, DemandedElts, Depth + 1, Q);
4001 Tmp += TyBits - X->getType()->getScalarSizeInBits();
4002 } else
4003 Tmp =
4004 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4005 if (ShAmt->uge(Tmp))
4006 break; // Shifted all sign bits out.
4007 Tmp2 = ShAmt->getZExtValue();
4008 return Tmp - Tmp2;
4010 break;
4012 case Instruction::And:
4013 case Instruction::Or:
4014 case Instruction::Xor: // NOT is handled here.
4015 // Logical binary ops preserve the number of sign bits at the worst.
4016 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4017 if (Tmp != 1) {
4018 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4019 FirstAnswer = std::min(Tmp, Tmp2);
4020 // We computed what we know about the sign bits as our first
4021 // answer. Now proceed to the generic code that uses
4022 // computeKnownBits, and pick whichever answer is better.
4024 break;
4026 case Instruction::Select: {
4027 // If we have a clamp pattern, we know that the number of sign bits will
4028 // be the minimum of the clamp min/max range.
4029 const Value *X;
4030 const APInt *CLow, *CHigh;
4031 if (isSignedMinMaxClamp(U, X, CLow, CHigh))
4032 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4034 Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4035 if (Tmp == 1)
4036 break;
4037 Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Depth + 1, Q);
4038 return std::min(Tmp, Tmp2);
4041 case Instruction::Add:
4042 // Add can have at most one carry bit. Thus we know that the output
4043 // is, at worst, one more bit than the inputs.
4044 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4045 if (Tmp == 1) break;
4047 // Special case decrementing a value (ADD X, -1):
4048 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
4049 if (CRHS->isAllOnesValue()) {
4050 KnownBits Known(TyBits);
4051 computeKnownBits(U->getOperand(0), DemandedElts, Known, Depth + 1, Q);
4053 // If the input is known to be 0 or 1, the output is 0/-1, which is
4054 // all sign bits set.
4055 if ((Known.Zero | 1).isAllOnes())
4056 return TyBits;
4058 // If we are subtracting one from a positive number, there is no carry
4059 // out of the result.
4060 if (Known.isNonNegative())
4061 return Tmp;
4064 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4065 if (Tmp2 == 1)
4066 break;
4067 return std::min(Tmp, Tmp2) - 1;
4069 case Instruction::Sub:
4070 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4071 if (Tmp2 == 1)
4072 break;
4074 // Handle NEG.
4075 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
4076 if (CLHS->isNullValue()) {
4077 KnownBits Known(TyBits);
4078 computeKnownBits(U->getOperand(1), DemandedElts, Known, Depth + 1, Q);
4079 // If the input is known to be 0 or 1, the output is 0/-1, which is
4080 // all sign bits set.
4081 if ((Known.Zero | 1).isAllOnes())
4082 return TyBits;
4084 // If the input is known to be positive (the sign bit is known clear),
4085 // the output of the NEG has the same number of sign bits as the
4086 // input.
4087 if (Known.isNonNegative())
4088 return Tmp2;
4090 // Otherwise, we treat this like a SUB.
4093 // Sub can have at most one carry bit. Thus we know that the output
4094 // is, at worst, one more bit than the inputs.
4095 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4096 if (Tmp == 1)
4097 break;
4098 return std::min(Tmp, Tmp2) - 1;
4100 case Instruction::Mul: {
4101 // The output of the Mul can be at most twice the valid bits in the
4102 // inputs.
4103 unsigned SignBitsOp0 =
4104 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4105 if (SignBitsOp0 == 1)
4106 break;
4107 unsigned SignBitsOp1 =
4108 ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4109 if (SignBitsOp1 == 1)
4110 break;
4111 unsigned OutValidBits =
4112 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
4113 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
4116 case Instruction::PHI: {
4117 const PHINode *PN = cast<PHINode>(U);
4118 unsigned NumIncomingValues = PN->getNumIncomingValues();
4119 // Don't analyze large in-degree PHIs.
4120 if (NumIncomingValues > 4) break;
4121 // Unreachable blocks may have zero-operand PHI nodes.
4122 if (NumIncomingValues == 0) break;
4124 // Take the minimum of all incoming values. This can't infinitely loop
4125 // because of our depth threshold.
4126 SimplifyQuery RecQ = Q.getWithoutCondContext();
4127 Tmp = TyBits;
4128 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
4129 if (Tmp == 1) return Tmp;
4130 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4131 Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i),
4132 DemandedElts, Depth + 1, RecQ));
4134 return Tmp;
4137 case Instruction::Trunc: {
4138 // If the input contained enough sign bits that some remain after the
4139 // truncation, then we can make use of that. Otherwise we don't know
4140 // anything.
4141 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4142 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
4143 if (Tmp > (OperandTyBits - TyBits))
4144 return Tmp - (OperandTyBits - TyBits);
4146 return 1;
4149 case Instruction::ExtractElement:
4150 // Look through extract element. At the moment we keep this simple and
4151 // skip tracking the specific element. But at least we might find
4152 // information valid for all elements of the vector (for example if vector
4153 // is sign extended, shifted, etc).
4154 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4156 case Instruction::ShuffleVector: {
4157 // Collect the minimum number of sign bits that are shared by every vector
4158 // element referenced by the shuffle.
4159 auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
4160 if (!Shuf) {
4161 // FIXME: Add support for shufflevector constant expressions.
4162 return 1;
4164 APInt DemandedLHS, DemandedRHS;
4165 // For undef elements, we don't know anything about the common state of
4166 // the shuffle result.
4167 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4168 return 1;
4169 Tmp = std::numeric_limits<unsigned>::max();
4170 if (!!DemandedLHS) {
4171 const Value *LHS = Shuf->getOperand(0);
4172 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
4174 // If we don't know anything, early out and try computeKnownBits
4175 // fall-back.
4176 if (Tmp == 1)
4177 break;
4178 if (!!DemandedRHS) {
4179 const Value *RHS = Shuf->getOperand(1);
4180 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
4181 Tmp = std::min(Tmp, Tmp2);
4183 // If we don't know anything, early out and try computeKnownBits
4184 // fall-back.
4185 if (Tmp == 1)
4186 break;
4187 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4188 return Tmp;
4190 case Instruction::Call: {
4191 if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
4192 switch (II->getIntrinsicID()) {
4193 default:
4194 break;
4195 case Intrinsic::abs:
4196 Tmp =
4197 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4198 if (Tmp == 1)
4199 break;
4201 // Absolute value reduces number of sign bits by at most 1.
4202 return Tmp - 1;
4203 case Intrinsic::smin:
4204 case Intrinsic::smax: {
4205 const APInt *CLow, *CHigh;
4206 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4207 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4215 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4216 // use this information.
4218 // If we can examine all elements of a vector constant successfully, we're
4219 // done (we can't do any better than that). If not, keep trying.
4220 if (unsigned VecSignBits =
4221 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4222 return VecSignBits;
4224 KnownBits Known(TyBits);
4225 computeKnownBits(V, DemandedElts, Known, Depth, Q);
4227 // If we know that the sign bit is either zero or one, determine the number of
4228 // identical bits in the top of the input value.
4229 return std::max(FirstAnswer, Known.countMinSignBits());
4232 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
4233 const TargetLibraryInfo *TLI) {
4234 const Function *F = CB.getCalledFunction();
4235 if (!F)
4236 return Intrinsic::not_intrinsic;
4238 if (F->isIntrinsic())
4239 return F->getIntrinsicID();
4241 // We are going to infer semantics of a library function based on mapping it
4242 // to an LLVM intrinsic. Check that the library function is available from
4243 // this callbase and in this environment.
4244 LibFunc Func;
4245 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
4246 !CB.onlyReadsMemory())
4247 return Intrinsic::not_intrinsic;
4249 switch (Func) {
4250 default:
4251 break;
4252 case LibFunc_sin:
4253 case LibFunc_sinf:
4254 case LibFunc_sinl:
4255 return Intrinsic::sin;
4256 case LibFunc_cos:
4257 case LibFunc_cosf:
4258 case LibFunc_cosl:
4259 return Intrinsic::cos;
4260 case LibFunc_tan:
4261 case LibFunc_tanf:
4262 case LibFunc_tanl:
4263 return Intrinsic::tan;
4264 case LibFunc_asin:
4265 case LibFunc_asinf:
4266 case LibFunc_asinl:
4267 return Intrinsic::asin;
4268 case LibFunc_acos:
4269 case LibFunc_acosf:
4270 case LibFunc_acosl:
4271 return Intrinsic::acos;
4272 case LibFunc_atan:
4273 case LibFunc_atanf:
4274 case LibFunc_atanl:
4275 return Intrinsic::atan;
4276 case LibFunc_atan2:
4277 case LibFunc_atan2f:
4278 case LibFunc_atan2l:
4279 return Intrinsic::atan2;
4280 case LibFunc_sinh:
4281 case LibFunc_sinhf:
4282 case LibFunc_sinhl:
4283 return Intrinsic::sinh;
4284 case LibFunc_cosh:
4285 case LibFunc_coshf:
4286 case LibFunc_coshl:
4287 return Intrinsic::cosh;
4288 case LibFunc_tanh:
4289 case LibFunc_tanhf:
4290 case LibFunc_tanhl:
4291 return Intrinsic::tanh;
4292 case LibFunc_exp:
4293 case LibFunc_expf:
4294 case LibFunc_expl:
4295 return Intrinsic::exp;
4296 case LibFunc_exp2:
4297 case LibFunc_exp2f:
4298 case LibFunc_exp2l:
4299 return Intrinsic::exp2;
4300 case LibFunc_exp10:
4301 case LibFunc_exp10f:
4302 case LibFunc_exp10l:
4303 return Intrinsic::exp10;
4304 case LibFunc_log:
4305 case LibFunc_logf:
4306 case LibFunc_logl:
4307 return Intrinsic::log;
4308 case LibFunc_log10:
4309 case LibFunc_log10f:
4310 case LibFunc_log10l:
4311 return Intrinsic::log10;
4312 case LibFunc_log2:
4313 case LibFunc_log2f:
4314 case LibFunc_log2l:
4315 return Intrinsic::log2;
4316 case LibFunc_fabs:
4317 case LibFunc_fabsf:
4318 case LibFunc_fabsl:
4319 return Intrinsic::fabs;
4320 case LibFunc_fmin:
4321 case LibFunc_fminf:
4322 case LibFunc_fminl:
4323 return Intrinsic::minnum;
4324 case LibFunc_fmax:
4325 case LibFunc_fmaxf:
4326 case LibFunc_fmaxl:
4327 return Intrinsic::maxnum;
4328 case LibFunc_copysign:
4329 case LibFunc_copysignf:
4330 case LibFunc_copysignl:
4331 return Intrinsic::copysign;
4332 case LibFunc_floor:
4333 case LibFunc_floorf:
4334 case LibFunc_floorl:
4335 return Intrinsic::floor;
4336 case LibFunc_ceil:
4337 case LibFunc_ceilf:
4338 case LibFunc_ceill:
4339 return Intrinsic::ceil;
4340 case LibFunc_trunc:
4341 case LibFunc_truncf:
4342 case LibFunc_truncl:
4343 return Intrinsic::trunc;
4344 case LibFunc_rint:
4345 case LibFunc_rintf:
4346 case LibFunc_rintl:
4347 return Intrinsic::rint;
4348 case LibFunc_nearbyint:
4349 case LibFunc_nearbyintf:
4350 case LibFunc_nearbyintl:
4351 return Intrinsic::nearbyint;
4352 case LibFunc_round:
4353 case LibFunc_roundf:
4354 case LibFunc_roundl:
4355 return Intrinsic::round;
4356 case LibFunc_roundeven:
4357 case LibFunc_roundevenf:
4358 case LibFunc_roundevenl:
4359 return Intrinsic::roundeven;
4360 case LibFunc_pow:
4361 case LibFunc_powf:
4362 case LibFunc_powl:
4363 return Intrinsic::pow;
4364 case LibFunc_sqrt:
4365 case LibFunc_sqrtf:
4366 case LibFunc_sqrtl:
4367 return Intrinsic::sqrt;
4370 return Intrinsic::not_intrinsic;
4373 /// Return true if it's possible to assume IEEE treatment of input denormals in
4374 /// \p F for \p Val.
4375 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
4376 Ty = Ty->getScalarType();
4377 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
4380 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4381 Ty = Ty->getScalarType();
4382 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4383 return Mode.Input == DenormalMode::IEEE ||
4384 Mode.Input == DenormalMode::PositiveZero;
4387 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4388 Ty = Ty->getScalarType();
4389 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4390 return Mode.Output == DenormalMode::IEEE ||
4391 Mode.Output == DenormalMode::PositiveZero;
4394 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const {
4395 return isKnownNeverZero() &&
4396 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty));
4399 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F,
4400 Type *Ty) const {
4401 return isKnownNeverNegZero() &&
4402 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty));
4405 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F,
4406 Type *Ty) const {
4407 if (!isKnownNeverPosZero())
4408 return false;
4410 // If we know there are no denormals, nothing can be flushed to zero.
4411 if (isKnownNeverSubnormal())
4412 return true;
4414 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4415 switch (Mode.Input) {
4416 case DenormalMode::IEEE:
4417 return true;
4418 case DenormalMode::PreserveSign:
4419 // Negative subnormal won't flush to +0
4420 return isKnownNeverPosSubnormal();
4421 case DenormalMode::PositiveZero:
4422 default:
4423 // Both positive and negative subnormal could flush to +0
4424 return false;
4427 llvm_unreachable("covered switch over denormal mode");
4430 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F,
4431 Type *Ty) {
4432 KnownFPClasses = Src.KnownFPClasses;
4433 // If we aren't assuming the source can't be a zero, we don't have to check if
4434 // a denormal input could be flushed.
4435 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4436 return;
4438 // If we know the input can't be a denormal, it can't be flushed to 0.
4439 if (Src.isKnownNeverSubnormal())
4440 return;
4442 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4444 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4445 KnownFPClasses |= fcPosZero;
4447 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4448 if (Mode != DenormalMode::getPositiveZero())
4449 KnownFPClasses |= fcNegZero;
4451 if (Mode.Input == DenormalMode::PositiveZero ||
4452 Mode.Output == DenormalMode::PositiveZero ||
4453 Mode.Input == DenormalMode::Dynamic ||
4454 Mode.Output == DenormalMode::Dynamic)
4455 KnownFPClasses |= fcPosZero;
4459 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src,
4460 const Function &F, Type *Ty) {
4461 propagateDenormal(Src, F, Ty);
4462 propagateNaN(Src, /*PreserveSign=*/true);
4465 /// Given an exploded icmp instruction, return true if the comparison only
4466 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4467 /// the result of the comparison is true when the input value is signed.
4468 bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
4469 bool &TrueIfSigned) {
4470 switch (Pred) {
4471 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4472 TrueIfSigned = true;
4473 return RHS.isZero();
4474 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4475 TrueIfSigned = true;
4476 return RHS.isAllOnes();
4477 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4478 TrueIfSigned = false;
4479 return RHS.isAllOnes();
4480 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4481 TrueIfSigned = false;
4482 return RHS.isZero();
4483 case ICmpInst::ICMP_UGT:
4484 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4485 TrueIfSigned = true;
4486 return RHS.isMaxSignedValue();
4487 case ICmpInst::ICMP_UGE:
4488 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4489 TrueIfSigned = true;
4490 return RHS.isMinSignedValue();
4491 case ICmpInst::ICMP_ULT:
4492 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4493 TrueIfSigned = false;
4494 return RHS.isMinSignedValue();
4495 case ICmpInst::ICMP_ULE:
4496 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4497 TrueIfSigned = false;
4498 return RHS.isMaxSignedValue();
4499 default:
4500 return false;
4504 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4505 /// same result as an fcmp with the given operands.
4506 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
4507 const Function &F,
4508 Value *LHS, Value *RHS,
4509 bool LookThroughSrc) {
4510 const APFloat *ConstRHS;
4511 if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4512 return {nullptr, fcAllFlags};
4514 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
4517 std::pair<Value *, FPClassTest>
4518 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS,
4519 const APFloat *ConstRHS, bool LookThroughSrc) {
4521 auto [Src, ClassIfTrue, ClassIfFalse] =
4522 fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc);
4523 if (Src && ClassIfTrue == ~ClassIfFalse)
4524 return {Src, ClassIfTrue};
4525 return {nullptr, fcAllFlags};
4528 /// Return the return value for fcmpImpliesClass for a compare that produces an
4529 /// exact class test.
4530 static std::tuple<Value *, FPClassTest, FPClassTest> exactClass(Value *V,
4531 FPClassTest M) {
4532 return {V, M, ~M};
4535 std::tuple<Value *, FPClassTest, FPClassTest>
4536 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4537 FPClassTest RHSClass, bool LookThroughSrc) {
4538 assert(RHSClass != fcNone);
4539 Value *Src = LHS;
4541 if (Pred == FCmpInst::FCMP_TRUE)
4542 return exactClass(Src, fcAllFlags);
4544 if (Pred == FCmpInst::FCMP_FALSE)
4545 return exactClass(Src, fcNone);
4547 const FPClassTest OrigClass = RHSClass;
4549 const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass;
4550 const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass;
4551 const bool IsNaN = (RHSClass & ~fcNan) == fcNone;
4553 if (IsNaN) {
4554 // fcmp o__ x, nan -> false
4555 // fcmp u__ x, nan -> true
4556 return exactClass(Src, CmpInst::isOrdered(Pred) ? fcNone : fcAllFlags);
4559 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4560 if (Pred == FCmpInst::FCMP_ORD)
4561 return exactClass(Src, ~fcNan);
4563 // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4564 if (Pred == FCmpInst::FCMP_UNO)
4565 return exactClass(Src, fcNan);
4567 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
4568 if (IsFabs)
4569 RHSClass = llvm::inverse_fabs(RHSClass);
4571 const bool IsZero = (OrigClass & fcZero) == OrigClass;
4572 if (IsZero) {
4573 assert(Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO);
4574 // Compares with fcNone are only exactly equal to fcZero if input denormals
4575 // are not flushed.
4576 // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4577 if (!inputDenormalIsIEEE(F, LHS->getType()))
4578 return {nullptr, fcAllFlags, fcAllFlags};
4580 switch (Pred) {
4581 case FCmpInst::FCMP_OEQ: // Match x == 0.0
4582 return exactClass(Src, fcZero);
4583 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
4584 return exactClass(Src, fcZero | fcNan);
4585 case FCmpInst::FCMP_UNE: // Match (x != 0.0)
4586 return exactClass(Src, ~fcZero);
4587 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
4588 return exactClass(Src, ~fcNan & ~fcZero);
4589 case FCmpInst::FCMP_ORD:
4590 // Canonical form of ord/uno is with a zero. We could also handle
4591 // non-canonical other non-NaN constants or LHS == RHS.
4592 return exactClass(Src, ~fcNan);
4593 case FCmpInst::FCMP_UNO:
4594 return exactClass(Src, fcNan);
4595 case FCmpInst::FCMP_OGT: // x > 0
4596 return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
4597 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
4598 return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
4599 case FCmpInst::FCMP_OGE: // x >= 0
4600 return exactClass(Src, fcPositive | fcNegZero);
4601 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
4602 return exactClass(Src, fcPositive | fcNegZero | fcNan);
4603 case FCmpInst::FCMP_OLT: // x < 0
4604 return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
4605 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
4606 return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
4607 case FCmpInst::FCMP_OLE: // x <= 0
4608 return exactClass(Src, fcNegative | fcPosZero);
4609 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
4610 return exactClass(Src, fcNegative | fcPosZero | fcNan);
4611 default:
4612 llvm_unreachable("all compare types are handled");
4615 return {nullptr, fcAllFlags, fcAllFlags};
4618 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
4620 const bool IsInf = (OrigClass & fcInf) == OrigClass;
4621 if (IsInf) {
4622 FPClassTest Mask = fcAllFlags;
4624 switch (Pred) {
4625 case FCmpInst::FCMP_OEQ:
4626 case FCmpInst::FCMP_UNE: {
4627 // Match __builtin_isinf patterns
4629 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf
4630 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
4631 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf
4632 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
4634 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf
4635 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
4636 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf
4637 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
4638 if (IsNegativeRHS) {
4639 Mask = fcNegInf;
4640 if (IsFabs)
4641 Mask = fcNone;
4642 } else {
4643 Mask = fcPosInf;
4644 if (IsFabs)
4645 Mask |= fcNegInf;
4647 break;
4649 case FCmpInst::FCMP_ONE:
4650 case FCmpInst::FCMP_UEQ: {
4651 // Match __builtin_isinf patterns
4652 // fcmp one x, -inf -> is_fpclass x, fcNegInf
4653 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
4654 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
4655 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
4657 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
4658 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
4659 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
4660 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
4661 if (IsNegativeRHS) {
4662 Mask = ~fcNegInf & ~fcNan;
4663 if (IsFabs)
4664 Mask = ~fcNan;
4665 } else {
4666 Mask = ~fcPosInf & ~fcNan;
4667 if (IsFabs)
4668 Mask &= ~fcNegInf;
4671 break;
4673 case FCmpInst::FCMP_OLT:
4674 case FCmpInst::FCMP_UGE: {
4675 if (IsNegativeRHS) {
4676 // No value is ordered and less than negative infinity.
4677 // All values are unordered with or at least negative infinity.
4678 // fcmp olt x, -inf -> false
4679 // fcmp uge x, -inf -> true
4680 Mask = fcNone;
4681 break;
4684 // fcmp olt fabs(x), +inf -> fcFinite
4685 // fcmp uge fabs(x), +inf -> ~fcFinite
4686 // fcmp olt x, +inf -> fcFinite|fcNegInf
4687 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
4688 Mask = fcFinite;
4689 if (!IsFabs)
4690 Mask |= fcNegInf;
4691 break;
4693 case FCmpInst::FCMP_OGE:
4694 case FCmpInst::FCMP_ULT: {
4695 if (IsNegativeRHS) {
4696 // fcmp oge x, -inf -> ~fcNan
4697 // fcmp oge fabs(x), -inf -> ~fcNan
4698 // fcmp ult x, -inf -> fcNan
4699 // fcmp ult fabs(x), -inf -> fcNan
4700 Mask = ~fcNan;
4701 break;
4704 // fcmp oge fabs(x), +inf -> fcInf
4705 // fcmp oge x, +inf -> fcPosInf
4706 // fcmp ult fabs(x), +inf -> ~fcInf
4707 // fcmp ult x, +inf -> ~fcPosInf
4708 Mask = fcPosInf;
4709 if (IsFabs)
4710 Mask |= fcNegInf;
4711 break;
4713 case FCmpInst::FCMP_OGT:
4714 case FCmpInst::FCMP_ULE: {
4715 if (IsNegativeRHS) {
4716 // fcmp ogt x, -inf -> fcmp one x, -inf
4717 // fcmp ogt fabs(x), -inf -> fcmp ord x, x
4718 // fcmp ule x, -inf -> fcmp ueq x, -inf
4719 // fcmp ule fabs(x), -inf -> fcmp uno x, x
4720 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan);
4721 break;
4724 // No value is ordered and greater than infinity.
4725 Mask = fcNone;
4726 break;
4728 case FCmpInst::FCMP_OLE:
4729 case FCmpInst::FCMP_UGT: {
4730 if (IsNegativeRHS) {
4731 Mask = IsFabs ? fcNone : fcNegInf;
4732 break;
4735 // fcmp ole x, +inf -> fcmp ord x, x
4736 // fcmp ole fabs(x), +inf -> fcmp ord x, x
4737 // fcmp ole x, -inf -> fcmp oeq x, -inf
4738 // fcmp ole fabs(x), -inf -> false
4739 Mask = ~fcNan;
4740 break;
4742 default:
4743 llvm_unreachable("all compare types are handled");
4746 // Invert the comparison for the unordered cases.
4747 if (FCmpInst::isUnordered(Pred))
4748 Mask = ~Mask;
4750 return exactClass(Src, Mask);
4753 if (Pred == FCmpInst::FCMP_OEQ)
4754 return {Src, RHSClass, fcAllFlags};
4756 if (Pred == FCmpInst::FCMP_UEQ) {
4757 FPClassTest Class = RHSClass | fcNan;
4758 return {Src, Class, ~fcNan};
4761 if (Pred == FCmpInst::FCMP_ONE)
4762 return {Src, ~fcNan, RHSClass | fcNan};
4764 if (Pred == FCmpInst::FCMP_UNE)
4765 return {Src, fcAllFlags, RHSClass};
4767 assert((RHSClass == fcNone || RHSClass == fcPosNormal ||
4768 RHSClass == fcNegNormal || RHSClass == fcNormal ||
4769 RHSClass == fcPosSubnormal || RHSClass == fcNegSubnormal ||
4770 RHSClass == fcSubnormal) &&
4771 "should have been recognized as an exact class test");
4773 if (IsNegativeRHS) {
4774 // TODO: Handle fneg(fabs)
4775 if (IsFabs) {
4776 // fabs(x) o> -k -> fcmp ord x, x
4777 // fabs(x) u> -k -> true
4778 // fabs(x) o< -k -> false
4779 // fabs(x) u< -k -> fcmp uno x, x
4780 switch (Pred) {
4781 case FCmpInst::FCMP_OGT:
4782 case FCmpInst::FCMP_OGE:
4783 return {Src, ~fcNan, fcNan};
4784 case FCmpInst::FCMP_UGT:
4785 case FCmpInst::FCMP_UGE:
4786 return {Src, fcAllFlags, fcNone};
4787 case FCmpInst::FCMP_OLT:
4788 case FCmpInst::FCMP_OLE:
4789 return {Src, fcNone, fcAllFlags};
4790 case FCmpInst::FCMP_ULT:
4791 case FCmpInst::FCMP_ULE:
4792 return {Src, fcNan, ~fcNan};
4793 default:
4794 break;
4797 return {nullptr, fcAllFlags, fcAllFlags};
4800 FPClassTest ClassesLE = fcNegInf | fcNegNormal;
4801 FPClassTest ClassesGE = fcPositive | fcNegZero | fcNegSubnormal;
4803 if (IsDenormalRHS)
4804 ClassesLE |= fcNegSubnormal;
4805 else
4806 ClassesGE |= fcNegNormal;
4808 switch (Pred) {
4809 case FCmpInst::FCMP_OGT:
4810 case FCmpInst::FCMP_OGE:
4811 return {Src, ClassesGE, ~ClassesGE | RHSClass};
4812 case FCmpInst::FCMP_UGT:
4813 case FCmpInst::FCMP_UGE:
4814 return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass};
4815 case FCmpInst::FCMP_OLT:
4816 case FCmpInst::FCMP_OLE:
4817 return {Src, ClassesLE, ~ClassesLE | RHSClass};
4818 case FCmpInst::FCMP_ULT:
4819 case FCmpInst::FCMP_ULE:
4820 return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass};
4821 default:
4822 break;
4824 } else if (IsPositiveRHS) {
4825 FPClassTest ClassesGE = fcPosNormal | fcPosInf;
4826 FPClassTest ClassesLE = fcNegative | fcPosZero | fcPosSubnormal;
4827 if (IsDenormalRHS)
4828 ClassesGE |= fcPosSubnormal;
4829 else
4830 ClassesLE |= fcPosNormal;
4832 if (IsFabs) {
4833 ClassesGE = llvm::inverse_fabs(ClassesGE);
4834 ClassesLE = llvm::inverse_fabs(ClassesLE);
4837 switch (Pred) {
4838 case FCmpInst::FCMP_OGT:
4839 case FCmpInst::FCMP_OGE:
4840 return {Src, ClassesGE, ~ClassesGE | RHSClass};
4841 case FCmpInst::FCMP_UGT:
4842 case FCmpInst::FCMP_UGE:
4843 return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass};
4844 case FCmpInst::FCMP_OLT:
4845 case FCmpInst::FCMP_OLE:
4846 return {Src, ClassesLE, ~ClassesLE | RHSClass};
4847 case FCmpInst::FCMP_ULT:
4848 case FCmpInst::FCMP_ULE:
4849 return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass};
4850 default:
4851 break;
4855 return {nullptr, fcAllFlags, fcAllFlags};
4858 std::tuple<Value *, FPClassTest, FPClassTest>
4859 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4860 const APFloat &ConstRHS, bool LookThroughSrc) {
4861 // We can refine checks against smallest normal / largest denormal to an
4862 // exact class test.
4863 if (!ConstRHS.isNegative() && ConstRHS.isSmallestNormalized()) {
4864 Value *Src = LHS;
4865 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
4867 FPClassTest Mask;
4868 // Match pattern that's used in __builtin_isnormal.
4869 switch (Pred) {
4870 case FCmpInst::FCMP_OLT:
4871 case FCmpInst::FCMP_UGE: {
4872 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero
4873 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero
4874 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf
4875 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero)
4876 Mask = fcZero | fcSubnormal;
4877 if (!IsFabs)
4878 Mask |= fcNegNormal | fcNegInf;
4880 break;
4882 case FCmpInst::FCMP_OGE:
4883 case FCmpInst::FCMP_ULT: {
4884 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf
4885 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal
4886 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf)
4887 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal)
4888 Mask = fcPosInf | fcPosNormal;
4889 if (IsFabs)
4890 Mask |= fcNegInf | fcNegNormal;
4891 break;
4893 default:
4894 return fcmpImpliesClass(Pred, F, LHS, ConstRHS.classify(),
4895 LookThroughSrc);
4898 // Invert the comparison for the unordered cases.
4899 if (FCmpInst::isUnordered(Pred))
4900 Mask = ~Mask;
4902 return exactClass(Src, Mask);
4905 return fcmpImpliesClass(Pred, F, LHS, ConstRHS.classify(), LookThroughSrc);
4908 std::tuple<Value *, FPClassTest, FPClassTest>
4909 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4910 Value *RHS, bool LookThroughSrc) {
4911 const APFloat *ConstRHS;
4912 if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4913 return {nullptr, fcAllFlags, fcAllFlags};
4915 // TODO: Just call computeKnownFPClass for RHS to handle non-constants.
4916 return fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc);
4919 static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
4920 unsigned Depth, bool CondIsTrue,
4921 const Instruction *CxtI,
4922 KnownFPClass &KnownFromContext) {
4923 Value *A, *B;
4924 if (Depth < MaxAnalysisRecursionDepth &&
4925 (CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
4926 : match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
4927 computeKnownFPClassFromCond(V, A, Depth + 1, CondIsTrue, CxtI,
4928 KnownFromContext);
4929 computeKnownFPClassFromCond(V, B, Depth + 1, CondIsTrue, CxtI,
4930 KnownFromContext);
4931 return;
4933 CmpPredicate Pred;
4934 Value *LHS;
4935 uint64_t ClassVal = 0;
4936 const APFloat *CRHS;
4937 const APInt *RHS;
4938 if (match(Cond, m_FCmp(Pred, m_Value(LHS), m_APFloat(CRHS)))) {
4939 auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass(
4940 Pred, *CxtI->getParent()->getParent(), LHS, *CRHS, LHS != V);
4941 if (CmpVal == V)
4942 KnownFromContext.knownNot(~(CondIsTrue ? MaskIfTrue : MaskIfFalse));
4943 } else if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>(
4944 m_Specific(V), m_ConstantInt(ClassVal)))) {
4945 FPClassTest Mask = static_cast<FPClassTest>(ClassVal);
4946 KnownFromContext.knownNot(CondIsTrue ? ~Mask : Mask);
4947 } else if (match(Cond, m_ICmp(Pred, m_ElementWiseBitCast(m_Specific(V)),
4948 m_APInt(RHS)))) {
4949 bool TrueIfSigned;
4950 if (!isSignBitCheck(Pred, *RHS, TrueIfSigned))
4951 return;
4952 if (TrueIfSigned == CondIsTrue)
4953 KnownFromContext.signBitMustBeOne();
4954 else
4955 KnownFromContext.signBitMustBeZero();
4959 static KnownFPClass computeKnownFPClassFromContext(const Value *V,
4960 const SimplifyQuery &Q) {
4961 KnownFPClass KnownFromContext;
4963 if (!Q.CxtI)
4964 return KnownFromContext;
4966 if (Q.DC && Q.DT) {
4967 // Handle dominating conditions.
4968 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
4969 Value *Cond = BI->getCondition();
4971 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
4972 if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
4973 computeKnownFPClassFromCond(V, Cond, /*Depth=*/0, /*CondIsTrue=*/true,
4974 Q.CxtI, KnownFromContext);
4976 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
4977 if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
4978 computeKnownFPClassFromCond(V, Cond, /*Depth=*/0, /*CondIsTrue=*/false,
4979 Q.CxtI, KnownFromContext);
4983 if (!Q.AC)
4984 return KnownFromContext;
4986 // Try to restrict the floating-point classes based on information from
4987 // assumptions.
4988 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
4989 if (!AssumeVH)
4990 continue;
4991 CallInst *I = cast<CallInst>(AssumeVH);
4993 assert(I->getFunction() == Q.CxtI->getParent()->getParent() &&
4994 "Got assumption for the wrong function!");
4995 assert(I->getIntrinsicID() == Intrinsic::assume &&
4996 "must be an assume intrinsic");
4998 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
4999 continue;
5001 computeKnownFPClassFromCond(V, I->getArgOperand(0), /*Depth=*/0,
5002 /*CondIsTrue=*/true, Q.CxtI, KnownFromContext);
5005 return KnownFromContext;
5008 void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
5009 FPClassTest InterestedClasses, KnownFPClass &Known,
5010 unsigned Depth, const SimplifyQuery &Q);
5012 static void computeKnownFPClass(const Value *V, KnownFPClass &Known,
5013 FPClassTest InterestedClasses, unsigned Depth,
5014 const SimplifyQuery &Q) {
5015 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
5016 APInt DemandedElts =
5017 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
5018 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q);
5021 static void computeKnownFPClassForFPTrunc(const Operator *Op,
5022 const APInt &DemandedElts,
5023 FPClassTest InterestedClasses,
5024 KnownFPClass &Known, unsigned Depth,
5025 const SimplifyQuery &Q) {
5026 if ((InterestedClasses &
5027 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone)
5028 return;
5030 KnownFPClass KnownSrc;
5031 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5032 KnownSrc, Depth + 1, Q);
5034 // Sign should be preserved
5035 // TODO: Handle cannot be ordered greater than zero
5036 if (KnownSrc.cannotBeOrderedLessThanZero())
5037 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5039 Known.propagateNaN(KnownSrc, true);
5041 // Infinity needs a range check.
5044 void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
5045 FPClassTest InterestedClasses, KnownFPClass &Known,
5046 unsigned Depth, const SimplifyQuery &Q) {
5047 assert(Known.isUnknown() && "should not be called with known information");
5049 if (!DemandedElts) {
5050 // No demanded elts, better to assume we don't know anything.
5051 Known.resetAll();
5052 return;
5055 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
5057 if (auto *CFP = dyn_cast<ConstantFP>(V)) {
5058 Known.KnownFPClasses = CFP->getValueAPF().classify();
5059 Known.SignBit = CFP->isNegative();
5060 return;
5063 if (isa<ConstantAggregateZero>(V)) {
5064 Known.KnownFPClasses = fcPosZero;
5065 Known.SignBit = false;
5066 return;
5069 if (isa<PoisonValue>(V)) {
5070 Known.KnownFPClasses = fcNone;
5071 Known.SignBit = false;
5072 return;
5075 // Try to handle fixed width vector constants
5076 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
5077 const Constant *CV = dyn_cast<Constant>(V);
5078 if (VFVTy && CV) {
5079 Known.KnownFPClasses = fcNone;
5080 bool SignBitAllZero = true;
5081 bool SignBitAllOne = true;
5083 // For vectors, verify that each element is not NaN.
5084 unsigned NumElts = VFVTy->getNumElements();
5085 for (unsigned i = 0; i != NumElts; ++i) {
5086 if (!DemandedElts[i])
5087 continue;
5089 Constant *Elt = CV->getAggregateElement(i);
5090 if (!Elt) {
5091 Known = KnownFPClass();
5092 return;
5094 if (isa<PoisonValue>(Elt))
5095 continue;
5096 auto *CElt = dyn_cast<ConstantFP>(Elt);
5097 if (!CElt) {
5098 Known = KnownFPClass();
5099 return;
5102 const APFloat &C = CElt->getValueAPF();
5103 Known.KnownFPClasses |= C.classify();
5104 if (C.isNegative())
5105 SignBitAllZero = false;
5106 else
5107 SignBitAllOne = false;
5109 if (SignBitAllOne != SignBitAllZero)
5110 Known.SignBit = SignBitAllOne;
5111 return;
5114 FPClassTest KnownNotFromFlags = fcNone;
5115 if (const auto *CB = dyn_cast<CallBase>(V))
5116 KnownNotFromFlags |= CB->getRetNoFPClass();
5117 else if (const auto *Arg = dyn_cast<Argument>(V))
5118 KnownNotFromFlags |= Arg->getNoFPClass();
5120 const Operator *Op = dyn_cast<Operator>(V);
5121 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) {
5122 if (FPOp->hasNoNaNs())
5123 KnownNotFromFlags |= fcNan;
5124 if (FPOp->hasNoInfs())
5125 KnownNotFromFlags |= fcInf;
5128 KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q);
5129 KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses;
5131 // We no longer need to find out about these bits from inputs if we can
5132 // assume this from flags/attributes.
5133 InterestedClasses &= ~KnownNotFromFlags;
5135 auto ClearClassesFromFlags = make_scope_exit([=, &Known] {
5136 Known.knownNot(KnownNotFromFlags);
5137 if (!Known.SignBit && AssumedClasses.SignBit) {
5138 if (*AssumedClasses.SignBit)
5139 Known.signBitMustBeOne();
5140 else
5141 Known.signBitMustBeZero();
5145 if (!Op)
5146 return;
5148 // All recursive calls that increase depth must come after this.
5149 if (Depth == MaxAnalysisRecursionDepth)
5150 return;
5152 const unsigned Opc = Op->getOpcode();
5153 switch (Opc) {
5154 case Instruction::FNeg: {
5155 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5156 Known, Depth + 1, Q);
5157 Known.fneg();
5158 break;
5160 case Instruction::Select: {
5161 Value *Cond = Op->getOperand(0);
5162 Value *LHS = Op->getOperand(1);
5163 Value *RHS = Op->getOperand(2);
5165 FPClassTest FilterLHS = fcAllFlags;
5166 FPClassTest FilterRHS = fcAllFlags;
5168 Value *TestedValue = nullptr;
5169 FPClassTest MaskIfTrue = fcAllFlags;
5170 FPClassTest MaskIfFalse = fcAllFlags;
5171 uint64_t ClassVal = 0;
5172 const Function *F = cast<Instruction>(Op)->getFunction();
5173 CmpPredicate Pred;
5174 Value *CmpLHS, *CmpRHS;
5175 if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) {
5176 // If the select filters out a value based on the class, it no longer
5177 // participates in the class of the result
5179 // TODO: In some degenerate cases we can infer something if we try again
5180 // without looking through sign operations.
5181 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
5182 std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
5183 fcmpImpliesClass(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
5184 } else if (match(Cond,
5185 m_Intrinsic<Intrinsic::is_fpclass>(
5186 m_Value(TestedValue), m_ConstantInt(ClassVal)))) {
5187 FPClassTest TestedMask = static_cast<FPClassTest>(ClassVal);
5188 MaskIfTrue = TestedMask;
5189 MaskIfFalse = ~TestedMask;
5192 if (TestedValue == LHS) {
5193 // match !isnan(x) ? x : y
5194 FilterLHS = MaskIfTrue;
5195 } else if (TestedValue == RHS) { // && IsExactClass
5196 // match !isnan(x) ? y : x
5197 FilterRHS = MaskIfFalse;
5200 KnownFPClass Known2;
5201 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
5202 Depth + 1, Q);
5203 Known.KnownFPClasses &= FilterLHS;
5205 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
5206 Known2, Depth + 1, Q);
5207 Known2.KnownFPClasses &= FilterRHS;
5209 Known |= Known2;
5210 break;
5212 case Instruction::Call: {
5213 const CallInst *II = cast<CallInst>(Op);
5214 const Intrinsic::ID IID = II->getIntrinsicID();
5215 switch (IID) {
5216 case Intrinsic::fabs: {
5217 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
5218 // If we only care about the sign bit we don't need to inspect the
5219 // operand.
5220 computeKnownFPClass(II->getArgOperand(0), DemandedElts,
5221 InterestedClasses, Known, Depth + 1, Q);
5224 Known.fabs();
5225 break;
5227 case Intrinsic::copysign: {
5228 KnownFPClass KnownSign;
5230 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5231 Known, Depth + 1, Q);
5232 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
5233 KnownSign, Depth + 1, Q);
5234 Known.copysign(KnownSign);
5235 break;
5237 case Intrinsic::fma:
5238 case Intrinsic::fmuladd: {
5239 if ((InterestedClasses & fcNegative) == fcNone)
5240 break;
5242 if (II->getArgOperand(0) != II->getArgOperand(1))
5243 break;
5245 // The multiply cannot be -0 and therefore the add can't be -0
5246 Known.knownNot(fcNegZero);
5248 // x * x + y is non-negative if y is non-negative.
5249 KnownFPClass KnownAddend;
5250 computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses,
5251 KnownAddend, Depth + 1, Q);
5253 if (KnownAddend.cannotBeOrderedLessThanZero())
5254 Known.knownNot(fcNegative);
5255 break;
5257 case Intrinsic::sqrt:
5258 case Intrinsic::experimental_constrained_sqrt: {
5259 KnownFPClass KnownSrc;
5260 FPClassTest InterestedSrcs = InterestedClasses;
5261 if (InterestedClasses & fcNan)
5262 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5264 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5265 KnownSrc, Depth + 1, Q);
5267 if (KnownSrc.isKnownNeverPosInfinity())
5268 Known.knownNot(fcPosInf);
5269 if (KnownSrc.isKnownNever(fcSNan))
5270 Known.knownNot(fcSNan);
5272 // Any negative value besides -0 returns a nan.
5273 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
5274 Known.knownNot(fcNan);
5276 // The only negative value that can be returned is -0 for -0 inputs.
5277 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
5279 // If the input denormal mode could be PreserveSign, a negative
5280 // subnormal input could produce a negative zero output.
5281 const Function *F = II->getFunction();
5282 if (Q.IIQ.hasNoSignedZeros(II) ||
5283 (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())))
5284 Known.knownNot(fcNegZero);
5286 break;
5288 case Intrinsic::sin:
5289 case Intrinsic::cos: {
5290 // Return NaN on infinite inputs.
5291 KnownFPClass KnownSrc;
5292 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5293 KnownSrc, Depth + 1, Q);
5294 Known.knownNot(fcInf);
5295 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
5296 Known.knownNot(fcNan);
5297 break;
5299 case Intrinsic::maxnum:
5300 case Intrinsic::minnum:
5301 case Intrinsic::minimum:
5302 case Intrinsic::maximum: {
5303 KnownFPClass KnownLHS, KnownRHS;
5304 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5305 KnownLHS, Depth + 1, Q);
5306 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
5307 KnownRHS, Depth + 1, Q);
5309 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
5310 Known = KnownLHS | KnownRHS;
5312 // If either operand is not NaN, the result is not NaN.
5313 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
5314 Known.knownNot(fcNan);
5316 if (IID == Intrinsic::maxnum) {
5317 // If at least one operand is known to be positive, the result must be
5318 // positive.
5319 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
5320 KnownLHS.isKnownNeverNaN()) ||
5321 (KnownRHS.cannotBeOrderedLessThanZero() &&
5322 KnownRHS.isKnownNeverNaN()))
5323 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5324 } else if (IID == Intrinsic::maximum) {
5325 // If at least one operand is known to be positive, the result must be
5326 // positive.
5327 if (KnownLHS.cannotBeOrderedLessThanZero() ||
5328 KnownRHS.cannotBeOrderedLessThanZero())
5329 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5330 } else if (IID == Intrinsic::minnum) {
5331 // If at least one operand is known to be negative, the result must be
5332 // negative.
5333 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
5334 KnownLHS.isKnownNeverNaN()) ||
5335 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
5336 KnownRHS.isKnownNeverNaN()))
5337 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5338 } else {
5339 // If at least one operand is known to be negative, the result must be
5340 // negative.
5341 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
5342 KnownRHS.cannotBeOrderedGreaterThanZero())
5343 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5346 // Fixup zero handling if denormals could be returned as a zero.
5348 // As there's no spec for denormal flushing, be conservative with the
5349 // treatment of denormals that could be flushed to zero. For older
5350 // subtargets on AMDGPU the min/max instructions would not flush the
5351 // output and return the original value.
5353 if ((Known.KnownFPClasses & fcZero) != fcNone &&
5354 !Known.isKnownNeverSubnormal()) {
5355 const Function *Parent = II->getFunction();
5356 if (!Parent)
5357 break;
5359 DenormalMode Mode = Parent->getDenormalMode(
5360 II->getType()->getScalarType()->getFltSemantics());
5361 if (Mode != DenormalMode::getIEEE())
5362 Known.KnownFPClasses |= fcZero;
5365 if (Known.isKnownNeverNaN()) {
5366 if (KnownLHS.SignBit && KnownRHS.SignBit &&
5367 *KnownLHS.SignBit == *KnownRHS.SignBit) {
5368 if (*KnownLHS.SignBit)
5369 Known.signBitMustBeOne();
5370 else
5371 Known.signBitMustBeZero();
5372 } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) ||
5373 ((KnownLHS.isKnownNeverNegZero() ||
5374 KnownRHS.isKnownNeverPosZero()) &&
5375 (KnownLHS.isKnownNeverPosZero() ||
5376 KnownRHS.isKnownNeverNegZero()))) {
5377 if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) &&
5378 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
5379 Known.signBitMustBeZero();
5380 else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) &&
5381 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
5382 Known.signBitMustBeOne();
5385 break;
5387 case Intrinsic::canonicalize: {
5388 KnownFPClass KnownSrc;
5389 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5390 KnownSrc, Depth + 1, Q);
5392 // This is essentially a stronger form of
5393 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5394 // actually have an IR canonicalization guarantee.
5396 // Canonicalize may flush denormals to zero, so we have to consider the
5397 // denormal mode to preserve known-not-0 knowledge.
5398 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
5400 // Stronger version of propagateNaN
5401 // Canonicalize is guaranteed to quiet signaling nans.
5402 if (KnownSrc.isKnownNeverNaN())
5403 Known.knownNot(fcNan);
5404 else
5405 Known.knownNot(fcSNan);
5407 const Function *F = II->getFunction();
5408 if (!F)
5409 break;
5411 // If the parent function flushes denormals, the canonical output cannot
5412 // be a denormal.
5413 const fltSemantics &FPType =
5414 II->getType()->getScalarType()->getFltSemantics();
5415 DenormalMode DenormMode = F->getDenormalMode(FPType);
5416 if (DenormMode == DenormalMode::getIEEE()) {
5417 if (KnownSrc.isKnownNever(fcPosZero))
5418 Known.knownNot(fcPosZero);
5419 if (KnownSrc.isKnownNever(fcNegZero))
5420 Known.knownNot(fcNegZero);
5421 break;
5424 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
5425 Known.knownNot(fcSubnormal);
5427 if (DenormMode.Input == DenormalMode::PositiveZero ||
5428 (DenormMode.Output == DenormalMode::PositiveZero &&
5429 DenormMode.Input == DenormalMode::IEEE))
5430 Known.knownNot(fcNegZero);
5432 break;
5434 case Intrinsic::vector_reduce_fmax:
5435 case Intrinsic::vector_reduce_fmin:
5436 case Intrinsic::vector_reduce_fmaximum:
5437 case Intrinsic::vector_reduce_fminimum: {
5438 // reduce min/max will choose an element from one of the vector elements,
5439 // so we can infer and class information that is common to all elements.
5440 Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(),
5441 InterestedClasses, Depth + 1, Q);
5442 // Can only propagate sign if output is never NaN.
5443 if (!Known.isKnownNeverNaN())
5444 Known.SignBit.reset();
5445 break;
5447 // reverse preserves all characteristics of the input vec's element.
5448 case Intrinsic::vector_reverse:
5449 Known = computeKnownFPClass(
5450 II->getArgOperand(0), DemandedElts.reverseBits(),
5451 II->getFastMathFlags(), InterestedClasses, Depth + 1, Q);
5452 break;
5453 case Intrinsic::trunc:
5454 case Intrinsic::floor:
5455 case Intrinsic::ceil:
5456 case Intrinsic::rint:
5457 case Intrinsic::nearbyint:
5458 case Intrinsic::round:
5459 case Intrinsic::roundeven: {
5460 KnownFPClass KnownSrc;
5461 FPClassTest InterestedSrcs = InterestedClasses;
5462 if (InterestedSrcs & fcPosFinite)
5463 InterestedSrcs |= fcPosFinite;
5464 if (InterestedSrcs & fcNegFinite)
5465 InterestedSrcs |= fcNegFinite;
5466 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5467 KnownSrc, Depth + 1, Q);
5469 // Integer results cannot be subnormal.
5470 Known.knownNot(fcSubnormal);
5472 Known.propagateNaN(KnownSrc, true);
5474 // Pass through infinities, except PPC_FP128 is a special case for
5475 // intrinsics other than trunc.
5476 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
5477 if (KnownSrc.isKnownNeverPosInfinity())
5478 Known.knownNot(fcPosInf);
5479 if (KnownSrc.isKnownNeverNegInfinity())
5480 Known.knownNot(fcNegInf);
5483 // Negative round ups to 0 produce -0
5484 if (KnownSrc.isKnownNever(fcPosFinite))
5485 Known.knownNot(fcPosFinite);
5486 if (KnownSrc.isKnownNever(fcNegFinite))
5487 Known.knownNot(fcNegFinite);
5489 break;
5491 case Intrinsic::exp:
5492 case Intrinsic::exp2:
5493 case Intrinsic::exp10: {
5494 Known.knownNot(fcNegative);
5495 if ((InterestedClasses & fcNan) == fcNone)
5496 break;
5498 KnownFPClass KnownSrc;
5499 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5500 KnownSrc, Depth + 1, Q);
5501 if (KnownSrc.isKnownNeverNaN()) {
5502 Known.knownNot(fcNan);
5503 Known.signBitMustBeZero();
5506 break;
5508 case Intrinsic::fptrunc_round: {
5509 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5510 Depth, Q);
5511 break;
5513 case Intrinsic::log:
5514 case Intrinsic::log10:
5515 case Intrinsic::log2:
5516 case Intrinsic::experimental_constrained_log:
5517 case Intrinsic::experimental_constrained_log10:
5518 case Intrinsic::experimental_constrained_log2: {
5519 // log(+inf) -> +inf
5520 // log([+-]0.0) -> -inf
5521 // log(-inf) -> nan
5522 // log(-x) -> nan
5523 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
5524 break;
5526 FPClassTest InterestedSrcs = InterestedClasses;
5527 if ((InterestedClasses & fcNegInf) != fcNone)
5528 InterestedSrcs |= fcZero | fcSubnormal;
5529 if ((InterestedClasses & fcNan) != fcNone)
5530 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
5532 KnownFPClass KnownSrc;
5533 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5534 KnownSrc, Depth + 1, Q);
5536 if (KnownSrc.isKnownNeverPosInfinity())
5537 Known.knownNot(fcPosInf);
5539 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
5540 Known.knownNot(fcNan);
5542 const Function *F = II->getFunction();
5543 if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType()))
5544 Known.knownNot(fcNegInf);
5546 break;
5548 case Intrinsic::powi: {
5549 if ((InterestedClasses & fcNegative) == fcNone)
5550 break;
5552 const Value *Exp = II->getArgOperand(1);
5553 Type *ExpTy = Exp->getType();
5554 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
5555 KnownBits ExponentKnownBits(BitWidth);
5556 computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1),
5557 ExponentKnownBits, Depth + 1, Q);
5559 if (ExponentKnownBits.Zero[0]) { // Is even
5560 Known.knownNot(fcNegative);
5561 break;
5564 // Given that exp is an integer, here are the
5565 // ways that pow can return a negative value:
5567 // pow(-x, exp) --> negative if exp is odd and x is negative.
5568 // pow(-0, exp) --> -inf if exp is negative odd.
5569 // pow(-0, exp) --> -0 if exp is positive odd.
5570 // pow(-inf, exp) --> -0 if exp is negative odd.
5571 // pow(-inf, exp) --> -inf if exp is positive odd.
5572 KnownFPClass KnownSrc;
5573 computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative,
5574 KnownSrc, Depth + 1, Q);
5575 if (KnownSrc.isKnownNever(fcNegative))
5576 Known.knownNot(fcNegative);
5577 break;
5579 case Intrinsic::ldexp: {
5580 KnownFPClass KnownSrc;
5581 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5582 KnownSrc, Depth + 1, Q);
5583 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
5585 // Sign is preserved, but underflows may produce zeroes.
5586 if (KnownSrc.isKnownNever(fcNegative))
5587 Known.knownNot(fcNegative);
5588 else if (KnownSrc.cannotBeOrderedLessThanZero())
5589 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5591 if (KnownSrc.isKnownNever(fcPositive))
5592 Known.knownNot(fcPositive);
5593 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
5594 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5596 // Can refine inf/zero handling based on the exponent operand.
5597 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
5598 if ((InterestedClasses & ExpInfoMask) == fcNone)
5599 break;
5600 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
5601 break;
5603 const fltSemantics &Flt =
5604 II->getType()->getScalarType()->getFltSemantics();
5605 unsigned Precision = APFloat::semanticsPrecision(Flt);
5606 const Value *ExpArg = II->getArgOperand(1);
5607 ConstantRange ExpRange = computeConstantRange(
5608 ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1);
5610 const int MantissaBits = Precision - 1;
5611 if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits)))
5612 Known.knownNot(fcSubnormal);
5614 const Function *F = II->getFunction();
5615 const APInt *ConstVal = ExpRange.getSingleElement();
5616 if (ConstVal && ConstVal->isZero()) {
5617 // ldexp(x, 0) -> x, so propagate everything.
5618 Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType());
5619 } else if (ExpRange.isAllNegative()) {
5620 // If we know the power is <= 0, can't introduce inf
5621 if (KnownSrc.isKnownNeverPosInfinity())
5622 Known.knownNot(fcPosInf);
5623 if (KnownSrc.isKnownNeverNegInfinity())
5624 Known.knownNot(fcNegInf);
5625 } else if (ExpRange.isAllNonNegative()) {
5626 // If we know the power is >= 0, can't introduce subnormal or zero
5627 if (KnownSrc.isKnownNeverPosSubnormal())
5628 Known.knownNot(fcPosSubnormal);
5629 if (KnownSrc.isKnownNeverNegSubnormal())
5630 Known.knownNot(fcNegSubnormal);
5631 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType()))
5632 Known.knownNot(fcPosZero);
5633 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))
5634 Known.knownNot(fcNegZero);
5637 break;
5639 case Intrinsic::arithmetic_fence: {
5640 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5641 Known, Depth + 1, Q);
5642 break;
5644 case Intrinsic::experimental_constrained_sitofp:
5645 case Intrinsic::experimental_constrained_uitofp:
5646 // Cannot produce nan
5647 Known.knownNot(fcNan);
5649 // sitofp and uitofp turn into +0.0 for zero.
5650 Known.knownNot(fcNegZero);
5652 // Integers cannot be subnormal
5653 Known.knownNot(fcSubnormal);
5655 if (IID == Intrinsic::experimental_constrained_uitofp)
5656 Known.signBitMustBeZero();
5658 // TODO: Copy inf handling from instructions
5659 break;
5660 default:
5661 break;
5664 break;
5666 case Instruction::FAdd:
5667 case Instruction::FSub: {
5668 KnownFPClass KnownLHS, KnownRHS;
5669 bool WantNegative =
5670 Op->getOpcode() == Instruction::FAdd &&
5671 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
5672 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
5673 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
5675 if (!WantNaN && !WantNegative && !WantNegZero)
5676 break;
5678 FPClassTest InterestedSrcs = InterestedClasses;
5679 if (WantNegative)
5680 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5681 if (InterestedClasses & fcNan)
5682 InterestedSrcs |= fcInf;
5683 computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs,
5684 KnownRHS, Depth + 1, Q);
5686 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
5687 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
5688 WantNegZero || Opc == Instruction::FSub) {
5690 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5691 // there's no point.
5692 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs,
5693 KnownLHS, Depth + 1, Q);
5694 // Adding positive and negative infinity produces NaN.
5695 // TODO: Check sign of infinities.
5696 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5697 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
5698 Known.knownNot(fcNan);
5700 // FIXME: Context function should always be passed in separately
5701 const Function *F = cast<Instruction>(Op)->getFunction();
5703 if (Op->getOpcode() == Instruction::FAdd) {
5704 if (KnownLHS.cannotBeOrderedLessThanZero() &&
5705 KnownRHS.cannotBeOrderedLessThanZero())
5706 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5707 if (!F)
5708 break;
5710 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5711 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) ||
5712 KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) &&
5713 // Make sure output negative denormal can't flush to -0
5714 outputDenormalIsIEEEOrPosZero(*F, Op->getType()))
5715 Known.knownNot(fcNegZero);
5716 } else {
5717 if (!F)
5718 break;
5720 // Only fsub -0, +0 can return -0
5721 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) ||
5722 KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) &&
5723 // Make sure output negative denormal can't flush to -0
5724 outputDenormalIsIEEEOrPosZero(*F, Op->getType()))
5725 Known.knownNot(fcNegZero);
5729 break;
5731 case Instruction::FMul: {
5732 // X * X is always non-negative or a NaN.
5733 if (Op->getOperand(0) == Op->getOperand(1))
5734 Known.knownNot(fcNegative);
5736 if ((InterestedClasses & fcNan) != fcNan)
5737 break;
5739 // fcSubnormal is only needed in case of DAZ.
5740 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
5742 KnownFPClass KnownLHS, KnownRHS;
5743 computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS,
5744 Depth + 1, Q);
5745 if (!KnownRHS.isKnownNeverNaN())
5746 break;
5748 computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS,
5749 Depth + 1, Q);
5750 if (!KnownLHS.isKnownNeverNaN())
5751 break;
5753 if (KnownLHS.SignBit && KnownRHS.SignBit) {
5754 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
5755 Known.signBitMustBeZero();
5756 else
5757 Known.signBitMustBeOne();
5760 // If 0 * +/-inf produces NaN.
5761 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
5762 Known.knownNot(fcNan);
5763 break;
5766 const Function *F = cast<Instruction>(Op)->getFunction();
5767 if (!F)
5768 break;
5770 if ((KnownRHS.isKnownNeverInfinity() ||
5771 KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) &&
5772 (KnownLHS.isKnownNeverInfinity() ||
5773 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))
5774 Known.knownNot(fcNan);
5776 break;
5778 case Instruction::FDiv:
5779 case Instruction::FRem: {
5780 if (Op->getOperand(0) == Op->getOperand(1)) {
5781 // TODO: Could filter out snan if we inspect the operand
5782 if (Op->getOpcode() == Instruction::FDiv) {
5783 // X / X is always exactly 1.0 or a NaN.
5784 Known.KnownFPClasses = fcNan | fcPosNormal;
5785 } else {
5786 // X % X is always exactly [+-]0.0 or a NaN.
5787 Known.KnownFPClasses = fcNan | fcZero;
5790 break;
5793 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
5794 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
5795 const bool WantPositive =
5796 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone;
5797 if (!WantNan && !WantNegative && !WantPositive)
5798 break;
5800 KnownFPClass KnownLHS, KnownRHS;
5802 computeKnownFPClass(Op->getOperand(1), DemandedElts,
5803 fcNan | fcInf | fcZero | fcNegative, KnownRHS,
5804 Depth + 1, Q);
5806 bool KnowSomethingUseful =
5807 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
5809 if (KnowSomethingUseful || WantPositive) {
5810 const FPClassTest InterestedLHS =
5811 WantPositive ? fcAllFlags
5812 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
5814 computeKnownFPClass(Op->getOperand(0), DemandedElts,
5815 InterestedClasses & InterestedLHS, KnownLHS,
5816 Depth + 1, Q);
5819 const Function *F = cast<Instruction>(Op)->getFunction();
5821 if (Op->getOpcode() == Instruction::FDiv) {
5822 // Only 0/0, Inf/Inf produce NaN.
5823 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5824 (KnownLHS.isKnownNeverInfinity() ||
5825 KnownRHS.isKnownNeverInfinity()) &&
5826 ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) ||
5827 (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) {
5828 Known.knownNot(fcNan);
5831 // X / -0.0 is -Inf (or NaN).
5832 // +X / +X is +X
5833 if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative))
5834 Known.knownNot(fcNegative);
5835 } else {
5836 // Inf REM x and x REM 0 produce NaN.
5837 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5838 KnownLHS.isKnownNeverInfinity() && F &&
5839 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) {
5840 Known.knownNot(fcNan);
5843 // The sign for frem is the same as the first operand.
5844 if (KnownLHS.cannotBeOrderedLessThanZero())
5845 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5846 if (KnownLHS.cannotBeOrderedGreaterThanZero())
5847 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5849 // See if we can be more aggressive about the sign of 0.
5850 if (KnownLHS.isKnownNever(fcNegative))
5851 Known.knownNot(fcNegative);
5852 if (KnownLHS.isKnownNever(fcPositive))
5853 Known.knownNot(fcPositive);
5856 break;
5858 case Instruction::FPExt: {
5859 // Infinity, nan and zero propagate from source.
5860 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5861 Known, Depth + 1, Q);
5863 const fltSemantics &DstTy =
5864 Op->getType()->getScalarType()->getFltSemantics();
5865 const fltSemantics &SrcTy =
5866 Op->getOperand(0)->getType()->getScalarType()->getFltSemantics();
5868 // All subnormal inputs should be in the normal range in the result type.
5869 if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) {
5870 if (Known.KnownFPClasses & fcPosSubnormal)
5871 Known.KnownFPClasses |= fcPosNormal;
5872 if (Known.KnownFPClasses & fcNegSubnormal)
5873 Known.KnownFPClasses |= fcNegNormal;
5874 Known.knownNot(fcSubnormal);
5877 // Sign bit of a nan isn't guaranteed.
5878 if (!Known.isKnownNeverNaN())
5879 Known.SignBit = std::nullopt;
5880 break;
5882 case Instruction::FPTrunc: {
5883 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5884 Depth, Q);
5885 break;
5887 case Instruction::SIToFP:
5888 case Instruction::UIToFP: {
5889 // Cannot produce nan
5890 Known.knownNot(fcNan);
5892 // Integers cannot be subnormal
5893 Known.knownNot(fcSubnormal);
5895 // sitofp and uitofp turn into +0.0 for zero.
5896 Known.knownNot(fcNegZero);
5897 if (Op->getOpcode() == Instruction::UIToFP)
5898 Known.signBitMustBeZero();
5900 if (InterestedClasses & fcInf) {
5901 // Get width of largest magnitude integer (remove a bit if signed).
5902 // This still works for a signed minimum value because the largest FP
5903 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5904 int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits();
5905 if (Op->getOpcode() == Instruction::SIToFP)
5906 --IntSize;
5908 // If the exponent of the largest finite FP value can hold the largest
5909 // integer, the result of the cast must be finite.
5910 Type *FPTy = Op->getType()->getScalarType();
5911 if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize)
5912 Known.knownNot(fcInf);
5915 break;
5917 case Instruction::ExtractElement: {
5918 // Look through extract element. If the index is non-constant or
5919 // out-of-range demand all elements, otherwise just the extracted element.
5920 const Value *Vec = Op->getOperand(0);
5921 const Value *Idx = Op->getOperand(1);
5922 auto *CIdx = dyn_cast<ConstantInt>(Idx);
5924 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
5925 unsigned NumElts = VecTy->getNumElements();
5926 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
5927 if (CIdx && CIdx->getValue().ult(NumElts))
5928 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
5929 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
5930 Depth + 1, Q);
5933 break;
5935 case Instruction::InsertElement: {
5936 if (isa<ScalableVectorType>(Op->getType()))
5937 return;
5939 const Value *Vec = Op->getOperand(0);
5940 const Value *Elt = Op->getOperand(1);
5941 auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
5942 unsigned NumElts = DemandedElts.getBitWidth();
5943 APInt DemandedVecElts = DemandedElts;
5944 bool NeedsElt = true;
5945 // If we know the index we are inserting to, clear it from Vec check.
5946 if (CIdx && CIdx->getValue().ult(NumElts)) {
5947 DemandedVecElts.clearBit(CIdx->getZExtValue());
5948 NeedsElt = DemandedElts[CIdx->getZExtValue()];
5951 // Do we demand the inserted element?
5952 if (NeedsElt) {
5953 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q);
5954 // If we don't know any bits, early out.
5955 if (Known.isUnknown())
5956 break;
5957 } else {
5958 Known.KnownFPClasses = fcNone;
5961 // Do we need anymore elements from Vec?
5962 if (!DemandedVecElts.isZero()) {
5963 KnownFPClass Known2;
5964 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
5965 Depth + 1, Q);
5966 Known |= Known2;
5969 break;
5971 case Instruction::ShuffleVector: {
5972 // For undef elements, we don't know anything about the common state of
5973 // the shuffle result.
5974 APInt DemandedLHS, DemandedRHS;
5975 auto *Shuf = dyn_cast<ShuffleVectorInst>(Op);
5976 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
5977 return;
5979 if (!!DemandedLHS) {
5980 const Value *LHS = Shuf->getOperand(0);
5981 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
5982 Depth + 1, Q);
5984 // If we don't know any bits, early out.
5985 if (Known.isUnknown())
5986 break;
5987 } else {
5988 Known.KnownFPClasses = fcNone;
5991 if (!!DemandedRHS) {
5992 KnownFPClass Known2;
5993 const Value *RHS = Shuf->getOperand(1);
5994 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
5995 Depth + 1, Q);
5996 Known |= Known2;
5999 break;
6001 case Instruction::ExtractValue: {
6002 const ExtractValueInst *Extract = cast<ExtractValueInst>(Op);
6003 ArrayRef<unsigned> Indices = Extract->getIndices();
6004 const Value *Src = Extract->getAggregateOperand();
6005 if (isa<StructType>(Src->getType()) && Indices.size() == 1 &&
6006 Indices[0] == 0) {
6007 if (const auto *II = dyn_cast<IntrinsicInst>(Src)) {
6008 switch (II->getIntrinsicID()) {
6009 case Intrinsic::frexp: {
6010 Known.knownNot(fcSubnormal);
6012 KnownFPClass KnownSrc;
6013 computeKnownFPClass(II->getArgOperand(0), DemandedElts,
6014 InterestedClasses, KnownSrc, Depth + 1, Q);
6016 const Function *F = cast<Instruction>(Op)->getFunction();
6018 if (KnownSrc.isKnownNever(fcNegative))
6019 Known.knownNot(fcNegative);
6020 else {
6021 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType()))
6022 Known.knownNot(fcNegZero);
6023 if (KnownSrc.isKnownNever(fcNegInf))
6024 Known.knownNot(fcNegInf);
6027 if (KnownSrc.isKnownNever(fcPositive))
6028 Known.knownNot(fcPositive);
6029 else {
6030 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType()))
6031 Known.knownNot(fcPosZero);
6032 if (KnownSrc.isKnownNever(fcPosInf))
6033 Known.knownNot(fcPosInf);
6036 Known.propagateNaN(KnownSrc);
6037 return;
6039 default:
6040 break;
6045 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1,
6047 break;
6049 case Instruction::PHI: {
6050 const PHINode *P = cast<PHINode>(Op);
6051 // Unreachable blocks may have zero-operand PHI nodes.
6052 if (P->getNumIncomingValues() == 0)
6053 break;
6055 // Otherwise take the unions of the known bit sets of the operands,
6056 // taking conservative care to avoid excessive recursion.
6057 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
6059 if (Depth < PhiRecursionLimit) {
6060 // Skip if every incoming value references to ourself.
6061 if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
6062 break;
6064 bool First = true;
6066 for (const Use &U : P->operands()) {
6067 Value *IncValue;
6068 Instruction *CxtI;
6069 breakSelfRecursivePHI(&U, P, IncValue, CxtI);
6070 // Skip direct self references.
6071 if (IncValue == P)
6072 continue;
6074 KnownFPClass KnownSrc;
6075 // Recurse, but cap the recursion to two levels, because we don't want
6076 // to waste time spinning around in loops. We need at least depth 2 to
6077 // detect known sign bits.
6078 computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
6079 PhiRecursionLimit,
6080 Q.getWithoutCondContext().getWithInstruction(CxtI));
6082 if (First) {
6083 Known = KnownSrc;
6084 First = false;
6085 } else {
6086 Known |= KnownSrc;
6089 if (Known.KnownFPClasses == fcAllFlags)
6090 break;
6094 break;
6096 case Instruction::BitCast: {
6097 const Value *Src;
6098 if (!match(Op, m_ElementWiseBitCast(m_Value(Src))) ||
6099 !Src->getType()->isIntOrIntVectorTy())
6100 break;
6102 const Type *Ty = Op->getType()->getScalarType();
6103 KnownBits Bits(Ty->getScalarSizeInBits());
6104 computeKnownBits(Src, DemandedElts, Bits, Depth + 1, Q);
6106 // Transfer information from the sign bit.
6107 if (Bits.isNonNegative())
6108 Known.signBitMustBeZero();
6109 else if (Bits.isNegative())
6110 Known.signBitMustBeOne();
6112 if (Ty->isIEEE()) {
6113 // IEEE floats are NaN when all bits of the exponent plus at least one of
6114 // the fraction bits are 1. This means:
6115 // - If we assume unknown bits are 0 and the value is NaN, it will
6116 // always be NaN
6117 // - If we assume unknown bits are 1 and the value is not NaN, it can
6118 // never be NaN
6119 if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN())
6120 Known.KnownFPClasses = fcNan;
6121 else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN())
6122 Known.knownNot(fcNan);
6124 // Build KnownBits representing Inf and check if it must be equal or
6125 // unequal to this value.
6126 auto InfKB = KnownBits::makeConstant(
6127 APFloat::getInf(Ty->getFltSemantics()).bitcastToAPInt());
6128 InfKB.Zero.clearSignBit();
6129 if (const auto InfResult = KnownBits::eq(Bits, InfKB)) {
6130 assert(!InfResult.value());
6131 Known.knownNot(fcInf);
6132 } else if (Bits == InfKB) {
6133 Known.KnownFPClasses = fcInf;
6136 // Build KnownBits representing Zero and check if it must be equal or
6137 // unequal to this value.
6138 auto ZeroKB = KnownBits::makeConstant(
6139 APFloat::getZero(Ty->getFltSemantics()).bitcastToAPInt());
6140 ZeroKB.Zero.clearSignBit();
6141 if (const auto ZeroResult = KnownBits::eq(Bits, ZeroKB)) {
6142 assert(!ZeroResult.value());
6143 Known.knownNot(fcZero);
6144 } else if (Bits == ZeroKB) {
6145 Known.KnownFPClasses = fcZero;
6149 break;
6151 default:
6152 break;
6156 KnownFPClass llvm::computeKnownFPClass(const Value *V,
6157 const APInt &DemandedElts,
6158 FPClassTest InterestedClasses,
6159 unsigned Depth,
6160 const SimplifyQuery &SQ) {
6161 KnownFPClass KnownClasses;
6162 ::computeKnownFPClass(V, DemandedElts, InterestedClasses, KnownClasses, Depth,
6163 SQ);
6164 return KnownClasses;
6167 KnownFPClass llvm::computeKnownFPClass(const Value *V,
6168 FPClassTest InterestedClasses,
6169 unsigned Depth,
6170 const SimplifyQuery &SQ) {
6171 KnownFPClass Known;
6172 ::computeKnownFPClass(V, Known, InterestedClasses, Depth, SQ);
6173 return Known;
6176 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
6178 // All byte-wide stores are splatable, even of arbitrary variables.
6179 if (V->getType()->isIntegerTy(8))
6180 return V;
6182 LLVMContext &Ctx = V->getContext();
6184 // Undef don't care.
6185 auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
6186 if (isa<UndefValue>(V))
6187 return UndefInt8;
6189 // Return poison for zero-sized type.
6190 if (DL.getTypeStoreSize(V->getType()).isZero())
6191 return PoisonValue::get(Type::getInt8Ty(Ctx));
6193 Constant *C = dyn_cast<Constant>(V);
6194 if (!C) {
6195 // Conceptually, we could handle things like:
6196 // %a = zext i8 %X to i16
6197 // %b = shl i16 %a, 8
6198 // %c = or i16 %a, %b
6199 // but until there is an example that actually needs this, it doesn't seem
6200 // worth worrying about.
6201 return nullptr;
6204 // Handle 'null' ConstantArrayZero etc.
6205 if (C->isNullValue())
6206 return Constant::getNullValue(Type::getInt8Ty(Ctx));
6208 // Constant floating-point values can be handled as integer values if the
6209 // corresponding integer value is "byteable". An important case is 0.0.
6210 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
6211 Type *Ty = nullptr;
6212 if (CFP->getType()->isHalfTy())
6213 Ty = Type::getInt16Ty(Ctx);
6214 else if (CFP->getType()->isFloatTy())
6215 Ty = Type::getInt32Ty(Ctx);
6216 else if (CFP->getType()->isDoubleTy())
6217 Ty = Type::getInt64Ty(Ctx);
6218 // Don't handle long double formats, which have strange constraints.
6219 return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
6220 : nullptr;
6223 // We can handle constant integers that are multiple of 8 bits.
6224 if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
6225 if (CI->getBitWidth() % 8 == 0) {
6226 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
6227 if (!CI->getValue().isSplat(8))
6228 return nullptr;
6229 return ConstantInt::get(Ctx, CI->getValue().trunc(8));
6233 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
6234 if (CE->getOpcode() == Instruction::IntToPtr) {
6235 if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
6236 unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
6237 if (Constant *Op = ConstantFoldIntegerCast(
6238 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL))
6239 return isBytewiseValue(Op, DL);
6244 auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
6245 if (LHS == RHS)
6246 return LHS;
6247 if (!LHS || !RHS)
6248 return nullptr;
6249 if (LHS == UndefInt8)
6250 return RHS;
6251 if (RHS == UndefInt8)
6252 return LHS;
6253 return nullptr;
6256 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
6257 Value *Val = UndefInt8;
6258 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
6259 if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
6260 return nullptr;
6261 return Val;
6264 if (isa<ConstantAggregate>(C)) {
6265 Value *Val = UndefInt8;
6266 for (Value *Op : C->operands())
6267 if (!(Val = Merge(Val, isBytewiseValue(Op, DL))))
6268 return nullptr;
6269 return Val;
6272 // Don't try to handle the handful of other constants.
6273 return nullptr;
6276 // This is the recursive version of BuildSubAggregate. It takes a few different
6277 // arguments. Idxs is the index within the nested struct From that we are
6278 // looking at now (which is of type IndexedType). IdxSkip is the number of
6279 // indices from Idxs that should be left out when inserting into the resulting
6280 // struct. To is the result struct built so far, new insertvalue instructions
6281 // build on that.
6282 static Value *BuildSubAggregate(Value *From, Value *To, Type *IndexedType,
6283 SmallVectorImpl<unsigned> &Idxs,
6284 unsigned IdxSkip,
6285 BasicBlock::iterator InsertBefore) {
6286 StructType *STy = dyn_cast<StructType>(IndexedType);
6287 if (STy) {
6288 // Save the original To argument so we can modify it
6289 Value *OrigTo = To;
6290 // General case, the type indexed by Idxs is a struct
6291 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
6292 // Process each struct element recursively
6293 Idxs.push_back(i);
6294 Value *PrevTo = To;
6295 To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
6296 InsertBefore);
6297 Idxs.pop_back();
6298 if (!To) {
6299 // Couldn't find any inserted value for this index? Cleanup
6300 while (PrevTo != OrigTo) {
6301 InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
6302 PrevTo = Del->getAggregateOperand();
6303 Del->eraseFromParent();
6305 // Stop processing elements
6306 break;
6309 // If we successfully found a value for each of our subaggregates
6310 if (To)
6311 return To;
6313 // Base case, the type indexed by SourceIdxs is not a struct, or not all of
6314 // the struct's elements had a value that was inserted directly. In the latter
6315 // case, perhaps we can't determine each of the subelements individually, but
6316 // we might be able to find the complete struct somewhere.
6318 // Find the value that is at that particular spot
6319 Value *V = FindInsertedValue(From, Idxs);
6321 if (!V)
6322 return nullptr;
6324 // Insert the value in the new (sub) aggregate
6325 return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp",
6326 InsertBefore);
6329 // This helper takes a nested struct and extracts a part of it (which is again a
6330 // struct) into a new value. For example, given the struct:
6331 // { a, { b, { c, d }, e } }
6332 // and the indices "1, 1" this returns
6333 // { c, d }.
6335 // It does this by inserting an insertvalue for each element in the resulting
6336 // struct, as opposed to just inserting a single struct. This will only work if
6337 // each of the elements of the substruct are known (ie, inserted into From by an
6338 // insertvalue instruction somewhere).
6340 // All inserted insertvalue instructions are inserted before InsertBefore
6341 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
6342 BasicBlock::iterator InsertBefore) {
6343 Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
6344 idx_range);
6345 Value *To = PoisonValue::get(IndexedType);
6346 SmallVector<unsigned, 10> Idxs(idx_range);
6347 unsigned IdxSkip = Idxs.size();
6349 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
6352 /// Given an aggregate and a sequence of indices, see if the scalar value
6353 /// indexed is already around as a register, for example if it was inserted
6354 /// directly into the aggregate.
6356 /// If InsertBefore is not null, this function will duplicate (modified)
6357 /// insertvalues when a part of a nested struct is extracted.
6358 Value *
6359 llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
6360 std::optional<BasicBlock::iterator> InsertBefore) {
6361 // Nothing to index? Just return V then (this is useful at the end of our
6362 // recursion).
6363 if (idx_range.empty())
6364 return V;
6365 // We have indices, so V should have an indexable type.
6366 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
6367 "Not looking at a struct or array?");
6368 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
6369 "Invalid indices for type?");
6371 if (Constant *C = dyn_cast<Constant>(V)) {
6372 C = C->getAggregateElement(idx_range[0]);
6373 if (!C) return nullptr;
6374 return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
6377 if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
6378 // Loop the indices for the insertvalue instruction in parallel with the
6379 // requested indices
6380 const unsigned *req_idx = idx_range.begin();
6381 for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
6382 i != e; ++i, ++req_idx) {
6383 if (req_idx == idx_range.end()) {
6384 // We can't handle this without inserting insertvalues
6385 if (!InsertBefore)
6386 return nullptr;
6388 // The requested index identifies a part of a nested aggregate. Handle
6389 // this specially. For example,
6390 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
6391 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
6392 // %C = extractvalue {i32, { i32, i32 } } %B, 1
6393 // This can be changed into
6394 // %A = insertvalue {i32, i32 } undef, i32 10, 0
6395 // %C = insertvalue {i32, i32 } %A, i32 11, 1
6396 // which allows the unused 0,0 element from the nested struct to be
6397 // removed.
6398 return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx),
6399 *InsertBefore);
6402 // This insert value inserts something else than what we are looking for.
6403 // See if the (aggregate) value inserted into has the value we are
6404 // looking for, then.
6405 if (*req_idx != *i)
6406 return FindInsertedValue(I->getAggregateOperand(), idx_range,
6407 InsertBefore);
6409 // If we end up here, the indices of the insertvalue match with those
6410 // requested (though possibly only partially). Now we recursively look at
6411 // the inserted value, passing any remaining indices.
6412 return FindInsertedValue(I->getInsertedValueOperand(),
6413 ArrayRef(req_idx, idx_range.end()), InsertBefore);
6416 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
6417 // If we're extracting a value from an aggregate that was extracted from
6418 // something else, we can extract from that something else directly instead.
6419 // However, we will need to chain I's indices with the requested indices.
6421 // Calculate the number of indices required
6422 unsigned size = I->getNumIndices() + idx_range.size();
6423 // Allocate some space to put the new indices in
6424 SmallVector<unsigned, 5> Idxs;
6425 Idxs.reserve(size);
6426 // Add indices from the extract value instruction
6427 Idxs.append(I->idx_begin(), I->idx_end());
6429 // Add requested indices
6430 Idxs.append(idx_range.begin(), idx_range.end());
6432 assert(Idxs.size() == size
6433 && "Number of indices added not correct?");
6435 return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
6437 // Otherwise, we don't know (such as, extracting from a function return value
6438 // or load instruction)
6439 return nullptr;
6442 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
6443 unsigned CharSize) {
6444 // Make sure the GEP has exactly three arguments.
6445 if (GEP->getNumOperands() != 3)
6446 return false;
6448 // Make sure the index-ee is a pointer to array of \p CharSize integers.
6449 // CharSize.
6450 ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
6451 if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
6452 return false;
6454 // Check to make sure that the first operand of the GEP is an integer and
6455 // has value 0 so that we are sure we're indexing into the initializer.
6456 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
6457 if (!FirstIdx || !FirstIdx->isZero())
6458 return false;
6460 return true;
6463 // If V refers to an initialized global constant, set Slice either to
6464 // its initializer if the size of its elements equals ElementSize, or,
6465 // for ElementSize == 8, to its representation as an array of unsiged
6466 // char. Return true on success.
6467 // Offset is in the unit "nr of ElementSize sized elements".
6468 bool llvm::getConstantDataArrayInfo(const Value *V,
6469 ConstantDataArraySlice &Slice,
6470 unsigned ElementSize, uint64_t Offset) {
6471 assert(V && "V should not be null.");
6472 assert((ElementSize % 8) == 0 &&
6473 "ElementSize expected to be a multiple of the size of a byte.");
6474 unsigned ElementSizeInBytes = ElementSize / 8;
6476 // Drill down into the pointer expression V, ignoring any intervening
6477 // casts, and determine the identity of the object it references along
6478 // with the cumulative byte offset into it.
6479 const GlobalVariable *GV =
6480 dyn_cast<GlobalVariable>(getUnderlyingObject(V));
6481 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
6482 // Fail if V is not based on constant global object.
6483 return false;
6485 const DataLayout &DL = GV->getDataLayout();
6486 APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
6488 if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
6489 /*AllowNonInbounds*/ true))
6490 // Fail if a constant offset could not be determined.
6491 return false;
6493 uint64_t StartIdx = Off.getLimitedValue();
6494 if (StartIdx == UINT64_MAX)
6495 // Fail if the constant offset is excessive.
6496 return false;
6498 // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6499 // elements. Simply bail out if that isn't possible.
6500 if ((StartIdx % ElementSizeInBytes) != 0)
6501 return false;
6503 Offset += StartIdx / ElementSizeInBytes;
6504 ConstantDataArray *Array = nullptr;
6505 ArrayType *ArrayTy = nullptr;
6507 if (GV->getInitializer()->isNullValue()) {
6508 Type *GVTy = GV->getValueType();
6509 uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue();
6510 uint64_t Length = SizeInBytes / ElementSizeInBytes;
6512 Slice.Array = nullptr;
6513 Slice.Offset = 0;
6514 // Return an empty Slice for undersized constants to let callers
6515 // transform even undefined library calls into simpler, well-defined
6516 // expressions. This is preferable to making the calls although it
6517 // prevents sanitizers from detecting such calls.
6518 Slice.Length = Length < Offset ? 0 : Length - Offset;
6519 return true;
6522 auto *Init = const_cast<Constant *>(GV->getInitializer());
6523 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
6524 Type *InitElTy = ArrayInit->getElementType();
6525 if (InitElTy->isIntegerTy(ElementSize)) {
6526 // If Init is an initializer for an array of the expected type
6527 // and size, use it as is.
6528 Array = ArrayInit;
6529 ArrayTy = ArrayInit->getType();
6533 if (!Array) {
6534 if (ElementSize != 8)
6535 // TODO: Handle conversions to larger integral types.
6536 return false;
6538 // Otherwise extract the portion of the initializer starting
6539 // at Offset as an array of bytes, and reset Offset.
6540 Init = ReadByteArrayFromGlobal(GV, Offset);
6541 if (!Init)
6542 return false;
6544 Offset = 0;
6545 Array = dyn_cast<ConstantDataArray>(Init);
6546 ArrayTy = dyn_cast<ArrayType>(Init->getType());
6549 uint64_t NumElts = ArrayTy->getArrayNumElements();
6550 if (Offset > NumElts)
6551 return false;
6553 Slice.Array = Array;
6554 Slice.Offset = Offset;
6555 Slice.Length = NumElts - Offset;
6556 return true;
6559 /// Extract bytes from the initializer of the constant array V, which need
6560 /// not be a nul-terminated string. On success, store the bytes in Str and
6561 /// return true. When TrimAtNul is set, Str will contain only the bytes up
6562 /// to but not including the first nul. Return false on failure.
6563 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
6564 bool TrimAtNul) {
6565 ConstantDataArraySlice Slice;
6566 if (!getConstantDataArrayInfo(V, Slice, 8))
6567 return false;
6569 if (Slice.Array == nullptr) {
6570 if (TrimAtNul) {
6571 // Return a nul-terminated string even for an empty Slice. This is
6572 // safe because all existing SimplifyLibcalls callers require string
6573 // arguments and the behavior of the functions they fold is undefined
6574 // otherwise. Folding the calls this way is preferable to making
6575 // the undefined library calls, even though it prevents sanitizers
6576 // from reporting such calls.
6577 Str = StringRef();
6578 return true;
6580 if (Slice.Length == 1) {
6581 Str = StringRef("", 1);
6582 return true;
6584 // We cannot instantiate a StringRef as we do not have an appropriate string
6585 // of 0s at hand.
6586 return false;
6589 // Start out with the entire array in the StringRef.
6590 Str = Slice.Array->getAsString();
6591 // Skip over 'offset' bytes.
6592 Str = Str.substr(Slice.Offset);
6594 if (TrimAtNul) {
6595 // Trim off the \0 and anything after it. If the array is not nul
6596 // terminated, we just return the whole end of string. The client may know
6597 // some other way that the string is length-bound.
6598 Str = Str.substr(0, Str.find('\0'));
6600 return true;
6603 // These next two are very similar to the above, but also look through PHI
6604 // nodes.
6605 // TODO: See if we can integrate these two together.
6607 /// If we can compute the length of the string pointed to by
6608 /// the specified pointer, return 'len+1'. If we can't, return 0.
6609 static uint64_t GetStringLengthH(const Value *V,
6610 SmallPtrSetImpl<const PHINode*> &PHIs,
6611 unsigned CharSize) {
6612 // Look through noop bitcast instructions.
6613 V = V->stripPointerCasts();
6615 // If this is a PHI node, there are two cases: either we have already seen it
6616 // or we haven't.
6617 if (const PHINode *PN = dyn_cast<PHINode>(V)) {
6618 if (!PHIs.insert(PN).second)
6619 return ~0ULL; // already in the set.
6621 // If it was new, see if all the input strings are the same length.
6622 uint64_t LenSoFar = ~0ULL;
6623 for (Value *IncValue : PN->incoming_values()) {
6624 uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
6625 if (Len == 0) return 0; // Unknown length -> unknown.
6627 if (Len == ~0ULL) continue;
6629 if (Len != LenSoFar && LenSoFar != ~0ULL)
6630 return 0; // Disagree -> unknown.
6631 LenSoFar = Len;
6634 // Success, all agree.
6635 return LenSoFar;
6638 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6639 if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
6640 uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
6641 if (Len1 == 0) return 0;
6642 uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
6643 if (Len2 == 0) return 0;
6644 if (Len1 == ~0ULL) return Len2;
6645 if (Len2 == ~0ULL) return Len1;
6646 if (Len1 != Len2) return 0;
6647 return Len1;
6650 // Otherwise, see if we can read the string.
6651 ConstantDataArraySlice Slice;
6652 if (!getConstantDataArrayInfo(V, Slice, CharSize))
6653 return 0;
6655 if (Slice.Array == nullptr)
6656 // Zeroinitializer (including an empty one).
6657 return 1;
6659 // Search for the first nul character. Return a conservative result even
6660 // when there is no nul. This is safe since otherwise the string function
6661 // being folded such as strlen is undefined, and can be preferable to
6662 // making the undefined library call.
6663 unsigned NullIndex = 0;
6664 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
6665 if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
6666 break;
6669 return NullIndex + 1;
6672 /// If we can compute the length of the string pointed to by
6673 /// the specified pointer, return 'len+1'. If we can't, return 0.
6674 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6675 if (!V->getType()->isPointerTy())
6676 return 0;
6678 SmallPtrSet<const PHINode*, 32> PHIs;
6679 uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6680 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6681 // an empty string as a length.
6682 return Len == ~0ULL ? 1 : Len;
6685 const Value *
6686 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
6687 bool MustPreserveNullness) {
6688 assert(Call &&
6689 "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6690 if (const Value *RV = Call->getReturnedArgOperand())
6691 return RV;
6692 // This can be used only as a aliasing property.
6693 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6694 Call, MustPreserveNullness))
6695 return Call->getArgOperand(0);
6696 return nullptr;
6699 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6700 const CallBase *Call, bool MustPreserveNullness) {
6701 switch (Call->getIntrinsicID()) {
6702 case Intrinsic::launder_invariant_group:
6703 case Intrinsic::strip_invariant_group:
6704 case Intrinsic::aarch64_irg:
6705 case Intrinsic::aarch64_tagp:
6706 // The amdgcn_make_buffer_rsrc function does not alter the address of the
6707 // input pointer (and thus preserve null-ness for the purposes of escape
6708 // analysis, which is where the MustPreserveNullness flag comes in to play).
6709 // However, it will not necessarily map ptr addrspace(N) null to ptr
6710 // addrspace(8) null, aka the "null descriptor", which has "all loads return
6711 // 0, all stores are dropped" semantics. Given the context of this intrinsic
6712 // list, no one should be relying on such a strict interpretation of
6713 // MustPreserveNullness (and, at time of writing, they are not), but we
6714 // document this fact out of an abundance of caution.
6715 case Intrinsic::amdgcn_make_buffer_rsrc:
6716 return true;
6717 case Intrinsic::ptrmask:
6718 return !MustPreserveNullness;
6719 case Intrinsic::threadlocal_address:
6720 // The underlying variable changes with thread ID. The Thread ID may change
6721 // at coroutine suspend points.
6722 return !Call->getParent()->getParent()->isPresplitCoroutine();
6723 default:
6724 return false;
6728 /// \p PN defines a loop-variant pointer to an object. Check if the
6729 /// previous iteration of the loop was referring to the same object as \p PN.
6730 static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
6731 const LoopInfo *LI) {
6732 // Find the loop-defined value.
6733 Loop *L = LI->getLoopFor(PN->getParent());
6734 if (PN->getNumIncomingValues() != 2)
6735 return true;
6737 // Find the value from previous iteration.
6738 auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
6739 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
6740 PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
6741 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
6742 return true;
6744 // If a new pointer is loaded in the loop, the pointer references a different
6745 // object in every iteration. E.g.:
6746 // for (i)
6747 // int *p = a[i];
6748 // ...
6749 if (auto *Load = dyn_cast<LoadInst>(PrevValue))
6750 if (!L->isLoopInvariant(Load->getPointerOperand()))
6751 return false;
6752 return true;
6755 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
6756 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
6757 if (auto *GEP = dyn_cast<GEPOperator>(V)) {
6758 const Value *PtrOp = GEP->getPointerOperand();
6759 if (!PtrOp->getType()->isPointerTy()) // Only handle scalar pointer base.
6760 return V;
6761 V = PtrOp;
6762 } else if (Operator::getOpcode(V) == Instruction::BitCast ||
6763 Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
6764 Value *NewV = cast<Operator>(V)->getOperand(0);
6765 if (!NewV->getType()->isPointerTy())
6766 return V;
6767 V = NewV;
6768 } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
6769 if (GA->isInterposable())
6770 return V;
6771 V = GA->getAliasee();
6772 } else {
6773 if (auto *PHI = dyn_cast<PHINode>(V)) {
6774 // Look through single-arg phi nodes created by LCSSA.
6775 if (PHI->getNumIncomingValues() == 1) {
6776 V = PHI->getIncomingValue(0);
6777 continue;
6779 } else if (auto *Call = dyn_cast<CallBase>(V)) {
6780 // CaptureTracking can know about special capturing properties of some
6781 // intrinsics like launder.invariant.group, that can't be expressed with
6782 // the attributes, but have properties like returning aliasing pointer.
6783 // Because some analysis may assume that nocaptured pointer is not
6784 // returned from some special intrinsic (because function would have to
6785 // be marked with returns attribute), it is crucial to use this function
6786 // because it should be in sync with CaptureTracking. Not using it may
6787 // cause weird miscompilations where 2 aliasing pointers are assumed to
6788 // noalias.
6789 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
6790 V = RP;
6791 continue;
6795 return V;
6797 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
6799 return V;
6802 void llvm::getUnderlyingObjects(const Value *V,
6803 SmallVectorImpl<const Value *> &Objects,
6804 const LoopInfo *LI, unsigned MaxLookup) {
6805 SmallPtrSet<const Value *, 4> Visited;
6806 SmallVector<const Value *, 4> Worklist;
6807 Worklist.push_back(V);
6808 do {
6809 const Value *P = Worklist.pop_back_val();
6810 P = getUnderlyingObject(P, MaxLookup);
6812 if (!Visited.insert(P).second)
6813 continue;
6815 if (auto *SI = dyn_cast<SelectInst>(P)) {
6816 Worklist.push_back(SI->getTrueValue());
6817 Worklist.push_back(SI->getFalseValue());
6818 continue;
6821 if (auto *PN = dyn_cast<PHINode>(P)) {
6822 // If this PHI changes the underlying object in every iteration of the
6823 // loop, don't look through it. Consider:
6824 // int **A;
6825 // for (i) {
6826 // Prev = Curr; // Prev = PHI (Prev_0, Curr)
6827 // Curr = A[i];
6828 // *Prev, *Curr;
6830 // Prev is tracking Curr one iteration behind so they refer to different
6831 // underlying objects.
6832 if (!LI || !LI->isLoopHeader(PN->getParent()) ||
6833 isSameUnderlyingObjectInLoop(PN, LI))
6834 append_range(Worklist, PN->incoming_values());
6835 else
6836 Objects.push_back(P);
6837 continue;
6840 Objects.push_back(P);
6841 } while (!Worklist.empty());
6844 const Value *llvm::getUnderlyingObjectAggressive(const Value *V) {
6845 const unsigned MaxVisited = 8;
6847 SmallPtrSet<const Value *, 8> Visited;
6848 SmallVector<const Value *, 8> Worklist;
6849 Worklist.push_back(V);
6850 const Value *Object = nullptr;
6851 // Used as fallback if we can't find a common underlying object through
6852 // recursion.
6853 bool First = true;
6854 const Value *FirstObject = getUnderlyingObject(V);
6855 do {
6856 const Value *P = Worklist.pop_back_val();
6857 P = First ? FirstObject : getUnderlyingObject(P);
6858 First = false;
6860 if (!Visited.insert(P).second)
6861 continue;
6863 if (Visited.size() == MaxVisited)
6864 return FirstObject;
6866 if (auto *SI = dyn_cast<SelectInst>(P)) {
6867 Worklist.push_back(SI->getTrueValue());
6868 Worklist.push_back(SI->getFalseValue());
6869 continue;
6872 if (auto *PN = dyn_cast<PHINode>(P)) {
6873 append_range(Worklist, PN->incoming_values());
6874 continue;
6877 if (!Object)
6878 Object = P;
6879 else if (Object != P)
6880 return FirstObject;
6881 } while (!Worklist.empty());
6883 return Object ? Object : FirstObject;
6886 /// This is the function that does the work of looking through basic
6887 /// ptrtoint+arithmetic+inttoptr sequences.
6888 static const Value *getUnderlyingObjectFromInt(const Value *V) {
6889 do {
6890 if (const Operator *U = dyn_cast<Operator>(V)) {
6891 // If we find a ptrtoint, we can transfer control back to the
6892 // regular getUnderlyingObjectFromInt.
6893 if (U->getOpcode() == Instruction::PtrToInt)
6894 return U->getOperand(0);
6895 // If we find an add of a constant, a multiplied value, or a phi, it's
6896 // likely that the other operand will lead us to the base
6897 // object. We don't have to worry about the case where the
6898 // object address is somehow being computed by the multiply,
6899 // because our callers only care when the result is an
6900 // identifiable object.
6901 if (U->getOpcode() != Instruction::Add ||
6902 (!isa<ConstantInt>(U->getOperand(1)) &&
6903 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
6904 !isa<PHINode>(U->getOperand(1))))
6905 return V;
6906 V = U->getOperand(0);
6907 } else {
6908 return V;
6910 assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
6911 } while (true);
6914 /// This is a wrapper around getUnderlyingObjects and adds support for basic
6915 /// ptrtoint+arithmetic+inttoptr sequences.
6916 /// It returns false if unidentified object is found in getUnderlyingObjects.
6917 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
6918 SmallVectorImpl<Value *> &Objects) {
6919 SmallPtrSet<const Value *, 16> Visited;
6920 SmallVector<const Value *, 4> Working(1, V);
6921 do {
6922 V = Working.pop_back_val();
6924 SmallVector<const Value *, 4> Objs;
6925 getUnderlyingObjects(V, Objs);
6927 for (const Value *V : Objs) {
6928 if (!Visited.insert(V).second)
6929 continue;
6930 if (Operator::getOpcode(V) == Instruction::IntToPtr) {
6931 const Value *O =
6932 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
6933 if (O->getType()->isPointerTy()) {
6934 Working.push_back(O);
6935 continue;
6938 // If getUnderlyingObjects fails to find an identifiable object,
6939 // getUnderlyingObjectsForCodeGen also fails for safety.
6940 if (!isIdentifiedObject(V)) {
6941 Objects.clear();
6942 return false;
6944 Objects.push_back(const_cast<Value *>(V));
6946 } while (!Working.empty());
6947 return true;
6950 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
6951 AllocaInst *Result = nullptr;
6952 SmallPtrSet<Value *, 4> Visited;
6953 SmallVector<Value *, 4> Worklist;
6955 auto AddWork = [&](Value *V) {
6956 if (Visited.insert(V).second)
6957 Worklist.push_back(V);
6960 AddWork(V);
6961 do {
6962 V = Worklist.pop_back_val();
6963 assert(Visited.count(V));
6965 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
6966 if (Result && Result != AI)
6967 return nullptr;
6968 Result = AI;
6969 } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
6970 AddWork(CI->getOperand(0));
6971 } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
6972 for (Value *IncValue : PN->incoming_values())
6973 AddWork(IncValue);
6974 } else if (auto *SI = dyn_cast<SelectInst>(V)) {
6975 AddWork(SI->getTrueValue());
6976 AddWork(SI->getFalseValue());
6977 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
6978 if (OffsetZero && !GEP->hasAllZeroIndices())
6979 return nullptr;
6980 AddWork(GEP->getPointerOperand());
6981 } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
6982 Value *Returned = CB->getReturnedArgOperand();
6983 if (Returned)
6984 AddWork(Returned);
6985 else
6986 return nullptr;
6987 } else {
6988 return nullptr;
6990 } while (!Worklist.empty());
6992 return Result;
6995 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6996 const Value *V, bool AllowLifetime, bool AllowDroppable) {
6997 for (const User *U : V->users()) {
6998 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
6999 if (!II)
7000 return false;
7002 if (AllowLifetime && II->isLifetimeStartOrEnd())
7003 continue;
7005 if (AllowDroppable && II->isDroppable())
7006 continue;
7008 return false;
7010 return true;
7013 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
7014 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
7015 V, /* AllowLifetime */ true, /* AllowDroppable */ false);
7017 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
7018 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
7019 V, /* AllowLifetime */ true, /* AllowDroppable */ true);
7022 bool llvm::isNotCrossLaneOperation(const Instruction *I) {
7023 if (auto *II = dyn_cast<IntrinsicInst>(I))
7024 return isTriviallyVectorizable(II->getIntrinsicID());
7025 auto *Shuffle = dyn_cast<ShuffleVectorInst>(I);
7026 return (!Shuffle || Shuffle->isSelect()) &&
7027 !isa<CallBase, BitCastInst, ExtractElementInst>(I);
7030 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
7031 const Instruction *CtxI,
7032 AssumptionCache *AC,
7033 const DominatorTree *DT,
7034 const TargetLibraryInfo *TLI,
7035 bool UseVariableInfo) {
7036 return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI,
7037 AC, DT, TLI, UseVariableInfo);
7040 bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
7041 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
7042 AssumptionCache *AC, const DominatorTree *DT, const TargetLibraryInfo *TLI,
7043 bool UseVariableInfo) {
7044 #ifndef NDEBUG
7045 if (Inst->getOpcode() != Opcode) {
7046 // Check that the operands are actually compatible with the Opcode override.
7047 auto hasEqualReturnAndLeadingOperandTypes =
7048 [](const Instruction *Inst, unsigned NumLeadingOperands) {
7049 if (Inst->getNumOperands() < NumLeadingOperands)
7050 return false;
7051 const Type *ExpectedType = Inst->getType();
7052 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
7053 if (Inst->getOperand(ItOp)->getType() != ExpectedType)
7054 return false;
7055 return true;
7057 assert(!Instruction::isBinaryOp(Opcode) ||
7058 hasEqualReturnAndLeadingOperandTypes(Inst, 2));
7059 assert(!Instruction::isUnaryOp(Opcode) ||
7060 hasEqualReturnAndLeadingOperandTypes(Inst, 1));
7062 #endif
7064 switch (Opcode) {
7065 default:
7066 return true;
7067 case Instruction::UDiv:
7068 case Instruction::URem: {
7069 // x / y is undefined if y == 0.
7070 const APInt *V;
7071 if (match(Inst->getOperand(1), m_APInt(V)))
7072 return *V != 0;
7073 return false;
7075 case Instruction::SDiv:
7076 case Instruction::SRem: {
7077 // x / y is undefined if y == 0 or x == INT_MIN and y == -1
7078 const APInt *Numerator, *Denominator;
7079 if (!match(Inst->getOperand(1), m_APInt(Denominator)))
7080 return false;
7081 // We cannot hoist this division if the denominator is 0.
7082 if (*Denominator == 0)
7083 return false;
7084 // It's safe to hoist if the denominator is not 0 or -1.
7085 if (!Denominator->isAllOnes())
7086 return true;
7087 // At this point we know that the denominator is -1. It is safe to hoist as
7088 // long we know that the numerator is not INT_MIN.
7089 if (match(Inst->getOperand(0), m_APInt(Numerator)))
7090 return !Numerator->isMinSignedValue();
7091 // The numerator *might* be MinSignedValue.
7092 return false;
7094 case Instruction::Load: {
7095 if (!UseVariableInfo)
7096 return false;
7098 const LoadInst *LI = dyn_cast<LoadInst>(Inst);
7099 if (!LI)
7100 return false;
7101 if (mustSuppressSpeculation(*LI))
7102 return false;
7103 const DataLayout &DL = LI->getDataLayout();
7104 return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
7105 LI->getType(), LI->getAlign(), DL,
7106 CtxI, AC, DT, TLI);
7108 case Instruction::Call: {
7109 auto *CI = dyn_cast<const CallInst>(Inst);
7110 if (!CI)
7111 return false;
7112 const Function *Callee = CI->getCalledFunction();
7114 // The called function could have undefined behavior or side-effects, even
7115 // if marked readnone nounwind.
7116 return Callee && Callee->isSpeculatable();
7118 case Instruction::VAArg:
7119 case Instruction::Alloca:
7120 case Instruction::Invoke:
7121 case Instruction::CallBr:
7122 case Instruction::PHI:
7123 case Instruction::Store:
7124 case Instruction::Ret:
7125 case Instruction::Br:
7126 case Instruction::IndirectBr:
7127 case Instruction::Switch:
7128 case Instruction::Unreachable:
7129 case Instruction::Fence:
7130 case Instruction::AtomicRMW:
7131 case Instruction::AtomicCmpXchg:
7132 case Instruction::LandingPad:
7133 case Instruction::Resume:
7134 case Instruction::CatchSwitch:
7135 case Instruction::CatchPad:
7136 case Instruction::CatchRet:
7137 case Instruction::CleanupPad:
7138 case Instruction::CleanupRet:
7139 return false; // Misc instructions which have effects
7143 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
7144 if (I.mayReadOrWriteMemory())
7145 // Memory dependency possible
7146 return true;
7147 if (!isSafeToSpeculativelyExecute(&I))
7148 // Can't move above a maythrow call or infinite loop. Or if an
7149 // inalloca alloca, above a stacksave call.
7150 return true;
7151 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7152 // 1) Can't reorder two inf-loop calls, even if readonly
7153 // 2) Also can't reorder an inf-loop call below a instruction which isn't
7154 // safe to speculative execute. (Inverse of above)
7155 return true;
7156 return false;
7159 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
7160 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
7161 switch (OR) {
7162 case ConstantRange::OverflowResult::MayOverflow:
7163 return OverflowResult::MayOverflow;
7164 case ConstantRange::OverflowResult::AlwaysOverflowsLow:
7165 return OverflowResult::AlwaysOverflowsLow;
7166 case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
7167 return OverflowResult::AlwaysOverflowsHigh;
7168 case ConstantRange::OverflowResult::NeverOverflows:
7169 return OverflowResult::NeverOverflows;
7171 llvm_unreachable("Unknown OverflowResult");
7174 /// Combine constant ranges from computeConstantRange() and computeKnownBits().
7175 ConstantRange
7176 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
7177 bool ForSigned,
7178 const SimplifyQuery &SQ) {
7179 ConstantRange CR1 =
7180 ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
7181 ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
7182 ConstantRange::PreferredRangeType RangeType =
7183 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
7184 return CR1.intersectWith(CR2, RangeType);
7187 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
7188 const Value *RHS,
7189 const SimplifyQuery &SQ,
7190 bool IsNSW) {
7191 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ);
7192 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ);
7194 // mul nsw of two non-negative numbers is also nuw.
7195 if (IsNSW && LHSKnown.isNonNegative() && RHSKnown.isNonNegative())
7196 return OverflowResult::NeverOverflows;
7198 ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
7199 ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
7200 return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
7203 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
7204 const Value *RHS,
7205 const SimplifyQuery &SQ) {
7206 // Multiplying n * m significant bits yields a result of n + m significant
7207 // bits. If the total number of significant bits does not exceed the
7208 // result bit width (minus 1), there is no overflow.
7209 // This means if we have enough leading sign bits in the operands
7210 // we can guarantee that the result does not overflow.
7211 // Ref: "Hacker's Delight" by Henry Warren
7212 unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
7214 // Note that underestimating the number of sign bits gives a more
7215 // conservative answer.
7216 unsigned SignBits =
7217 ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ);
7219 // First handle the easy case: if we have enough sign bits there's
7220 // definitely no overflow.
7221 if (SignBits > BitWidth + 1)
7222 return OverflowResult::NeverOverflows;
7224 // There are two ambiguous cases where there can be no overflow:
7225 // SignBits == BitWidth + 1 and
7226 // SignBits == BitWidth
7227 // The second case is difficult to check, therefore we only handle the
7228 // first case.
7229 if (SignBits == BitWidth + 1) {
7230 // It overflows only when both arguments are negative and the true
7231 // product is exactly the minimum negative number.
7232 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
7233 // For simplicity we just check if at least one side is not negative.
7234 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ);
7235 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ);
7236 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
7237 return OverflowResult::NeverOverflows;
7239 return OverflowResult::MayOverflow;
7242 OverflowResult
7243 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
7244 const WithCache<const Value *> &RHS,
7245 const SimplifyQuery &SQ) {
7246 ConstantRange LHSRange =
7247 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
7248 ConstantRange RHSRange =
7249 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
7250 return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
7253 static OverflowResult
7254 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7255 const WithCache<const Value *> &RHS,
7256 const AddOperator *Add, const SimplifyQuery &SQ) {
7257 if (Add && Add->hasNoSignedWrap()) {
7258 return OverflowResult::NeverOverflows;
7261 // If LHS and RHS each have at least two sign bits, the addition will look
7262 // like
7264 // XX..... +
7265 // YY.....
7267 // If the carry into the most significant position is 0, X and Y can't both
7268 // be 1 and therefore the carry out of the addition is also 0.
7270 // If the carry into the most significant position is 1, X and Y can't both
7271 // be 0 and therefore the carry out of the addition is also 1.
7273 // Since the carry into the most significant position is always equal to
7274 // the carry out of the addition, there is no signed overflow.
7275 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 &&
7276 ::ComputeNumSignBits(RHS, 0, SQ) > 1)
7277 return OverflowResult::NeverOverflows;
7279 ConstantRange LHSRange =
7280 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
7281 ConstantRange RHSRange =
7282 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
7283 OverflowResult OR =
7284 mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
7285 if (OR != OverflowResult::MayOverflow)
7286 return OR;
7288 // The remaining code needs Add to be available. Early returns if not so.
7289 if (!Add)
7290 return OverflowResult::MayOverflow;
7292 // If the sign of Add is the same as at least one of the operands, this add
7293 // CANNOT overflow. If this can be determined from the known bits of the
7294 // operands the above signedAddMayOverflow() check will have already done so.
7295 // The only other way to improve on the known bits is from an assumption, so
7296 // call computeKnownBitsFromContext() directly.
7297 bool LHSOrRHSKnownNonNegative =
7298 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
7299 bool LHSOrRHSKnownNegative =
7300 (LHSRange.isAllNegative() || RHSRange.isAllNegative());
7301 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
7302 KnownBits AddKnown(LHSRange.getBitWidth());
7303 computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ);
7304 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
7305 (AddKnown.isNegative() && LHSOrRHSKnownNegative))
7306 return OverflowResult::NeverOverflows;
7309 return OverflowResult::MayOverflow;
7312 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
7313 const Value *RHS,
7314 const SimplifyQuery &SQ) {
7315 // X - (X % ?)
7316 // The remainder of a value can't have greater magnitude than itself,
7317 // so the subtraction can't overflow.
7319 // X - (X -nuw ?)
7320 // In the minimal case, this would simplify to "?", so there's no subtract
7321 // at all. But if this analysis is used to peek through casts, for example,
7322 // then determining no-overflow may allow other transforms.
7324 // TODO: There are other patterns like this.
7325 // See simplifyICmpWithBinOpOnLHS() for candidates.
7326 if (match(RHS, m_URem(m_Specific(LHS), m_Value())) ||
7327 match(RHS, m_NUWSub(m_Specific(LHS), m_Value())))
7328 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
7329 return OverflowResult::NeverOverflows;
7331 if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI,
7332 SQ.DL)) {
7333 if (*C)
7334 return OverflowResult::NeverOverflows;
7335 return OverflowResult::AlwaysOverflowsLow;
7338 ConstantRange LHSRange =
7339 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
7340 ConstantRange RHSRange =
7341 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
7342 return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
7345 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
7346 const Value *RHS,
7347 const SimplifyQuery &SQ) {
7348 // X - (X % ?)
7349 // The remainder of a value can't have greater magnitude than itself,
7350 // so the subtraction can't overflow.
7352 // X - (X -nsw ?)
7353 // In the minimal case, this would simplify to "?", so there's no subtract
7354 // at all. But if this analysis is used to peek through casts, for example,
7355 // then determining no-overflow may allow other transforms.
7356 if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) ||
7357 match(RHS, m_NSWSub(m_Specific(LHS), m_Value())))
7358 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
7359 return OverflowResult::NeverOverflows;
7361 // If LHS and RHS each have at least two sign bits, the subtraction
7362 // cannot overflow.
7363 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 &&
7364 ::ComputeNumSignBits(RHS, 0, SQ) > 1)
7365 return OverflowResult::NeverOverflows;
7367 ConstantRange LHSRange =
7368 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
7369 ConstantRange RHSRange =
7370 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
7371 return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
7374 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
7375 const DominatorTree &DT) {
7376 SmallVector<const BranchInst *, 2> GuardingBranches;
7377 SmallVector<const ExtractValueInst *, 2> Results;
7379 for (const User *U : WO->users()) {
7380 if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
7381 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
7383 if (EVI->getIndices()[0] == 0)
7384 Results.push_back(EVI);
7385 else {
7386 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
7388 for (const auto *U : EVI->users())
7389 if (const auto *B = dyn_cast<BranchInst>(U)) {
7390 assert(B->isConditional() && "How else is it using an i1?");
7391 GuardingBranches.push_back(B);
7394 } else {
7395 // We are using the aggregate directly in a way we don't want to analyze
7396 // here (storing it to a global, say).
7397 return false;
7401 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
7402 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
7403 if (!NoWrapEdge.isSingleEdge())
7404 return false;
7406 // Check if all users of the add are provably no-wrap.
7407 for (const auto *Result : Results) {
7408 // If the extractvalue itself is not executed on overflow, the we don't
7409 // need to check each use separately, since domination is transitive.
7410 if (DT.dominates(NoWrapEdge, Result->getParent()))
7411 continue;
7413 for (const auto &RU : Result->uses())
7414 if (!DT.dominates(NoWrapEdge, RU))
7415 return false;
7418 return true;
7421 return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
7424 /// Shifts return poison if shiftwidth is larger than the bitwidth.
7425 static bool shiftAmountKnownInRange(const Value *ShiftAmount) {
7426 auto *C = dyn_cast<Constant>(ShiftAmount);
7427 if (!C)
7428 return false;
7430 // Shifts return poison if shiftwidth is larger than the bitwidth.
7431 SmallVector<const Constant *, 4> ShiftAmounts;
7432 if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) {
7433 unsigned NumElts = FVTy->getNumElements();
7434 for (unsigned i = 0; i < NumElts; ++i)
7435 ShiftAmounts.push_back(C->getAggregateElement(i));
7436 } else if (isa<ScalableVectorType>(C->getType()))
7437 return false; // Can't tell, just return false to be safe
7438 else
7439 ShiftAmounts.push_back(C);
7441 bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) {
7442 auto *CI = dyn_cast_or_null<ConstantInt>(C);
7443 return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth());
7446 return Safe;
7449 enum class UndefPoisonKind {
7450 PoisonOnly = (1 << 0),
7451 UndefOnly = (1 << 1),
7452 UndefOrPoison = PoisonOnly | UndefOnly,
7455 static bool includesPoison(UndefPoisonKind Kind) {
7456 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
7459 static bool includesUndef(UndefPoisonKind Kind) {
7460 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
7463 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
7464 bool ConsiderFlagsAndMetadata) {
7466 if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
7467 Op->hasPoisonGeneratingAnnotations())
7468 return true;
7470 unsigned Opcode = Op->getOpcode();
7472 // Check whether opcode is a poison/undef-generating operation
7473 switch (Opcode) {
7474 case Instruction::Shl:
7475 case Instruction::AShr:
7476 case Instruction::LShr:
7477 return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1));
7478 case Instruction::FPToSI:
7479 case Instruction::FPToUI:
7480 // fptosi/ui yields poison if the resulting value does not fit in the
7481 // destination type.
7482 return true;
7483 case Instruction::Call:
7484 if (auto *II = dyn_cast<IntrinsicInst>(Op)) {
7485 switch (II->getIntrinsicID()) {
7486 // TODO: Add more intrinsics.
7487 case Intrinsic::ctlz:
7488 case Intrinsic::cttz:
7489 case Intrinsic::abs:
7490 if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue())
7491 return false;
7492 break;
7493 case Intrinsic::ctpop:
7494 case Intrinsic::bswap:
7495 case Intrinsic::bitreverse:
7496 case Intrinsic::fshl:
7497 case Intrinsic::fshr:
7498 case Intrinsic::smax:
7499 case Intrinsic::smin:
7500 case Intrinsic::umax:
7501 case Intrinsic::umin:
7502 case Intrinsic::ptrmask:
7503 case Intrinsic::fptoui_sat:
7504 case Intrinsic::fptosi_sat:
7505 case Intrinsic::sadd_with_overflow:
7506 case Intrinsic::ssub_with_overflow:
7507 case Intrinsic::smul_with_overflow:
7508 case Intrinsic::uadd_with_overflow:
7509 case Intrinsic::usub_with_overflow:
7510 case Intrinsic::umul_with_overflow:
7511 case Intrinsic::sadd_sat:
7512 case Intrinsic::uadd_sat:
7513 case Intrinsic::ssub_sat:
7514 case Intrinsic::usub_sat:
7515 return false;
7516 case Intrinsic::sshl_sat:
7517 case Intrinsic::ushl_sat:
7518 return includesPoison(Kind) &&
7519 !shiftAmountKnownInRange(II->getArgOperand(1));
7520 case Intrinsic::fma:
7521 case Intrinsic::fmuladd:
7522 case Intrinsic::sqrt:
7523 case Intrinsic::powi:
7524 case Intrinsic::sin:
7525 case Intrinsic::cos:
7526 case Intrinsic::pow:
7527 case Intrinsic::log:
7528 case Intrinsic::log10:
7529 case Intrinsic::log2:
7530 case Intrinsic::exp:
7531 case Intrinsic::exp2:
7532 case Intrinsic::exp10:
7533 case Intrinsic::fabs:
7534 case Intrinsic::copysign:
7535 case Intrinsic::floor:
7536 case Intrinsic::ceil:
7537 case Intrinsic::trunc:
7538 case Intrinsic::rint:
7539 case Intrinsic::nearbyint:
7540 case Intrinsic::round:
7541 case Intrinsic::roundeven:
7542 case Intrinsic::fptrunc_round:
7543 case Intrinsic::canonicalize:
7544 case Intrinsic::arithmetic_fence:
7545 case Intrinsic::minnum:
7546 case Intrinsic::maxnum:
7547 case Intrinsic::minimum:
7548 case Intrinsic::maximum:
7549 case Intrinsic::is_fpclass:
7550 case Intrinsic::ldexp:
7551 case Intrinsic::frexp:
7552 return false;
7553 case Intrinsic::lround:
7554 case Intrinsic::llround:
7555 case Intrinsic::lrint:
7556 case Intrinsic::llrint:
7557 // If the value doesn't fit an unspecified value is returned (but this
7558 // is not poison).
7559 return false;
7562 [[fallthrough]];
7563 case Instruction::CallBr:
7564 case Instruction::Invoke: {
7565 const auto *CB = cast<CallBase>(Op);
7566 return !CB->hasRetAttr(Attribute::NoUndef);
7568 case Instruction::InsertElement:
7569 case Instruction::ExtractElement: {
7570 // If index exceeds the length of the vector, it returns poison
7571 auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
7572 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
7573 auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
7574 if (includesPoison(Kind))
7575 return !Idx ||
7576 Idx->getValue().uge(VTy->getElementCount().getKnownMinValue());
7577 return false;
7579 case Instruction::ShuffleVector: {
7580 ArrayRef<int> Mask = isa<ConstantExpr>(Op)
7581 ? cast<ConstantExpr>(Op)->getShuffleMask()
7582 : cast<ShuffleVectorInst>(Op)->getShuffleMask();
7583 return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem);
7585 case Instruction::FNeg:
7586 case Instruction::PHI:
7587 case Instruction::Select:
7588 case Instruction::URem:
7589 case Instruction::SRem:
7590 case Instruction::ExtractValue:
7591 case Instruction::InsertValue:
7592 case Instruction::Freeze:
7593 case Instruction::ICmp:
7594 case Instruction::FCmp:
7595 case Instruction::FAdd:
7596 case Instruction::FSub:
7597 case Instruction::FMul:
7598 case Instruction::FDiv:
7599 case Instruction::FRem:
7600 return false;
7601 case Instruction::GetElementPtr:
7602 // inbounds is handled above
7603 // TODO: what about inrange on constexpr?
7604 return false;
7605 default: {
7606 const auto *CE = dyn_cast<ConstantExpr>(Op);
7607 if (isa<CastInst>(Op) || (CE && CE->isCast()))
7608 return false;
7609 else if (Instruction::isBinaryOp(Opcode))
7610 return false;
7611 // Be conservative and return true.
7612 return true;
7617 bool llvm::canCreateUndefOrPoison(const Operator *Op,
7618 bool ConsiderFlagsAndMetadata) {
7619 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison,
7620 ConsiderFlagsAndMetadata);
7623 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) {
7624 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly,
7625 ConsiderFlagsAndMetadata);
7628 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V,
7629 unsigned Depth) {
7630 if (ValAssumedPoison == V)
7631 return true;
7633 const unsigned MaxDepth = 2;
7634 if (Depth >= MaxDepth)
7635 return false;
7637 if (const auto *I = dyn_cast<Instruction>(V)) {
7638 if (any_of(I->operands(), [=](const Use &Op) {
7639 return propagatesPoison(Op) &&
7640 directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1);
7642 return true;
7644 // V = extractvalue V0, idx
7645 // V2 = extractvalue V0, idx2
7646 // V0's elements are all poison or not. (e.g., add_with_overflow)
7647 const WithOverflowInst *II;
7648 if (match(I, m_ExtractValue(m_WithOverflowInst(II))) &&
7649 (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) ||
7650 llvm::is_contained(II->args(), ValAssumedPoison)))
7651 return true;
7653 return false;
7656 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
7657 unsigned Depth) {
7658 if (isGuaranteedNotToBePoison(ValAssumedPoison))
7659 return true;
7661 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
7662 return true;
7664 const unsigned MaxDepth = 2;
7665 if (Depth >= MaxDepth)
7666 return false;
7668 const auto *I = dyn_cast<Instruction>(ValAssumedPoison);
7669 if (I && !canCreatePoison(cast<Operator>(I))) {
7670 return all_of(I->operands(), [=](const Value *Op) {
7671 return impliesPoison(Op, V, Depth + 1);
7674 return false;
7677 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
7678 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
7681 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
7683 static bool isGuaranteedNotToBeUndefOrPoison(
7684 const Value *V, AssumptionCache *AC, const Instruction *CtxI,
7685 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
7686 if (Depth >= MaxAnalysisRecursionDepth)
7687 return false;
7689 if (isa<MetadataAsValue>(V))
7690 return false;
7692 if (const auto *A = dyn_cast<Argument>(V)) {
7693 if (A->hasAttribute(Attribute::NoUndef) ||
7694 A->hasAttribute(Attribute::Dereferenceable) ||
7695 A->hasAttribute(Attribute::DereferenceableOrNull))
7696 return true;
7699 if (auto *C = dyn_cast<Constant>(V)) {
7700 if (isa<PoisonValue>(C))
7701 return !includesPoison(Kind);
7703 if (isa<UndefValue>(C))
7704 return !includesUndef(Kind);
7706 if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) ||
7707 isa<ConstantPointerNull>(C) || isa<Function>(C))
7708 return true;
7710 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) {
7711 if (includesUndef(Kind) && C->containsUndefElement())
7712 return false;
7713 if (includesPoison(Kind) && C->containsPoisonElement())
7714 return false;
7715 return !C->containsConstantExpression();
7719 // Strip cast operations from a pointer value.
7720 // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7721 // inbounds with zero offset. To guarantee that the result isn't poison, the
7722 // stripped pointer is checked as it has to be pointing into an allocated
7723 // object or be null `null` to ensure `inbounds` getelement pointers with a
7724 // zero offset could not produce poison.
7725 // It can strip off addrspacecast that do not change bit representation as
7726 // well. We believe that such addrspacecast is equivalent to no-op.
7727 auto *StrippedV = V->stripPointerCastsSameRepresentation();
7728 if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) ||
7729 isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV))
7730 return true;
7732 auto OpCheck = [&](const Value *V) {
7733 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind);
7736 if (auto *Opr = dyn_cast<Operator>(V)) {
7737 // If the value is a freeze instruction, then it can never
7738 // be undef or poison.
7739 if (isa<FreezeInst>(V))
7740 return true;
7742 if (const auto *CB = dyn_cast<CallBase>(V)) {
7743 if (CB->hasRetAttr(Attribute::NoUndef) ||
7744 CB->hasRetAttr(Attribute::Dereferenceable) ||
7745 CB->hasRetAttr(Attribute::DereferenceableOrNull))
7746 return true;
7749 if (const auto *PN = dyn_cast<PHINode>(V)) {
7750 unsigned Num = PN->getNumIncomingValues();
7751 bool IsWellDefined = true;
7752 for (unsigned i = 0; i < Num; ++i) {
7753 auto *TI = PN->getIncomingBlock(i)->getTerminator();
7754 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
7755 DT, Depth + 1, Kind)) {
7756 IsWellDefined = false;
7757 break;
7760 if (IsWellDefined)
7761 return true;
7762 } else if (!::canCreateUndefOrPoison(Opr, Kind,
7763 /*ConsiderFlagsAndMetadata*/ true) &&
7764 all_of(Opr->operands(), OpCheck))
7765 return true;
7768 if (auto *I = dyn_cast<LoadInst>(V))
7769 if (I->hasMetadata(LLVMContext::MD_noundef) ||
7770 I->hasMetadata(LLVMContext::MD_dereferenceable) ||
7771 I->hasMetadata(LLVMContext::MD_dereferenceable_or_null))
7772 return true;
7774 if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind)))
7775 return true;
7777 // CxtI may be null or a cloned instruction.
7778 if (!CtxI || !CtxI->getParent() || !DT)
7779 return false;
7781 auto *DNode = DT->getNode(CtxI->getParent());
7782 if (!DNode)
7783 // Unreachable block
7784 return false;
7786 // If V is used as a branch condition before reaching CtxI, V cannot be
7787 // undef or poison.
7788 // br V, BB1, BB2
7789 // BB1:
7790 // CtxI ; V cannot be undef or poison here
7791 auto *Dominator = DNode->getIDom();
7792 // This check is purely for compile time reasons: we can skip the IDom walk
7793 // if what we are checking for includes undef and the value is not an integer.
7794 if (!includesUndef(Kind) || V->getType()->isIntegerTy())
7795 while (Dominator) {
7796 auto *TI = Dominator->getBlock()->getTerminator();
7798 Value *Cond = nullptr;
7799 if (auto BI = dyn_cast_or_null<BranchInst>(TI)) {
7800 if (BI->isConditional())
7801 Cond = BI->getCondition();
7802 } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) {
7803 Cond = SI->getCondition();
7806 if (Cond) {
7807 if (Cond == V)
7808 return true;
7809 else if (!includesUndef(Kind) && isa<Operator>(Cond)) {
7810 // For poison, we can analyze further
7811 auto *Opr = cast<Operator>(Cond);
7812 if (any_of(Opr->operands(), [V](const Use &U) {
7813 return V == U && propagatesPoison(U);
7815 return true;
7819 Dominator = Dominator->getIDom();
7822 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC))
7823 return true;
7825 return false;
7828 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
7829 const Instruction *CtxI,
7830 const DominatorTree *DT,
7831 unsigned Depth) {
7832 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7833 UndefPoisonKind::UndefOrPoison);
7836 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
7837 const Instruction *CtxI,
7838 const DominatorTree *DT, unsigned Depth) {
7839 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7840 UndefPoisonKind::PoisonOnly);
7843 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
7844 const Instruction *CtxI,
7845 const DominatorTree *DT, unsigned Depth) {
7846 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7847 UndefPoisonKind::UndefOnly);
7850 /// Return true if undefined behavior would provably be executed on the path to
7851 /// OnPathTo if Root produced a posion result. Note that this doesn't say
7852 /// anything about whether OnPathTo is actually executed or whether Root is
7853 /// actually poison. This can be used to assess whether a new use of Root can
7854 /// be added at a location which is control equivalent with OnPathTo (such as
7855 /// immediately before it) without introducing UB which didn't previously
7856 /// exist. Note that a false result conveys no information.
7857 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
7858 Instruction *OnPathTo,
7859 DominatorTree *DT) {
7860 // Basic approach is to assume Root is poison, propagate poison forward
7861 // through all users we can easily track, and then check whether any of those
7862 // users are provable UB and must execute before out exiting block might
7863 // exit.
7865 // The set of all recursive users we've visited (which are assumed to all be
7866 // poison because of said visit)
7867 SmallSet<const Value *, 16> KnownPoison;
7868 SmallVector<const Instruction*, 16> Worklist;
7869 Worklist.push_back(Root);
7870 while (!Worklist.empty()) {
7871 const Instruction *I = Worklist.pop_back_val();
7873 // If we know this must trigger UB on a path leading our target.
7874 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
7875 return true;
7877 // If we can't analyze propagation through this instruction, just skip it
7878 // and transitive users. Safe as false is a conservative result.
7879 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
7880 return KnownPoison.contains(U) && propagatesPoison(U);
7882 continue;
7884 if (KnownPoison.insert(I).second)
7885 for (const User *User : I->users())
7886 Worklist.push_back(cast<Instruction>(User));
7889 // Might be non-UB, or might have a path we couldn't prove must execute on
7890 // way to exiting bb.
7891 return false;
7894 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
7895 const SimplifyQuery &SQ) {
7896 return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
7897 Add, SQ);
7900 OverflowResult
7901 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7902 const WithCache<const Value *> &RHS,
7903 const SimplifyQuery &SQ) {
7904 return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ);
7907 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
7908 // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7909 // of time because it's possible for another thread to interfere with it for an
7910 // arbitrary length of time, but programs aren't allowed to rely on that.
7912 // If there is no successor, then execution can't transfer to it.
7913 if (isa<ReturnInst>(I))
7914 return false;
7915 if (isa<UnreachableInst>(I))
7916 return false;
7918 // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7919 // Instruction::willReturn.
7921 // FIXME: Move this check into Instruction::willReturn.
7922 if (isa<CatchPadInst>(I)) {
7923 switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) {
7924 default:
7925 // A catchpad may invoke exception object constructors and such, which
7926 // in some languages can be arbitrary code, so be conservative by default.
7927 return false;
7928 case EHPersonality::CoreCLR:
7929 // For CoreCLR, it just involves a type test.
7930 return true;
7934 // An instruction that returns without throwing must transfer control flow
7935 // to a successor.
7936 return !I->mayThrow() && I->willReturn();
7939 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
7940 // TODO: This is slightly conservative for invoke instruction since exiting
7941 // via an exception *is* normal control for them.
7942 for (const Instruction &I : *BB)
7943 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7944 return false;
7945 return true;
7948 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7949 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
7950 unsigned ScanLimit) {
7951 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End),
7952 ScanLimit);
7955 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7956 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
7957 assert(ScanLimit && "scan limit must be non-zero");
7958 for (const Instruction &I : Range) {
7959 if (isa<DbgInfoIntrinsic>(I))
7960 continue;
7961 if (--ScanLimit == 0)
7962 return false;
7963 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7964 return false;
7966 return true;
7969 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
7970 const Loop *L) {
7971 // The loop header is guaranteed to be executed for every iteration.
7973 // FIXME: Relax this constraint to cover all basic blocks that are
7974 // guaranteed to be executed at every iteration.
7975 if (I->getParent() != L->getHeader()) return false;
7977 for (const Instruction &LI : *L->getHeader()) {
7978 if (&LI == I) return true;
7979 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
7981 llvm_unreachable("Instruction not contained in its own parent basic block.");
7984 bool llvm::propagatesPoison(const Use &PoisonOp) {
7985 const Operator *I = cast<Operator>(PoisonOp.getUser());
7986 switch (I->getOpcode()) {
7987 case Instruction::Freeze:
7988 case Instruction::PHI:
7989 case Instruction::Invoke:
7990 return false;
7991 case Instruction::Select:
7992 return PoisonOp.getOperandNo() == 0;
7993 case Instruction::Call:
7994 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
7995 switch (II->getIntrinsicID()) {
7996 // TODO: Add more intrinsics.
7997 case Intrinsic::sadd_with_overflow:
7998 case Intrinsic::ssub_with_overflow:
7999 case Intrinsic::smul_with_overflow:
8000 case Intrinsic::uadd_with_overflow:
8001 case Intrinsic::usub_with_overflow:
8002 case Intrinsic::umul_with_overflow:
8003 // If an input is a vector containing a poison element, the
8004 // two output vectors (calculated results, overflow bits)'
8005 // corresponding lanes are poison.
8006 return true;
8007 case Intrinsic::ctpop:
8008 case Intrinsic::ctlz:
8009 case Intrinsic::cttz:
8010 case Intrinsic::abs:
8011 case Intrinsic::smax:
8012 case Intrinsic::smin:
8013 case Intrinsic::umax:
8014 case Intrinsic::umin:
8015 case Intrinsic::bitreverse:
8016 case Intrinsic::bswap:
8017 case Intrinsic::sadd_sat:
8018 case Intrinsic::ssub_sat:
8019 case Intrinsic::sshl_sat:
8020 case Intrinsic::uadd_sat:
8021 case Intrinsic::usub_sat:
8022 case Intrinsic::ushl_sat:
8023 return true;
8026 return false;
8027 case Instruction::ICmp:
8028 case Instruction::FCmp:
8029 case Instruction::GetElementPtr:
8030 return true;
8031 default:
8032 if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I))
8033 return true;
8035 // Be conservative and return false.
8036 return false;
8040 /// Enumerates all operands of \p I that are guaranteed to not be undef or
8041 /// poison. If the callback \p Handle returns true, stop processing and return
8042 /// true. Otherwise, return false.
8043 template <typename CallableT>
8044 static bool handleGuaranteedWellDefinedOps(const Instruction *I,
8045 const CallableT &Handle) {
8046 switch (I->getOpcode()) {
8047 case Instruction::Store:
8048 if (Handle(cast<StoreInst>(I)->getPointerOperand()))
8049 return true;
8050 break;
8052 case Instruction::Load:
8053 if (Handle(cast<LoadInst>(I)->getPointerOperand()))
8054 return true;
8055 break;
8057 // Since dereferenceable attribute imply noundef, atomic operations
8058 // also implicitly have noundef pointers too
8059 case Instruction::AtomicCmpXchg:
8060 if (Handle(cast<AtomicCmpXchgInst>(I)->getPointerOperand()))
8061 return true;
8062 break;
8064 case Instruction::AtomicRMW:
8065 if (Handle(cast<AtomicRMWInst>(I)->getPointerOperand()))
8066 return true;
8067 break;
8069 case Instruction::Call:
8070 case Instruction::Invoke: {
8071 const CallBase *CB = cast<CallBase>(I);
8072 if (CB->isIndirectCall() && Handle(CB->getCalledOperand()))
8073 return true;
8074 for (unsigned i = 0; i < CB->arg_size(); ++i)
8075 if ((CB->paramHasAttr(i, Attribute::NoUndef) ||
8076 CB->paramHasAttr(i, Attribute::Dereferenceable) ||
8077 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) &&
8078 Handle(CB->getArgOperand(i)))
8079 return true;
8080 break;
8082 case Instruction::Ret:
8083 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) &&
8084 Handle(I->getOperand(0)))
8085 return true;
8086 break;
8087 case Instruction::Switch:
8088 if (Handle(cast<SwitchInst>(I)->getCondition()))
8089 return true;
8090 break;
8091 case Instruction::Br: {
8092 auto *BR = cast<BranchInst>(I);
8093 if (BR->isConditional() && Handle(BR->getCondition()))
8094 return true;
8095 break;
8097 default:
8098 break;
8101 return false;
8104 void llvm::getGuaranteedWellDefinedOps(
8105 const Instruction *I, SmallVectorImpl<const Value *> &Operands) {
8106 handleGuaranteedWellDefinedOps(I, [&](const Value *V) {
8107 Operands.push_back(V);
8108 return false;
8112 /// Enumerates all operands of \p I that are guaranteed to not be poison.
8113 template <typename CallableT>
8114 static bool handleGuaranteedNonPoisonOps(const Instruction *I,
8115 const CallableT &Handle) {
8116 if (handleGuaranteedWellDefinedOps(I, Handle))
8117 return true;
8118 switch (I->getOpcode()) {
8119 // Divisors of these operations are allowed to be partially undef.
8120 case Instruction::UDiv:
8121 case Instruction::SDiv:
8122 case Instruction::URem:
8123 case Instruction::SRem:
8124 return Handle(I->getOperand(1));
8125 default:
8126 return false;
8130 void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
8131 SmallVectorImpl<const Value *> &Operands) {
8132 handleGuaranteedNonPoisonOps(I, [&](const Value *V) {
8133 Operands.push_back(V);
8134 return false;
8138 bool llvm::mustTriggerUB(const Instruction *I,
8139 const SmallPtrSetImpl<const Value *> &KnownPoison) {
8140 return handleGuaranteedNonPoisonOps(
8141 I, [&](const Value *V) { return KnownPoison.count(V); });
8144 static bool programUndefinedIfUndefOrPoison(const Value *V,
8145 bool PoisonOnly) {
8146 // We currently only look for uses of values within the same basic
8147 // block, as that makes it easier to guarantee that the uses will be
8148 // executed given that Inst is executed.
8150 // FIXME: Expand this to consider uses beyond the same basic block. To do
8151 // this, look out for the distinction between post-dominance and strong
8152 // post-dominance.
8153 const BasicBlock *BB = nullptr;
8154 BasicBlock::const_iterator Begin;
8155 if (const auto *Inst = dyn_cast<Instruction>(V)) {
8156 BB = Inst->getParent();
8157 Begin = Inst->getIterator();
8158 Begin++;
8159 } else if (const auto *Arg = dyn_cast<Argument>(V)) {
8160 if (Arg->getParent()->isDeclaration())
8161 return false;
8162 BB = &Arg->getParent()->getEntryBlock();
8163 Begin = BB->begin();
8164 } else {
8165 return false;
8168 // Limit number of instructions we look at, to avoid scanning through large
8169 // blocks. The current limit is chosen arbitrarily.
8170 unsigned ScanLimit = 32;
8171 BasicBlock::const_iterator End = BB->end();
8173 if (!PoisonOnly) {
8174 // Since undef does not propagate eagerly, be conservative & just check
8175 // whether a value is directly passed to an instruction that must take
8176 // well-defined operands.
8178 for (const auto &I : make_range(Begin, End)) {
8179 if (isa<DbgInfoIntrinsic>(I))
8180 continue;
8181 if (--ScanLimit == 0)
8182 break;
8184 if (handleGuaranteedWellDefinedOps(&I, [V](const Value *WellDefinedOp) {
8185 return WellDefinedOp == V;
8187 return true;
8189 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
8190 break;
8192 return false;
8195 // Set of instructions that we have proved will yield poison if Inst
8196 // does.
8197 SmallSet<const Value *, 16> YieldsPoison;
8198 SmallSet<const BasicBlock *, 4> Visited;
8200 YieldsPoison.insert(V);
8201 Visited.insert(BB);
8203 while (true) {
8204 for (const auto &I : make_range(Begin, End)) {
8205 if (isa<DbgInfoIntrinsic>(I))
8206 continue;
8207 if (--ScanLimit == 0)
8208 return false;
8209 if (mustTriggerUB(&I, YieldsPoison))
8210 return true;
8211 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
8212 return false;
8214 // If an operand is poison and propagates it, mark I as yielding poison.
8215 for (const Use &Op : I.operands()) {
8216 if (YieldsPoison.count(Op) && propagatesPoison(Op)) {
8217 YieldsPoison.insert(&I);
8218 break;
8222 // Special handling for select, which returns poison if its operand 0 is
8223 // poison (handled in the loop above) *or* if both its true/false operands
8224 // are poison (handled here).
8225 if (I.getOpcode() == Instruction::Select &&
8226 YieldsPoison.count(I.getOperand(1)) &&
8227 YieldsPoison.count(I.getOperand(2))) {
8228 YieldsPoison.insert(&I);
8232 BB = BB->getSingleSuccessor();
8233 if (!BB || !Visited.insert(BB).second)
8234 break;
8236 Begin = BB->getFirstNonPHIIt();
8237 End = BB->end();
8239 return false;
8242 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) {
8243 return ::programUndefinedIfUndefOrPoison(Inst, false);
8246 bool llvm::programUndefinedIfPoison(const Instruction *Inst) {
8247 return ::programUndefinedIfUndefOrPoison(Inst, true);
8250 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
8251 if (FMF.noNaNs())
8252 return true;
8254 if (auto *C = dyn_cast<ConstantFP>(V))
8255 return !C->isNaN();
8257 if (auto *C = dyn_cast<ConstantDataVector>(V)) {
8258 if (!C->getElementType()->isFloatingPointTy())
8259 return false;
8260 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
8261 if (C->getElementAsAPFloat(I).isNaN())
8262 return false;
8264 return true;
8267 if (isa<ConstantAggregateZero>(V))
8268 return true;
8270 return false;
8273 static bool isKnownNonZero(const Value *V) {
8274 if (auto *C = dyn_cast<ConstantFP>(V))
8275 return !C->isZero();
8277 if (auto *C = dyn_cast<ConstantDataVector>(V)) {
8278 if (!C->getElementType()->isFloatingPointTy())
8279 return false;
8280 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
8281 if (C->getElementAsAPFloat(I).isZero())
8282 return false;
8284 return true;
8287 return false;
8290 /// Match clamp pattern for float types without care about NaNs or signed zeros.
8291 /// Given non-min/max outer cmp/select from the clamp pattern this
8292 /// function recognizes if it can be substitued by a "canonical" min/max
8293 /// pattern.
8294 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
8295 Value *CmpLHS, Value *CmpRHS,
8296 Value *TrueVal, Value *FalseVal,
8297 Value *&LHS, Value *&RHS) {
8298 // Try to match
8299 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
8300 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
8301 // and return description of the outer Max/Min.
8303 // First, check if select has inverse order:
8304 if (CmpRHS == FalseVal) {
8305 std::swap(TrueVal, FalseVal);
8306 Pred = CmpInst::getInversePredicate(Pred);
8309 // Assume success now. If there's no match, callers should not use these anyway.
8310 LHS = TrueVal;
8311 RHS = FalseVal;
8313 const APFloat *FC1;
8314 if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite())
8315 return {SPF_UNKNOWN, SPNB_NA, false};
8317 const APFloat *FC2;
8318 switch (Pred) {
8319 case CmpInst::FCMP_OLT:
8320 case CmpInst::FCMP_OLE:
8321 case CmpInst::FCMP_ULT:
8322 case CmpInst::FCMP_ULE:
8323 if (match(FalseVal, m_OrdOrUnordFMin(m_Specific(CmpLHS), m_APFloat(FC2))) &&
8324 *FC1 < *FC2)
8325 return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
8326 break;
8327 case CmpInst::FCMP_OGT:
8328 case CmpInst::FCMP_OGE:
8329 case CmpInst::FCMP_UGT:
8330 case CmpInst::FCMP_UGE:
8331 if (match(FalseVal, m_OrdOrUnordFMax(m_Specific(CmpLHS), m_APFloat(FC2))) &&
8332 *FC1 > *FC2)
8333 return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
8334 break;
8335 default:
8336 break;
8339 return {SPF_UNKNOWN, SPNB_NA, false};
8342 /// Recognize variations of:
8343 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
8344 static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
8345 Value *CmpLHS, Value *CmpRHS,
8346 Value *TrueVal, Value *FalseVal) {
8347 // Swap the select operands and predicate to match the patterns below.
8348 if (CmpRHS != TrueVal) {
8349 Pred = ICmpInst::getSwappedPredicate(Pred);
8350 std::swap(TrueVal, FalseVal);
8352 const APInt *C1;
8353 if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
8354 const APInt *C2;
8355 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
8356 if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
8357 C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
8358 return {SPF_SMAX, SPNB_NA, false};
8360 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
8361 if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
8362 C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT)
8363 return {SPF_SMIN, SPNB_NA, false};
8365 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
8366 if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
8367 C1->ult(*C2) && Pred == CmpInst::ICMP_ULT)
8368 return {SPF_UMAX, SPNB_NA, false};
8370 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
8371 if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
8372 C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
8373 return {SPF_UMIN, SPNB_NA, false};
8375 return {SPF_UNKNOWN, SPNB_NA, false};
8378 /// Recognize variations of:
8379 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
8380 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
8381 Value *CmpLHS, Value *CmpRHS,
8382 Value *TVal, Value *FVal,
8383 unsigned Depth) {
8384 // TODO: Allow FP min/max with nnan/nsz.
8385 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
8387 Value *A = nullptr, *B = nullptr;
8388 SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
8389 if (!SelectPatternResult::isMinOrMax(L.Flavor))
8390 return {SPF_UNKNOWN, SPNB_NA, false};
8392 Value *C = nullptr, *D = nullptr;
8393 SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
8394 if (L.Flavor != R.Flavor)
8395 return {SPF_UNKNOWN, SPNB_NA, false};
8397 // We have something like: x Pred y ? min(a, b) : min(c, d).
8398 // Try to match the compare to the min/max operations of the select operands.
8399 // First, make sure we have the right compare predicate.
8400 switch (L.Flavor) {
8401 case SPF_SMIN:
8402 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
8403 Pred = ICmpInst::getSwappedPredicate(Pred);
8404 std::swap(CmpLHS, CmpRHS);
8406 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
8407 break;
8408 return {SPF_UNKNOWN, SPNB_NA, false};
8409 case SPF_SMAX:
8410 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
8411 Pred = ICmpInst::getSwappedPredicate(Pred);
8412 std::swap(CmpLHS, CmpRHS);
8414 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
8415 break;
8416 return {SPF_UNKNOWN, SPNB_NA, false};
8417 case SPF_UMIN:
8418 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
8419 Pred = ICmpInst::getSwappedPredicate(Pred);
8420 std::swap(CmpLHS, CmpRHS);
8422 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
8423 break;
8424 return {SPF_UNKNOWN, SPNB_NA, false};
8425 case SPF_UMAX:
8426 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
8427 Pred = ICmpInst::getSwappedPredicate(Pred);
8428 std::swap(CmpLHS, CmpRHS);
8430 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
8431 break;
8432 return {SPF_UNKNOWN, SPNB_NA, false};
8433 default:
8434 return {SPF_UNKNOWN, SPNB_NA, false};
8437 // If there is a common operand in the already matched min/max and the other
8438 // min/max operands match the compare operands (either directly or inverted),
8439 // then this is min/max of the same flavor.
8441 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8442 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8443 if (D == B) {
8444 if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
8445 match(A, m_Not(m_Specific(CmpRHS)))))
8446 return {L.Flavor, SPNB_NA, false};
8448 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8449 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8450 if (C == B) {
8451 if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
8452 match(A, m_Not(m_Specific(CmpRHS)))))
8453 return {L.Flavor, SPNB_NA, false};
8455 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8456 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8457 if (D == A) {
8458 if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
8459 match(B, m_Not(m_Specific(CmpRHS)))))
8460 return {L.Flavor, SPNB_NA, false};
8462 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8463 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8464 if (C == A) {
8465 if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
8466 match(B, m_Not(m_Specific(CmpRHS)))))
8467 return {L.Flavor, SPNB_NA, false};
8470 return {SPF_UNKNOWN, SPNB_NA, false};
8473 /// If the input value is the result of a 'not' op, constant integer, or vector
8474 /// splat of a constant integer, return the bitwise-not source value.
8475 /// TODO: This could be extended to handle non-splat vector integer constants.
8476 static Value *getNotValue(Value *V) {
8477 Value *NotV;
8478 if (match(V, m_Not(m_Value(NotV))))
8479 return NotV;
8481 const APInt *C;
8482 if (match(V, m_APInt(C)))
8483 return ConstantInt::get(V->getType(), ~(*C));
8485 return nullptr;
8488 /// Match non-obvious integer minimum and maximum sequences.
8489 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
8490 Value *CmpLHS, Value *CmpRHS,
8491 Value *TrueVal, Value *FalseVal,
8492 Value *&LHS, Value *&RHS,
8493 unsigned Depth) {
8494 // Assume success. If there's no match, callers should not use these anyway.
8495 LHS = TrueVal;
8496 RHS = FalseVal;
8498 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
8499 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8500 return SPR;
8502 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
8503 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8504 return SPR;
8506 // Look through 'not' ops to find disguised min/max.
8507 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8508 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8509 if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) {
8510 switch (Pred) {
8511 case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false};
8512 case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false};
8513 case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false};
8514 case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false};
8515 default: break;
8519 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8520 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8521 if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) {
8522 switch (Pred) {
8523 case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false};
8524 case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false};
8525 case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false};
8526 case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false};
8527 default: break;
8531 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
8532 return {SPF_UNKNOWN, SPNB_NA, false};
8534 const APInt *C1;
8535 if (!match(CmpRHS, m_APInt(C1)))
8536 return {SPF_UNKNOWN, SPNB_NA, false};
8538 // An unsigned min/max can be written with a signed compare.
8539 const APInt *C2;
8540 if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
8541 (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
8542 // Is the sign bit set?
8543 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8544 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8545 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue())
8546 return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
8548 // Is the sign bit clear?
8549 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8550 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8551 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue())
8552 return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
8555 return {SPF_UNKNOWN, SPNB_NA, false};
8558 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW,
8559 bool AllowPoison) {
8560 assert(X && Y && "Invalid operand");
8562 auto IsNegationOf = [&](const Value *X, const Value *Y) {
8563 if (!match(X, m_Neg(m_Specific(Y))))
8564 return false;
8566 auto *BO = cast<BinaryOperator>(X);
8567 if (NeedNSW && !BO->hasNoSignedWrap())
8568 return false;
8570 auto *Zero = cast<Constant>(BO->getOperand(0));
8571 if (!AllowPoison && !Zero->isNullValue())
8572 return false;
8574 return true;
8577 // X = -Y or Y = -X
8578 if (IsNegationOf(X, Y) || IsNegationOf(Y, X))
8579 return true;
8581 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8582 Value *A, *B;
8583 return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) &&
8584 match(Y, m_Sub(m_Specific(B), m_Specific(A))))) ||
8585 (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) &&
8586 match(Y, m_NSWSub(m_Specific(B), m_Specific(A)))));
8589 bool llvm::isKnownInversion(const Value *X, const Value *Y) {
8590 // Handle X = icmp pred A, B, Y = icmp pred A, C.
8591 Value *A, *B, *C;
8592 CmpPredicate Pred1, Pred2;
8593 if (!match(X, m_ICmp(Pred1, m_Value(A), m_Value(B))) ||
8594 !match(Y, m_c_ICmp(Pred2, m_Specific(A), m_Value(C))))
8595 return false;
8597 // They must both have samesign flag or not.
8598 if (cast<ICmpInst>(X)->hasSameSign() != cast<ICmpInst>(Y)->hasSameSign())
8599 return false;
8601 if (B == C)
8602 return Pred1 == ICmpInst::getInversePredicate(Pred2);
8604 // Try to infer the relationship from constant ranges.
8605 const APInt *RHSC1, *RHSC2;
8606 if (!match(B, m_APInt(RHSC1)) || !match(C, m_APInt(RHSC2)))
8607 return false;
8609 // Sign bits of two RHSCs should match.
8610 if (cast<ICmpInst>(X)->hasSameSign() &&
8611 RHSC1->isNonNegative() != RHSC2->isNonNegative())
8612 return false;
8614 const auto CR1 = ConstantRange::makeExactICmpRegion(Pred1, *RHSC1);
8615 const auto CR2 = ConstantRange::makeExactICmpRegion(Pred2, *RHSC2);
8617 return CR1.inverse() == CR2;
8620 SelectPatternResult llvm::getSelectPattern(CmpInst::Predicate Pred,
8621 SelectPatternNaNBehavior NaNBehavior,
8622 bool Ordered) {
8623 switch (Pred) {
8624 default:
8625 return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
8626 case ICmpInst::ICMP_UGT:
8627 case ICmpInst::ICMP_UGE:
8628 return {SPF_UMAX, SPNB_NA, false};
8629 case ICmpInst::ICMP_SGT:
8630 case ICmpInst::ICMP_SGE:
8631 return {SPF_SMAX, SPNB_NA, false};
8632 case ICmpInst::ICMP_ULT:
8633 case ICmpInst::ICMP_ULE:
8634 return {SPF_UMIN, SPNB_NA, false};
8635 case ICmpInst::ICMP_SLT:
8636 case ICmpInst::ICMP_SLE:
8637 return {SPF_SMIN, SPNB_NA, false};
8638 case FCmpInst::FCMP_UGT:
8639 case FCmpInst::FCMP_UGE:
8640 case FCmpInst::FCMP_OGT:
8641 case FCmpInst::FCMP_OGE:
8642 return {SPF_FMAXNUM, NaNBehavior, Ordered};
8643 case FCmpInst::FCMP_ULT:
8644 case FCmpInst::FCMP_ULE:
8645 case FCmpInst::FCMP_OLT:
8646 case FCmpInst::FCMP_OLE:
8647 return {SPF_FMINNUM, NaNBehavior, Ordered};
8651 std::optional<std::pair<CmpPredicate, Constant *>>
8652 llvm::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred, Constant *C) {
8653 assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
8654 "Only for relational integer predicates.");
8655 if (isa<UndefValue>(C))
8656 return std::nullopt;
8658 Type *Type = C->getType();
8659 bool IsSigned = ICmpInst::isSigned(Pred);
8661 CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
8662 bool WillIncrement =
8663 UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
8665 // Check if the constant operand can be safely incremented/decremented
8666 // without overflowing/underflowing.
8667 auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
8668 return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
8671 Constant *SafeReplacementConstant = nullptr;
8672 if (auto *CI = dyn_cast<ConstantInt>(C)) {
8673 // Bail out if the constant can't be safely incremented/decremented.
8674 if (!ConstantIsOk(CI))
8675 return std::nullopt;
8676 } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
8677 unsigned NumElts = FVTy->getNumElements();
8678 for (unsigned i = 0; i != NumElts; ++i) {
8679 Constant *Elt = C->getAggregateElement(i);
8680 if (!Elt)
8681 return std::nullopt;
8683 if (isa<UndefValue>(Elt))
8684 continue;
8686 // Bail out if we can't determine if this constant is min/max or if we
8687 // know that this constant is min/max.
8688 auto *CI = dyn_cast<ConstantInt>(Elt);
8689 if (!CI || !ConstantIsOk(CI))
8690 return std::nullopt;
8692 if (!SafeReplacementConstant)
8693 SafeReplacementConstant = CI;
8695 } else if (isa<VectorType>(C->getType())) {
8696 // Handle scalable splat
8697 Value *SplatC = C->getSplatValue();
8698 auto *CI = dyn_cast_or_null<ConstantInt>(SplatC);
8699 // Bail out if the constant can't be safely incremented/decremented.
8700 if (!CI || !ConstantIsOk(CI))
8701 return std::nullopt;
8702 } else {
8703 // ConstantExpr?
8704 return std::nullopt;
8707 // It may not be safe to change a compare predicate in the presence of
8708 // undefined elements, so replace those elements with the first safe constant
8709 // that we found.
8710 // TODO: in case of poison, it is safe; let's replace undefs only.
8711 if (C->containsUndefOrPoisonElement()) {
8712 assert(SafeReplacementConstant && "Replacement constant not set");
8713 C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
8716 CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
8718 // Increment or decrement the constant.
8719 Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
8720 Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
8722 return std::make_pair(NewPred, NewC);
8725 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
8726 FastMathFlags FMF,
8727 Value *CmpLHS, Value *CmpRHS,
8728 Value *TrueVal, Value *FalseVal,
8729 Value *&LHS, Value *&RHS,
8730 unsigned Depth) {
8731 bool HasMismatchedZeros = false;
8732 if (CmpInst::isFPPredicate(Pred)) {
8733 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8734 // 0.0 operand, set the compare's 0.0 operands to that same value for the
8735 // purpose of identifying min/max. Disregard vector constants with undefined
8736 // elements because those can not be back-propagated for analysis.
8737 Value *OutputZeroVal = nullptr;
8738 if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) &&
8739 !cast<Constant>(TrueVal)->containsUndefOrPoisonElement())
8740 OutputZeroVal = TrueVal;
8741 else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) &&
8742 !cast<Constant>(FalseVal)->containsUndefOrPoisonElement())
8743 OutputZeroVal = FalseVal;
8745 if (OutputZeroVal) {
8746 if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) {
8747 HasMismatchedZeros = true;
8748 CmpLHS = OutputZeroVal;
8750 if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) {
8751 HasMismatchedZeros = true;
8752 CmpRHS = OutputZeroVal;
8757 LHS = CmpLHS;
8758 RHS = CmpRHS;
8760 // Signed zero may return inconsistent results between implementations.
8761 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8762 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8763 // Therefore, we behave conservatively and only proceed if at least one of the
8764 // operands is known to not be zero or if we don't care about signed zero.
8765 switch (Pred) {
8766 default: break;
8767 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT:
8768 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT:
8769 if (!HasMismatchedZeros)
8770 break;
8771 [[fallthrough]];
8772 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
8773 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
8774 if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
8775 !isKnownNonZero(CmpRHS))
8776 return {SPF_UNKNOWN, SPNB_NA, false};
8779 SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
8780 bool Ordered = false;
8782 // When given one NaN and one non-NaN input:
8783 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8784 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8785 // ordered comparison fails), which could be NaN or non-NaN.
8786 // so here we discover exactly what NaN behavior is required/accepted.
8787 if (CmpInst::isFPPredicate(Pred)) {
8788 bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
8789 bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
8791 if (LHSSafe && RHSSafe) {
8792 // Both operands are known non-NaN.
8793 NaNBehavior = SPNB_RETURNS_ANY;
8794 } else if (CmpInst::isOrdered(Pred)) {
8795 // An ordered comparison will return false when given a NaN, so it
8796 // returns the RHS.
8797 Ordered = true;
8798 if (LHSSafe)
8799 // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8800 NaNBehavior = SPNB_RETURNS_NAN;
8801 else if (RHSSafe)
8802 NaNBehavior = SPNB_RETURNS_OTHER;
8803 else
8804 // Completely unsafe.
8805 return {SPF_UNKNOWN, SPNB_NA, false};
8806 } else {
8807 Ordered = false;
8808 // An unordered comparison will return true when given a NaN, so it
8809 // returns the LHS.
8810 if (LHSSafe)
8811 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8812 NaNBehavior = SPNB_RETURNS_OTHER;
8813 else if (RHSSafe)
8814 NaNBehavior = SPNB_RETURNS_NAN;
8815 else
8816 // Completely unsafe.
8817 return {SPF_UNKNOWN, SPNB_NA, false};
8821 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
8822 std::swap(CmpLHS, CmpRHS);
8823 Pred = CmpInst::getSwappedPredicate(Pred);
8824 if (NaNBehavior == SPNB_RETURNS_NAN)
8825 NaNBehavior = SPNB_RETURNS_OTHER;
8826 else if (NaNBehavior == SPNB_RETURNS_OTHER)
8827 NaNBehavior = SPNB_RETURNS_NAN;
8828 Ordered = !Ordered;
8831 // ([if]cmp X, Y) ? X : Y
8832 if (TrueVal == CmpLHS && FalseVal == CmpRHS)
8833 return getSelectPattern(Pred, NaNBehavior, Ordered);
8835 if (isKnownNegation(TrueVal, FalseVal)) {
8836 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8837 // match against either LHS or sext(LHS).
8838 auto MaybeSExtCmpLHS =
8839 m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS)));
8840 auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes());
8841 auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One());
8842 if (match(TrueVal, MaybeSExtCmpLHS)) {
8843 // Set the return values. If the compare uses the negated value (-X >s 0),
8844 // swap the return values because the negated value is always 'RHS'.
8845 LHS = TrueVal;
8846 RHS = FalseVal;
8847 if (match(CmpLHS, m_Neg(m_Specific(FalseVal))))
8848 std::swap(LHS, RHS);
8850 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8851 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8852 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
8853 return {SPF_ABS, SPNB_NA, false};
8855 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8856 if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne))
8857 return {SPF_ABS, SPNB_NA, false};
8859 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8860 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8861 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
8862 return {SPF_NABS, SPNB_NA, false};
8864 else if (match(FalseVal, MaybeSExtCmpLHS)) {
8865 // Set the return values. If the compare uses the negated value (-X >s 0),
8866 // swap the return values because the negated value is always 'RHS'.
8867 LHS = FalseVal;
8868 RHS = TrueVal;
8869 if (match(CmpLHS, m_Neg(m_Specific(TrueVal))))
8870 std::swap(LHS, RHS);
8872 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8873 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8874 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
8875 return {SPF_NABS, SPNB_NA, false};
8877 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8878 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8879 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
8880 return {SPF_ABS, SPNB_NA, false};
8884 if (CmpInst::isIntPredicate(Pred))
8885 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
8887 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8888 // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8889 // semantics than minNum. Be conservative in such case.
8890 if (NaNBehavior != SPNB_RETURNS_ANY ||
8891 (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
8892 !isKnownNonZero(CmpRHS)))
8893 return {SPF_UNKNOWN, SPNB_NA, false};
8895 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
8898 static Value *lookThroughCastConst(CmpInst *CmpI, Type *SrcTy, Constant *C,
8899 Instruction::CastOps *CastOp) {
8900 const DataLayout &DL = CmpI->getDataLayout();
8902 Constant *CastedTo = nullptr;
8903 switch (*CastOp) {
8904 case Instruction::ZExt:
8905 if (CmpI->isUnsigned())
8906 CastedTo = ConstantExpr::getTrunc(C, SrcTy);
8907 break;
8908 case Instruction::SExt:
8909 if (CmpI->isSigned())
8910 CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
8911 break;
8912 case Instruction::Trunc:
8913 Constant *CmpConst;
8914 if (match(CmpI->getOperand(1), m_Constant(CmpConst)) &&
8915 CmpConst->getType() == SrcTy) {
8916 // Here we have the following case:
8918 // %cond = cmp iN %x, CmpConst
8919 // %tr = trunc iN %x to iK
8920 // %narrowsel = select i1 %cond, iK %t, iK C
8922 // We can always move trunc after select operation:
8924 // %cond = cmp iN %x, CmpConst
8925 // %widesel = select i1 %cond, iN %x, iN CmpConst
8926 // %tr = trunc iN %widesel to iK
8928 // Note that C could be extended in any way because we don't care about
8929 // upper bits after truncation. It can't be abs pattern, because it would
8930 // look like:
8932 // select i1 %cond, x, -x.
8934 // So only min/max pattern could be matched. Such match requires widened C
8935 // == CmpConst. That is why set widened C = CmpConst, condition trunc
8936 // CmpConst == C is checked below.
8937 CastedTo = CmpConst;
8938 } else {
8939 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt;
8940 CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL);
8942 break;
8943 case Instruction::FPTrunc:
8944 CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL);
8945 break;
8946 case Instruction::FPExt:
8947 CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL);
8948 break;
8949 case Instruction::FPToUI:
8950 CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL);
8951 break;
8952 case Instruction::FPToSI:
8953 CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL);
8954 break;
8955 case Instruction::UIToFP:
8956 CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL);
8957 break;
8958 case Instruction::SIToFP:
8959 CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL);
8960 break;
8961 default:
8962 break;
8965 if (!CastedTo)
8966 return nullptr;
8968 // Make sure the cast doesn't lose any information.
8969 Constant *CastedBack =
8970 ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL);
8971 if (CastedBack && CastedBack != C)
8972 return nullptr;
8974 return CastedTo;
8977 /// Helps to match a select pattern in case of a type mismatch.
8979 /// The function processes the case when type of true and false values of a
8980 /// select instruction differs from type of the cmp instruction operands because
8981 /// of a cast instruction. The function checks if it is legal to move the cast
8982 /// operation after "select". If yes, it returns the new second value of
8983 /// "select" (with the assumption that cast is moved):
8984 /// 1. As operand of cast instruction when both values of "select" are same cast
8985 /// instructions.
8986 /// 2. As restored constant (by applying reverse cast operation) when the first
8987 /// value of the "select" is a cast operation and the second value is a
8988 /// constant. It is implemented in lookThroughCastConst().
8989 /// 3. As one operand is cast instruction and the other is not. The operands in
8990 /// sel(cmp) are in different type integer.
8991 /// NOTE: We return only the new second value because the first value could be
8992 /// accessed as operand of cast instruction.
8993 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
8994 Instruction::CastOps *CastOp) {
8995 auto *Cast1 = dyn_cast<CastInst>(V1);
8996 if (!Cast1)
8997 return nullptr;
8999 *CastOp = Cast1->getOpcode();
9000 Type *SrcTy = Cast1->getSrcTy();
9001 if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
9002 // If V1 and V2 are both the same cast from the same type, look through V1.
9003 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
9004 return Cast2->getOperand(0);
9005 return nullptr;
9008 auto *C = dyn_cast<Constant>(V2);
9009 if (C)
9010 return lookThroughCastConst(CmpI, SrcTy, C, CastOp);
9012 Value *CastedTo = nullptr;
9013 if (*CastOp == Instruction::Trunc) {
9014 if (match(CmpI->getOperand(1), m_ZExtOrSExt(m_Specific(V2)))) {
9015 // Here we have the following case:
9016 // %y_ext = sext iK %y to iN
9017 // %cond = cmp iN %x, %y_ext
9018 // %tr = trunc iN %x to iK
9019 // %narrowsel = select i1 %cond, iK %tr, iK %y
9021 // We can always move trunc after select operation:
9022 // %y_ext = sext iK %y to iN
9023 // %cond = cmp iN %x, %y_ext
9024 // %widesel = select i1 %cond, iN %x, iN %y_ext
9025 // %tr = trunc iN %widesel to iK
9026 assert(V2->getType() == Cast1->getType() &&
9027 "V2 and Cast1 should be the same type.");
9028 CastedTo = CmpI->getOperand(1);
9032 return CastedTo;
9034 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
9035 Instruction::CastOps *CastOp,
9036 unsigned Depth) {
9037 if (Depth >= MaxAnalysisRecursionDepth)
9038 return {SPF_UNKNOWN, SPNB_NA, false};
9040 SelectInst *SI = dyn_cast<SelectInst>(V);
9041 if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
9043 CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
9044 if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
9046 Value *TrueVal = SI->getTrueValue();
9047 Value *FalseVal = SI->getFalseValue();
9049 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
9050 CastOp, Depth);
9053 SelectPatternResult llvm::matchDecomposedSelectPattern(
9054 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
9055 Instruction::CastOps *CastOp, unsigned Depth) {
9056 CmpInst::Predicate Pred = CmpI->getPredicate();
9057 Value *CmpLHS = CmpI->getOperand(0);
9058 Value *CmpRHS = CmpI->getOperand(1);
9059 FastMathFlags FMF;
9060 if (isa<FPMathOperator>(CmpI))
9061 FMF = CmpI->getFastMathFlags();
9063 // Bail out early.
9064 if (CmpI->isEquality())
9065 return {SPF_UNKNOWN, SPNB_NA, false};
9067 // Deal with type mismatches.
9068 if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
9069 if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
9070 // If this is a potential fmin/fmax with a cast to integer, then ignore
9071 // -0.0 because there is no corresponding integer value.
9072 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
9073 FMF.setNoSignedZeros();
9074 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
9075 cast<CastInst>(TrueVal)->getOperand(0), C,
9076 LHS, RHS, Depth);
9078 if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
9079 // If this is a potential fmin/fmax with a cast to integer, then ignore
9080 // -0.0 because there is no corresponding integer value.
9081 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
9082 FMF.setNoSignedZeros();
9083 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
9084 C, cast<CastInst>(FalseVal)->getOperand(0),
9085 LHS, RHS, Depth);
9088 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
9089 LHS, RHS, Depth);
9092 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) {
9093 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT;
9094 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT;
9095 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT;
9096 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT;
9097 if (SPF == SPF_FMINNUM)
9098 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
9099 if (SPF == SPF_FMAXNUM)
9100 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
9101 llvm_unreachable("unhandled!");
9104 Intrinsic::ID llvm::getMinMaxIntrinsic(SelectPatternFlavor SPF) {
9105 switch (SPF) {
9106 case SelectPatternFlavor::SPF_UMIN:
9107 return Intrinsic::umin;
9108 case SelectPatternFlavor::SPF_UMAX:
9109 return Intrinsic::umax;
9110 case SelectPatternFlavor::SPF_SMIN:
9111 return Intrinsic::smin;
9112 case SelectPatternFlavor::SPF_SMAX:
9113 return Intrinsic::smax;
9114 default:
9115 llvm_unreachable("Unexpected SPF");
9119 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) {
9120 if (SPF == SPF_SMIN) return SPF_SMAX;
9121 if (SPF == SPF_UMIN) return SPF_UMAX;
9122 if (SPF == SPF_SMAX) return SPF_SMIN;
9123 if (SPF == SPF_UMAX) return SPF_UMIN;
9124 llvm_unreachable("unhandled!");
9127 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
9128 switch (MinMaxID) {
9129 case Intrinsic::smax: return Intrinsic::smin;
9130 case Intrinsic::smin: return Intrinsic::smax;
9131 case Intrinsic::umax: return Intrinsic::umin;
9132 case Intrinsic::umin: return Intrinsic::umax;
9133 // Please note that next four intrinsics may produce the same result for
9134 // original and inverted case even if X != Y due to NaN is handled specially.
9135 case Intrinsic::maximum: return Intrinsic::minimum;
9136 case Intrinsic::minimum: return Intrinsic::maximum;
9137 case Intrinsic::maxnum: return Intrinsic::minnum;
9138 case Intrinsic::minnum: return Intrinsic::maxnum;
9139 default: llvm_unreachable("Unexpected intrinsic");
9143 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
9144 switch (SPF) {
9145 case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
9146 case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
9147 case SPF_UMAX: return APInt::getMaxValue(BitWidth);
9148 case SPF_UMIN: return APInt::getMinValue(BitWidth);
9149 default: llvm_unreachable("Unexpected flavor");
9153 std::pair<Intrinsic::ID, bool>
9154 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
9155 // Check if VL contains select instructions that can be folded into a min/max
9156 // vector intrinsic and return the intrinsic if it is possible.
9157 // TODO: Support floating point min/max.
9158 bool AllCmpSingleUse = true;
9159 SelectPatternResult SelectPattern;
9160 SelectPattern.Flavor = SPF_UNKNOWN;
9161 if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
9162 Value *LHS, *RHS;
9163 auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
9164 if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor))
9165 return false;
9166 if (SelectPattern.Flavor != SPF_UNKNOWN &&
9167 SelectPattern.Flavor != CurrentPattern.Flavor)
9168 return false;
9169 SelectPattern = CurrentPattern;
9170 AllCmpSingleUse &=
9171 match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
9172 return true;
9173 })) {
9174 switch (SelectPattern.Flavor) {
9175 case SPF_SMIN:
9176 return {Intrinsic::smin, AllCmpSingleUse};
9177 case SPF_UMIN:
9178 return {Intrinsic::umin, AllCmpSingleUse};
9179 case SPF_SMAX:
9180 return {Intrinsic::smax, AllCmpSingleUse};
9181 case SPF_UMAX:
9182 return {Intrinsic::umax, AllCmpSingleUse};
9183 case SPF_FMAXNUM:
9184 return {Intrinsic::maxnum, AllCmpSingleUse};
9185 case SPF_FMINNUM:
9186 return {Intrinsic::minnum, AllCmpSingleUse};
9187 default:
9188 llvm_unreachable("unexpected select pattern flavor");
9191 return {Intrinsic::not_intrinsic, false};
9194 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
9195 Value *&Start, Value *&Step) {
9196 // Handle the case of a simple two-predecessor recurrence PHI.
9197 // There's a lot more that could theoretically be done here, but
9198 // this is sufficient to catch some interesting cases.
9199 if (P->getNumIncomingValues() != 2)
9200 return false;
9202 for (unsigned i = 0; i != 2; ++i) {
9203 Value *L = P->getIncomingValue(i);
9204 Value *R = P->getIncomingValue(!i);
9205 auto *LU = dyn_cast<BinaryOperator>(L);
9206 if (!LU)
9207 continue;
9208 unsigned Opcode = LU->getOpcode();
9210 switch (Opcode) {
9211 default:
9212 continue;
9213 // TODO: Expand list -- xor, gep, uadd.sat etc.
9214 case Instruction::LShr:
9215 case Instruction::AShr:
9216 case Instruction::Shl:
9217 case Instruction::Add:
9218 case Instruction::Sub:
9219 case Instruction::UDiv:
9220 case Instruction::URem:
9221 case Instruction::And:
9222 case Instruction::Or:
9223 case Instruction::Mul:
9224 case Instruction::FMul: {
9225 Value *LL = LU->getOperand(0);
9226 Value *LR = LU->getOperand(1);
9227 // Find a recurrence.
9228 if (LL == P)
9229 L = LR;
9230 else if (LR == P)
9231 L = LL;
9232 else
9233 continue; // Check for recurrence with L and R flipped.
9235 break; // Match!
9239 // We have matched a recurrence of the form:
9240 // %iv = [R, %entry], [%iv.next, %backedge]
9241 // %iv.next = binop %iv, L
9242 // OR
9243 // %iv = [R, %entry], [%iv.next, %backedge]
9244 // %iv.next = binop L, %iv
9245 BO = LU;
9246 Start = R;
9247 Step = L;
9248 return true;
9250 return false;
9253 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
9254 Value *&Start, Value *&Step) {
9255 BinaryOperator *BO = nullptr;
9256 P = dyn_cast<PHINode>(I->getOperand(0));
9257 if (!P)
9258 P = dyn_cast<PHINode>(I->getOperand(1));
9259 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I;
9262 /// Return true if "icmp Pred LHS RHS" is always true.
9263 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
9264 const Value *RHS) {
9265 if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
9266 return true;
9268 switch (Pred) {
9269 default:
9270 return false;
9272 case CmpInst::ICMP_SLE: {
9273 const APInt *C;
9275 // LHS s<= LHS +_{nsw} C if C >= 0
9276 // LHS s<= LHS | C if C >= 0
9277 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))) ||
9278 match(RHS, m_Or(m_Specific(LHS), m_APInt(C))))
9279 return !C->isNegative();
9281 // LHS s<= smax(LHS, V) for any V
9282 if (match(RHS, m_c_SMax(m_Specific(LHS), m_Value())))
9283 return true;
9285 // smin(RHS, V) s<= RHS for any V
9286 if (match(LHS, m_c_SMin(m_Specific(RHS), m_Value())))
9287 return true;
9289 // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
9290 const Value *X;
9291 const APInt *CLHS, *CRHS;
9292 if (match(LHS, m_NSWAddLike(m_Value(X), m_APInt(CLHS))) &&
9293 match(RHS, m_NSWAddLike(m_Specific(X), m_APInt(CRHS))))
9294 return CLHS->sle(*CRHS);
9296 return false;
9299 case CmpInst::ICMP_ULE: {
9300 // LHS u<= LHS +_{nuw} V for any V
9301 if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) &&
9302 cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap())
9303 return true;
9305 // LHS u<= LHS | V for any V
9306 if (match(RHS, m_c_Or(m_Specific(LHS), m_Value())))
9307 return true;
9309 // LHS u<= umax(LHS, V) for any V
9310 if (match(RHS, m_c_UMax(m_Specific(LHS), m_Value())))
9311 return true;
9313 // RHS >> V u<= RHS for any V
9314 if (match(LHS, m_LShr(m_Specific(RHS), m_Value())))
9315 return true;
9317 // RHS u/ C_ugt_1 u<= RHS
9318 const APInt *C;
9319 if (match(LHS, m_UDiv(m_Specific(RHS), m_APInt(C))) && C->ugt(1))
9320 return true;
9322 // RHS & V u<= RHS for any V
9323 if (match(LHS, m_c_And(m_Specific(RHS), m_Value())))
9324 return true;
9326 // umin(RHS, V) u<= RHS for any V
9327 if (match(LHS, m_c_UMin(m_Specific(RHS), m_Value())))
9328 return true;
9330 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
9331 const Value *X;
9332 const APInt *CLHS, *CRHS;
9333 if (match(LHS, m_NUWAddLike(m_Value(X), m_APInt(CLHS))) &&
9334 match(RHS, m_NUWAddLike(m_Specific(X), m_APInt(CRHS))))
9335 return CLHS->ule(*CRHS);
9337 return false;
9342 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
9343 /// ALHS ARHS" is true. Otherwise, return std::nullopt.
9344 static std::optional<bool>
9345 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
9346 const Value *ARHS, const Value *BLHS, const Value *BRHS) {
9347 switch (Pred) {
9348 default:
9349 return std::nullopt;
9351 case CmpInst::ICMP_SLT:
9352 case CmpInst::ICMP_SLE:
9353 if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS) &&
9354 isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS))
9355 return true;
9356 return std::nullopt;
9358 case CmpInst::ICMP_SGT:
9359 case CmpInst::ICMP_SGE:
9360 if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS) &&
9361 isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS))
9362 return true;
9363 return std::nullopt;
9365 case CmpInst::ICMP_ULT:
9366 case CmpInst::ICMP_ULE:
9367 if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS) &&
9368 isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS))
9369 return true;
9370 return std::nullopt;
9372 case CmpInst::ICMP_UGT:
9373 case CmpInst::ICMP_UGE:
9374 if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS) &&
9375 isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS))
9376 return true;
9377 return std::nullopt;
9381 /// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true.
9382 /// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false.
9383 /// Otherwise, return std::nullopt if we can't infer anything.
9384 static std::optional<bool>
9385 isImpliedCondCommonOperandWithCR(CmpPredicate LPred, const ConstantRange &LCR,
9386 CmpPredicate RPred, const ConstantRange &RCR) {
9387 auto CRImpliesPred = [&](ConstantRange CR,
9388 CmpInst::Predicate Pred) -> std::optional<bool> {
9389 // If all true values for lhs and true for rhs, lhs implies rhs
9390 if (CR.icmp(Pred, RCR))
9391 return true;
9393 // If there is no overlap, lhs implies not rhs
9394 if (CR.icmp(CmpInst::getInversePredicate(Pred), RCR))
9395 return false;
9397 return std::nullopt;
9399 if (auto Res = CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred, LCR),
9400 RPred))
9401 return Res;
9402 if (LPred.hasSameSign() ^ RPred.hasSameSign()) {
9403 LPred = LPred.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(LPred)
9404 : static_cast<CmpInst::Predicate>(LPred);
9405 RPred = RPred.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(RPred)
9406 : static_cast<CmpInst::Predicate>(RPred);
9407 return CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred, LCR),
9408 RPred);
9410 return std::nullopt;
9413 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
9414 /// is true. Return false if LHS implies RHS is false. Otherwise, return
9415 /// std::nullopt if we can't infer anything.
9416 static std::optional<bool>
9417 isImpliedCondICmps(const ICmpInst *LHS, CmpPredicate RPred, const Value *R0,
9418 const Value *R1, const DataLayout &DL, bool LHSIsTrue) {
9419 Value *L0 = LHS->getOperand(0);
9420 Value *L1 = LHS->getOperand(1);
9422 // The rest of the logic assumes the LHS condition is true. If that's not the
9423 // case, invert the predicate to make it so.
9424 CmpPredicate LPred =
9425 LHSIsTrue ? LHS->getCmpPredicate() : LHS->getInverseCmpPredicate();
9427 // We can have non-canonical operands, so try to normalize any common operand
9428 // to L0/R0.
9429 if (L0 == R1) {
9430 std::swap(R0, R1);
9431 RPred = ICmpInst::getSwappedCmpPredicate(RPred);
9433 if (R0 == L1) {
9434 std::swap(L0, L1);
9435 LPred = ICmpInst::getSwappedCmpPredicate(LPred);
9437 if (L1 == R1) {
9438 // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
9439 if (L0 != R0 || match(L0, m_ImmConstant())) {
9440 std::swap(L0, L1);
9441 LPred = ICmpInst::getSwappedCmpPredicate(LPred);
9442 std::swap(R0, R1);
9443 RPred = ICmpInst::getSwappedCmpPredicate(RPred);
9447 // See if we can infer anything if operand-0 matches and we have at least one
9448 // constant.
9449 const APInt *Unused;
9450 if (L0 == R0 && (match(L1, m_APInt(Unused)) || match(R1, m_APInt(Unused)))) {
9451 // Potential TODO: We could also further use the constant range of L0/R0 to
9452 // further constraint the constant ranges. At the moment this leads to
9453 // several regressions related to not transforming `multi_use(A + C0) eq/ne
9454 // C1` (see discussion: D58633).
9455 ConstantRange LCR = computeConstantRange(
9456 L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9457 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
9458 ConstantRange RCR = computeConstantRange(
9459 R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9460 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
9461 // Even if L1/R1 are not both constant, we can still sometimes deduce
9462 // relationship from a single constant. For example X u> Y implies X != 0.
9463 if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
9464 return R;
9465 // If both L1/R1 were exact constant ranges and we didn't get anything
9466 // here, we won't be able to deduce this.
9467 if (match(L1, m_APInt(Unused)) && match(R1, m_APInt(Unused)))
9468 return std::nullopt;
9471 // Can we infer anything when the two compares have matching operands?
9472 if (L0 == R0 && L1 == R1)
9473 return ICmpInst::isImpliedByMatchingCmp(LPred, RPred);
9475 // It only really makes sense in the context of signed comparison for "X - Y
9476 // must be positive if X >= Y and no overflow".
9477 // Take SGT as an example: L0:x > L1:y and C >= 0
9478 // ==> R0:(x -nsw y) < R1:(-C) is false
9479 CmpInst::Predicate SignedLPred = LPred.getPreferredSignedPredicate();
9480 if ((SignedLPred == ICmpInst::ICMP_SGT ||
9481 SignedLPred == ICmpInst::ICMP_SGE) &&
9482 match(R0, m_NSWSub(m_Specific(L0), m_Specific(L1)))) {
9483 if (match(R1, m_NonPositive()) &&
9484 ICmpInst::isImpliedByMatchingCmp(SignedLPred, RPred) == false)
9485 return false;
9488 // Take SLT as an example: L0:x < L1:y and C <= 0
9489 // ==> R0:(x -nsw y) < R1:(-C) is true
9490 if ((SignedLPred == ICmpInst::ICMP_SLT ||
9491 SignedLPred == ICmpInst::ICMP_SLE) &&
9492 match(R0, m_NSWSub(m_Specific(L0), m_Specific(L1)))) {
9493 if (match(R1, m_NonNegative()) &&
9494 ICmpInst::isImpliedByMatchingCmp(SignedLPred, RPred) == true)
9495 return true;
9498 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
9499 if (L0 == R0 &&
9500 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) &&
9501 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) &&
9502 match(L0, m_c_Add(m_Specific(L1), m_Specific(R1))))
9503 return CmpPredicate::getMatching(LPred, RPred).has_value();
9505 if (auto P = CmpPredicate::getMatching(LPred, RPred))
9506 return isImpliedCondOperands(*P, L0, L1, R0, R1);
9508 return std::nullopt;
9511 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
9512 /// false. Otherwise, return std::nullopt if we can't infer anything. We
9513 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
9514 /// instruction.
9515 static std::optional<bool>
9516 isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
9517 const Value *RHSOp0, const Value *RHSOp1,
9518 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9519 // The LHS must be an 'or', 'and', or a 'select' instruction.
9520 assert((LHS->getOpcode() == Instruction::And ||
9521 LHS->getOpcode() == Instruction::Or ||
9522 LHS->getOpcode() == Instruction::Select) &&
9523 "Expected LHS to be 'and', 'or', or 'select'.");
9525 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
9527 // If the result of an 'or' is false, then we know both legs of the 'or' are
9528 // false. Similarly, if the result of an 'and' is true, then we know both
9529 // legs of the 'and' are true.
9530 const Value *ALHS, *ARHS;
9531 if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) ||
9532 (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) {
9533 // FIXME: Make this non-recursion.
9534 if (std::optional<bool> Implication = isImpliedCondition(
9535 ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
9536 return Implication;
9537 if (std::optional<bool> Implication = isImpliedCondition(
9538 ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
9539 return Implication;
9540 return std::nullopt;
9542 return std::nullopt;
9545 std::optional<bool>
9546 llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
9547 const Value *RHSOp0, const Value *RHSOp1,
9548 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9549 // Bail out when we hit the limit.
9550 if (Depth == MaxAnalysisRecursionDepth)
9551 return std::nullopt;
9553 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
9554 // example.
9555 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
9556 return std::nullopt;
9558 assert(LHS->getType()->isIntOrIntVectorTy(1) &&
9559 "Expected integer type only!");
9561 // Match not
9562 if (match(LHS, m_Not(m_Value(LHS))))
9563 LHSIsTrue = !LHSIsTrue;
9565 // Both LHS and RHS are icmps.
9566 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS);
9567 if (LHSCmp)
9568 return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue);
9570 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
9571 /// the RHS to be an icmp.
9572 /// FIXME: Add support for and/or/select on the RHS.
9573 if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
9574 if ((LHSI->getOpcode() == Instruction::And ||
9575 LHSI->getOpcode() == Instruction::Or ||
9576 LHSI->getOpcode() == Instruction::Select))
9577 return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
9578 Depth);
9580 return std::nullopt;
9583 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
9584 const DataLayout &DL,
9585 bool LHSIsTrue, unsigned Depth) {
9586 // LHS ==> RHS by definition
9587 if (LHS == RHS)
9588 return LHSIsTrue;
9590 // Match not
9591 bool InvertRHS = false;
9592 if (match(RHS, m_Not(m_Value(RHS)))) {
9593 if (LHS == RHS)
9594 return !LHSIsTrue;
9595 InvertRHS = true;
9598 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) {
9599 if (auto Implied = isImpliedCondition(
9600 LHS, RHSCmp->getCmpPredicate(), RHSCmp->getOperand(0),
9601 RHSCmp->getOperand(1), DL, LHSIsTrue, Depth))
9602 return InvertRHS ? !*Implied : *Implied;
9603 return std::nullopt;
9606 if (Depth == MaxAnalysisRecursionDepth)
9607 return std::nullopt;
9609 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
9610 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
9611 const Value *RHS1, *RHS2;
9612 if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) {
9613 if (std::optional<bool> Imp =
9614 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
9615 if (*Imp == true)
9616 return !InvertRHS;
9617 if (std::optional<bool> Imp =
9618 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
9619 if (*Imp == true)
9620 return !InvertRHS;
9622 if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) {
9623 if (std::optional<bool> Imp =
9624 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
9625 if (*Imp == false)
9626 return InvertRHS;
9627 if (std::optional<bool> Imp =
9628 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
9629 if (*Imp == false)
9630 return InvertRHS;
9633 return std::nullopt;
9636 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
9637 // condition dominating ContextI or nullptr, if no condition is found.
9638 static std::pair<Value *, bool>
9639 getDomPredecessorCondition(const Instruction *ContextI) {
9640 if (!ContextI || !ContextI->getParent())
9641 return {nullptr, false};
9643 // TODO: This is a poor/cheap way to determine dominance. Should we use a
9644 // dominator tree (eg, from a SimplifyQuery) instead?
9645 const BasicBlock *ContextBB = ContextI->getParent();
9646 const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
9647 if (!PredBB)
9648 return {nullptr, false};
9650 // We need a conditional branch in the predecessor.
9651 Value *PredCond;
9652 BasicBlock *TrueBB, *FalseBB;
9653 if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB)))
9654 return {nullptr, false};
9656 // The branch should get simplified. Don't bother simplifying this condition.
9657 if (TrueBB == FalseBB)
9658 return {nullptr, false};
9660 assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
9661 "Predecessor block does not point to successor?");
9663 // Is this condition implied by the predecessor condition?
9664 return {PredCond, TrueBB == ContextBB};
9667 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
9668 const Instruction *ContextI,
9669 const DataLayout &DL) {
9670 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
9671 auto PredCond = getDomPredecessorCondition(ContextI);
9672 if (PredCond.first)
9673 return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second);
9674 return std::nullopt;
9677 std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred,
9678 const Value *LHS,
9679 const Value *RHS,
9680 const Instruction *ContextI,
9681 const DataLayout &DL) {
9682 auto PredCond = getDomPredecessorCondition(ContextI);
9683 if (PredCond.first)
9684 return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL,
9685 PredCond.second);
9686 return std::nullopt;
9689 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
9690 APInt &Upper, const InstrInfoQuery &IIQ,
9691 bool PreferSignedRange) {
9692 unsigned Width = Lower.getBitWidth();
9693 const APInt *C;
9694 switch (BO.getOpcode()) {
9695 case Instruction::Add:
9696 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
9697 bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9698 bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9700 // If the caller expects a signed compare, then try to use a signed range.
9701 // Otherwise if both no-wraps are set, use the unsigned range because it
9702 // is never larger than the signed range. Example:
9703 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9704 if (PreferSignedRange && HasNSW && HasNUW)
9705 HasNUW = false;
9707 if (HasNUW) {
9708 // 'add nuw x, C' produces [C, UINT_MAX].
9709 Lower = *C;
9710 } else if (HasNSW) {
9711 if (C->isNegative()) {
9712 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9713 Lower = APInt::getSignedMinValue(Width);
9714 Upper = APInt::getSignedMaxValue(Width) + *C + 1;
9715 } else {
9716 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9717 Lower = APInt::getSignedMinValue(Width) + *C;
9718 Upper = APInt::getSignedMaxValue(Width) + 1;
9722 break;
9724 case Instruction::And:
9725 if (match(BO.getOperand(1), m_APInt(C)))
9726 // 'and x, C' produces [0, C].
9727 Upper = *C + 1;
9728 // X & -X is a power of two or zero. So we can cap the value at max power of
9729 // two.
9730 if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) ||
9731 match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0)))))
9732 Upper = APInt::getSignedMinValue(Width) + 1;
9733 break;
9735 case Instruction::Or:
9736 if (match(BO.getOperand(1), m_APInt(C)))
9737 // 'or x, C' produces [C, UINT_MAX].
9738 Lower = *C;
9739 break;
9741 case Instruction::AShr:
9742 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
9743 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9744 Lower = APInt::getSignedMinValue(Width).ashr(*C);
9745 Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
9746 } else if (match(BO.getOperand(0), m_APInt(C))) {
9747 unsigned ShiftAmount = Width - 1;
9748 if (!C->isZero() && IIQ.isExact(&BO))
9749 ShiftAmount = C->countr_zero();
9750 if (C->isNegative()) {
9751 // 'ashr C, x' produces [C, C >> (Width-1)]
9752 Lower = *C;
9753 Upper = C->ashr(ShiftAmount) + 1;
9754 } else {
9755 // 'ashr C, x' produces [C >> (Width-1), C]
9756 Lower = C->ashr(ShiftAmount);
9757 Upper = *C + 1;
9760 break;
9762 case Instruction::LShr:
9763 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
9764 // 'lshr x, C' produces [0, UINT_MAX >> C].
9765 Upper = APInt::getAllOnes(Width).lshr(*C) + 1;
9766 } else if (match(BO.getOperand(0), m_APInt(C))) {
9767 // 'lshr C, x' produces [C >> (Width-1), C].
9768 unsigned ShiftAmount = Width - 1;
9769 if (!C->isZero() && IIQ.isExact(&BO))
9770 ShiftAmount = C->countr_zero();
9771 Lower = C->lshr(ShiftAmount);
9772 Upper = *C + 1;
9774 break;
9776 case Instruction::Shl:
9777 if (match(BO.getOperand(0), m_APInt(C))) {
9778 if (IIQ.hasNoUnsignedWrap(&BO)) {
9779 // 'shl nuw C, x' produces [C, C << CLZ(C)]
9780 Lower = *C;
9781 Upper = Lower.shl(Lower.countl_zero()) + 1;
9782 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
9783 if (C->isNegative()) {
9784 // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9785 unsigned ShiftAmount = C->countl_one() - 1;
9786 Lower = C->shl(ShiftAmount);
9787 Upper = *C + 1;
9788 } else {
9789 // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9790 unsigned ShiftAmount = C->countl_zero() - 1;
9791 Lower = *C;
9792 Upper = C->shl(ShiftAmount) + 1;
9794 } else {
9795 // If lowbit is set, value can never be zero.
9796 if ((*C)[0])
9797 Lower = APInt::getOneBitSet(Width, 0);
9798 // If we are shifting a constant the largest it can be is if the longest
9799 // sequence of consecutive ones is shifted to the highbits (breaking
9800 // ties for which sequence is higher). At the moment we take a liberal
9801 // upper bound on this by just popcounting the constant.
9802 // TODO: There may be a bitwise trick for it longest/highest
9803 // consecutative sequence of ones (naive method is O(Width) loop).
9804 Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1;
9806 } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
9807 Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1;
9809 break;
9811 case Instruction::SDiv:
9812 if (match(BO.getOperand(1), m_APInt(C))) {
9813 APInt IntMin = APInt::getSignedMinValue(Width);
9814 APInt IntMax = APInt::getSignedMaxValue(Width);
9815 if (C->isAllOnes()) {
9816 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9817 // where C != -1 and C != 0 and C != 1
9818 Lower = IntMin + 1;
9819 Upper = IntMax + 1;
9820 } else if (C->countl_zero() < Width - 1) {
9821 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9822 // where C != -1 and C != 0 and C != 1
9823 Lower = IntMin.sdiv(*C);
9824 Upper = IntMax.sdiv(*C);
9825 if (Lower.sgt(Upper))
9826 std::swap(Lower, Upper);
9827 Upper = Upper + 1;
9828 assert(Upper != Lower && "Upper part of range has wrapped!");
9830 } else if (match(BO.getOperand(0), m_APInt(C))) {
9831 if (C->isMinSignedValue()) {
9832 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9833 Lower = *C;
9834 Upper = Lower.lshr(1) + 1;
9835 } else {
9836 // 'sdiv C, x' produces [-|C|, |C|].
9837 Upper = C->abs() + 1;
9838 Lower = (-Upper) + 1;
9841 break;
9843 case Instruction::UDiv:
9844 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
9845 // 'udiv x, C' produces [0, UINT_MAX / C].
9846 Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
9847 } else if (match(BO.getOperand(0), m_APInt(C))) {
9848 // 'udiv C, x' produces [0, C].
9849 Upper = *C + 1;
9851 break;
9853 case Instruction::SRem:
9854 if (match(BO.getOperand(1), m_APInt(C))) {
9855 // 'srem x, C' produces (-|C|, |C|).
9856 Upper = C->abs();
9857 Lower = (-Upper) + 1;
9858 } else if (match(BO.getOperand(0), m_APInt(C))) {
9859 if (C->isNegative()) {
9860 // 'srem -|C|, x' produces [-|C|, 0].
9861 Upper = 1;
9862 Lower = *C;
9863 } else {
9864 // 'srem |C|, x' produces [0, |C|].
9865 Upper = *C + 1;
9868 break;
9870 case Instruction::URem:
9871 if (match(BO.getOperand(1), m_APInt(C)))
9872 // 'urem x, C' produces [0, C).
9873 Upper = *C;
9874 else if (match(BO.getOperand(0), m_APInt(C)))
9875 // 'urem C, x' produces [0, C].
9876 Upper = *C + 1;
9877 break;
9879 default:
9880 break;
9884 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II,
9885 bool UseInstrInfo) {
9886 unsigned Width = II.getType()->getScalarSizeInBits();
9887 const APInt *C;
9888 switch (II.getIntrinsicID()) {
9889 case Intrinsic::ctlz:
9890 case Intrinsic::cttz: {
9891 APInt Upper(Width, Width);
9892 if (!UseInstrInfo || !match(II.getArgOperand(1), m_One()))
9893 Upper += 1;
9894 // Maximum of set/clear bits is the bit width.
9895 return ConstantRange::getNonEmpty(APInt::getZero(Width), Upper);
9897 case Intrinsic::ctpop:
9898 // Maximum of set/clear bits is the bit width.
9899 return ConstantRange::getNonEmpty(APInt::getZero(Width),
9900 APInt(Width, Width) + 1);
9901 case Intrinsic::uadd_sat:
9902 // uadd.sat(x, C) produces [C, UINT_MAX].
9903 if (match(II.getOperand(0), m_APInt(C)) ||
9904 match(II.getOperand(1), m_APInt(C)))
9905 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
9906 break;
9907 case Intrinsic::sadd_sat:
9908 if (match(II.getOperand(0), m_APInt(C)) ||
9909 match(II.getOperand(1), m_APInt(C))) {
9910 if (C->isNegative())
9911 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9912 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9913 APInt::getSignedMaxValue(Width) + *C +
9916 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9917 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C,
9918 APInt::getSignedMaxValue(Width) + 1);
9920 break;
9921 case Intrinsic::usub_sat:
9922 // usub.sat(C, x) produces [0, C].
9923 if (match(II.getOperand(0), m_APInt(C)))
9924 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
9926 // usub.sat(x, C) produces [0, UINT_MAX - C].
9927 if (match(II.getOperand(1), m_APInt(C)))
9928 return ConstantRange::getNonEmpty(APInt::getZero(Width),
9929 APInt::getMaxValue(Width) - *C + 1);
9930 break;
9931 case Intrinsic::ssub_sat:
9932 if (match(II.getOperand(0), m_APInt(C))) {
9933 if (C->isNegative())
9934 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9935 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9936 *C - APInt::getSignedMinValue(Width) +
9939 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9940 return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width),
9941 APInt::getSignedMaxValue(Width) + 1);
9942 } else if (match(II.getOperand(1), m_APInt(C))) {
9943 if (C->isNegative())
9944 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9945 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C,
9946 APInt::getSignedMaxValue(Width) + 1);
9948 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9949 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9950 APInt::getSignedMaxValue(Width) - *C +
9953 break;
9954 case Intrinsic::umin:
9955 case Intrinsic::umax:
9956 case Intrinsic::smin:
9957 case Intrinsic::smax:
9958 if (!match(II.getOperand(0), m_APInt(C)) &&
9959 !match(II.getOperand(1), m_APInt(C)))
9960 break;
9962 switch (II.getIntrinsicID()) {
9963 case Intrinsic::umin:
9964 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
9965 case Intrinsic::umax:
9966 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
9967 case Intrinsic::smin:
9968 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9969 *C + 1);
9970 case Intrinsic::smax:
9971 return ConstantRange::getNonEmpty(*C,
9972 APInt::getSignedMaxValue(Width) + 1);
9973 default:
9974 llvm_unreachable("Must be min/max intrinsic");
9976 break;
9977 case Intrinsic::abs:
9978 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9979 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9980 if (match(II.getOperand(1), m_One()))
9981 return ConstantRange::getNonEmpty(APInt::getZero(Width),
9982 APInt::getSignedMaxValue(Width) + 1);
9984 return ConstantRange::getNonEmpty(APInt::getZero(Width),
9985 APInt::getSignedMinValue(Width) + 1);
9986 case Intrinsic::vscale:
9987 if (!II.getParent() || !II.getFunction())
9988 break;
9989 return getVScaleRange(II.getFunction(), Width);
9990 case Intrinsic::scmp:
9991 case Intrinsic::ucmp:
9992 return ConstantRange::getNonEmpty(APInt::getAllOnes(Width),
9993 APInt(Width, 2));
9994 default:
9995 break;
9998 return ConstantRange::getFull(Width);
10001 static ConstantRange getRangeForSelectPattern(const SelectInst &SI,
10002 const InstrInfoQuery &IIQ) {
10003 unsigned BitWidth = SI.getType()->getScalarSizeInBits();
10004 const Value *LHS = nullptr, *RHS = nullptr;
10005 SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
10006 if (R.Flavor == SPF_UNKNOWN)
10007 return ConstantRange::getFull(BitWidth);
10009 if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
10010 // If the negation part of the abs (in RHS) has the NSW flag,
10011 // then the result of abs(X) is [0..SIGNED_MAX],
10012 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
10013 if (match(RHS, m_Neg(m_Specific(LHS))) &&
10014 IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
10015 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
10016 APInt::getSignedMaxValue(BitWidth) + 1);
10018 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
10019 APInt::getSignedMinValue(BitWidth) + 1);
10022 if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
10023 // The result of -abs(X) is <= 0.
10024 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
10025 APInt(BitWidth, 1));
10028 const APInt *C;
10029 if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C)))
10030 return ConstantRange::getFull(BitWidth);
10032 switch (R.Flavor) {
10033 case SPF_UMIN:
10034 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1);
10035 case SPF_UMAX:
10036 return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth));
10037 case SPF_SMIN:
10038 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
10039 *C + 1);
10040 case SPF_SMAX:
10041 return ConstantRange::getNonEmpty(*C,
10042 APInt::getSignedMaxValue(BitWidth) + 1);
10043 default:
10044 return ConstantRange::getFull(BitWidth);
10048 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
10049 // The maximum representable value of a half is 65504. For floats the maximum
10050 // value is 3.4e38 which requires roughly 129 bits.
10051 unsigned BitWidth = I->getType()->getScalarSizeInBits();
10052 if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy())
10053 return;
10054 if (isa<FPToSIInst>(I) && BitWidth >= 17) {
10055 Lower = APInt(BitWidth, -65504, true);
10056 Upper = APInt(BitWidth, 65505);
10059 if (isa<FPToUIInst>(I) && BitWidth >= 16) {
10060 // For a fptoui the lower limit is left as 0.
10061 Upper = APInt(BitWidth, 65505);
10065 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
10066 bool UseInstrInfo, AssumptionCache *AC,
10067 const Instruction *CtxI,
10068 const DominatorTree *DT,
10069 unsigned Depth) {
10070 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
10072 if (Depth == MaxAnalysisRecursionDepth)
10073 return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
10075 if (auto *C = dyn_cast<Constant>(V))
10076 return C->toConstantRange();
10078 unsigned BitWidth = V->getType()->getScalarSizeInBits();
10079 InstrInfoQuery IIQ(UseInstrInfo);
10080 ConstantRange CR = ConstantRange::getFull(BitWidth);
10081 if (auto *BO = dyn_cast<BinaryOperator>(V)) {
10082 APInt Lower = APInt(BitWidth, 0);
10083 APInt Upper = APInt(BitWidth, 0);
10084 // TODO: Return ConstantRange.
10085 setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
10086 CR = ConstantRange::getNonEmpty(Lower, Upper);
10087 } else if (auto *II = dyn_cast<IntrinsicInst>(V))
10088 CR = getRangeForIntrinsic(*II, UseInstrInfo);
10089 else if (auto *SI = dyn_cast<SelectInst>(V)) {
10090 ConstantRange CRTrue = computeConstantRange(
10091 SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
10092 ConstantRange CRFalse = computeConstantRange(
10093 SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
10094 CR = CRTrue.unionWith(CRFalse);
10095 CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ));
10096 } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) {
10097 APInt Lower = APInt(BitWidth, 0);
10098 APInt Upper = APInt(BitWidth, 0);
10099 // TODO: Return ConstantRange.
10100 setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
10101 CR = ConstantRange::getNonEmpty(Lower, Upper);
10102 } else if (const auto *A = dyn_cast<Argument>(V))
10103 if (std::optional<ConstantRange> Range = A->getRange())
10104 CR = *Range;
10106 if (auto *I = dyn_cast<Instruction>(V)) {
10107 if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
10108 CR = CR.intersectWith(getConstantRangeFromMetadata(*Range));
10110 if (const auto *CB = dyn_cast<CallBase>(V))
10111 if (std::optional<ConstantRange> Range = CB->getRange())
10112 CR = CR.intersectWith(*Range);
10115 if (CtxI && AC) {
10116 // Try to restrict the range based on information from assumptions.
10117 for (auto &AssumeVH : AC->assumptionsFor(V)) {
10118 if (!AssumeVH)
10119 continue;
10120 CallInst *I = cast<CallInst>(AssumeVH);
10121 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
10122 "Got assumption for the wrong function!");
10123 assert(I->getIntrinsicID() == Intrinsic::assume &&
10124 "must be an assume intrinsic");
10126 if (!isValidAssumeForContext(I, CtxI, DT))
10127 continue;
10128 Value *Arg = I->getArgOperand(0);
10129 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
10130 // Currently we just use information from comparisons.
10131 if (!Cmp || Cmp->getOperand(0) != V)
10132 continue;
10133 // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
10134 ConstantRange RHS =
10135 computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false,
10136 UseInstrInfo, AC, I, DT, Depth + 1);
10137 CR = CR.intersectWith(
10138 ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS));
10142 return CR;
10145 static void
10146 addValueAffectedByCondition(Value *V,
10147 function_ref<void(Value *)> InsertAffected) {
10148 assert(V != nullptr);
10149 if (isa<Argument>(V) || isa<GlobalValue>(V)) {
10150 InsertAffected(V);
10151 } else if (auto *I = dyn_cast<Instruction>(V)) {
10152 InsertAffected(V);
10154 // Peek through unary operators to find the source of the condition.
10155 Value *Op;
10156 if (match(I, m_CombineOr(m_PtrToInt(m_Value(Op)), m_Trunc(m_Value(Op))))) {
10157 if (isa<Instruction>(Op) || isa<Argument>(Op))
10158 InsertAffected(Op);
10163 void llvm::findValuesAffectedByCondition(
10164 Value *Cond, bool IsAssume, function_ref<void(Value *)> InsertAffected) {
10165 auto AddAffected = [&InsertAffected](Value *V) {
10166 addValueAffectedByCondition(V, InsertAffected);
10169 auto AddCmpOperands = [&AddAffected, IsAssume](Value *LHS, Value *RHS) {
10170 if (IsAssume) {
10171 AddAffected(LHS);
10172 AddAffected(RHS);
10173 } else if (match(RHS, m_Constant()))
10174 AddAffected(LHS);
10177 SmallVector<Value *, 8> Worklist;
10178 SmallPtrSet<Value *, 8> Visited;
10179 Worklist.push_back(Cond);
10180 while (!Worklist.empty()) {
10181 Value *V = Worklist.pop_back_val();
10182 if (!Visited.insert(V).second)
10183 continue;
10185 CmpPredicate Pred;
10186 Value *A, *B, *X;
10188 if (IsAssume) {
10189 AddAffected(V);
10190 if (match(V, m_Not(m_Value(X))))
10191 AddAffected(X);
10194 if (match(V, m_LogicalOp(m_Value(A), m_Value(B)))) {
10195 // assume(A && B) is split to -> assume(A); assume(B);
10196 // assume(!(A || B)) is split to -> assume(!A); assume(!B);
10197 // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
10198 // enough information to be worth handling (intersection of information as
10199 // opposed to union).
10200 if (!IsAssume) {
10201 Worklist.push_back(A);
10202 Worklist.push_back(B);
10204 } else if (match(V, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
10205 AddCmpOperands(A, B);
10207 bool HasRHSC = match(B, m_ConstantInt());
10208 if (ICmpInst::isEquality(Pred)) {
10209 if (HasRHSC) {
10210 Value *Y;
10211 // (X & C) or (X | C) or (X ^ C).
10212 // (X << C) or (X >>_s C) or (X >>_u C).
10213 if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) ||
10214 match(A, m_Shift(m_Value(X), m_ConstantInt())))
10215 AddAffected(X);
10216 else if (match(A, m_And(m_Value(X), m_Value(Y))) ||
10217 match(A, m_Or(m_Value(X), m_Value(Y)))) {
10218 AddAffected(X);
10219 AddAffected(Y);
10222 } else {
10223 if (HasRHSC) {
10224 // Handle (A + C1) u< C2, which is the canonical form of
10225 // A > C3 && A < C4.
10226 if (match(A, m_AddLike(m_Value(X), m_ConstantInt())))
10227 AddAffected(X);
10229 if (ICmpInst::isUnsigned(Pred)) {
10230 Value *Y;
10231 // X & Y u> C -> X >u C && Y >u C
10232 // X | Y u< C -> X u< C && Y u< C
10233 // X nuw+ Y u< C -> X u< C && Y u< C
10234 if (match(A, m_And(m_Value(X), m_Value(Y))) ||
10235 match(A, m_Or(m_Value(X), m_Value(Y))) ||
10236 match(A, m_NUWAdd(m_Value(X), m_Value(Y)))) {
10237 AddAffected(X);
10238 AddAffected(Y);
10240 // X nuw- Y u> C -> X u> C
10241 if (match(A, m_NUWSub(m_Value(X), m_Value())))
10242 AddAffected(X);
10246 // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
10247 // by computeKnownFPClass().
10248 if (match(A, m_ElementWiseBitCast(m_Value(X)))) {
10249 if (Pred == ICmpInst::ICMP_SLT && match(B, m_Zero()))
10250 InsertAffected(X);
10251 else if (Pred == ICmpInst::ICMP_SGT && match(B, m_AllOnes()))
10252 InsertAffected(X);
10256 if (HasRHSC && match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X))))
10257 AddAffected(X);
10258 } else if (match(V, m_FCmp(Pred, m_Value(A), m_Value(B)))) {
10259 AddCmpOperands(A, B);
10261 // fcmp fneg(x), y
10262 // fcmp fabs(x), y
10263 // fcmp fneg(fabs(x)), y
10264 if (match(A, m_FNeg(m_Value(A))))
10265 AddAffected(A);
10266 if (match(A, m_FAbs(m_Value(A))))
10267 AddAffected(A);
10269 } else if (match(V, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A),
10270 m_Value()))) {
10271 // Handle patterns that computeKnownFPClass() support.
10272 AddAffected(A);