llvm/lib/Analysis/ValueTracking.cpp

   1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains routines that help analyze properties that chains of
  10 // computations have.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Analysis/ValueTracking.h"
  15 #include "llvm/ADT/APFloat.h"
  16 #include "llvm/ADT/APInt.h"
  17 #include "llvm/ADT/ArrayRef.h"
  18 #include "llvm/ADT/STLExtras.h"
  19 #include "llvm/ADT/ScopeExit.h"
  20 #include "llvm/ADT/SmallPtrSet.h"
  21 #include "llvm/ADT/SmallSet.h"
  22 #include "llvm/ADT/SmallVector.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/ADT/iterator_range.h"
  25 #include "llvm/Analysis/AliasAnalysis.h"
  26 #include "llvm/Analysis/AssumeBundleQueries.h"
  27 #include "llvm/Analysis/AssumptionCache.h"
  28 #include "llvm/Analysis/ConstantFolding.h"
  29 #include "llvm/Analysis/DomConditionCache.h"
  30 #include "llvm/Analysis/GuardUtils.h"
  31 #include "llvm/Analysis/InstructionSimplify.h"
  32 #include "llvm/Analysis/Loads.h"
  33 #include "llvm/Analysis/LoopInfo.h"
  34 #include "llvm/Analysis/TargetLibraryInfo.h"
  35 #include "llvm/Analysis/VectorUtils.h"
  36 #include "llvm/Analysis/WithCache.h"
  37 #include "llvm/IR/Argument.h"
  38 #include "llvm/IR/Attributes.h"
  39 #include "llvm/IR/BasicBlock.h"
  40 #include "llvm/IR/Constant.h"
  41 #include "llvm/IR/ConstantRange.h"
  42 #include "llvm/IR/Constants.h"
  43 #include "llvm/IR/DerivedTypes.h"
  44 #include "llvm/IR/DiagnosticInfo.h"
  45 #include "llvm/IR/Dominators.h"
  46 #include "llvm/IR/EHPersonalities.h"
  47 #include "llvm/IR/Function.h"
  48 #include "llvm/IR/GetElementPtrTypeIterator.h"
  49 #include "llvm/IR/GlobalAlias.h"
  50 #include "llvm/IR/GlobalValue.h"
  51 #include "llvm/IR/GlobalVariable.h"
  52 #include "llvm/IR/InstrTypes.h"
  53 #include "llvm/IR/Instruction.h"
  54 #include "llvm/IR/Instructions.h"
  55 #include "llvm/IR/IntrinsicInst.h"
  56 #include "llvm/IR/Intrinsics.h"
  57 #include "llvm/IR/IntrinsicsAArch64.h"
  58 #include "llvm/IR/IntrinsicsAMDGPU.h"
  59 #include "llvm/IR/IntrinsicsRISCV.h"
  60 #include "llvm/IR/IntrinsicsX86.h"
  61 #include "llvm/IR/LLVMContext.h"
  62 #include "llvm/IR/Metadata.h"
  63 #include "llvm/IR/Module.h"
  64 #include "llvm/IR/Operator.h"
  65 #include "llvm/IR/PatternMatch.h"
  66 #include "llvm/IR/Type.h"
  67 #include "llvm/IR/User.h"
  68 #include "llvm/IR/Value.h"
  69 #include "llvm/Support/Casting.h"
  70 #include "llvm/Support/CommandLine.h"
  71 #include "llvm/Support/Compiler.h"
  72 #include "llvm/Support/ErrorHandling.h"
  73 #include "llvm/Support/KnownBits.h"
  74 #include "llvm/Support/MathExtras.h"
  75 #include "llvm/TargetParser/RISCVTargetParser.h"
  76 #include <algorithm>
  77 #include <cassert>
  78 #include <cstdint>
  79 #include <optional>
  80 #include <utility>
  81
  82 using namespace llvm;
  83 using namespace llvm::PatternMatch;
  84
  85 // Controls the number of uses of the value searched for possible
  86 // dominating comparisons.
  87 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
  88                                               cl::Hidden, cl::init(20));
  89
  90
  91 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
  92 /// returns the element type's bitwidth.
  93 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
  94   if (unsigned BitWidth = Ty->getScalarSizeInBits())
  95     return BitWidth;
  96
  97   return DL.getPointerTypeSizeInBits(Ty);
  98 }
  99
 100 // Given the provided Value and, potentially, a context instruction, return
 101 // the preferred context instruction (if any).
 102 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
 103   // If we've been provided with a context instruction, then use that (provided
 104   // it has been inserted).
 105   if (CxtI && CxtI->getParent())
 106     return CxtI;
 107
 108   // If the value is really an already-inserted instruction, then use that.
 109   CxtI = dyn_cast<Instruction>(V);
 110   if (CxtI && CxtI->getParent())
 111     return CxtI;
 112
 113   return nullptr;
 114 }
 115
 116 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
 117                                    const APInt &DemandedElts,
 118                                    APInt &DemandedLHS, APInt &DemandedRHS) {
 119   if (isa<ScalableVectorType>(Shuf->getType())) {
 120     assert(DemandedElts == APInt(1,1));
 121     DemandedLHS = DemandedRHS = DemandedElts;
 122     return true;
 123   }
 124
 125   int NumElts =
 126       cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
 127   return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
 128                                       DemandedElts, DemandedLHS, DemandedRHS);
 129 }
 130
 131 static void computeKnownBits(const Value *V, const APInt &DemandedElts,
 132                              KnownBits &Known, unsigned Depth,
 133                              const SimplifyQuery &Q);
 134
 135 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
 136                             const SimplifyQuery &Q) {
 137   // Since the number of lanes in a scalable vector is unknown at compile time,
 138   // we track one bit which is implicitly broadcast to all lanes.  This means
 139   // that all lanes in a scalable vector are considered demanded.
 140   auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
 141   APInt DemandedElts =
 142       FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
 143   ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
 144 }
 145
 146 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
 147                             const DataLayout &DL, unsigned Depth,
 148                             AssumptionCache *AC, const Instruction *CxtI,
 149                             const DominatorTree *DT, bool UseInstrInfo) {
 150   computeKnownBits(
 151       V, Known, Depth,
 152       SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
 153 }
 154
 155 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
 156                                  unsigned Depth, AssumptionCache *AC,
 157                                  const Instruction *CxtI,
 158                                  const DominatorTree *DT, bool UseInstrInfo) {
 159   return computeKnownBits(
 160       V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
 161 }
 162
 163 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
 164                                  const DataLayout &DL, unsigned Depth,
 165                                  AssumptionCache *AC, const Instruction *CxtI,
 166                                  const DominatorTree *DT, bool UseInstrInfo) {
 167   return computeKnownBits(
 168       V, DemandedElts, Depth,
 169       SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
 170 }
 171
 172 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
 173                                             const SimplifyQuery &SQ) {
 174   // Look for an inverted mask: (X & ~M) op (Y & M).
 175   {
 176     Value *M;
 177     if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
 178         match(RHS, m_c_And(m_Specific(M), m_Value())) &&
 179         isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
 180       return true;
 181   }
 182
 183   // X op (Y & ~X)
 184   if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) &&
 185       isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
 186     return true;
 187
 188   // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
 189   // for constant Y.
 190   Value *Y;
 191   if (match(RHS,
 192             m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) &&
 193       isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
 194       isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
 195     return true;
 196
 197   // Peek through extends to find a 'not' of the other side:
 198   // (ext Y) op ext(~Y)
 199   if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
 200       match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) &&
 201       isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
 202     return true;
 203
 204   // Look for: (A & B) op ~(A | B)
 205   {
 206     Value *A, *B;
 207     if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
 208         match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) &&
 209         isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
 210         isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
 211       return true;
 212   }
 213
 214   // Look for: (X << V) op (Y >> (BitWidth - V))
 215   // or        (X >> V) op (Y << (BitWidth - V))
 216   {
 217     const Value *V;
 218     const APInt *R;
 219     if (((match(RHS, m_Shl(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
 220           match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
 221          (match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
 222           match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
 223         R->uge(LHS->getType()->getScalarSizeInBits()))
 224       return true;
 225   }
 226
 227   return false;
 228 }
 229
 230 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
 231                                const WithCache<const Value *> &RHSCache,
 232                                const SimplifyQuery &SQ) {
 233   const Value *LHS = LHSCache.getValue();
 234   const Value *RHS = RHSCache.getValue();
 235
 236   assert(LHS->getType() == RHS->getType() &&
 237          "LHS and RHS should have the same type");
 238   assert(LHS->getType()->isIntOrIntVectorTy() &&
 239          "LHS and RHS should be integers");
 240
 241   if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) ||
 242       haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ))
 243     return true;
 244
 245   return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ),
 246                                         RHSCache.getKnownBits(SQ));
 247 }
 248
 249 bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
 250   return !I->user_empty() && all_of(I->users(), [](const User *U) {
 251     return match(U, m_ICmp(m_Value(), m_Zero()));
 252   });
 253 }
 254
 255 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
 256   return !I->user_empty() && all_of(I->users(), [](const User *U) {
 257     CmpPredicate P;
 258     return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
 259   });
 260 }
 261
 262 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
 263                                   bool OrZero, unsigned Depth,
 264                                   AssumptionCache *AC, const Instruction *CxtI,
 265                                   const DominatorTree *DT, bool UseInstrInfo) {
 266   return ::isKnownToBeAPowerOfTwo(
 267       V, OrZero, Depth,
 268       SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
 269 }
 270
 271 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
 272                            const SimplifyQuery &Q, unsigned Depth);
 273
 274 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
 275                               unsigned Depth) {
 276   return computeKnownBits(V, Depth, SQ).isNonNegative();
 277 }
 278
 279 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
 280                            unsigned Depth) {
 281   if (auto *CI = dyn_cast<ConstantInt>(V))
 282     return CI->getValue().isStrictlyPositive();
 283
 284   // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
 285   // this updated.
 286   KnownBits Known = computeKnownBits(V, Depth, SQ);
 287   return Known.isNonNegative() &&
 288          (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
 289 }
 290
 291 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
 292                            unsigned Depth) {
 293   return computeKnownBits(V, Depth, SQ).isNegative();
 294 }
 295
 296 static bool isKnownNonEqual(const Value *V1, const Value *V2,
 297                             const APInt &DemandedElts, unsigned Depth,
 298                             const SimplifyQuery &Q);
 299
 300 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
 301                            const SimplifyQuery &Q, unsigned Depth) {
 302   // We don't support looking through casts.
 303   if (V1 == V2 || V1->getType() != V2->getType())
 304     return false;
 305   auto *FVTy = dyn_cast<FixedVectorType>(V1->getType());
 306   APInt DemandedElts =
 307       FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
 308   return ::isKnownNonEqual(V1, V2, DemandedElts, Depth, Q);
 309 }
 310
 311 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
 312                              const SimplifyQuery &SQ, unsigned Depth) {
 313   KnownBits Known(Mask.getBitWidth());
 314   computeKnownBits(V, Known, Depth, SQ);
 315   return Mask.isSubsetOf(Known.Zero);
 316 }
 317
 318 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
 319                                    unsigned Depth, const SimplifyQuery &Q);
 320
 321 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
 322                                    const SimplifyQuery &Q) {
 323   auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
 324   APInt DemandedElts =
 325       FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
 326   return ComputeNumSignBits(V, DemandedElts, Depth, Q);
 327 }
 328
 329 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
 330                                   unsigned Depth, AssumptionCache *AC,
 331                                   const Instruction *CxtI,
 332                                   const DominatorTree *DT, bool UseInstrInfo) {
 333   return ::ComputeNumSignBits(
 334       V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
 335 }
 336
 337 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
 338                                          unsigned Depth, AssumptionCache *AC,
 339                                          const Instruction *CxtI,
 340                                          const DominatorTree *DT) {
 341   unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
 342   return V->getType()->getScalarSizeInBits() - SignBits + 1;
 343 }
 344
 345 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
 346                                    bool NSW, bool NUW,
 347                                    const APInt &DemandedElts,
 348                                    KnownBits &KnownOut, KnownBits &Known2,
 349                                    unsigned Depth, const SimplifyQuery &Q) {
 350   computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
 351
 352   // If one operand is unknown and we have no nowrap information,
 353   // the result will be unknown independently of the second operand.
 354   if (KnownOut.isUnknown() && !NSW && !NUW)
 355     return;
 356
 357   computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
 358   KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
 359 }
 360
 361 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
 362                                 bool NUW, const APInt &DemandedElts,
 363                                 KnownBits &Known, KnownBits &Known2,
 364                                 unsigned Depth, const SimplifyQuery &Q) {
 365   computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
 366   computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
 367
 368   bool isKnownNegative = false;
 369   bool isKnownNonNegative = false;
 370   // If the multiplication is known not to overflow, compute the sign bit.
 371   if (NSW) {
 372     if (Op0 == Op1) {
 373       // The product of a number with itself is non-negative.
 374       isKnownNonNegative = true;
 375     } else {
 376       bool isKnownNonNegativeOp1 = Known.isNonNegative();
 377       bool isKnownNonNegativeOp0 = Known2.isNonNegative();
 378       bool isKnownNegativeOp1 = Known.isNegative();
 379       bool isKnownNegativeOp0 = Known2.isNegative();
 380       // The product of two numbers with the same sign is non-negative.
 381       isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
 382                            (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
 383       if (!isKnownNonNegative && NUW) {
 384         // mul nuw nsw with a factor > 1 is non-negative.
 385         KnownBits One = KnownBits::makeConstant(APInt(Known.getBitWidth(), 1));
 386         isKnownNonNegative = KnownBits::sgt(Known, One).value_or(false) ||
 387                              KnownBits::sgt(Known2, One).value_or(false);
 388       }
 389
 390       // The product of a negative number and a non-negative number is either
 391       // negative or zero.
 392       if (!isKnownNonNegative)
 393         isKnownNegative =
 394             (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
 395              Known2.isNonZero()) ||
 396             (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
 397     }
 398   }
 399
 400   bool SelfMultiply = Op0 == Op1;
 401   if (SelfMultiply)
 402     SelfMultiply &=
 403         isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
 404   Known = KnownBits::mul(Known, Known2, SelfMultiply);
 405
 406   // Only make use of no-wrap flags if we failed to compute the sign bit
 407   // directly.  This matters if the multiplication always overflows, in
 408   // which case we prefer to follow the result of the direct computation,
 409   // though as the program is invoking undefined behaviour we can choose
 410   // whatever we like here.
 411   if (isKnownNonNegative && !Known.isNegative())
 412     Known.makeNonNegative();
 413   else if (isKnownNegative && !Known.isNonNegative())
 414     Known.makeNegative();
 415 }
 416
 417 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
 418                                              KnownBits &Known) {
 419   unsigned BitWidth = Known.getBitWidth();
 420   unsigned NumRanges = Ranges.getNumOperands() / 2;
 421   assert(NumRanges >= 1);
 422
 423   Known.Zero.setAllBits();
 424   Known.One.setAllBits();
 425
 426   for (unsigned i = 0; i < NumRanges; ++i) {
 427     ConstantInt *Lower =
 428         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
 429     ConstantInt *Upper =
 430         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
 431     ConstantRange Range(Lower->getValue(), Upper->getValue());
 432
 433     // The first CommonPrefixBits of all values in Range are equal.
 434     unsigned CommonPrefixBits =
 435         (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
 436     APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
 437     APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
 438     Known.One &= UnsignedMax & Mask;
 439     Known.Zero &= ~UnsignedMax & Mask;
 440   }
 441 }
 442
 443 static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
 444   SmallVector<const Value *, 16> WorkSet(1, I);
 445   SmallPtrSet<const Value *, 32> Visited;
 446   SmallPtrSet<const Value *, 16> EphValues;
 447
 448   // The instruction defining an assumption's condition itself is always
 449   // considered ephemeral to that assumption (even if it has other
 450   // non-ephemeral users). See r246696's test case for an example.
 451   if (is_contained(I->operands(), E))
 452     return true;
 453
 454   while (!WorkSet.empty()) {
 455     const Value *V = WorkSet.pop_back_val();
 456     if (!Visited.insert(V).second)
 457       continue;
 458
 459     // If all uses of this value are ephemeral, then so is this value.
 460     if (llvm::all_of(V->users(), [&](const User *U) {
 461                                    return EphValues.count(U);
 462                                  })) {
 463       if (V == E)
 464         return true;
 465
 466       if (V == I || (isa<Instruction>(V) &&
 467                      !cast<Instruction>(V)->mayHaveSideEffects() &&
 468                      !cast<Instruction>(V)->isTerminator())) {
 469        EphValues.insert(V);
 470        if (const User *U = dyn_cast<User>(V))
 471          append_range(WorkSet, U->operands());
 472       }
 473     }
 474   }
 475
 476   return false;
 477 }
 478
 479 // Is this an intrinsic that cannot be speculated but also cannot trap?
 480 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
 481   if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
 482     return CI->isAssumeLikeIntrinsic();
 483
 484   return false;
 485 }
 486
 487 bool llvm::isValidAssumeForContext(const Instruction *Inv,
 488                                    const Instruction *CxtI,
 489                                    const DominatorTree *DT,
 490                                    bool AllowEphemerals) {
 491   // There are two restrictions on the use of an assume:
 492   //  1. The assume must dominate the context (or the control flow must
 493   //     reach the assume whenever it reaches the context).
 494   //  2. The context must not be in the assume's set of ephemeral values
 495   //     (otherwise we will use the assume to prove that the condition
 496   //     feeding the assume is trivially true, thus causing the removal of
 497   //     the assume).
 498
 499   if (Inv->getParent() == CxtI->getParent()) {
 500     // If Inv and CtxI are in the same block, check if the assume (Inv) is first
 501     // in the BB.
 502     if (Inv->comesBefore(CxtI))
 503       return true;
 504
 505     // Don't let an assume affect itself - this would cause the problems
 506     // `isEphemeralValueOf` is trying to prevent, and it would also make
 507     // the loop below go out of bounds.
 508     if (!AllowEphemerals && Inv == CxtI)
 509       return false;
 510
 511     // The context comes first, but they're both in the same block.
 512     // Make sure there is nothing in between that might interrupt
 513     // the control flow, not even CxtI itself.
 514     // We limit the scan distance between the assume and its context instruction
 515     // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
 516     // it can be adjusted if needed (could be turned into a cl::opt).
 517     auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
 518     if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15))
 519       return false;
 520
 521     return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI);
 522   }
 523
 524   // Inv and CxtI are in different blocks.
 525   if (DT) {
 526     if (DT->dominates(Inv, CxtI))
 527       return true;
 528   } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor() ||
 529              Inv->getParent()->isEntryBlock()) {
 530     // We don't have a DT, but this trivially dominates.
 531     return true;
 532   }
 533
 534   return false;
 535 }
 536
 537 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
 538 // we still have enough information about `RHS` to conclude non-zero. For
 539 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
 540 // so the extra compile time may not be worth it, but possibly a second API
 541 // should be created for use outside of loops.
 542 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
 543   // v u> y implies v != 0.
 544   if (Pred == ICmpInst::ICMP_UGT)
 545     return true;
 546
 547   // Special-case v != 0 to also handle v != null.
 548   if (Pred == ICmpInst::ICMP_NE)
 549     return match(RHS, m_Zero());
 550
 551   // All other predicates - rely on generic ConstantRange handling.
 552   const APInt *C;
 553   auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits());
 554   if (match(RHS, m_APInt(C))) {
 555     ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
 556     return !TrueValues.contains(Zero);
 557   }
 558
 559   auto *VC = dyn_cast<ConstantDataVector>(RHS);
 560   if (VC == nullptr)
 561     return false;
 562
 563   for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
 564        ++ElemIdx) {
 565     ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
 566         Pred, VC->getElementAsAPInt(ElemIdx));
 567     if (TrueValues.contains(Zero))
 568       return false;
 569   }
 570   return true;
 571 }
 572
 573 static void breakSelfRecursivePHI(const Use *U, const PHINode *PHI,
 574                                   Value *&ValOut, Instruction *&CtxIOut,
 575                                   const PHINode **PhiOut = nullptr) {
 576   ValOut = U->get();
 577   if (ValOut == PHI)
 578     return;
 579   CtxIOut = PHI->getIncomingBlock(*U)->getTerminator();
 580   if (PhiOut)
 581     *PhiOut = PHI;
 582   Value *V;
 583   // If the Use is a select of this phi, compute analysis on other arm to break
 584   // recursion.
 585   // TODO: Min/Max
 586   if (match(ValOut, m_Select(m_Value(), m_Specific(PHI), m_Value(V))) ||
 587       match(ValOut, m_Select(m_Value(), m_Value(V), m_Specific(PHI))))
 588     ValOut = V;
 589
 590   // Same for select, if this phi is 2-operand phi, compute analysis on other
 591   // incoming value to break recursion.
 592   // TODO: We could handle any number of incoming edges as long as we only have
 593   // two unique values.
 594   if (auto *IncPhi = dyn_cast<PHINode>(ValOut);
 595       IncPhi && IncPhi->getNumIncomingValues() == 2) {
 596     for (int Idx = 0; Idx < 2; ++Idx) {
 597       if (IncPhi->getIncomingValue(Idx) == PHI) {
 598         ValOut = IncPhi->getIncomingValue(1 - Idx);
 599         if (PhiOut)
 600           *PhiOut = IncPhi;
 601         CtxIOut = IncPhi->getIncomingBlock(1 - Idx)->getTerminator();
 602         break;
 603       }
 604     }
 605   }
 606 }
 607
 608 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
 609   // Use of assumptions is context-sensitive. If we don't have a context, we
 610   // cannot use them!
 611   if (!Q.AC || !Q.CxtI)
 612     return false;
 613
 614   for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
 615     if (!Elem.Assume)
 616       continue;
 617
 618     AssumeInst *I = cast<AssumeInst>(Elem.Assume);
 619     assert(I->getFunction() == Q.CxtI->getFunction() &&
 620            "Got assumption for the wrong function!");
 621
 622     if (Elem.Index != AssumptionCache::ExprResultIdx) {
 623       if (!V->getType()->isPointerTy())
 624         continue;
 625       if (RetainedKnowledge RK = getKnowledgeFromBundle(
 626               *I, I->bundle_op_info_begin()[Elem.Index])) {
 627         if (RK.WasOn == V &&
 628             (RK.AttrKind == Attribute::NonNull ||
 629              (RK.AttrKind == Attribute::Dereferenceable &&
 630               !NullPointerIsDefined(Q.CxtI->getFunction(),
 631                                     V->getType()->getPointerAddressSpace()))) &&
 632             isValidAssumeForContext(I, Q.CxtI, Q.DT))
 633           return true;
 634       }
 635       continue;
 636     }
 637
 638     // Warning: This loop can end up being somewhat performance sensitive.
 639     // We're running this loop for once for each value queried resulting in a
 640     // runtime of ~O(#assumes * #values).
 641
 642     Value *RHS;
 643     CmpPredicate Pred;
 644     auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
 645     if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
 646       continue;
 647
 648     if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
 649       return true;
 650   }
 651
 652   return false;
 653 }
 654
 655 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
 656                                     Value *LHS, Value *RHS, KnownBits &Known,
 657                                     const SimplifyQuery &Q) {
 658   if (RHS->getType()->isPointerTy()) {
 659     // Handle comparison of pointer to null explicitly, as it will not be
 660     // covered by the m_APInt() logic below.
 661     if (LHS == V && match(RHS, m_Zero())) {
 662       switch (Pred) {
 663       case ICmpInst::ICMP_EQ:
 664         Known.setAllZero();
 665         break;
 666       case ICmpInst::ICMP_SGE:
 667       case ICmpInst::ICMP_SGT:
 668         Known.makeNonNegative();
 669         break;
 670       case ICmpInst::ICMP_SLT:
 671         Known.makeNegative();
 672         break;
 673       default:
 674         break;
 675       }
 676     }
 677     return;
 678   }
 679
 680   unsigned BitWidth = Known.getBitWidth();
 681   auto m_V =
 682       m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V)));
 683
 684   Value *Y;
 685   const APInt *Mask, *C;
 686   uint64_t ShAmt;
 687   switch (Pred) {
 688   case ICmpInst::ICMP_EQ:
 689     // assume(V = C)
 690     if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
 691       Known = Known.unionWith(KnownBits::makeConstant(*C));
 692       // assume(V & Mask = C)
 693     } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
 694                match(RHS, m_APInt(C))) {
 695       // For one bits in Mask, we can propagate bits from C to V.
 696       Known.One |= *C;
 697       if (match(Y, m_APInt(Mask)))
 698         Known.Zero |= ~*C & *Mask;
 699       // assume(V | Mask = C)
 700     } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
 701       // For zero bits in Mask, we can propagate bits from C to V.
 702       Known.Zero |= ~*C;
 703       if (match(Y, m_APInt(Mask)))
 704         Known.One |= *C & ~*Mask;
 705       // assume(V ^ Mask = C)
 706     } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
 707                match(RHS, m_APInt(C))) {
 708       // Equivalent to assume(V == Mask ^ C)
 709       Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
 710       // assume(V << ShAmt = C)
 711     } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
 712                match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
 713       // For those bits in C that are known, we can propagate them to known
 714       // bits in V shifted to the right by ShAmt.
 715       KnownBits RHSKnown = KnownBits::makeConstant(*C);
 716       RHSKnown.Zero.lshrInPlace(ShAmt);
 717       RHSKnown.One.lshrInPlace(ShAmt);
 718       Known = Known.unionWith(RHSKnown);
 719       // assume(V >> ShAmt = C)
 720     } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
 721                match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
 722       KnownBits RHSKnown = KnownBits::makeConstant(*C);
 723       // For those bits in RHS that are known, we can propagate them to known
 724       // bits in V shifted to the right by C.
 725       Known.Zero |= RHSKnown.Zero << ShAmt;
 726       Known.One |= RHSKnown.One << ShAmt;
 727     }
 728     break;
 729   case ICmpInst::ICMP_NE: {
 730     // assume (V & B != 0) where B is a power of 2
 731     const APInt *BPow2;
 732     if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
 733       Known.One |= *BPow2;
 734     break;
 735   }
 736   default:
 737     if (match(RHS, m_APInt(C))) {
 738       const APInt *Offset = nullptr;
 739       if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
 740         ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C);
 741         if (Offset)
 742           LHSRange = LHSRange.sub(*Offset);
 743         Known = Known.unionWith(LHSRange.toKnownBits());
 744       }
 745       if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
 746         // X & Y u> C     -> X u> C && Y u> C
 747         // X nuw- Y u> C  -> X u> C
 748         if (match(LHS, m_c_And(m_V, m_Value())) ||
 749             match(LHS, m_NUWSub(m_V, m_Value())))
 750           Known.One.setHighBits(
 751               (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
 752       }
 753       if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
 754         // X | Y u< C    -> X u< C && Y u< C
 755         // X nuw+ Y u< C -> X u< C && Y u< C
 756         if (match(LHS, m_c_Or(m_V, m_Value())) ||
 757             match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
 758           Known.Zero.setHighBits(
 759               (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
 760         }
 761       }
 762     }
 763     break;
 764   }
 765 }
 766
 767 static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
 768                                          KnownBits &Known,
 769                                          const SimplifyQuery &SQ, bool Invert) {
 770   ICmpInst::Predicate Pred =
 771       Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
 772   Value *LHS = Cmp->getOperand(0);
 773   Value *RHS = Cmp->getOperand(1);
 774
 775   // Handle icmp pred (trunc V), C
 776   if (match(LHS, m_Trunc(m_Specific(V)))) {
 777     KnownBits DstKnown(LHS->getType()->getScalarSizeInBits());
 778     computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
 779     Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
 780     return;
 781   }
 782
 783   computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
 784 }
 785
 786 static void computeKnownBitsFromCond(const Value *V, Value *Cond,
 787                                      KnownBits &Known, unsigned Depth,
 788                                      const SimplifyQuery &SQ, bool Invert) {
 789   Value *A, *B;
 790   if (Depth < MaxAnalysisRecursionDepth &&
 791       match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
 792     KnownBits Known2(Known.getBitWidth());
 793     KnownBits Known3(Known.getBitWidth());
 794     computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert);
 795     computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert);
 796     if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value()))
 797                : match(Cond, m_LogicalAnd(m_Value(), m_Value())))
 798       Known2 = Known2.unionWith(Known3);
 799     else
 800       Known2 = Known2.intersectWith(Known3);
 801     Known = Known.unionWith(Known2);
 802   }
 803
 804   if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
 805     computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
 806 }
 807
 808 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
 809                                        unsigned Depth, const SimplifyQuery &Q) {
 810   // Handle injected condition.
 811   if (Q.CC && Q.CC->AffectedValues.contains(V))
 812     computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert);
 813
 814   if (!Q.CxtI)
 815     return;
 816
 817   if (Q.DC && Q.DT) {
 818     // Handle dominating conditions.
 819     for (BranchInst *BI : Q.DC->conditionsFor(V)) {
 820       BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
 821       if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
 822         computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
 823                                  /*Invert*/ false);
 824
 825       BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
 826       if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
 827         computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
 828                                  /*Invert*/ true);
 829     }
 830
 831     if (Known.hasConflict())
 832       Known.resetAll();
 833   }
 834
 835   if (!Q.AC)
 836     return;
 837
 838   unsigned BitWidth = Known.getBitWidth();
 839
 840   // Note that the patterns below need to be kept in sync with the code
 841   // in AssumptionCache::updateAffectedValues.
 842
 843   for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
 844     if (!Elem.Assume)
 845       continue;
 846
 847     AssumeInst *I = cast<AssumeInst>(Elem.Assume);
 848     assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
 849            "Got assumption for the wrong function!");
 850
 851     if (Elem.Index != AssumptionCache::ExprResultIdx) {
 852       if (!V->getType()->isPointerTy())
 853         continue;
 854       if (RetainedKnowledge RK = getKnowledgeFromBundle(
 855               *I, I->bundle_op_info_begin()[Elem.Index])) {
 856         // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
 857         // be the producer of the pointer in the bundle. At the moment, align
 858         // assumptions aren't optimized away.
 859         if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
 860             isPowerOf2_64(RK.ArgValue) &&
 861             isValidAssumeForContext(I, Q.CxtI, Q.DT, /*AllowEphemerals*/ true))
 862           Known.Zero.setLowBits(Log2_64(RK.ArgValue));
 863       }
 864       continue;
 865     }
 866
 867     // Warning: This loop can end up being somewhat performance sensitive.
 868     // We're running this loop for once for each value queried resulting in a
 869     // runtime of ~O(#assumes * #values).
 870
 871     Value *Arg = I->getArgOperand(0);
 872
 873     if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 874       assert(BitWidth == 1 && "assume operand is not i1?");
 875       (void)BitWidth;
 876       Known.setAllOnes();
 877       return;
 878     }
 879     if (match(Arg, m_Not(m_Specific(V))) &&
 880         isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 881       assert(BitWidth == 1 && "assume operand is not i1?");
 882       (void)BitWidth;
 883       Known.setAllZero();
 884       return;
 885     }
 886
 887     // The remaining tests are all recursive, so bail out if we hit the limit.
 888     if (Depth == MaxAnalysisRecursionDepth)
 889       continue;
 890
 891     ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
 892     if (!Cmp)
 893       continue;
 894
 895     if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
 896       continue;
 897
 898     computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
 899   }
 900
 901   // Conflicting assumption: Undefined behavior will occur on this execution
 902   // path.
 903   if (Known.hasConflict())
 904     Known.resetAll();
 905 }
 906
 907 /// Compute known bits from a shift operator, including those with a
 908 /// non-constant shift amount. Known is the output of this function. Known2 is a
 909 /// pre-allocated temporary with the same bit width as Known and on return
 910 /// contains the known bit of the shift value source. KF is an
 911 /// operator-specific function that, given the known-bits and a shift amount,
 912 /// compute the implied known-bits of the shift operator's result respectively
 913 /// for that shift amount. The results from calling KF are conservatively
 914 /// combined for all permitted shift amounts.
 915 static void computeKnownBitsFromShiftOperator(
 916     const Operator *I, const APInt &DemandedElts, KnownBits &Known,
 917     KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
 918     function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
 919   computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
 920   computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
 921   // To limit compile-time impact, only query isKnownNonZero() if we know at
 922   // least something about the shift amount.
 923   bool ShAmtNonZero =
 924       Known.isNonZero() ||
 925       (Known.getMaxValue().ult(Known.getBitWidth()) &&
 926        isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
 927   Known = KF(Known2, Known, ShAmtNonZero);
 928 }
 929
 930 static KnownBits
 931 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
 932                          const KnownBits &KnownLHS, const KnownBits &KnownRHS,
 933                          unsigned Depth, const SimplifyQuery &Q) {
 934   unsigned BitWidth = KnownLHS.getBitWidth();
 935   KnownBits KnownOut(BitWidth);
 936   bool IsAnd = false;
 937   bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
 938   Value *X = nullptr, *Y = nullptr;
 939
 940   switch (I->getOpcode()) {
 941   case Instruction::And:
 942     KnownOut = KnownLHS & KnownRHS;
 943     IsAnd = true;
 944     // and(x, -x) is common idioms that will clear all but lowest set
 945     // bit. If we have a single known bit in x, we can clear all bits
 946     // above it.
 947     // TODO: instcombine often reassociates independent `and` which can hide
 948     // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
 949     if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
 950       // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
 951       if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
 952         KnownOut = KnownLHS.blsi();
 953       else
 954         KnownOut = KnownRHS.blsi();
 955     }
 956     break;
 957   case Instruction::Or:
 958     KnownOut = KnownLHS | KnownRHS;
 959     break;
 960   case Instruction::Xor:
 961     KnownOut = KnownLHS ^ KnownRHS;
 962     // xor(x, x-1) is common idioms that will clear all but lowest set
 963     // bit. If we have a single known bit in x, we can clear all bits
 964     // above it.
 965     // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
 966     // -1 but for the purpose of demanded bits (xor(x, x-C) &
 967     // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
 968     // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
 969     if (HasKnownOne &&
 970         match(I, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())))) {
 971       const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
 972       KnownOut = XBits.blsmsk();
 973     }
 974     break;
 975   default:
 976     llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
 977   }
 978
 979   // and(x, add (x, -1)) is a common idiom that always clears the low bit;
 980   // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
 981   // here we handle the more general case of adding any odd number by
 982   // matching the form and/xor/or(x, add(x, y)) where y is odd.
 983   // TODO: This could be generalized to clearing any bit set in y where the
 984   // following bit is known to be unset in y.
 985   if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
 986       (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) ||
 987        match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) ||
 988        match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) {
 989     KnownBits KnownY(BitWidth);
 990     computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
 991     if (KnownY.countMinTrailingOnes() > 0) {
 992       if (IsAnd)
 993         KnownOut.Zero.setBit(0);
 994       else
 995         KnownOut.One.setBit(0);
 996     }
 997   }
 998   return KnownOut;
 999 }
1000
1001 static KnownBits computeKnownBitsForHorizontalOperation(
1002     const Operator *I, const APInt &DemandedElts, unsigned Depth,
1003     const SimplifyQuery &Q,
1004     const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
1005         KnownBitsFunc) {
1006   APInt DemandedEltsLHS, DemandedEltsRHS;
1007   getHorizDemandedEltsForFirstOperand(Q.DL.getTypeSizeInBits(I->getType()),
1008                                       DemandedElts, DemandedEltsLHS,
1009                                       DemandedEltsRHS);
1010
1011   const auto ComputeForSingleOpFunc =
1012       [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
1013         return KnownBitsFunc(
1014             computeKnownBits(Op, DemandedEltsOp, Depth + 1, Q),
1015             computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1, Q));
1016       };
1017
1018   if (DemandedEltsRHS.isZero())
1019     return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS);
1020   if (DemandedEltsLHS.isZero())
1021     return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS);
1022
1023   return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS)
1024       .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS));
1025 }
1026
1027 // Public so this can be used in `SimplifyDemandedUseBits`.
1028 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I,
1029                                              const KnownBits &KnownLHS,
1030                                              const KnownBits &KnownRHS,
1031                                              unsigned Depth,
1032                                              const SimplifyQuery &SQ) {
1033   auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
1034   APInt DemandedElts =
1035       FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
1036
1037   return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
1038                                   SQ);
1039 }
1040
1041 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
1042   Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
1043   // Without vscale_range, we only know that vscale is non-zero.
1044   if (!Attr.isValid())
1045     return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth));
1046
1047   unsigned AttrMin = Attr.getVScaleRangeMin();
1048   // Minimum is larger than vscale width, result is always poison.
1049   if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
1050     return ConstantRange::getEmpty(BitWidth);
1051
1052   APInt Min(BitWidth, AttrMin);
1053   std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1054   if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
1055     return ConstantRange(Min, APInt::getZero(BitWidth));
1056
1057   return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1058 }
1059
1060 void llvm::adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond,
1061                                        Value *Arm, bool Invert, unsigned Depth,
1062                                        const SimplifyQuery &Q) {
1063   // If we have a constant arm, we are done.
1064   if (Known.isConstant())
1065     return;
1066
1067   // See what condition implies about the bits of the select arm.
1068   KnownBits CondRes(Known.getBitWidth());
1069   computeKnownBitsFromCond(Arm, Cond, CondRes, Depth + 1, Q, Invert);
1070   // If we don't get any information from the condition, no reason to
1071   // proceed.
1072   if (CondRes.isUnknown())
1073     return;
1074
1075   // We can have conflict if the condition is dead. I.e if we have
1076   // (x | 64) < 32 ? (x | 64) : y
1077   // we will have conflict at bit 6 from the condition/the `or`.
1078   // In that case just return. Its not particularly important
1079   // what we do, as this select is going to be simplified soon.
1080   CondRes = CondRes.unionWith(Known);
1081   if (CondRes.hasConflict())
1082     return;
1083
1084   // Finally make sure the information we found is valid. This is relatively
1085   // expensive so it's left for the very end.
1086   if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1087     return;
1088
1089   // Finally, we know we get information from the condition and its valid,
1090   // so return it.
1091   Known = CondRes;
1092 }
1093
1094 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
1095 // Returns the input and lower/upper bounds.
1096 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
1097                                 const APInt *&CLow, const APInt *&CHigh) {
1098   assert(isa<Operator>(Select) &&
1099          cast<Operator>(Select)->getOpcode() == Instruction::Select &&
1100          "Input should be a Select!");
1101
1102   const Value *LHS = nullptr, *RHS = nullptr;
1103   SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
1104   if (SPF != SPF_SMAX && SPF != SPF_SMIN)
1105     return false;
1106
1107   if (!match(RHS, m_APInt(CLow)))
1108     return false;
1109
1110   const Value *LHS2 = nullptr, *RHS2 = nullptr;
1111   SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
1112   if (getInverseMinMaxFlavor(SPF) != SPF2)
1113     return false;
1114
1115   if (!match(RHS2, m_APInt(CHigh)))
1116     return false;
1117
1118   if (SPF == SPF_SMIN)
1119     std::swap(CLow, CHigh);
1120
1121   In = LHS2;
1122   return CLow->sle(*CHigh);
1123 }
1124
1125 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
1126                                          const APInt *&CLow,
1127                                          const APInt *&CHigh) {
1128   assert((II->getIntrinsicID() == Intrinsic::smin ||
1129           II->getIntrinsicID() == Intrinsic::smax) &&
1130          "Must be smin/smax");
1131
1132   Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
1133   auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1134   if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
1135       !match(II->getArgOperand(1), m_APInt(CLow)) ||
1136       !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
1137     return false;
1138
1139   if (II->getIntrinsicID() == Intrinsic::smin)
1140     std::swap(CLow, CHigh);
1141   return CLow->sle(*CHigh);
1142 }
1143
1144 static void unionWithMinMaxIntrinsicClamp(const IntrinsicInst *II,
1145                                           KnownBits &Known) {
1146   const APInt *CLow, *CHigh;
1147   if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
1148     Known = Known.unionWith(
1149         ConstantRange::getNonEmpty(*CLow, *CHigh + 1).toKnownBits());
1150 }
1151
1152 static void computeKnownBitsFromOperator(const Operator *I,
1153                                          const APInt &DemandedElts,
1154                                          KnownBits &Known, unsigned Depth,
1155                                          const SimplifyQuery &Q) {
1156   unsigned BitWidth = Known.getBitWidth();
1157
1158   KnownBits Known2(BitWidth);
1159   switch (I->getOpcode()) {
1160   default: break;
1161   case Instruction::Load:
1162     if (MDNode *MD =
1163             Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1164       computeKnownBitsFromRangeMetadata(*MD, Known);
1165     break;
1166   case Instruction::And:
1167     computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1168     computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1169
1170     Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1171     break;
1172   case Instruction::Or:
1173     computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1174     computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1175
1176     Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1177     break;
1178   case Instruction::Xor:
1179     computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1180     computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1181
1182     Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1183     break;
1184   case Instruction::Mul: {
1185     bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1186     bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1187     computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, NUW,
1188                         DemandedElts, Known, Known2, Depth, Q);
1189     break;
1190   }
1191   case Instruction::UDiv: {
1192     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1193     computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1194     Known =
1195         KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1196     break;
1197   }
1198   case Instruction::SDiv: {
1199     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1200     computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1201     Known =
1202         KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1203     break;
1204   }
1205   case Instruction::Select: {
1206     auto ComputeForArm = [&](Value *Arm, bool Invert) {
1207       KnownBits Res(Known.getBitWidth());
1208       computeKnownBits(Arm, DemandedElts, Res, Depth + 1, Q);
1209       adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Depth, Q);
1210       return Res;
1211     };
1212     // Only known if known in both the LHS and RHS.
1213     Known =
1214         ComputeForArm(I->getOperand(1), /*Invert=*/false)
1215             .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1216     break;
1217   }
1218   case Instruction::FPTrunc:
1219   case Instruction::FPExt:
1220   case Instruction::FPToUI:
1221   case Instruction::FPToSI:
1222   case Instruction::SIToFP:
1223   case Instruction::UIToFP:
1224     break; // Can't work with floating point.
1225   case Instruction::PtrToInt:
1226   case Instruction::IntToPtr:
1227     // Fall through and handle them the same as zext/trunc.
1228     [[fallthrough]];
1229   case Instruction::ZExt:
1230   case Instruction::Trunc: {
1231     Type *SrcTy = I->getOperand(0)->getType();
1232
1233     unsigned SrcBitWidth;
1234     // Note that we handle pointer operands here because of inttoptr/ptrtoint
1235     // which fall through here.
1236     Type *ScalarTy = SrcTy->getScalarType();
1237     SrcBitWidth = ScalarTy->isPointerTy() ?
1238       Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1239       Q.DL.getTypeSizeInBits(ScalarTy);
1240
1241     assert(SrcBitWidth && "SrcBitWidth can't be zero");
1242     Known = Known.anyextOrTrunc(SrcBitWidth);
1243     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1244     if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1245         Inst && Inst->hasNonNeg() && !Known.isNegative())
1246       Known.makeNonNegative();
1247     Known = Known.zextOrTrunc(BitWidth);
1248     break;
1249   }
1250   case Instruction::BitCast: {
1251     Type *SrcTy = I->getOperand(0)->getType();
1252     if (SrcTy->isIntOrPtrTy() &&
1253         // TODO: For now, not handling conversions like:
1254         // (bitcast i64 %x to <2 x i32>)
1255         !I->getType()->isVectorTy()) {
1256       computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1257       break;
1258     }
1259
1260     const Value *V;
1261     // Handle bitcast from floating point to integer.
1262     if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1263         V->getType()->isFPOrFPVectorTy()) {
1264       Type *FPType = V->getType()->getScalarType();
1265       KnownFPClass Result =
1266           computeKnownFPClass(V, DemandedElts, fcAllFlags, Depth + 1, Q);
1267       FPClassTest FPClasses = Result.KnownFPClasses;
1268
1269       // TODO: Treat it as zero/poison if the use of I is unreachable.
1270       if (FPClasses == fcNone)
1271         break;
1272
1273       if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1274         Known.Zero.setAllBits();
1275         Known.One.setAllBits();
1276
1277         if (FPClasses & fcInf)
1278           Known = Known.intersectWith(KnownBits::makeConstant(
1279               APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
1280
1281         if (FPClasses & fcZero)
1282           Known = Known.intersectWith(KnownBits::makeConstant(
1283               APInt::getZero(FPType->getScalarSizeInBits())));
1284
1285         Known.Zero.clearSignBit();
1286         Known.One.clearSignBit();
1287       }
1288
1289       if (Result.SignBit) {
1290         if (*Result.SignBit)
1291           Known.makeNegative();
1292         else
1293           Known.makeNonNegative();
1294       }
1295
1296       break;
1297     }
1298
1299     // Handle cast from vector integer type to scalar or vector integer.
1300     auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1301     if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1302         !I->getType()->isIntOrIntVectorTy() ||
1303         isa<ScalableVectorType>(I->getType()))
1304       break;
1305
1306     // Look through a cast from narrow vector elements to wider type.
1307     // Examples: v4i32 -> v2i64, v3i8 -> v24
1308     unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1309     if (BitWidth % SubBitWidth == 0) {
1310       // Known bits are automatically intersected across demanded elements of a
1311       // vector. So for example, if a bit is computed as known zero, it must be
1312       // zero across all demanded elements of the vector.
1313       //
1314       // For this bitcast, each demanded element of the output is sub-divided
1315       // across a set of smaller vector elements in the source vector. To get
1316       // the known bits for an entire element of the output, compute the known
1317       // bits for each sub-element sequentially. This is done by shifting the
1318       // one-set-bit demanded elements parameter across the sub-elements for
1319       // consecutive calls to computeKnownBits. We are using the demanded
1320       // elements parameter as a mask operator.
1321       //
1322       // The known bits of each sub-element are then inserted into place
1323       // (dependent on endian) to form the full result of known bits.
1324       unsigned NumElts = DemandedElts.getBitWidth();
1325       unsigned SubScale = BitWidth / SubBitWidth;
1326       APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1327       for (unsigned i = 0; i != NumElts; ++i) {
1328         if (DemandedElts[i])
1329           SubDemandedElts.setBit(i * SubScale);
1330       }
1331
1332       KnownBits KnownSrc(SubBitWidth);
1333       for (unsigned i = 0; i != SubScale; ++i) {
1334         computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1335                          Depth + 1, Q);
1336         unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1337         Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1338       }
1339     }
1340     break;
1341   }
1342   case Instruction::SExt: {
1343     // Compute the bits in the result that are not present in the input.
1344     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1345
1346     Known = Known.trunc(SrcBitWidth);
1347     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1348     // If the sign bit of the input is known set or clear, then we know the
1349     // top bits of the result.
1350     Known = Known.sext(BitWidth);
1351     break;
1352   }
1353   case Instruction::Shl: {
1354     bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1355     bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1356     auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1357                          bool ShAmtNonZero) {
1358       return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1359     };
1360     computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1361                                       KF);
1362     // Trailing zeros of a right-shifted constant never decrease.
1363     const APInt *C;
1364     if (match(I->getOperand(0), m_APInt(C)))
1365       Known.Zero.setLowBits(C->countr_zero());
1366     break;
1367   }
1368   case Instruction::LShr: {
1369     bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1370     auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1371                       bool ShAmtNonZero) {
1372       return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1373     };
1374     computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1375                                       KF);
1376     // Leading zeros of a left-shifted constant never decrease.
1377     const APInt *C;
1378     if (match(I->getOperand(0), m_APInt(C)))
1379       Known.Zero.setHighBits(C->countl_zero());
1380     break;
1381   }
1382   case Instruction::AShr: {
1383     bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1384     auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1385                       bool ShAmtNonZero) {
1386       return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1387     };
1388     computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1389                                       KF);
1390     break;
1391   }
1392   case Instruction::Sub: {
1393     bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1394     bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1395     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1396                            DemandedElts, Known, Known2, Depth, Q);
1397     break;
1398   }
1399   case Instruction::Add: {
1400     bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1401     bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1402     computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1403                            DemandedElts, Known, Known2, Depth, Q);
1404     break;
1405   }
1406   case Instruction::SRem:
1407     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1408     computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1409     Known = KnownBits::srem(Known, Known2);
1410     break;
1411
1412   case Instruction::URem:
1413     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1414     computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1415     Known = KnownBits::urem(Known, Known2);
1416     break;
1417   case Instruction::Alloca:
1418     Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1419     break;
1420   case Instruction::GetElementPtr: {
1421     // Analyze all of the subscripts of this getelementptr instruction
1422     // to determine if we can prove known low zero bits.
1423     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1424     // Accumulate the constant indices in a separate variable
1425     // to minimize the number of calls to computeForAddSub.
1426     APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1427
1428     gep_type_iterator GTI = gep_type_begin(I);
1429     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1430       // TrailZ can only become smaller, short-circuit if we hit zero.
1431       if (Known.isUnknown())
1432         break;
1433
1434       Value *Index = I->getOperand(i);
1435
1436       // Handle case when index is zero.
1437       Constant *CIndex = dyn_cast<Constant>(Index);
1438       if (CIndex && CIndex->isZeroValue())
1439         continue;
1440
1441       if (StructType *STy = GTI.getStructTypeOrNull()) {
1442         // Handle struct member offset arithmetic.
1443
1444         assert(CIndex &&
1445                "Access to structure field must be known at compile time");
1446
1447         if (CIndex->getType()->isVectorTy())
1448           Index = CIndex->getSplatValue();
1449
1450         unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1451         const StructLayout *SL = Q.DL.getStructLayout(STy);
1452         uint64_t Offset = SL->getElementOffset(Idx);
1453         AccConstIndices += Offset;
1454         continue;
1455       }
1456
1457       // Handle array index arithmetic.
1458       Type *IndexedTy = GTI.getIndexedType();
1459       if (!IndexedTy->isSized()) {
1460         Known.resetAll();
1461         break;
1462       }
1463
1464       unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1465       KnownBits IndexBits(IndexBitWidth);
1466       computeKnownBits(Index, IndexBits, Depth + 1, Q);
1467       TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
1468       uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1469       KnownBits ScalingFactor(IndexBitWidth);
1470       // Multiply by current sizeof type.
1471       // &A[i] == A + i * sizeof(*A[i]).
1472       if (IndexTypeSize.isScalable()) {
1473         // For scalable types the only thing we know about sizeof is
1474         // that this is a multiple of the minimum size.
1475         ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
1476       } else if (IndexBits.isConstant()) {
1477         APInt IndexConst = IndexBits.getConstant();
1478         APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1479         IndexConst *= ScalingFactor;
1480         AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1481         continue;
1482       } else {
1483         ScalingFactor =
1484             KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1485       }
1486       IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1487
1488       // If the offsets have a different width from the pointer, according
1489       // to the language reference we need to sign-extend or truncate them
1490       // to the width of the pointer.
1491       IndexBits = IndexBits.sextOrTrunc(BitWidth);
1492
1493       // Note that inbounds does *not* guarantee nsw for the addition, as only
1494       // the offset is signed, while the base address is unsigned.
1495       Known = KnownBits::add(Known, IndexBits);
1496     }
1497     if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1498       KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1499       Known = KnownBits::add(Known, Index);
1500     }
1501     break;
1502   }
1503   case Instruction::PHI: {
1504     const PHINode *P = cast<PHINode>(I);
1505     BinaryOperator *BO = nullptr;
1506     Value *R = nullptr, *L = nullptr;
1507     if (matchSimpleRecurrence(P, BO, R, L)) {
1508       // Handle the case of a simple two-predecessor recurrence PHI.
1509       // There's a lot more that could theoretically be done here, but
1510       // this is sufficient to catch some interesting cases.
1511       unsigned Opcode = BO->getOpcode();
1512
1513       switch (Opcode) {
1514       // If this is a shift recurrence, we know the bits being shifted in. We
1515       // can combine that with information about the start value of the
1516       // recurrence to conclude facts about the result. If this is a udiv
1517       // recurrence, we know that the result can never exceed either the
1518       // numerator or the start value, whichever is greater.
1519       case Instruction::LShr:
1520       case Instruction::AShr:
1521       case Instruction::Shl:
1522       case Instruction::UDiv:
1523         if (BO->getOperand(0) != I)
1524           break;
1525         [[fallthrough]];
1526
1527       // For a urem recurrence, the result can never exceed the start value. The
1528       // phi could either be the numerator or the denominator.
1529       case Instruction::URem: {
1530         // We have matched a recurrence of the form:
1531         // %iv = [R, %entry], [%iv.next, %backedge]
1532         // %iv.next = shift_op %iv, L
1533
1534         // Recurse with the phi context to avoid concern about whether facts
1535         // inferred hold at original context instruction.  TODO: It may be
1536         // correct to use the original context.  IF warranted, explore and
1537         // add sufficient tests to cover.
1538         SimplifyQuery RecQ = Q.getWithoutCondContext();
1539         RecQ.CxtI = P;
1540         computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1541         switch (Opcode) {
1542         case Instruction::Shl:
1543           // A shl recurrence will only increase the tailing zeros
1544           Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1545           break;
1546         case Instruction::LShr:
1547         case Instruction::UDiv:
1548         case Instruction::URem:
1549           // lshr, udiv, and urem recurrences will preserve the leading zeros of
1550           // the start value.
1551           Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1552           break;
1553         case Instruction::AShr:
1554           // An ashr recurrence will extend the initial sign bit
1555           Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1556           Known.One.setHighBits(Known2.countMinLeadingOnes());
1557           break;
1558         }
1559         break;
1560       }
1561
1562       // Check for operations that have the property that if
1563       // both their operands have low zero bits, the result
1564       // will have low zero bits.
1565       case Instruction::Add:
1566       case Instruction::Sub:
1567       case Instruction::And:
1568       case Instruction::Or:
1569       case Instruction::Mul: {
1570         // Change the context instruction to the "edge" that flows into the
1571         // phi. This is important because that is where the value is actually
1572         // "evaluated" even though it is used later somewhere else. (see also
1573         // D69571).
1574         SimplifyQuery RecQ = Q.getWithoutCondContext();
1575
1576         unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1577         Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1578         Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator();
1579
1580         // Ok, we have a PHI of the form L op= R. Check for low
1581         // zero bits.
1582         RecQ.CxtI = RInst;
1583         computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1584
1585         // We need to take the minimum number of known bits
1586         KnownBits Known3(BitWidth);
1587         RecQ.CxtI = LInst;
1588         computeKnownBits(L, DemandedElts, Known3, Depth + 1, RecQ);
1589
1590         Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1591                                        Known3.countMinTrailingZeros()));
1592
1593         auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1594         if (!OverflowOp || !Q.IIQ.hasNoSignedWrap(OverflowOp))
1595           break;
1596
1597         switch (Opcode) {
1598         // If initial value of recurrence is nonnegative, and we are adding
1599         // a nonnegative number with nsw, the result can only be nonnegative
1600         // or poison value regardless of the number of times we execute the
1601         // add in phi recurrence. If initial value is negative and we are
1602         // adding a negative number with nsw, the result can only be
1603         // negative or poison value. Similar arguments apply to sub and mul.
1604         //
1605         // (add non-negative, non-negative) --> non-negative
1606         // (add negative, negative) --> negative
1607         case Instruction::Add: {
1608           if (Known2.isNonNegative() && Known3.isNonNegative())
1609             Known.makeNonNegative();
1610           else if (Known2.isNegative() && Known3.isNegative())
1611             Known.makeNegative();
1612           break;
1613         }
1614
1615         // (sub nsw non-negative, negative) --> non-negative
1616         // (sub nsw negative, non-negative) --> negative
1617         case Instruction::Sub: {
1618           if (BO->getOperand(0) != I)
1619             break;
1620           if (Known2.isNonNegative() && Known3.isNegative())
1621             Known.makeNonNegative();
1622           else if (Known2.isNegative() && Known3.isNonNegative())
1623             Known.makeNegative();
1624           break;
1625         }
1626
1627         // (mul nsw non-negative, non-negative) --> non-negative
1628         case Instruction::Mul:
1629           if (Known2.isNonNegative() && Known3.isNonNegative())
1630             Known.makeNonNegative();
1631           break;
1632
1633         default:
1634           break;
1635         }
1636         break;
1637       }
1638
1639       default:
1640         break;
1641       }
1642     }
1643
1644     // Unreachable blocks may have zero-operand PHI nodes.
1645     if (P->getNumIncomingValues() == 0)
1646       break;
1647
1648     // Otherwise take the unions of the known bit sets of the operands,
1649     // taking conservative care to avoid excessive recursion.
1650     if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1651       // Skip if every incoming value references to ourself.
1652       if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1653         break;
1654
1655       Known.Zero.setAllBits();
1656       Known.One.setAllBits();
1657       for (const Use &U : P->operands()) {
1658         Value *IncValue;
1659         const PHINode *CxtPhi;
1660         Instruction *CxtI;
1661         breakSelfRecursivePHI(&U, P, IncValue, CxtI, &CxtPhi);
1662         // Skip direct self references.
1663         if (IncValue == P)
1664           continue;
1665
1666         // Change the context instruction to the "edge" that flows into the
1667         // phi. This is important because that is where the value is actually
1668         // "evaluated" even though it is used later somewhere else. (see also
1669         // D69571).
1670         SimplifyQuery RecQ = Q.getWithoutCondContext().getWithInstruction(CxtI);
1671
1672         Known2 = KnownBits(BitWidth);
1673
1674         // Recurse, but cap the recursion to one level, because we don't
1675         // want to waste time spinning around in loops.
1676         // TODO: See if we can base recursion limiter on number of incoming phi
1677         // edges so we don't overly clamp analysis.
1678         computeKnownBits(IncValue, DemandedElts, Known2,
1679                          MaxAnalysisRecursionDepth - 1, RecQ);
1680
1681         // See if we can further use a conditional branch into the phi
1682         // to help us determine the range of the value.
1683         if (!Known2.isConstant()) {
1684           CmpPredicate Pred;
1685           const APInt *RHSC;
1686           BasicBlock *TrueSucc, *FalseSucc;
1687           // TODO: Use RHS Value and compute range from its known bits.
1688           if (match(RecQ.CxtI,
1689                     m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1690                          m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1691             // Check for cases of duplicate successors.
1692             if ((TrueSucc == CxtPhi->getParent()) !=
1693                 (FalseSucc == CxtPhi->getParent())) {
1694               // If we're using the false successor, invert the predicate.
1695               if (FalseSucc == CxtPhi->getParent())
1696                 Pred = CmpInst::getInversePredicate(Pred);
1697               // Get the knownbits implied by the incoming phi condition.
1698               auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1699               KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1700               // We can have conflicts here if we are analyzing deadcode (its
1701               // impossible for us reach this BB based the icmp).
1702               if (KnownUnion.hasConflict()) {
1703                 // No reason to continue analyzing in a known dead region, so
1704                 // just resetAll and break. This will cause us to also exit the
1705                 // outer loop.
1706                 Known.resetAll();
1707                 break;
1708               }
1709               Known2 = KnownUnion;
1710             }
1711           }
1712         }
1713
1714         Known = Known.intersectWith(Known2);
1715         // If all bits have been ruled out, there's no need to check
1716         // more operands.
1717         if (Known.isUnknown())
1718           break;
1719       }
1720     }
1721     break;
1722   }
1723   case Instruction::Call:
1724   case Instruction::Invoke: {
1725     // If range metadata is attached to this call, set known bits from that,
1726     // and then intersect with known bits based on other properties of the
1727     // function.
1728     if (MDNode *MD =
1729             Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1730       computeKnownBitsFromRangeMetadata(*MD, Known);
1731
1732     const auto *CB = cast<CallBase>(I);
1733
1734     if (std::optional<ConstantRange> Range = CB->getRange())
1735       Known = Known.unionWith(Range->toKnownBits());
1736
1737     if (const Value *RV = CB->getReturnedArgOperand()) {
1738       if (RV->getType() == I->getType()) {
1739         computeKnownBits(RV, Known2, Depth + 1, Q);
1740         Known = Known.unionWith(Known2);
1741         // If the function doesn't return properly for all input values
1742         // (e.g. unreachable exits) then there might be conflicts between the
1743         // argument value and the range metadata. Simply discard the known bits
1744         // in case of conflicts.
1745         if (Known.hasConflict())
1746           Known.resetAll();
1747       }
1748     }
1749     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1750       switch (II->getIntrinsicID()) {
1751       default:
1752         break;
1753       case Intrinsic::abs: {
1754         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1755         bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1756         Known = Known2.abs(IntMinIsPoison);
1757         break;
1758       }
1759       case Intrinsic::bitreverse:
1760         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1761         Known.Zero |= Known2.Zero.reverseBits();
1762         Known.One |= Known2.One.reverseBits();
1763         break;
1764       case Intrinsic::bswap:
1765         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1766         Known.Zero |= Known2.Zero.byteSwap();
1767         Known.One |= Known2.One.byteSwap();
1768         break;
1769       case Intrinsic::ctlz: {
1770         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1771         // If we have a known 1, its position is our upper bound.
1772         unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1773         // If this call is poison for 0 input, the result will be less than 2^n.
1774         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1775           PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1776         unsigned LowBits = llvm::bit_width(PossibleLZ);
1777         Known.Zero.setBitsFrom(LowBits);
1778         break;
1779       }
1780       case Intrinsic::cttz: {
1781         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1782         // If we have a known 1, its position is our upper bound.
1783         unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1784         // If this call is poison for 0 input, the result will be less than 2^n.
1785         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1786           PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1787         unsigned LowBits = llvm::bit_width(PossibleTZ);
1788         Known.Zero.setBitsFrom(LowBits);
1789         break;
1790       }
1791       case Intrinsic::ctpop: {
1792         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1793         // We can bound the space the count needs.  Also, bits known to be zero
1794         // can't contribute to the population.
1795         unsigned BitsPossiblySet = Known2.countMaxPopulation();
1796         unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1797         Known.Zero.setBitsFrom(LowBits);
1798         // TODO: we could bound KnownOne using the lower bound on the number
1799         // of bits which might be set provided by popcnt KnownOne2.
1800         break;
1801       }
1802       case Intrinsic::fshr:
1803       case Intrinsic::fshl: {
1804         const APInt *SA;
1805         if (!match(I->getOperand(2), m_APInt(SA)))
1806           break;
1807
1808         // Normalize to funnel shift left.
1809         uint64_t ShiftAmt = SA->urem(BitWidth);
1810         if (II->getIntrinsicID() == Intrinsic::fshr)
1811           ShiftAmt = BitWidth - ShiftAmt;
1812
1813         KnownBits Known3(BitWidth);
1814         computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1815         computeKnownBits(I->getOperand(1), DemandedElts, Known3, Depth + 1, Q);
1816
1817         Known.Zero =
1818             Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1819         Known.One =
1820             Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1821         break;
1822       }
1823       case Intrinsic::uadd_sat:
1824         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1825         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1826         Known = KnownBits::uadd_sat(Known, Known2);
1827         break;
1828       case Intrinsic::usub_sat:
1829         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1830         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1831         Known = KnownBits::usub_sat(Known, Known2);
1832         break;
1833       case Intrinsic::sadd_sat:
1834         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1835         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1836         Known = KnownBits::sadd_sat(Known, Known2);
1837         break;
1838       case Intrinsic::ssub_sat:
1839         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1840         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1841         Known = KnownBits::ssub_sat(Known, Known2);
1842         break;
1843         // Vec reverse preserves bits from input vec.
1844       case Intrinsic::vector_reverse:
1845         computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known,
1846                          Depth + 1, Q);
1847         break;
1848         // for min/max/and/or reduce, any bit common to each element in the
1849         // input vec is set in the output.
1850       case Intrinsic::vector_reduce_and:
1851       case Intrinsic::vector_reduce_or:
1852       case Intrinsic::vector_reduce_umax:
1853       case Intrinsic::vector_reduce_umin:
1854       case Intrinsic::vector_reduce_smax:
1855       case Intrinsic::vector_reduce_smin:
1856         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1857         break;
1858       case Intrinsic::vector_reduce_xor: {
1859         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1860         // The zeros common to all vecs are zero in the output.
1861         // If the number of elements is odd, then the common ones remain. If the
1862         // number of elements is even, then the common ones becomes zeros.
1863         auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1864         // Even, so the ones become zeros.
1865         bool EvenCnt = VecTy->getElementCount().isKnownEven();
1866         if (EvenCnt)
1867           Known.Zero |= Known.One;
1868         // Maybe even element count so need to clear ones.
1869         if (VecTy->isScalableTy() || EvenCnt)
1870           Known.One.clearAllBits();
1871         break;
1872       }
1873       case Intrinsic::umin:
1874         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1875         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1876         Known = KnownBits::umin(Known, Known2);
1877         break;
1878       case Intrinsic::umax:
1879         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1880         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1881         Known = KnownBits::umax(Known, Known2);
1882         break;
1883       case Intrinsic::smin:
1884         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1885         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1886         Known = KnownBits::smin(Known, Known2);
1887         unionWithMinMaxIntrinsicClamp(II, Known);
1888         break;
1889       case Intrinsic::smax:
1890         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1891         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1892         Known = KnownBits::smax(Known, Known2);
1893         unionWithMinMaxIntrinsicClamp(II, Known);
1894         break;
1895       case Intrinsic::ptrmask: {
1896         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1897
1898         const Value *Mask = I->getOperand(1);
1899         Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1900         computeKnownBits(Mask, DemandedElts, Known2, Depth + 1, Q);
1901         // TODO: 1-extend would be more precise.
1902         Known &= Known2.anyextOrTrunc(BitWidth);
1903         break;
1904       }
1905       case Intrinsic::x86_sse2_pmulh_w:
1906       case Intrinsic::x86_avx2_pmulh_w:
1907       case Intrinsic::x86_avx512_pmulh_w_512:
1908         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1909         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1910         Known = KnownBits::mulhs(Known, Known2);
1911         break;
1912       case Intrinsic::x86_sse2_pmulhu_w:
1913       case Intrinsic::x86_avx2_pmulhu_w:
1914       case Intrinsic::x86_avx512_pmulhu_w_512:
1915         computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1916         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1917         Known = KnownBits::mulhu(Known, Known2);
1918         break;
1919       case Intrinsic::x86_sse42_crc32_64_64:
1920         Known.Zero.setBitsFrom(32);
1921         break;
1922       case Intrinsic::x86_ssse3_phadd_d_128:
1923       case Intrinsic::x86_ssse3_phadd_w_128:
1924       case Intrinsic::x86_avx2_phadd_d:
1925       case Intrinsic::x86_avx2_phadd_w: {
1926         Known = computeKnownBitsForHorizontalOperation(
1927             I, DemandedElts, Depth, Q,
1928             [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1929               return KnownBits::add(KnownLHS, KnownRHS);
1930             });
1931         break;
1932       }
1933       case Intrinsic::x86_ssse3_phadd_sw_128:
1934       case Intrinsic::x86_avx2_phadd_sw: {
1935         Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1936                                                        Q, KnownBits::sadd_sat);
1937         break;
1938       }
1939       case Intrinsic::x86_ssse3_phsub_d_128:
1940       case Intrinsic::x86_ssse3_phsub_w_128:
1941       case Intrinsic::x86_avx2_phsub_d:
1942       case Intrinsic::x86_avx2_phsub_w: {
1943         Known = computeKnownBitsForHorizontalOperation(
1944             I, DemandedElts, Depth, Q,
1945             [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1946               return KnownBits::sub(KnownLHS, KnownRHS);
1947             });
1948         break;
1949       }
1950       case Intrinsic::x86_ssse3_phsub_sw_128:
1951       case Intrinsic::x86_avx2_phsub_sw: {
1952         Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1953                                                        Q, KnownBits::ssub_sat);
1954         break;
1955       }
1956       case Intrinsic::riscv_vsetvli:
1957       case Intrinsic::riscv_vsetvlimax: {
1958         bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1959         const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
1960         uint64_t SEW = RISCVVType::decodeVSEW(
1961             cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
1962         RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1963             cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
1964         uint64_t MaxVLEN =
1965             Range.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock;
1966         uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
1967
1968         // Result of vsetvli must be not larger than AVL.
1969         if (HasAVL)
1970           if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1971             MaxVL = std::min(MaxVL, CI->getZExtValue());
1972
1973         unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
1974         if (BitWidth > KnownZeroFirstBit)
1975           Known.Zero.setBitsFrom(KnownZeroFirstBit);
1976         break;
1977       }
1978       case Intrinsic::vscale: {
1979         if (!II->getParent() || !II->getFunction())
1980           break;
1981
1982         Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
1983         break;
1984       }
1985       }
1986     }
1987     break;
1988   }
1989   case Instruction::ShuffleVector: {
1990     auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1991     // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1992     if (!Shuf) {
1993       Known.resetAll();
1994       return;
1995     }
1996     // For undef elements, we don't know anything about the common state of
1997     // the shuffle result.
1998     APInt DemandedLHS, DemandedRHS;
1999     if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
2000       Known.resetAll();
2001       return;
2002     }
2003     Known.One.setAllBits();
2004     Known.Zero.setAllBits();
2005     if (!!DemandedLHS) {
2006       const Value *LHS = Shuf->getOperand(0);
2007       computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
2008       // If we don't know any bits, early out.
2009       if (Known.isUnknown())
2010         break;
2011     }
2012     if (!!DemandedRHS) {
2013       const Value *RHS = Shuf->getOperand(1);
2014       computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
2015       Known = Known.intersectWith(Known2);
2016     }
2017     break;
2018   }
2019   case Instruction::InsertElement: {
2020     if (isa<ScalableVectorType>(I->getType())) {
2021       Known.resetAll();
2022       return;
2023     }
2024     const Value *Vec = I->getOperand(0);
2025     const Value *Elt = I->getOperand(1);
2026     auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
2027     unsigned NumElts = DemandedElts.getBitWidth();
2028     APInt DemandedVecElts = DemandedElts;
2029     bool NeedsElt = true;
2030     // If we know the index we are inserting too, clear it from Vec check.
2031     if (CIdx && CIdx->getValue().ult(NumElts)) {
2032       DemandedVecElts.clearBit(CIdx->getZExtValue());
2033       NeedsElt = DemandedElts[CIdx->getZExtValue()];
2034     }
2035
2036     Known.One.setAllBits();
2037     Known.Zero.setAllBits();
2038     if (NeedsElt) {
2039       computeKnownBits(Elt, Known, Depth + 1, Q);
2040       // If we don't know any bits, early out.
2041       if (Known.isUnknown())
2042         break;
2043     }
2044
2045     if (!DemandedVecElts.isZero()) {
2046       computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
2047       Known = Known.intersectWith(Known2);
2048     }
2049     break;
2050   }
2051   case Instruction::ExtractElement: {
2052     // Look through extract element. If the index is non-constant or
2053     // out-of-range demand all elements, otherwise just the extracted element.
2054     const Value *Vec = I->getOperand(0);
2055     const Value *Idx = I->getOperand(1);
2056     auto *CIdx = dyn_cast<ConstantInt>(Idx);
2057     if (isa<ScalableVectorType>(Vec->getType())) {
2058       // FIXME: there's probably *something* we can do with scalable vectors
2059       Known.resetAll();
2060       break;
2061     }
2062     unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2063     APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2064     if (CIdx && CIdx->getValue().ult(NumElts))
2065       DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2066     computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
2067     break;
2068   }
2069   case Instruction::ExtractValue:
2070     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
2071       const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
2072       if (EVI->getNumIndices() != 1) break;
2073       if (EVI->getIndices()[0] == 0) {
2074         switch (II->getIntrinsicID()) {
2075         default: break;
2076         case Intrinsic::uadd_with_overflow:
2077         case Intrinsic::sadd_with_overflow:
2078           computeKnownBitsAddSub(
2079               true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2080               /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2081           break;
2082         case Intrinsic::usub_with_overflow:
2083         case Intrinsic::ssub_with_overflow:
2084           computeKnownBitsAddSub(
2085               false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2086               /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2087           break;
2088         case Intrinsic::umul_with_overflow:
2089         case Intrinsic::smul_with_overflow:
2090           computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
2091                               false, DemandedElts, Known, Known2, Depth, Q);
2092           break;
2093         }
2094       }
2095     }
2096     break;
2097   case Instruction::Freeze:
2098     if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
2099                                   Depth + 1))
2100       computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
2101     break;
2102   }
2103 }
2104
2105 /// Determine which bits of V are known to be either zero or one and return
2106 /// them.
2107 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
2108                                  unsigned Depth, const SimplifyQuery &Q) {
2109   KnownBits Known(getBitWidth(V->getType(), Q.DL));
2110   ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
2111   return Known;
2112 }
2113
2114 /// Determine which bits of V are known to be either zero or one and return
2115 /// them.
2116 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth,
2117                                  const SimplifyQuery &Q) {
2118   KnownBits Known(getBitWidth(V->getType(), Q.DL));
2119   computeKnownBits(V, Known, Depth, Q);
2120   return Known;
2121 }
2122
2123 /// Determine which bits of V are known to be either zero or one and return
2124 /// them in the Known bit set.
2125 ///
2126 /// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
2127 /// we cannot optimize based on the assumption that it is zero without changing
2128 /// it to be an explicit zero.  If we don't change it to zero, other code could
2129 /// optimized based on the contradictory assumption that it is non-zero.
2130 /// Because instcombine aggressively folds operations with undef args anyway,
2131 /// this won't lose us code quality.
2132 ///
2133 /// This function is defined on values with integer type, values with pointer
2134 /// type, and vectors of integers.  In the case
2135 /// where V is a vector, known zero, and known one values are the
2136 /// same width as the vector element, and the bit is set only if it is true
2137 /// for all of the demanded elements in the vector specified by DemandedElts.
2138 void computeKnownBits(const Value *V, const APInt &DemandedElts,
2139                       KnownBits &Known, unsigned Depth,
2140                       const SimplifyQuery &Q) {
2141   if (!DemandedElts) {
2142     // No demanded elts, better to assume we don't know anything.
2143     Known.resetAll();
2144     return;
2145   }
2146
2147   assert(V && "No Value?");
2148   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2149
2150 #ifndef NDEBUG
2151   Type *Ty = V->getType();
2152   unsigned BitWidth = Known.getBitWidth();
2153
2154   assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) &&
2155          "Not integer or pointer type!");
2156
2157   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2158     assert(
2159         FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2160         "DemandedElt width should equal the fixed vector number of elements");
2161   } else {
2162     assert(DemandedElts == APInt(1, 1) &&
2163            "DemandedElt width should be 1 for scalars or scalable vectors");
2164   }
2165
2166   Type *ScalarTy = Ty->getScalarType();
2167   if (ScalarTy->isPointerTy()) {
2168     assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2169            "V and Known should have same BitWidth");
2170   } else {
2171     assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2172            "V and Known should have same BitWidth");
2173   }
2174 #endif
2175
2176   const APInt *C;
2177   if (match(V, m_APInt(C))) {
2178     // We know all of the bits for a scalar constant or a splat vector constant!
2179     Known = KnownBits::makeConstant(*C);
2180     return;
2181   }
2182   // Null and aggregate-zero are all-zeros.
2183   if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
2184     Known.setAllZero();
2185     return;
2186   }
2187   // Handle a constant vector by taking the intersection of the known bits of
2188   // each element.
2189   if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2190     assert(!isa<ScalableVectorType>(V->getType()));
2191     // We know that CDV must be a vector of integers. Take the intersection of
2192     // each element.
2193     Known.Zero.setAllBits(); Known.One.setAllBits();
2194     for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2195       if (!DemandedElts[i])
2196         continue;
2197       APInt Elt = CDV->getElementAsAPInt(i);
2198       Known.Zero &= ~Elt;
2199       Known.One &= Elt;
2200     }
2201     if (Known.hasConflict())
2202       Known.resetAll();
2203     return;
2204   }
2205
2206   if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2207     assert(!isa<ScalableVectorType>(V->getType()));
2208     // We know that CV must be a vector of integers. Take the intersection of
2209     // each element.
2210     Known.Zero.setAllBits(); Known.One.setAllBits();
2211     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2212       if (!DemandedElts[i])
2213         continue;
2214       Constant *Element = CV->getAggregateElement(i);
2215       if (isa<PoisonValue>(Element))
2216         continue;
2217       auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2218       if (!ElementCI) {
2219         Known.resetAll();
2220         return;
2221       }
2222       const APInt &Elt = ElementCI->getValue();
2223       Known.Zero &= ~Elt;
2224       Known.One &= Elt;
2225     }
2226     if (Known.hasConflict())
2227       Known.resetAll();
2228     return;
2229   }
2230
2231   // Start out not knowing anything.
2232   Known.resetAll();
2233
2234   // We can't imply anything about undefs.
2235   if (isa<UndefValue>(V))
2236     return;
2237
2238   // There's no point in looking through other users of ConstantData for
2239   // assumptions.  Confirm that we've handled them all.
2240   assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2241
2242   if (const auto *A = dyn_cast<Argument>(V))
2243     if (std::optional<ConstantRange> Range = A->getRange())
2244       Known = Range->toKnownBits();
2245
2246   // All recursive calls that increase depth must come after this.
2247   if (Depth == MaxAnalysisRecursionDepth)
2248     return;
2249
2250   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2251   // the bits of its aliasee.
2252   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2253     if (!GA->isInterposable())
2254       computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2255     return;
2256   }
2257
2258   if (const Operator *I = dyn_cast<Operator>(V))
2259     computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2260   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2261     if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2262       Known = CR->toKnownBits();
2263   }
2264
2265   // Aligned pointers have trailing zeros - refine Known.Zero set
2266   if (isa<PointerType>(V->getType())) {
2267     Align Alignment = V->getPointerAlignment(Q.DL);
2268     Known.Zero.setLowBits(Log2(Alignment));
2269   }
2270
2271   // computeKnownBitsFromContext strictly refines Known.
2272   // Therefore, we run them after computeKnownBitsFromOperator.
2273
2274   // Check whether we can determine known bits from context such as assumes.
2275   computeKnownBitsFromContext(V, Known, Depth, Q);
2276 }
2277
2278 /// Try to detect a recurrence that the value of the induction variable is
2279 /// always a power of two (or zero).
2280 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2281                                    unsigned Depth, SimplifyQuery &Q) {
2282   BinaryOperator *BO = nullptr;
2283   Value *Start = nullptr, *Step = nullptr;
2284   if (!matchSimpleRecurrence(PN, BO, Start, Step))
2285     return false;
2286
2287   // Initial value must be a power of two.
2288   for (const Use &U : PN->operands()) {
2289     if (U.get() == Start) {
2290       // Initial value comes from a different BB, need to adjust context
2291       // instruction for analysis.
2292       Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2293       if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2294         return false;
2295     }
2296   }
2297
2298   // Except for Mul, the induction variable must be on the left side of the
2299   // increment expression, otherwise its value can be arbitrary.
2300   if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2301     return false;
2302
2303   Q.CxtI = BO->getParent()->getTerminator();
2304   switch (BO->getOpcode()) {
2305   case Instruction::Mul:
2306     // Power of two is closed under multiplication.
2307     return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2308             Q.IIQ.hasNoSignedWrap(BO)) &&
2309            isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2310   case Instruction::SDiv:
2311     // Start value must not be signmask for signed division, so simply being a
2312     // power of two is not sufficient, and it has to be a constant.
2313     if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2314       return false;
2315     [[fallthrough]];
2316   case Instruction::UDiv:
2317     // Divisor must be a power of two.
2318     // If OrZero is false, cannot guarantee induction variable is non-zero after
2319     // division, same for Shr, unless it is exact division.
2320     return (OrZero || Q.IIQ.isExact(BO)) &&
2321            isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2322   case Instruction::Shl:
2323     return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2324   case Instruction::AShr:
2325     if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2326       return false;
2327     [[fallthrough]];
2328   case Instruction::LShr:
2329     return OrZero || Q.IIQ.isExact(BO);
2330   default:
2331     return false;
2332   }
2333 }
2334
2335 /// Return true if we can infer that \p V is known to be a power of 2 from
2336 /// dominating condition \p Cond (e.g., ctpop(V) == 1).
2337 static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
2338                                              const Value *Cond,
2339                                              bool CondIsTrue) {
2340   CmpPredicate Pred;
2341   const APInt *RHSC;
2342   if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)),
2343                           m_APInt(RHSC))))
2344     return false;
2345   if (!CondIsTrue)
2346     Pred = ICmpInst::getInversePredicate(Pred);
2347   // ctpop(V) u< 2
2348   if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2)
2349     return true;
2350   // ctpop(V) == 1
2351   return Pred == ICmpInst::ICMP_EQ && *RHSC == 1;
2352 }
2353
2354 /// Return true if the given value is known to have exactly one
2355 /// bit set when defined. For vectors return true if every element is known to
2356 /// be a power of two when defined. Supports values with integer or pointer
2357 /// types and vectors of integers.
2358 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2359                                   const SimplifyQuery &Q) {
2360   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2361
2362   if (isa<Constant>(V))
2363     return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
2364
2365   // i1 is by definition a power of 2 or zero.
2366   if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2367     return true;
2368
2369   // Try to infer from assumptions.
2370   if (Q.AC && Q.CxtI) {
2371     for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
2372       if (!AssumeVH)
2373         continue;
2374       CallInst *I = cast<CallInst>(AssumeVH);
2375       if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0),
2376                                            /*CondIsTrue=*/true) &&
2377           isValidAssumeForContext(I, Q.CxtI, Q.DT))
2378         return true;
2379     }
2380   }
2381
2382   // Handle dominating conditions.
2383   if (Q.DC && Q.CxtI && Q.DT) {
2384     for (BranchInst *BI : Q.DC->conditionsFor(V)) {
2385       Value *Cond = BI->getCondition();
2386
2387       BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
2388       if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2389                                            /*CondIsTrue=*/true) &&
2390           Q.DT->dominates(Edge0, Q.CxtI->getParent()))
2391         return true;
2392
2393       BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
2394       if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2395                                            /*CondIsTrue=*/false) &&
2396           Q.DT->dominates(Edge1, Q.CxtI->getParent()))
2397         return true;
2398     }
2399   }
2400
2401   auto *I = dyn_cast<Instruction>(V);
2402   if (!I)
2403     return false;
2404
2405   if (Q.CxtI && match(V, m_VScale())) {
2406     const Function *F = Q.CxtI->getFunction();
2407     // The vscale_range indicates vscale is a power-of-two.
2408     return F->hasFnAttribute(Attribute::VScaleRange);
2409   }
2410
2411   // 1 << X is clearly a power of two if the one is not shifted off the end.  If
2412   // it is shifted off the end then the result is undefined.
2413   if (match(I, m_Shl(m_One(), m_Value())))
2414     return true;
2415
2416   // (signmask) >>l X is clearly a power of two if the one is not shifted off
2417   // the bottom.  If it is shifted off the bottom then the result is undefined.
2418   if (match(I, m_LShr(m_SignMask(), m_Value())))
2419     return true;
2420
2421   // The remaining tests are all recursive, so bail out if we hit the limit.
2422   if (Depth++ == MaxAnalysisRecursionDepth)
2423     return false;
2424
2425   switch (I->getOpcode()) {
2426   case Instruction::ZExt:
2427     return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2428   case Instruction::Trunc:
2429     return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2430   case Instruction::Shl:
2431     if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
2432       return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2433     return false;
2434   case Instruction::LShr:
2435     if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
2436       return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2437     return false;
2438   case Instruction::UDiv:
2439     if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2440       return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2441     return false;
2442   case Instruction::Mul:
2443     return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2444            isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
2445            (OrZero || isKnownNonZero(I, Q, Depth));
2446   case Instruction::And:
2447     // A power of two and'd with anything is a power of two or zero.
2448     if (OrZero &&
2449         (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
2450          isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
2451       return true;
2452     // X & (-X) is always a power of two or zero.
2453     if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2454         match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2455       return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
2456     return false;
2457   case Instruction::Add: {
2458     // Adding a power-of-two or zero to the same power-of-two or zero yields
2459     // either the original power-of-two, a larger power-of-two or zero.
2460     const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2461     if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2462         Q.IIQ.hasNoSignedWrap(VOBO)) {
2463       if (match(I->getOperand(0),
2464                 m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2465           isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
2466         return true;
2467       if (match(I->getOperand(1),
2468                 m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2469           isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
2470         return true;
2471
2472       unsigned BitWidth = V->getType()->getScalarSizeInBits();
2473       KnownBits LHSBits(BitWidth);
2474       computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
2475
2476       KnownBits RHSBits(BitWidth);
2477       computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
2478       // If i8 V is a power of two or zero:
2479       //  ZeroBits: 1 1 1 0 1 1 1 1
2480       // ~ZeroBits: 0 0 0 1 0 0 0 0
2481       if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2482         // If OrZero isn't set, we cannot give back a zero result.
2483         // Make sure either the LHS or RHS has a bit set.
2484         if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2485           return true;
2486     }
2487
2488     // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2489     if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2490       if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2491         return true;
2492     return false;
2493   }
2494   case Instruction::Select:
2495     return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2496            isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
2497   case Instruction::PHI: {
2498     // A PHI node is power of two if all incoming values are power of two, or if
2499     // it is an induction variable where in each step its value is a power of
2500     // two.
2501     auto *PN = cast<PHINode>(I);
2502     SimplifyQuery RecQ = Q.getWithoutCondContext();
2503
2504     // Check if it is an induction variable and always power of two.
2505     if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2506       return true;
2507
2508     // Recursively check all incoming values. Limit recursion to 2 levels, so
2509     // that search complexity is limited to number of operands^2.
2510     unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2511     return llvm::all_of(PN->operands(), [&](const Use &U) {
2512       // Value is power of 2 if it is coming from PHI node itself by induction.
2513       if (U.get() == PN)
2514         return true;
2515
2516       // Change the context instruction to the incoming block where it is
2517       // evaluated.
2518       RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2519       return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2520     });
2521   }
2522   case Instruction::Invoke:
2523   case Instruction::Call: {
2524     if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2525       switch (II->getIntrinsicID()) {
2526       case Intrinsic::umax:
2527       case Intrinsic::smax:
2528       case Intrinsic::umin:
2529       case Intrinsic::smin:
2530         return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
2531                isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2532       // bswap/bitreverse just move around bits, but don't change any 1s/0s
2533       // thus dont change pow2/non-pow2 status.
2534       case Intrinsic::bitreverse:
2535       case Intrinsic::bswap:
2536         return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2537       case Intrinsic::fshr:
2538       case Intrinsic::fshl:
2539         // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2540         if (II->getArgOperand(0) == II->getArgOperand(1))
2541           return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2542         break;
2543       default:
2544         break;
2545       }
2546     }
2547     return false;
2548   }
2549   default:
2550     return false;
2551   }
2552 }
2553
2554 /// Test whether a GEP's result is known to be non-null.
2555 ///
2556 /// Uses properties inherent in a GEP to try to determine whether it is known
2557 /// to be non-null.
2558 ///
2559 /// Currently this routine does not support vector GEPs.
2560 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2561                               const SimplifyQuery &Q) {
2562   const Function *F = nullptr;
2563   if (const Instruction *I = dyn_cast<Instruction>(GEP))
2564     F = I->getFunction();
2565
2566   // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2567   // may be null iff the base pointer is null and the offset is zero.
2568   if (!GEP->hasNoUnsignedWrap() &&
2569       !(GEP->isInBounds() &&
2570         !NullPointerIsDefined(F, GEP->getPointerAddressSpace())))
2571     return false;
2572
2573   // FIXME: Support vector-GEPs.
2574   assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2575
2576   // If the base pointer is non-null, we cannot walk to a null address with an
2577   // inbounds GEP in address space zero.
2578   if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2579     return true;
2580
2581   // Walk the GEP operands and see if any operand introduces a non-zero offset.
2582   // If so, then the GEP cannot produce a null pointer, as doing so would
2583   // inherently violate the inbounds contract within address space zero.
2584   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2585        GTI != GTE; ++GTI) {
2586     // Struct types are easy -- they must always be indexed by a constant.
2587     if (StructType *STy = GTI.getStructTypeOrNull()) {
2588       ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2589       unsigned ElementIdx = OpC->getZExtValue();
2590       const StructLayout *SL = Q.DL.getStructLayout(STy);
2591       uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2592       if (ElementOffset > 0)
2593         return true;
2594       continue;
2595     }
2596
2597     // If we have a zero-sized type, the index doesn't matter. Keep looping.
2598     if (GTI.getSequentialElementStride(Q.DL).isZero())
2599       continue;
2600
2601     // Fast path the constant operand case both for efficiency and so we don't
2602     // increment Depth when just zipping down an all-constant GEP.
2603     if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2604       if (!OpC->isZero())
2605         return true;
2606       continue;
2607     }
2608
2609     // We post-increment Depth here because while isKnownNonZero increments it
2610     // as well, when we pop back up that increment won't persist. We don't want
2611     // to recurse 10k times just because we have 10k GEP operands. We don't
2612     // bail completely out because we want to handle constant GEPs regardless
2613     // of depth.
2614     if (Depth++ >= MaxAnalysisRecursionDepth)
2615       continue;
2616
2617     if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2618       return true;
2619   }
2620
2621   return false;
2622 }
2623
2624 static bool isKnownNonNullFromDominatingCondition(const Value *V,
2625                                                   const Instruction *CtxI,
2626                                                   const DominatorTree *DT) {
2627   assert(!isa<Constant>(V) && "Called for constant?");
2628
2629   if (!CtxI || !DT)
2630     return false;
2631
2632   unsigned NumUsesExplored = 0;
2633   for (auto &U : V->uses()) {
2634     // Avoid massive lists
2635     if (NumUsesExplored >= DomConditionsMaxUses)
2636       break;
2637     NumUsesExplored++;
2638
2639     const Instruction *UI = cast<Instruction>(U.getUser());
2640     // If the value is used as an argument to a call or invoke, then argument
2641     // attributes may provide an answer about null-ness.
2642     if (V->getType()->isPointerTy()) {
2643       if (const auto *CB = dyn_cast<CallBase>(UI)) {
2644         if (CB->isArgOperand(&U) &&
2645             CB->paramHasNonNullAttr(CB->getArgOperandNo(&U),
2646                                     /*AllowUndefOrPoison=*/false) &&
2647             DT->dominates(CB, CtxI))
2648           return true;
2649       }
2650     }
2651
2652     // If the value is used as a load/store, then the pointer must be non null.
2653     if (V == getLoadStorePointerOperand(UI)) {
2654       if (!NullPointerIsDefined(UI->getFunction(),
2655                                 V->getType()->getPointerAddressSpace()) &&
2656           DT->dominates(UI, CtxI))
2657         return true;
2658     }
2659
2660     if ((match(UI, m_IDiv(m_Value(), m_Specific(V))) ||
2661          match(UI, m_IRem(m_Value(), m_Specific(V)))) &&
2662         isValidAssumeForContext(UI, CtxI, DT))
2663       return true;
2664
2665     // Consider only compare instructions uniquely controlling a branch
2666     Value *RHS;
2667     CmpPredicate Pred;
2668     if (!match(UI, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2669       continue;
2670
2671     bool NonNullIfTrue;
2672     if (cmpExcludesZero(Pred, RHS))
2673       NonNullIfTrue = true;
2674     else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS))
2675       NonNullIfTrue = false;
2676     else
2677       continue;
2678
2679     SmallVector<const User *, 4> WorkList;
2680     SmallPtrSet<const User *, 4> Visited;
2681     for (const auto *CmpU : UI->users()) {
2682       assert(WorkList.empty() && "Should be!");
2683       if (Visited.insert(CmpU).second)
2684         WorkList.push_back(CmpU);
2685
2686       while (!WorkList.empty()) {
2687         auto *Curr = WorkList.pop_back_val();
2688
2689         // If a user is an AND, add all its users to the work list. We only
2690         // propagate "pred != null" condition through AND because it is only
2691         // correct to assume that all conditions of AND are met in true branch.
2692         // TODO: Support similar logic of OR and EQ predicate?
2693         if (NonNullIfTrue)
2694           if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2695             for (const auto *CurrU : Curr->users())
2696               if (Visited.insert(CurrU).second)
2697                 WorkList.push_back(CurrU);
2698             continue;
2699           }
2700
2701         if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2702           assert(BI->isConditional() && "uses a comparison!");
2703
2704           BasicBlock *NonNullSuccessor =
2705               BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2706           BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2707           if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2708             return true;
2709         } else if (NonNullIfTrue && isGuard(Curr) &&
2710                    DT->dominates(cast<Instruction>(Curr), CtxI)) {
2711           return true;
2712         }
2713       }
2714     }
2715   }
2716
2717   return false;
2718 }
2719
2720 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
2721 /// ensure that the value it's attached to is never Value?  'RangeType' is
2722 /// is the type of the value described by the range.
2723 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2724   const unsigned NumRanges = Ranges->getNumOperands() / 2;
2725   assert(NumRanges >= 1);
2726   for (unsigned i = 0; i < NumRanges; ++i) {
2727     ConstantInt *Lower =
2728         mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2729     ConstantInt *Upper =
2730         mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2731     ConstantRange Range(Lower->getValue(), Upper->getValue());
2732     if (Range.contains(Value))
2733       return false;
2734   }
2735   return true;
2736 }
2737
2738 /// Try to detect a recurrence that monotonically increases/decreases from a
2739 /// non-zero starting value. These are common as induction variables.
2740 static bool isNonZeroRecurrence(const PHINode *PN) {
2741   BinaryOperator *BO = nullptr;
2742   Value *Start = nullptr, *Step = nullptr;
2743   const APInt *StartC, *StepC;
2744   if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2745       !match(Start, m_APInt(StartC)) || StartC->isZero())
2746     return false;
2747
2748   switch (BO->getOpcode()) {
2749   case Instruction::Add:
2750     // Starting from non-zero and stepping away from zero can never wrap back
2751     // to zero.
2752     return BO->hasNoUnsignedWrap() ||
2753            (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2754             StartC->isNegative() == StepC->isNegative());
2755   case Instruction::Mul:
2756     return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2757            match(Step, m_APInt(StepC)) && !StepC->isZero();
2758   case Instruction::Shl:
2759     return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2760   case Instruction::AShr:
2761   case Instruction::LShr:
2762     return BO->isExact();
2763   default:
2764     return false;
2765   }
2766 }
2767
2768 static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2769   return match(Op0, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2770                                                 m_Specific(Op1), m_Zero()))) ||
2771          match(Op1, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2772                                                 m_Specific(Op0), m_Zero())));
2773 }
2774
2775 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2776                          const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2777                          Value *Y, bool NSW, bool NUW) {
2778   // (X + (X != 0)) is non zero
2779   if (matchOpWithOpEqZero(X, Y))
2780     return true;
2781
2782   if (NUW)
2783     return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2784            isKnownNonZero(X, DemandedElts, Q, Depth);
2785
2786   KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2787   KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2788
2789   // If X and Y are both non-negative (as signed values) then their sum is not
2790   // zero unless both X and Y are zero.
2791   if (XKnown.isNonNegative() && YKnown.isNonNegative())
2792     if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2793         isKnownNonZero(X, DemandedElts, Q, Depth))
2794       return true;
2795
2796   // If X and Y are both negative (as signed values) then their sum is not
2797   // zero unless both X and Y equal INT_MIN.
2798   if (XKnown.isNegative() && YKnown.isNegative()) {
2799     APInt Mask = APInt::getSignedMaxValue(BitWidth);
2800     // The sign bit of X is set.  If some other bit is set then X is not equal
2801     // to INT_MIN.
2802     if (XKnown.One.intersects(Mask))
2803       return true;
2804     // The sign bit of Y is set.  If some other bit is set then Y is not equal
2805     // to INT_MIN.
2806     if (YKnown.One.intersects(Mask))
2807       return true;
2808   }
2809
2810   // The sum of a non-negative number and a power of two is not zero.
2811   if (XKnown.isNonNegative() &&
2812       isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2813     return true;
2814   if (YKnown.isNonNegative() &&
2815       isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2816     return true;
2817
2818   return KnownBits::add(XKnown, YKnown, NSW, NUW).isNonZero();
2819 }
2820
2821 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2822                          const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2823                          Value *Y) {
2824   // (X - (X != 0)) is non zero
2825   // ((X != 0) - X) is non zero
2826   if (matchOpWithOpEqZero(X, Y))
2827     return true;
2828
2829   // TODO: Move this case into isKnownNonEqual().
2830   if (auto *C = dyn_cast<Constant>(X))
2831     if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
2832       return true;
2833
2834   return ::isKnownNonEqual(X, Y, DemandedElts, Depth, Q);
2835 }
2836
2837 static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2838                          const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2839                          Value *Y, bool NSW, bool NUW) {
2840   // If X and Y are non-zero then so is X * Y as long as the multiplication
2841   // does not overflow.
2842   if (NSW || NUW)
2843     return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2844            isKnownNonZero(Y, DemandedElts, Q, Depth);
2845
2846   // If either X or Y is odd, then if the other is non-zero the result can't
2847   // be zero.
2848   KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2849   if (XKnown.One[0])
2850     return isKnownNonZero(Y, DemandedElts, Q, Depth);
2851
2852   KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2853   if (YKnown.One[0])
2854     return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2855
2856   // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2857   // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2858   // the lowest known One of X and Y. If they are non-zero, the result
2859   // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2860   // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2861   return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2862          BitWidth;
2863 }
2864
2865 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2866                            unsigned Depth, const SimplifyQuery &Q,
2867                            const KnownBits &KnownVal) {
2868   auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2869     switch (I->getOpcode()) {
2870     case Instruction::Shl:
2871       return Lhs.shl(Rhs);
2872     case Instruction::LShr:
2873       return Lhs.lshr(Rhs);
2874     case Instruction::AShr:
2875       return Lhs.ashr(Rhs);
2876     default:
2877       llvm_unreachable("Unknown Shift Opcode");
2878     }
2879   };
2880
2881   auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2882     switch (I->getOpcode()) {
2883     case Instruction::Shl:
2884       return Lhs.lshr(Rhs);
2885     case Instruction::LShr:
2886     case Instruction::AShr:
2887       return Lhs.shl(Rhs);
2888     default:
2889       llvm_unreachable("Unknown Shift Opcode");
2890     }
2891   };
2892
2893   if (KnownVal.isUnknown())
2894     return false;
2895
2896   KnownBits KnownCnt =
2897       computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2898   APInt MaxShift = KnownCnt.getMaxValue();
2899   unsigned NumBits = KnownVal.getBitWidth();
2900   if (MaxShift.uge(NumBits))
2901     return false;
2902
2903   if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2904     return true;
2905
2906   // If all of the bits shifted out are known to be zero, and Val is known
2907   // non-zero then at least one non-zero bit must remain.
2908   if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2909           .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2910       isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
2911     return true;
2912
2913   return false;
2914 }
2915
2916 static bool isKnownNonZeroFromOperator(const Operator *I,
2917                                        const APInt &DemandedElts,
2918                                        unsigned Depth, const SimplifyQuery &Q) {
2919   unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2920   switch (I->getOpcode()) {
2921   case Instruction::Alloca:
2922     // Alloca never returns null, malloc might.
2923     return I->getType()->getPointerAddressSpace() == 0;
2924   case Instruction::GetElementPtr:
2925     if (I->getType()->isPointerTy())
2926       return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
2927     break;
2928   case Instruction::BitCast: {
2929     // We need to be a bit careful here. We can only peek through the bitcast
2930     // if the scalar size of elements in the operand are smaller than and a
2931     // multiple of the size they are casting too. Take three cases:
2932     //
2933     // 1) Unsafe:
2934     //        bitcast <2 x i16> %NonZero to <4 x i8>
2935     //
2936     //    %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2937     //    <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2938     //    guranteed (imagine just sign bit set in the 2 i16 elements).
2939     //
2940     // 2) Unsafe:
2941     //        bitcast <4 x i3> %NonZero to <3 x i4>
2942     //
2943     //    Even though the scalar size of the src (`i3`) is smaller than the
2944     //    scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2945     //    its possible for the `3 x i4` elements to be zero because there are
2946     //    some elements in the destination that don't contain any full src
2947     //    element.
2948     //
2949     // 3) Safe:
2950     //        bitcast <4 x i8> %NonZero to <2 x i16>
2951     //
2952     //    This is always safe as non-zero in the 4 i8 elements implies
2953     //    non-zero in the combination of any two adjacent ones. Since i8 is a
2954     //    multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2955     //    This all implies the 2 i16 elements are non-zero.
2956     Type *FromTy = I->getOperand(0)->getType();
2957     if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2958         (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
2959       return isKnownNonZero(I->getOperand(0), Q, Depth);
2960   } break;
2961   case Instruction::IntToPtr:
2962     // Note that we have to take special care to avoid looking through
2963     // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2964     // as casts that can alter the value, e.g., AddrSpaceCasts.
2965     if (!isa<ScalableVectorType>(I->getType()) &&
2966         Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2967             Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2968       return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2969     break;
2970   case Instruction::PtrToInt:
2971     // Similar to int2ptr above, we can look through ptr2int here if the cast
2972     // is a no-op or an extend and not a truncate.
2973     if (!isa<ScalableVectorType>(I->getType()) &&
2974         Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2975             Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2976       return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2977     break;
2978   case Instruction::Trunc:
2979     // nuw/nsw trunc preserves zero/non-zero status of input.
2980     if (auto *TI = dyn_cast<TruncInst>(I))
2981       if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2982         return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth);
2983     break;
2984
2985   case Instruction::Sub:
2986     return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2987                         I->getOperand(1));
2988   case Instruction::Xor:
2989     // (X ^ (X != 0)) is non zero
2990     if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2991       return true;
2992     break;
2993   case Instruction::Or:
2994     // (X | (X != 0)) is non zero
2995     if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2996       return true;
2997     // X | Y != 0 if X != 0 or Y != 0.
2998     return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
2999            isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3000   case Instruction::SExt:
3001   case Instruction::ZExt:
3002     // ext X != 0 if X != 0.
3003     return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3004
3005   case Instruction::Shl: {
3006     // shl nsw/nuw can't remove any non-zero bits.
3007     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3008     if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
3009       return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3010
3011     // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
3012     // if the lowest bit is shifted off the end.
3013     KnownBits Known(BitWidth);
3014     computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
3015     if (Known.One[0])
3016       return true;
3017
3018     return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
3019   }
3020   case Instruction::LShr:
3021   case Instruction::AShr: {
3022     // shr exact can only shift out zero bits.
3023     const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
3024     if (BO->isExact())
3025       return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3026
3027     // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
3028     // defined if the sign bit is shifted off the end.
3029     KnownBits Known =
3030         computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
3031     if (Known.isNegative())
3032       return true;
3033
3034     return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
3035   }
3036   case Instruction::UDiv:
3037   case Instruction::SDiv: {
3038     // X / Y
3039     // div exact can only produce a zero if the dividend is zero.
3040     if (cast<PossiblyExactOperator>(I)->isExact())
3041       return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3042
3043     KnownBits XKnown =
3044         computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
3045     // If X is fully unknown we won't be able to figure anything out so don't
3046     // both computing knownbits for Y.
3047     if (XKnown.isUnknown())
3048       return false;
3049
3050     KnownBits YKnown =
3051         computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
3052     if (I->getOpcode() == Instruction::SDiv) {
3053       // For signed division need to compare abs value of the operands.
3054       XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
3055       YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
3056     }
3057     // If X u>= Y then div is non zero (0/0 is UB).
3058     std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
3059     // If X is total unknown or X u< Y we won't be able to prove non-zero
3060     // with compute known bits so just return early.
3061     return XUgeY && *XUgeY;
3062   }
3063   case Instruction::Add: {
3064     // X + Y.
3065
3066     // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
3067     // non-zero.
3068     auto *BO = cast<OverflowingBinaryOperator>(I);
3069     return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
3070                         I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3071                         Q.IIQ.hasNoUnsignedWrap(BO));
3072   }
3073   case Instruction::Mul: {
3074     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3075     return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
3076                         I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3077                         Q.IIQ.hasNoUnsignedWrap(BO));
3078   }
3079   case Instruction::Select: {
3080     // (C ? X : Y) != 0 if X != 0 and Y != 0.
3081
3082     // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
3083     // then see if the select condition implies the arm is non-zero. For example
3084     // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3085     // dominated by `X != 0`.
3086     auto SelectArmIsNonZero = [&](bool IsTrueArm) {
3087       Value *Op;
3088       Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
3089       // Op is trivially non-zero.
3090       if (isKnownNonZero(Op, DemandedElts, Q, Depth))
3091         return true;
3092
3093       // The condition of the select dominates the true/false arm. Check if the
3094       // condition implies that a given arm is non-zero.
3095       Value *X;
3096       CmpPredicate Pred;
3097       if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
3098         return false;
3099
3100       if (!IsTrueArm)
3101         Pred = ICmpInst::getInversePredicate(Pred);
3102
3103       return cmpExcludesZero(Pred, X);
3104     };
3105
3106     if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3107         SelectArmIsNonZero(/* IsTrueArm */ false))
3108       return true;
3109     break;
3110   }
3111   case Instruction::PHI: {
3112     auto *PN = cast<PHINode>(I);
3113     if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
3114       return true;
3115
3116     // Check if all incoming values are non-zero using recursion.
3117     SimplifyQuery RecQ = Q.getWithoutCondContext();
3118     unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
3119     return llvm::all_of(PN->operands(), [&](const Use &U) {
3120       if (U.get() == PN)
3121         return true;
3122       RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
3123       // Check if the branch on the phi excludes zero.
3124       CmpPredicate Pred;
3125       Value *X;
3126       BasicBlock *TrueSucc, *FalseSucc;
3127       if (match(RecQ.CxtI,
3128                 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
3129                      m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
3130         // Check for cases of duplicate successors.
3131         if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
3132           // If we're using the false successor, invert the predicate.
3133           if (FalseSucc == PN->getParent())
3134             Pred = CmpInst::getInversePredicate(Pred);
3135           if (cmpExcludesZero(Pred, X))
3136             return true;
3137         }
3138       }
3139       // Finally recurse on the edge and check it directly.
3140       return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
3141     });
3142   }
3143   case Instruction::InsertElement: {
3144     if (isa<ScalableVectorType>(I->getType()))
3145       break;
3146
3147     const Value *Vec = I->getOperand(0);
3148     const Value *Elt = I->getOperand(1);
3149     auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
3150
3151     unsigned NumElts = DemandedElts.getBitWidth();
3152     APInt DemandedVecElts = DemandedElts;
3153     bool SkipElt = false;
3154     // If we know the index we are inserting too, clear it from Vec check.
3155     if (CIdx && CIdx->getValue().ult(NumElts)) {
3156       DemandedVecElts.clearBit(CIdx->getZExtValue());
3157       SkipElt = !DemandedElts[CIdx->getZExtValue()];
3158     }
3159
3160     // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3161     // are non-zero.
3162     return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
3163            (DemandedVecElts.isZero() ||
3164             isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
3165   }
3166   case Instruction::ExtractElement:
3167     if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
3168       const Value *Vec = EEI->getVectorOperand();
3169       const Value *Idx = EEI->getIndexOperand();
3170       auto *CIdx = dyn_cast<ConstantInt>(Idx);
3171       if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
3172         unsigned NumElts = VecTy->getNumElements();
3173         APInt DemandedVecElts = APInt::getAllOnes(NumElts);
3174         if (CIdx && CIdx->getValue().ult(NumElts))
3175           DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
3176         return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
3177       }
3178     }
3179     break;
3180   case Instruction::ShuffleVector: {
3181     auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
3182     if (!Shuf)
3183       break;
3184     APInt DemandedLHS, DemandedRHS;
3185     // For undef elements, we don't know anything about the common state of
3186     // the shuffle result.
3187     if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3188       break;
3189     // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3190     return (DemandedRHS.isZero() ||
3191             isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
3192            (DemandedLHS.isZero() ||
3193             isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
3194   }
3195   case Instruction::Freeze:
3196     return isKnownNonZero(I->getOperand(0), Q, Depth) &&
3197            isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
3198                                      Depth);
3199   case Instruction::Load: {
3200     auto *LI = cast<LoadInst>(I);
3201     // A Load tagged with nonnull or dereferenceable with null pointer undefined
3202     // is never null.
3203     if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
3204       if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
3205           (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
3206            !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
3207         return true;
3208     } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
3209       return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
3210     }
3211
3212     // No need to fall through to computeKnownBits as range metadata is already
3213     // handled in isKnownNonZero.
3214     return false;
3215   }
3216   case Instruction::ExtractValue: {
3217     const WithOverflowInst *WO;
3218     if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
3219       switch (WO->getBinaryOp()) {
3220       default:
3221         break;
3222       case Instruction::Add:
3223         return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3224                             WO->getArgOperand(0), WO->getArgOperand(1),
3225                             /*NSW=*/false,
3226                             /*NUW=*/false);
3227       case Instruction::Sub:
3228         return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3229                             WO->getArgOperand(0), WO->getArgOperand(1));
3230       case Instruction::Mul:
3231         return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
3232                             WO->getArgOperand(0), WO->getArgOperand(1),
3233                             /*NSW=*/false, /*NUW=*/false);
3234         break;
3235       }
3236     }
3237     break;
3238   }
3239   case Instruction::Call:
3240   case Instruction::Invoke: {
3241     const auto *Call = cast<CallBase>(I);
3242     if (I->getType()->isPointerTy()) {
3243       if (Call->isReturnNonNull())
3244         return true;
3245       if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
3246         return isKnownNonZero(RP, Q, Depth);
3247     } else {
3248       if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
3249         return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
3250       if (std::optional<ConstantRange> Range = Call->getRange()) {
3251         const APInt ZeroValue(Range->getBitWidth(), 0);
3252         if (!Range->contains(ZeroValue))
3253           return true;
3254       }
3255       if (const Value *RV = Call->getReturnedArgOperand())
3256         if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
3257           return true;
3258     }
3259
3260     if (auto *II = dyn_cast<IntrinsicInst>(I)) {
3261       switch (II->getIntrinsicID()) {
3262       case Intrinsic::sshl_sat:
3263       case Intrinsic::ushl_sat:
3264       case Intrinsic::abs:
3265       case Intrinsic::bitreverse:
3266       case Intrinsic::bswap:
3267       case Intrinsic::ctpop:
3268         return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3269         // NB: We don't do usub_sat here as in any case we can prove its
3270         // non-zero, we will fold it to `sub nuw` in InstCombine.
3271       case Intrinsic::ssub_sat:
3272         return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3273                             II->getArgOperand(0), II->getArgOperand(1));
3274       case Intrinsic::sadd_sat:
3275         return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3276                             II->getArgOperand(0), II->getArgOperand(1),
3277                             /*NSW=*/true, /* NUW=*/false);
3278         // Vec reverse preserves zero/non-zero status from input vec.
3279       case Intrinsic::vector_reverse:
3280         return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(),
3281                               Q, Depth);
3282         // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3283       case Intrinsic::vector_reduce_or:
3284       case Intrinsic::vector_reduce_umax:
3285       case Intrinsic::vector_reduce_umin:
3286       case Intrinsic::vector_reduce_smax:
3287       case Intrinsic::vector_reduce_smin:
3288         return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3289       case Intrinsic::umax:
3290       case Intrinsic::uadd_sat:
3291         // umax(X, (X != 0)) is non zero
3292         // X +usat (X != 0) is non zero
3293         if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3294           return true;
3295
3296         return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3297                isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3298       case Intrinsic::smax: {
3299         // If either arg is strictly positive the result is non-zero. Otherwise
3300         // the result is non-zero if both ops are non-zero.
3301         auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3302                              const KnownBits &OpKnown) {
3303           if (!OpNonZero.has_value())
3304             OpNonZero = OpKnown.isNonZero() ||
3305                         isKnownNonZero(Op, DemandedElts, Q, Depth);
3306           return *OpNonZero;
3307         };
3308         // Avoid re-computing isKnownNonZero.
3309         std::optional<bool> Op0NonZero, Op1NonZero;
3310         KnownBits Op1Known =
3311             computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3312         if (Op1Known.isNonNegative() &&
3313             IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
3314           return true;
3315         KnownBits Op0Known =
3316             computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3317         if (Op0Known.isNonNegative() &&
3318             IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
3319           return true;
3320         return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3321                IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
3322       }
3323       case Intrinsic::smin: {
3324         // If either arg is negative the result is non-zero. Otherwise
3325         // the result is non-zero if both ops are non-zero.
3326         KnownBits Op1Known =
3327             computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3328         if (Op1Known.isNegative())
3329           return true;
3330         KnownBits Op0Known =
3331             computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3332         if (Op0Known.isNegative())
3333           return true;
3334
3335         if (Op1Known.isNonZero() && Op0Known.isNonZero())
3336           return true;
3337       }
3338         [[fallthrough]];
3339       case Intrinsic::umin:
3340         return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3341                isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
3342       case Intrinsic::cttz:
3343         return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3344             .Zero[0];
3345       case Intrinsic::ctlz:
3346         return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3347             .isNonNegative();
3348       case Intrinsic::fshr:
3349       case Intrinsic::fshl:
3350         // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3351         if (II->getArgOperand(0) == II->getArgOperand(1))
3352           return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3353         break;
3354       case Intrinsic::vscale:
3355         return true;
3356       case Intrinsic::experimental_get_vector_length:
3357         return isKnownNonZero(I->getOperand(0), Q, Depth);
3358       default:
3359         break;
3360       }
3361       break;
3362     }
3363
3364     return false;
3365   }
3366   }
3367
3368   KnownBits Known(BitWidth);
3369   computeKnownBits(I, DemandedElts, Known, Depth, Q);
3370   return Known.One != 0;
3371 }
3372
3373 /// Return true if the given value is known to be non-zero when defined. For
3374 /// vectors, return true if every demanded element is known to be non-zero when
3375 /// defined. For pointers, if the context instruction and dominator tree are
3376 /// specified, perform context-sensitive analysis and return true if the
3377 /// pointer couldn't possibly be null at the specified instruction.
3378 /// Supports values with integer or pointer type and vectors of integers.
3379 bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3380                     const SimplifyQuery &Q, unsigned Depth) {
3381   Type *Ty = V->getType();
3382
3383 #ifndef NDEBUG
3384   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3385
3386   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3387     assert(
3388         FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3389         "DemandedElt width should equal the fixed vector number of elements");
3390   } else {
3391     assert(DemandedElts == APInt(1, 1) &&
3392            "DemandedElt width should be 1 for scalars");
3393   }
3394 #endif
3395
3396   if (auto *C = dyn_cast<Constant>(V)) {
3397     if (C->isNullValue())
3398       return false;
3399     if (isa<ConstantInt>(C))
3400       // Must be non-zero due to null test above.
3401       return true;
3402
3403     // For constant vectors, check that all elements are poison or known
3404     // non-zero to determine that the whole vector is known non-zero.
3405     if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3406       for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3407         if (!DemandedElts[i])
3408           continue;
3409         Constant *Elt = C->getAggregateElement(i);
3410         if (!Elt || Elt->isNullValue())
3411           return false;
3412         if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3413           return false;
3414       }
3415       return true;
3416     }
3417
3418     // Constant ptrauth can be null, iff the base pointer can be.
3419     if (auto *CPA = dyn_cast<ConstantPtrAuth>(V))
3420       return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth);
3421
3422     // A global variable in address space 0 is non null unless extern weak
3423     // or an absolute symbol reference. Other address spaces may have null as a
3424     // valid address for a global, so we can't assume anything.
3425     if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3426       if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3427           GV->getType()->getAddressSpace() == 0)
3428         return true;
3429     }
3430
3431     // For constant expressions, fall through to the Operator code below.
3432     if (!isa<ConstantExpr>(V))
3433       return false;
3434   }
3435
3436   if (const auto *A = dyn_cast<Argument>(V))
3437     if (std::optional<ConstantRange> Range = A->getRange()) {
3438       const APInt ZeroValue(Range->getBitWidth(), 0);
3439       if (!Range->contains(ZeroValue))
3440         return true;
3441     }
3442
3443   if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
3444     return true;
3445
3446   // Some of the tests below are recursive, so bail out if we hit the limit.
3447   if (Depth++ >= MaxAnalysisRecursionDepth)
3448     return false;
3449
3450   // Check for pointer simplifications.
3451
3452   if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3453     // A byval, inalloca may not be null in a non-default addres space. A
3454     // nonnull argument is assumed never 0.
3455     if (const Argument *A = dyn_cast<Argument>(V)) {
3456       if (((A->hasPassPointeeByValueCopyAttr() &&
3457             !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
3458            A->hasNonNullAttr()))
3459         return true;
3460     }
3461   }
3462
3463   if (const auto *I = dyn_cast<Operator>(V))
3464     if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3465       return true;
3466
3467   if (!isa<Constant>(V) &&
3468       isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
3469     return true;
3470
3471   return false;
3472 }
3473
3474 bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q,
3475                           unsigned Depth) {
3476   auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3477   APInt DemandedElts =
3478       FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
3479   return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3480 }
3481
3482 /// If the pair of operators are the same invertible function, return the
3483 /// the operands of the function corresponding to each input. Otherwise,
3484 /// return std::nullopt.  An invertible function is one that is 1-to-1 and maps
3485 /// every input value to exactly one output value.  This is equivalent to
3486 /// saying that Op1 and Op2 are equal exactly when the specified pair of
3487 /// operands are equal, (except that Op1 and Op2 may be poison more often.)
3488 static std::optional<std::pair<Value*, Value*>>
3489 getInvertibleOperands(const Operator *Op1,
3490                       const Operator *Op2) {
3491   if (Op1->getOpcode() != Op2->getOpcode())
3492     return std::nullopt;
3493
3494   auto getOperands = [&](unsigned OpNum) -> auto {
3495     return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3496   };
3497
3498   switch (Op1->getOpcode()) {
3499   default:
3500     break;
3501   case Instruction::Or:
3502     if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3503         !cast<PossiblyDisjointInst>(Op2)->isDisjoint())
3504       break;
3505     [[fallthrough]];
3506   case Instruction::Xor:
3507   case Instruction::Add: {
3508     Value *Other;
3509     if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3510       return std::make_pair(Op1->getOperand(1), Other);
3511     if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3512       return std::make_pair(Op1->getOperand(0), Other);
3513     break;
3514   }
3515   case Instruction::Sub:
3516     if (Op1->getOperand(0) == Op2->getOperand(0))
3517       return getOperands(1);
3518     if (Op1->getOperand(1) == Op2->getOperand(1))
3519       return getOperands(0);
3520     break;
3521   case Instruction::Mul: {
3522     // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3523     // and N is the bitwdith.  The nsw case is non-obvious, but proven by
3524     // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3525     auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3526     auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3527     if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3528         (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3529       break;
3530
3531     // Assume operand order has been canonicalized
3532     if (Op1->getOperand(1) == Op2->getOperand(1) &&
3533         isa<ConstantInt>(Op1->getOperand(1)) &&
3534         !cast<ConstantInt>(Op1->getOperand(1))->isZero())
3535       return getOperands(0);
3536     break;
3537   }
3538   case Instruction::Shl: {
3539     // Same as multiplies, with the difference that we don't need to check
3540     // for a non-zero multiply. Shifts always multiply by non-zero.
3541     auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3542     auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3543     if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3544         (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3545       break;
3546
3547     if (Op1->getOperand(1) == Op2->getOperand(1))
3548       return getOperands(0);
3549     break;
3550   }
3551   case Instruction::AShr:
3552   case Instruction::LShr: {
3553     auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3554     auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3555     if (!PEO1->isExact() || !PEO2->isExact())
3556       break;
3557
3558     if (Op1->getOperand(1) == Op2->getOperand(1))
3559       return getOperands(0);
3560     break;
3561   }
3562   case Instruction::SExt:
3563   case Instruction::ZExt:
3564     if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3565       return getOperands(0);
3566     break;
3567   case Instruction::PHI: {
3568     const PHINode *PN1 = cast<PHINode>(Op1);
3569     const PHINode *PN2 = cast<PHINode>(Op2);
3570
3571     // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3572     // are a single invertible function of the start values? Note that repeated
3573     // application of an invertible function is also invertible
3574     BinaryOperator *BO1 = nullptr;
3575     Value *Start1 = nullptr, *Step1 = nullptr;
3576     BinaryOperator *BO2 = nullptr;
3577     Value *Start2 = nullptr, *Step2 = nullptr;
3578     if (PN1->getParent() != PN2->getParent() ||
3579         !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
3580         !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
3581       break;
3582
3583     auto Values = getInvertibleOperands(cast<Operator>(BO1),
3584                                         cast<Operator>(BO2));
3585     if (!Values)
3586        break;
3587
3588     // We have to be careful of mutually defined recurrences here.  Ex:
3589     // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3590     // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3591     // The invertibility of these is complicated, and not worth reasoning
3592     // about (yet?).
3593     if (Values->first != PN1 || Values->second != PN2)
3594       break;
3595
3596     return std::make_pair(Start1, Start2);
3597   }
3598   }
3599   return std::nullopt;
3600 }
3601
3602 /// Return true if V1 == (binop V2, X), where X is known non-zero.
3603 /// Only handle a small subset of binops where (binop V2, X) with non-zero X
3604 /// implies V2 != V1.
3605 static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3606                                       const APInt &DemandedElts, unsigned Depth,
3607                                       const SimplifyQuery &Q) {
3608   const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3609   if (!BO)
3610     return false;
3611   switch (BO->getOpcode()) {
3612   default:
3613     break;
3614   case Instruction::Or:
3615     if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3616       break;
3617     [[fallthrough]];
3618   case Instruction::Xor:
3619   case Instruction::Add:
3620     Value *Op = nullptr;
3621     if (V2 == BO->getOperand(0))
3622       Op = BO->getOperand(1);
3623     else if (V2 == BO->getOperand(1))
3624       Op = BO->getOperand(0);
3625     else
3626       return false;
3627     return isKnownNonZero(Op, DemandedElts, Q, Depth + 1);
3628   }
3629   return false;
3630 }
3631
3632 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3633 /// the multiplication is nuw or nsw.
3634 static bool isNonEqualMul(const Value *V1, const Value *V2,
3635                           const APInt &DemandedElts, unsigned Depth,
3636                           const SimplifyQuery &Q) {
3637   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3638     const APInt *C;
3639     return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3640            (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3641            !C->isZero() && !C->isOne() &&
3642            isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3643   }
3644   return false;
3645 }
3646
3647 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3648 /// the shift is nuw or nsw.
3649 static bool isNonEqualShl(const Value *V1, const Value *V2,
3650                           const APInt &DemandedElts, unsigned Depth,
3651                           const SimplifyQuery &Q) {
3652   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3653     const APInt *C;
3654     return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3655            (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3656            !C->isZero() && isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3657   }
3658   return false;
3659 }
3660
3661 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3662                            const APInt &DemandedElts, unsigned Depth,
3663                            const SimplifyQuery &Q) {
3664   // Check two PHIs are in same block.
3665   if (PN1->getParent() != PN2->getParent())
3666     return false;
3667
3668   SmallPtrSet<const BasicBlock *, 8> VisitedBBs;
3669   bool UsedFullRecursion = false;
3670   for (const BasicBlock *IncomBB : PN1->blocks()) {
3671     if (!VisitedBBs.insert(IncomBB).second)
3672       continue; // Don't reprocess blocks that we have dealt with already.
3673     const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3674     const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3675     const APInt *C1, *C2;
3676     if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
3677       continue;
3678
3679     // Only one pair of phi operands is allowed for full recursion.
3680     if (UsedFullRecursion)
3681       return false;
3682
3683     SimplifyQuery RecQ = Q.getWithoutCondContext();
3684     RecQ.CxtI = IncomBB->getTerminator();
3685     if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
3686       return false;
3687     UsedFullRecursion = true;
3688   }
3689   return true;
3690 }
3691
3692 static bool isNonEqualSelect(const Value *V1, const Value *V2,
3693                              const APInt &DemandedElts, unsigned Depth,
3694                              const SimplifyQuery &Q) {
3695   const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3696   if (!SI1)
3697     return false;
3698
3699   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3700     const Value *Cond1 = SI1->getCondition();
3701     const Value *Cond2 = SI2->getCondition();
3702     if (Cond1 == Cond2)
3703       return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3704                              DemandedElts, Depth + 1, Q) &&
3705              isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3706                              DemandedElts, Depth + 1, Q);
3707   }
3708   return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Depth + 1, Q) &&
3709          isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Depth + 1, Q);
3710 }
3711
3712 // Check to see if A is both a GEP and is the incoming value for a PHI in the
3713 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3714 // one of them being the recursive GEP A and the other a ptr at same base and at
3715 // the same/higher offset than B we are only incrementing the pointer further in
3716 // loop if offset of recursive GEP is greater than 0.
3717 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B,
3718                                                const SimplifyQuery &Q) {
3719   if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3720     return false;
3721
3722   auto *GEPA = dyn_cast<GEPOperator>(A);
3723   if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
3724     return false;
3725
3726   // Handle 2 incoming PHI values with one being a recursive GEP.
3727   auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3728   if (!PN || PN->getNumIncomingValues() != 2)
3729     return false;
3730
3731   // Search for the recursive GEP as an incoming operand, and record that as
3732   // Step.
3733   Value *Start = nullptr;
3734   Value *Step = const_cast<Value *>(A);
3735   if (PN->getIncomingValue(0) == Step)
3736     Start = PN->getIncomingValue(1);
3737   else if (PN->getIncomingValue(1) == Step)
3738     Start = PN->getIncomingValue(0);
3739   else
3740     return false;
3741
3742   // Other incoming node base should match the B base.
3743   // StartOffset >= OffsetB && StepOffset > 0?
3744   // StartOffset <= OffsetB && StepOffset < 0?
3745   // Is non-equal if above are true.
3746   // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3747   // optimisation to inbounds GEPs only.
3748   unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3749   APInt StartOffset(IndexWidth, 0);
3750   Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3751   APInt StepOffset(IndexWidth, 0);
3752   Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3753
3754   // Check if Base Pointer of Step matches the PHI.
3755   if (Step != PN)
3756     return false;
3757   APInt OffsetB(IndexWidth, 0);
3758   B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3759   return Start == B &&
3760          ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
3761           (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
3762 }
3763
3764 /// Return true if it is known that V1 != V2.
3765 static bool isKnownNonEqual(const Value *V1, const Value *V2,
3766                             const APInt &DemandedElts, unsigned Depth,
3767                             const SimplifyQuery &Q) {
3768   if (V1 == V2)
3769     return false;
3770   if (V1->getType() != V2->getType())
3771     // We can't look through casts yet.
3772     return false;
3773
3774   if (Depth >= MaxAnalysisRecursionDepth)
3775     return false;
3776
3777   // See if we can recurse through (exactly one of) our operands.  This
3778   // requires our operation be 1-to-1 and map every input value to exactly
3779   // one output value.  Such an operation is invertible.
3780   auto *O1 = dyn_cast<Operator>(V1);
3781   auto *O2 = dyn_cast<Operator>(V2);
3782   if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3783     if (auto Values = getInvertibleOperands(O1, O2))
3784       return isKnownNonEqual(Values->first, Values->second, DemandedElts,
3785                              Depth + 1, Q);
3786
3787     if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3788       const PHINode *PN2 = cast<PHINode>(V2);
3789       // FIXME: This is missing a generalization to handle the case where one is
3790       // a PHI and another one isn't.
3791       if (isNonEqualPHIs(PN1, PN2, DemandedElts, Depth, Q))
3792         return true;
3793     };
3794   }
3795
3796   if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Depth, Q) ||
3797       isModifyingBinopOfNonZero(V2, V1, DemandedElts, Depth, Q))
3798     return true;
3799
3800   if (isNonEqualMul(V1, V2, DemandedElts, Depth, Q) ||
3801       isNonEqualMul(V2, V1, DemandedElts, Depth, Q))
3802     return true;
3803
3804   if (isNonEqualShl(V1, V2, DemandedElts, Depth, Q) ||
3805       isNonEqualShl(V2, V1, DemandedElts, Depth, Q))
3806     return true;
3807
3808   if (V1->getType()->isIntOrIntVectorTy()) {
3809     // Are any known bits in V1 contradictory to known bits in V2? If V1
3810     // has a known zero where V2 has a known one, they must not be equal.
3811     KnownBits Known1 = computeKnownBits(V1, DemandedElts, Depth, Q);
3812     if (!Known1.isUnknown()) {
3813       KnownBits Known2 = computeKnownBits(V2, DemandedElts, Depth, Q);
3814       if (Known1.Zero.intersects(Known2.One) ||
3815           Known2.Zero.intersects(Known1.One))
3816         return true;
3817     }
3818   }
3819
3820   if (isNonEqualSelect(V1, V2, DemandedElts, Depth, Q) ||
3821       isNonEqualSelect(V2, V1, DemandedElts, Depth, Q))
3822     return true;
3823
3824   if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3825       isNonEqualPointersWithRecursiveGEP(V2, V1, Q))
3826     return true;
3827
3828   Value *A, *B;
3829   // PtrToInts are NonEqual if their Ptrs are NonEqual.
3830   // Check PtrToInt type matches the pointer size.
3831   if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3832       match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B))))
3833     return isKnownNonEqual(A, B, DemandedElts, Depth + 1, Q);
3834
3835   return false;
3836 }
3837
3838 /// For vector constants, loop over the elements and find the constant with the
3839 /// minimum number of sign bits. Return 0 if the value is not a vector constant
3840 /// or if any element was not analyzed; otherwise, return the count for the
3841 /// element with the minimum number of sign bits.
3842 static unsigned computeNumSignBitsVectorConstant(const Value *V,
3843                                                  const APInt &DemandedElts,
3844                                                  unsigned TyBits) {
3845   const auto *CV = dyn_cast<Constant>(V);
3846   if (!CV || !isa<FixedVectorType>(CV->getType()))
3847     return 0;
3848
3849   unsigned MinSignBits = TyBits;
3850   unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3851   for (unsigned i = 0; i != NumElts; ++i) {
3852     if (!DemandedElts[i])
3853       continue;
3854     // If we find a non-ConstantInt, bail out.
3855     auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3856     if (!Elt)
3857       return 0;
3858
3859     MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3860   }
3861
3862   return MinSignBits;
3863 }
3864
3865 static unsigned ComputeNumSignBitsImpl(const Value *V,
3866                                        const APInt &DemandedElts,
3867                                        unsigned Depth, const SimplifyQuery &Q);
3868
3869 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3870                                    unsigned Depth, const SimplifyQuery &Q) {
3871   unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3872   assert(Result > 0 && "At least one sign bit needs to be present!");
3873   return Result;
3874 }
3875
3876 /// Return the number of times the sign bit of the register is replicated into
3877 /// the other bits. We know that at least 1 bit is always equal to the sign bit
3878 /// (itself), but other cases can give us information. For example, immediately
3879 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3880 /// other, so we return 3. For vectors, return the number of sign bits for the
3881 /// vector element with the minimum number of known sign bits of the demanded
3882 /// elements in the vector specified by DemandedElts.
3883 static unsigned ComputeNumSignBitsImpl(const Value *V,
3884                                        const APInt &DemandedElts,
3885                                        unsigned Depth, const SimplifyQuery &Q) {
3886   Type *Ty = V->getType();
3887 #ifndef NDEBUG
3888   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3889
3890   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3891     assert(
3892         FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3893         "DemandedElt width should equal the fixed vector number of elements");
3894   } else {
3895     assert(DemandedElts == APInt(1, 1) &&
3896            "DemandedElt width should be 1 for scalars");
3897   }
3898 #endif
3899
3900   // We return the minimum number of sign bits that are guaranteed to be present
3901   // in V, so for undef we have to conservatively return 1.  We don't have the
3902   // same behavior for poison though -- that's a FIXME today.
3903
3904   Type *ScalarTy = Ty->getScalarType();
3905   unsigned TyBits = ScalarTy->isPointerTy() ?
3906     Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3907     Q.DL.getTypeSizeInBits(ScalarTy);
3908
3909   unsigned Tmp, Tmp2;
3910   unsigned FirstAnswer = 1;
3911
3912   // Note that ConstantInt is handled by the general computeKnownBits case
3913   // below.
3914
3915   if (Depth == MaxAnalysisRecursionDepth)
3916     return 1;
3917
3918   if (auto *U = dyn_cast<Operator>(V)) {
3919     switch (Operator::getOpcode(V)) {
3920     default: break;
3921     case Instruction::SExt:
3922       Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3923       return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
3924              Tmp;
3925
3926     case Instruction::SDiv: {
3927       const APInt *Denominator;
3928       // sdiv X, C -> adds log(C) sign bits.
3929       if (match(U->getOperand(1), m_APInt(Denominator))) {
3930
3931         // Ignore non-positive denominator.
3932         if (!Denominator->isStrictlyPositive())
3933           break;
3934
3935         // Calculate the incoming numerator bits.
3936         unsigned NumBits =
3937             ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3938
3939         // Add floor(log(C)) bits to the numerator bits.
3940         return std::min(TyBits, NumBits + Denominator->logBase2());
3941       }
3942       break;
3943     }
3944
3945     case Instruction::SRem: {
3946       Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3947
3948       const APInt *Denominator;
3949       // srem X, C -> we know that the result is within [-C+1,C) when C is a
3950       // positive constant.  This let us put a lower bound on the number of sign
3951       // bits.
3952       if (match(U->getOperand(1), m_APInt(Denominator))) {
3953
3954         // Ignore non-positive denominator.
3955         if (Denominator->isStrictlyPositive()) {
3956           // Calculate the leading sign bit constraints by examining the
3957           // denominator.  Given that the denominator is positive, there are two
3958           // cases:
3959           //
3960           //  1. The numerator is positive. The result range is [0,C) and
3961           //     [0,C) u< (1 << ceilLogBase2(C)).
3962           //
3963           //  2. The numerator is negative. Then the result range is (-C,0] and
3964           //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3965           //
3966           // Thus a lower bound on the number of sign bits is `TyBits -
3967           // ceilLogBase2(C)`.
3968
3969           unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3970           Tmp = std::max(Tmp, ResBits);
3971         }
3972       }
3973       return Tmp;
3974     }
3975
3976     case Instruction::AShr: {
3977       Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3978       // ashr X, C   -> adds C sign bits.  Vectors too.
3979       const APInt *ShAmt;
3980       if (match(U->getOperand(1), m_APInt(ShAmt))) {
3981         if (ShAmt->uge(TyBits))
3982           break; // Bad shift.
3983         unsigned ShAmtLimited = ShAmt->getZExtValue();
3984         Tmp += ShAmtLimited;
3985         if (Tmp > TyBits) Tmp = TyBits;
3986       }
3987       return Tmp;
3988     }
3989     case Instruction::Shl: {
3990       const APInt *ShAmt;
3991       Value *X = nullptr;
3992       if (match(U->getOperand(1), m_APInt(ShAmt))) {
3993         // shl destroys sign bits.
3994         if (ShAmt->uge(TyBits))
3995           break; // Bad shift.
3996         // We can look through a zext (more or less treating it as a sext) if
3997         // all extended bits are shifted out.
3998         if (match(U->getOperand(0), m_ZExt(m_Value(X))) &&
3999             ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())) {
4000           Tmp = ComputeNumSignBits(X, DemandedElts, Depth + 1, Q);
4001           Tmp += TyBits - X->getType()->getScalarSizeInBits();
4002         } else
4003           Tmp =
4004               ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4005         if (ShAmt->uge(Tmp))
4006           break; // Shifted all sign bits out.
4007         Tmp2 = ShAmt->getZExtValue();
4008         return Tmp - Tmp2;
4009       }
4010       break;
4011     }
4012     case Instruction::And:
4013     case Instruction::Or:
4014     case Instruction::Xor: // NOT is handled here.
4015       // Logical binary ops preserve the number of sign bits at the worst.
4016       Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4017       if (Tmp != 1) {
4018         Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4019         FirstAnswer = std::min(Tmp, Tmp2);
4020         // We computed what we know about the sign bits as our first
4021         // answer. Now proceed to the generic code that uses
4022         // computeKnownBits, and pick whichever answer is better.
4023       }
4024       break;
4025
4026     case Instruction::Select: {
4027       // If we have a clamp pattern, we know that the number of sign bits will
4028       // be the minimum of the clamp min/max range.
4029       const Value *X;
4030       const APInt *CLow, *CHigh;
4031       if (isSignedMinMaxClamp(U, X, CLow, CHigh))
4032         return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4033
4034       Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4035       if (Tmp == 1)
4036         break;
4037       Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Depth + 1, Q);
4038       return std::min(Tmp, Tmp2);
4039     }
4040
4041     case Instruction::Add:
4042       // Add can have at most one carry bit.  Thus we know that the output
4043       // is, at worst, one more bit than the inputs.
4044       Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4045       if (Tmp == 1) break;
4046
4047       // Special case decrementing a value (ADD X, -1):
4048       if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
4049         if (CRHS->isAllOnesValue()) {
4050           KnownBits Known(TyBits);
4051           computeKnownBits(U->getOperand(0), DemandedElts, Known, Depth + 1, Q);
4052
4053           // If the input is known to be 0 or 1, the output is 0/-1, which is
4054           // all sign bits set.
4055           if ((Known.Zero | 1).isAllOnes())
4056             return TyBits;
4057
4058           // If we are subtracting one from a positive number, there is no carry
4059           // out of the result.
4060           if (Known.isNonNegative())
4061             return Tmp;
4062         }
4063
4064       Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4065       if (Tmp2 == 1)
4066         break;
4067       return std::min(Tmp, Tmp2) - 1;
4068
4069     case Instruction::Sub:
4070       Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4071       if (Tmp2 == 1)
4072         break;
4073
4074       // Handle NEG.
4075       if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
4076         if (CLHS->isNullValue()) {
4077           KnownBits Known(TyBits);
4078           computeKnownBits(U->getOperand(1), DemandedElts, Known, Depth + 1, Q);
4079           // If the input is known to be 0 or 1, the output is 0/-1, which is
4080           // all sign bits set.
4081           if ((Known.Zero | 1).isAllOnes())
4082             return TyBits;
4083
4084           // If the input is known to be positive (the sign bit is known clear),
4085           // the output of the NEG has the same number of sign bits as the
4086           // input.
4087           if (Known.isNonNegative())
4088             return Tmp2;
4089
4090           // Otherwise, we treat this like a SUB.
4091         }
4092
4093       // Sub can have at most one carry bit.  Thus we know that the output
4094       // is, at worst, one more bit than the inputs.
4095       Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4096       if (Tmp == 1)
4097         break;
4098       return std::min(Tmp, Tmp2) - 1;
4099
4100     case Instruction::Mul: {
4101       // The output of the Mul can be at most twice the valid bits in the
4102       // inputs.
4103       unsigned SignBitsOp0 =
4104           ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4105       if (SignBitsOp0 == 1)
4106         break;
4107       unsigned SignBitsOp1 =
4108           ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4109       if (SignBitsOp1 == 1)
4110         break;
4111       unsigned OutValidBits =
4112           (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
4113       return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
4114     }
4115
4116     case Instruction::PHI: {
4117       const PHINode *PN = cast<PHINode>(U);
4118       unsigned NumIncomingValues = PN->getNumIncomingValues();
4119       // Don't analyze large in-degree PHIs.
4120       if (NumIncomingValues > 4) break;
4121       // Unreachable blocks may have zero-operand PHI nodes.
4122       if (NumIncomingValues == 0) break;
4123
4124       // Take the minimum of all incoming values.  This can't infinitely loop
4125       // because of our depth threshold.
4126       SimplifyQuery RecQ = Q.getWithoutCondContext();
4127       Tmp = TyBits;
4128       for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
4129         if (Tmp == 1) return Tmp;
4130         RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4131         Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i),
4132                                                DemandedElts, Depth + 1, RecQ));
4133       }
4134       return Tmp;
4135     }
4136
4137     case Instruction::Trunc: {
4138       // If the input contained enough sign bits that some remain after the
4139       // truncation, then we can make use of that. Otherwise we don't know
4140       // anything.
4141       Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4142       unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
4143       if (Tmp > (OperandTyBits - TyBits))
4144         return Tmp - (OperandTyBits - TyBits);
4145
4146       return 1;
4147     }
4148
4149     case Instruction::ExtractElement:
4150       // Look through extract element. At the moment we keep this simple and
4151       // skip tracking the specific element. But at least we might find
4152       // information valid for all elements of the vector (for example if vector
4153       // is sign extended, shifted, etc).
4154       return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4155
4156     case Instruction::ShuffleVector: {
4157       // Collect the minimum number of sign bits that are shared by every vector
4158       // element referenced by the shuffle.
4159       auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
4160       if (!Shuf) {
4161         // FIXME: Add support for shufflevector constant expressions.
4162         return 1;
4163       }
4164       APInt DemandedLHS, DemandedRHS;
4165       // For undef elements, we don't know anything about the common state of
4166       // the shuffle result.
4167       if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4168         return 1;
4169       Tmp = std::numeric_limits<unsigned>::max();
4170       if (!!DemandedLHS) {
4171         const Value *LHS = Shuf->getOperand(0);
4172         Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
4173       }
4174       // If we don't know anything, early out and try computeKnownBits
4175       // fall-back.
4176       if (Tmp == 1)
4177         break;
4178       if (!!DemandedRHS) {
4179         const Value *RHS = Shuf->getOperand(1);
4180         Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
4181         Tmp = std::min(Tmp, Tmp2);
4182       }
4183       // If we don't know anything, early out and try computeKnownBits
4184       // fall-back.
4185       if (Tmp == 1)
4186         break;
4187       assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4188       return Tmp;
4189     }
4190     case Instruction::Call: {
4191       if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
4192         switch (II->getIntrinsicID()) {
4193         default:
4194           break;
4195         case Intrinsic::abs:
4196           Tmp =
4197               ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4198           if (Tmp == 1)
4199             break;
4200
4201           // Absolute value reduces number of sign bits by at most 1.
4202           return Tmp - 1;
4203         case Intrinsic::smin:
4204         case Intrinsic::smax: {
4205           const APInt *CLow, *CHigh;
4206           if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4207             return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4208         }
4209         }
4210       }
4211     }
4212     }
4213   }
4214
4215   // Finally, if we can prove that the top bits of the result are 0's or 1's,
4216   // use this information.
4217
4218   // If we can examine all elements of a vector constant successfully, we're
4219   // done (we can't do any better than that). If not, keep trying.
4220   if (unsigned VecSignBits =
4221           computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4222     return VecSignBits;
4223
4224   KnownBits Known(TyBits);
4225   computeKnownBits(V, DemandedElts, Known, Depth, Q);
4226
4227   // If we know that the sign bit is either zero or one, determine the number of
4228   // identical bits in the top of the input value.
4229   return std::max(FirstAnswer, Known.countMinSignBits());
4230 }
4231
4232 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
4233                                             const TargetLibraryInfo *TLI) {
4234   const Function *F = CB.getCalledFunction();
4235   if (!F)
4236     return Intrinsic::not_intrinsic;
4237
4238   if (F->isIntrinsic())
4239     return F->getIntrinsicID();
4240
4241   // We are going to infer semantics of a library function based on mapping it
4242   // to an LLVM intrinsic. Check that the library function is available from
4243   // this callbase and in this environment.
4244   LibFunc Func;
4245   if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
4246       !CB.onlyReadsMemory())
4247     return Intrinsic::not_intrinsic;
4248
4249   switch (Func) {
4250   default:
4251     break;
4252   case LibFunc_sin:
4253   case LibFunc_sinf:
4254   case LibFunc_sinl:
4255     return Intrinsic::sin;
4256   case LibFunc_cos:
4257   case LibFunc_cosf:
4258   case LibFunc_cosl:
4259     return Intrinsic::cos;
4260   case LibFunc_tan:
4261   case LibFunc_tanf:
4262   case LibFunc_tanl:
4263     return Intrinsic::tan;
4264   case LibFunc_asin:
4265   case LibFunc_asinf:
4266   case LibFunc_asinl:
4267     return Intrinsic::asin;
4268   case LibFunc_acos:
4269   case LibFunc_acosf:
4270   case LibFunc_acosl:
4271     return Intrinsic::acos;
4272   case LibFunc_atan:
4273   case LibFunc_atanf:
4274   case LibFunc_atanl:
4275     return Intrinsic::atan;
4276   case LibFunc_atan2:
4277   case LibFunc_atan2f:
4278   case LibFunc_atan2l:
4279     return Intrinsic::atan2;
4280   case LibFunc_sinh:
4281   case LibFunc_sinhf:
4282   case LibFunc_sinhl:
4283     return Intrinsic::sinh;
4284   case LibFunc_cosh:
4285   case LibFunc_coshf:
4286   case LibFunc_coshl:
4287     return Intrinsic::cosh;
4288   case LibFunc_tanh:
4289   case LibFunc_tanhf:
4290   case LibFunc_tanhl:
4291     return Intrinsic::tanh;
4292   case LibFunc_exp:
4293   case LibFunc_expf:
4294   case LibFunc_expl:
4295     return Intrinsic::exp;
4296   case LibFunc_exp2:
4297   case LibFunc_exp2f:
4298   case LibFunc_exp2l:
4299     return Intrinsic::exp2;
4300   case LibFunc_exp10:
4301   case LibFunc_exp10f:
4302   case LibFunc_exp10l:
4303     return Intrinsic::exp10;
4304   case LibFunc_log:
4305   case LibFunc_logf:
4306   case LibFunc_logl:
4307     return Intrinsic::log;
4308   case LibFunc_log10:
4309   case LibFunc_log10f:
4310   case LibFunc_log10l:
4311     return Intrinsic::log10;
4312   case LibFunc_log2:
4313   case LibFunc_log2f:
4314   case LibFunc_log2l:
4315     return Intrinsic::log2;
4316   case LibFunc_fabs:
4317   case LibFunc_fabsf:
4318   case LibFunc_fabsl:
4319     return Intrinsic::fabs;
4320   case LibFunc_fmin:
4321   case LibFunc_fminf:
4322   case LibFunc_fminl:
4323     return Intrinsic::minnum;
4324   case LibFunc_fmax:
4325   case LibFunc_fmaxf:
4326   case LibFunc_fmaxl:
4327     return Intrinsic::maxnum;
4328   case LibFunc_copysign:
4329   case LibFunc_copysignf:
4330   case LibFunc_copysignl:
4331     return Intrinsic::copysign;
4332   case LibFunc_floor:
4333   case LibFunc_floorf:
4334   case LibFunc_floorl:
4335     return Intrinsic::floor;
4336   case LibFunc_ceil:
4337   case LibFunc_ceilf:
4338   case LibFunc_ceill:
4339     return Intrinsic::ceil;
4340   case LibFunc_trunc:
4341   case LibFunc_truncf:
4342   case LibFunc_truncl:
4343     return Intrinsic::trunc;
4344   case LibFunc_rint:
4345   case LibFunc_rintf:
4346   case LibFunc_rintl:
4347     return Intrinsic::rint;
4348   case LibFunc_nearbyint:
4349   case LibFunc_nearbyintf:
4350   case LibFunc_nearbyintl:
4351     return Intrinsic::nearbyint;
4352   case LibFunc_round:
4353   case LibFunc_roundf:
4354   case LibFunc_roundl:
4355     return Intrinsic::round;
4356   case LibFunc_roundeven:
4357   case LibFunc_roundevenf:
4358   case LibFunc_roundevenl:
4359     return Intrinsic::roundeven;
4360   case LibFunc_pow:
4361   case LibFunc_powf:
4362   case LibFunc_powl:
4363     return Intrinsic::pow;
4364   case LibFunc_sqrt:
4365   case LibFunc_sqrtf:
4366   case LibFunc_sqrtl:
4367     return Intrinsic::sqrt;
4368   }
4369
4370   return Intrinsic::not_intrinsic;
4371 }
4372
4373 /// Return true if it's possible to assume IEEE treatment of input denormals in
4374 /// \p F for \p Val.
4375 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
4376   Ty = Ty->getScalarType();
4377   return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
4378 }
4379
4380 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4381   Ty = Ty->getScalarType();
4382   DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4383   return Mode.Input == DenormalMode::IEEE ||
4384          Mode.Input == DenormalMode::PositiveZero;
4385 }
4386
4387 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4388   Ty = Ty->getScalarType();
4389   DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4390   return Mode.Output == DenormalMode::IEEE ||
4391          Mode.Output == DenormalMode::PositiveZero;
4392 }
4393
4394 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const {
4395   return isKnownNeverZero() &&
4396          (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty));
4397 }
4398
4399 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F,
4400                                               Type *Ty) const {
4401   return isKnownNeverNegZero() &&
4402          (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty));
4403 }
4404
4405 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F,
4406                                               Type *Ty) const {
4407   if (!isKnownNeverPosZero())
4408     return false;
4409
4410   // If we know there are no denormals, nothing can be flushed to zero.
4411   if (isKnownNeverSubnormal())
4412     return true;
4413
4414   DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4415   switch (Mode.Input) {
4416   case DenormalMode::IEEE:
4417     return true;
4418   case DenormalMode::PreserveSign:
4419     // Negative subnormal won't flush to +0
4420     return isKnownNeverPosSubnormal();
4421   case DenormalMode::PositiveZero:
4422   default:
4423     // Both positive and negative subnormal could flush to +0
4424     return false;
4425   }
4426
4427   llvm_unreachable("covered switch over denormal mode");
4428 }
4429
4430 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F,
4431                                      Type *Ty) {
4432   KnownFPClasses = Src.KnownFPClasses;
4433   // If we aren't assuming the source can't be a zero, we don't have to check if
4434   // a denormal input could be flushed.
4435   if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4436     return;
4437
4438   // If we know the input can't be a denormal, it can't be flushed to 0.
4439   if (Src.isKnownNeverSubnormal())
4440     return;
4441
4442   DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4443
4444   if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4445     KnownFPClasses |= fcPosZero;
4446
4447   if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4448     if (Mode != DenormalMode::getPositiveZero())
4449       KnownFPClasses |= fcNegZero;
4450
4451     if (Mode.Input == DenormalMode::PositiveZero ||
4452         Mode.Output == DenormalMode::PositiveZero ||
4453         Mode.Input == DenormalMode::Dynamic ||
4454         Mode.Output == DenormalMode::Dynamic)
4455       KnownFPClasses |= fcPosZero;
4456   }
4457 }
4458
4459 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src,
4460                                               const Function &F, Type *Ty) {
4461   propagateDenormal(Src, F, Ty);
4462   propagateNaN(Src, /*PreserveSign=*/true);
4463 }
4464
4465 /// Given an exploded icmp instruction, return true if the comparison only
4466 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4467 /// the result of the comparison is true when the input value is signed.
4468 bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
4469                           bool &TrueIfSigned) {
4470   switch (Pred) {
4471   case ICmpInst::ICMP_SLT: // True if LHS s< 0
4472     TrueIfSigned = true;
4473     return RHS.isZero();
4474   case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4475     TrueIfSigned = true;
4476     return RHS.isAllOnes();
4477   case ICmpInst::ICMP_SGT: // True if LHS s> -1
4478     TrueIfSigned = false;
4479     return RHS.isAllOnes();
4480   case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4481     TrueIfSigned = false;
4482     return RHS.isZero();
4483   case ICmpInst::ICMP_UGT:
4484     // True if LHS u> RHS and RHS == sign-bit-mask - 1
4485     TrueIfSigned = true;
4486     return RHS.isMaxSignedValue();
4487   case ICmpInst::ICMP_UGE:
4488     // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4489     TrueIfSigned = true;
4490     return RHS.isMinSignedValue();
4491   case ICmpInst::ICMP_ULT:
4492     // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4493     TrueIfSigned = false;
4494     return RHS.isMinSignedValue();
4495   case ICmpInst::ICMP_ULE:
4496     // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4497     TrueIfSigned = false;
4498     return RHS.isMaxSignedValue();
4499   default:
4500     return false;
4501   }
4502 }
4503
4504 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4505 /// same result as an fcmp with the given operands.
4506 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
4507                                                       const Function &F,
4508                                                       Value *LHS, Value *RHS,
4509                                                       bool LookThroughSrc) {
4510   const APFloat *ConstRHS;
4511   if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4512     return {nullptr, fcAllFlags};
4513
4514   return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
4515 }
4516
4517 std::pair<Value *, FPClassTest>
4518 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS,
4519                       const APFloat *ConstRHS, bool LookThroughSrc) {
4520
4521   auto [Src, ClassIfTrue, ClassIfFalse] =
4522       fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc);
4523   if (Src && ClassIfTrue == ~ClassIfFalse)
4524     return {Src, ClassIfTrue};
4525   return {nullptr, fcAllFlags};
4526 }
4527
4528 /// Return the return value for fcmpImpliesClass for a compare that produces an
4529 /// exact class test.
4530 static std::tuple<Value *, FPClassTest, FPClassTest> exactClass(Value *V,
4531                                                                 FPClassTest M) {
4532   return {V, M, ~M};
4533 }
4534
4535 std::tuple<Value *, FPClassTest, FPClassTest>
4536 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4537                        FPClassTest RHSClass, bool LookThroughSrc) {
4538   assert(RHSClass != fcNone);
4539   Value *Src = LHS;
4540
4541   if (Pred == FCmpInst::FCMP_TRUE)
4542     return exactClass(Src, fcAllFlags);
4543
4544   if (Pred == FCmpInst::FCMP_FALSE)
4545     return exactClass(Src, fcNone);
4546
4547   const FPClassTest OrigClass = RHSClass;
4548
4549   const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass;
4550   const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass;
4551   const bool IsNaN = (RHSClass & ~fcNan) == fcNone;
4552
4553   if (IsNaN) {
4554     // fcmp o__ x, nan -> false
4555     // fcmp u__ x, nan -> true
4556     return exactClass(Src, CmpInst::isOrdered(Pred) ? fcNone : fcAllFlags);
4557   }
4558
4559   // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4560   if (Pred == FCmpInst::FCMP_ORD)
4561     return exactClass(Src, ~fcNan);
4562
4563   // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4564   if (Pred == FCmpInst::FCMP_UNO)
4565     return exactClass(Src, fcNan);
4566
4567   const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
4568   if (IsFabs)
4569     RHSClass = llvm::inverse_fabs(RHSClass);
4570
4571   const bool IsZero = (OrigClass & fcZero) == OrigClass;
4572   if (IsZero) {
4573     assert(Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO);
4574     // Compares with fcNone are only exactly equal to fcZero if input denormals
4575     // are not flushed.
4576     // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4577     if (!inputDenormalIsIEEE(F, LHS->getType()))
4578       return {nullptr, fcAllFlags, fcAllFlags};
4579
4580     switch (Pred) {
4581     case FCmpInst::FCMP_OEQ: // Match x == 0.0
4582       return exactClass(Src, fcZero);
4583     case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
4584       return exactClass(Src, fcZero | fcNan);
4585     case FCmpInst::FCMP_UNE: // Match (x != 0.0)
4586       return exactClass(Src, ~fcZero);
4587     case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
4588       return exactClass(Src, ~fcNan & ~fcZero);
4589     case FCmpInst::FCMP_ORD:
4590       // Canonical form of ord/uno is with a zero. We could also handle
4591       // non-canonical other non-NaN constants or LHS == RHS.
4592       return exactClass(Src, ~fcNan);
4593     case FCmpInst::FCMP_UNO:
4594       return exactClass(Src, fcNan);
4595     case FCmpInst::FCMP_OGT: // x > 0
4596       return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf);
4597     case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
4598       return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan);
4599     case FCmpInst::FCMP_OGE: // x >= 0
4600       return exactClass(Src, fcPositive | fcNegZero);
4601     case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
4602       return exactClass(Src, fcPositive | fcNegZero | fcNan);
4603     case FCmpInst::FCMP_OLT: // x < 0
4604       return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf);
4605     case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
4606       return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan);
4607     case FCmpInst::FCMP_OLE: // x <= 0
4608       return exactClass(Src, fcNegative | fcPosZero);
4609     case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
4610       return exactClass(Src, fcNegative | fcPosZero | fcNan);
4611     default:
4612       llvm_unreachable("all compare types are handled");
4613     }
4614
4615     return {nullptr, fcAllFlags, fcAllFlags};
4616   }
4617
4618   const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
4619
4620   const bool IsInf = (OrigClass & fcInf) == OrigClass;
4621   if (IsInf) {
4622     FPClassTest Mask = fcAllFlags;
4623
4624     switch (Pred) {
4625     case FCmpInst::FCMP_OEQ:
4626     case FCmpInst::FCMP_UNE: {
4627       // Match __builtin_isinf patterns
4628       //
4629       //   fcmp oeq x, +inf -> is_fpclass x, fcPosInf
4630       //   fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
4631       //   fcmp oeq x, -inf -> is_fpclass x, fcNegInf
4632       //   fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
4633       //
4634       //   fcmp une x, +inf -> is_fpclass x, ~fcPosInf
4635       //   fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
4636       //   fcmp une x, -inf -> is_fpclass x, ~fcNegInf
4637       //   fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
4638       if (IsNegativeRHS) {
4639         Mask = fcNegInf;
4640         if (IsFabs)
4641           Mask = fcNone;
4642       } else {
4643         Mask = fcPosInf;
4644         if (IsFabs)
4645           Mask |= fcNegInf;
4646       }
4647       break;
4648     }
4649     case FCmpInst::FCMP_ONE:
4650     case FCmpInst::FCMP_UEQ: {
4651       // Match __builtin_isinf patterns
4652       //   fcmp one x, -inf -> is_fpclass x, fcNegInf
4653       //   fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
4654       //   fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
4655       //   fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
4656       //
4657       //   fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
4658       //   fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
4659       //   fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
4660       //   fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
4661       if (IsNegativeRHS) {
4662         Mask = ~fcNegInf & ~fcNan;
4663         if (IsFabs)
4664           Mask = ~fcNan;
4665       } else {
4666         Mask = ~fcPosInf & ~fcNan;
4667         if (IsFabs)
4668           Mask &= ~fcNegInf;
4669       }
4670
4671       break;
4672     }
4673     case FCmpInst::FCMP_OLT:
4674     case FCmpInst::FCMP_UGE: {
4675       if (IsNegativeRHS) {
4676         // No value is ordered and less than negative infinity.
4677         // All values are unordered with or at least negative infinity.
4678         // fcmp olt x, -inf -> false
4679         // fcmp uge x, -inf -> true
4680         Mask = fcNone;
4681         break;
4682       }
4683
4684       // fcmp olt fabs(x), +inf -> fcFinite
4685       // fcmp uge fabs(x), +inf -> ~fcFinite
4686       // fcmp olt x, +inf -> fcFinite|fcNegInf
4687       // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
4688       Mask = fcFinite;
4689       if (!IsFabs)
4690         Mask |= fcNegInf;
4691       break;
4692     }
4693     case FCmpInst::FCMP_OGE:
4694     case FCmpInst::FCMP_ULT: {
4695       if (IsNegativeRHS) {
4696         // fcmp oge x, -inf -> ~fcNan
4697         // fcmp oge fabs(x), -inf -> ~fcNan
4698         // fcmp ult x, -inf -> fcNan
4699         // fcmp ult fabs(x), -inf -> fcNan
4700         Mask = ~fcNan;
4701         break;
4702       }
4703
4704       // fcmp oge fabs(x), +inf -> fcInf
4705       // fcmp oge x, +inf -> fcPosInf
4706       // fcmp ult fabs(x), +inf -> ~fcInf
4707       // fcmp ult x, +inf -> ~fcPosInf
4708       Mask = fcPosInf;
4709       if (IsFabs)
4710         Mask |= fcNegInf;
4711       break;
4712     }
4713     case FCmpInst::FCMP_OGT:
4714     case FCmpInst::FCMP_ULE: {
4715       if (IsNegativeRHS) {
4716         // fcmp ogt x, -inf -> fcmp one x, -inf
4717         // fcmp ogt fabs(x), -inf -> fcmp ord x, x
4718         // fcmp ule x, -inf -> fcmp ueq x, -inf
4719         // fcmp ule fabs(x), -inf -> fcmp uno x, x
4720         Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan);
4721         break;
4722       }
4723
4724       // No value is ordered and greater than infinity.
4725       Mask = fcNone;
4726       break;
4727     }
4728     case FCmpInst::FCMP_OLE:
4729     case FCmpInst::FCMP_UGT: {
4730       if (IsNegativeRHS) {
4731         Mask = IsFabs ? fcNone : fcNegInf;
4732         break;
4733       }
4734
4735       // fcmp ole x, +inf -> fcmp ord x, x
4736       // fcmp ole fabs(x), +inf -> fcmp ord x, x
4737       // fcmp ole x, -inf -> fcmp oeq x, -inf
4738       // fcmp ole fabs(x), -inf -> false
4739       Mask = ~fcNan;
4740       break;
4741     }
4742     default:
4743       llvm_unreachable("all compare types are handled");
4744     }
4745
4746     // Invert the comparison for the unordered cases.
4747     if (FCmpInst::isUnordered(Pred))
4748       Mask = ~Mask;
4749
4750     return exactClass(Src, Mask);
4751   }
4752
4753   if (Pred == FCmpInst::FCMP_OEQ)
4754     return {Src, RHSClass, fcAllFlags};
4755
4756   if (Pred == FCmpInst::FCMP_UEQ) {
4757     FPClassTest Class = RHSClass | fcNan;
4758     return {Src, Class, ~fcNan};
4759   }
4760
4761   if (Pred == FCmpInst::FCMP_ONE)
4762     return {Src, ~fcNan, RHSClass | fcNan};
4763
4764   if (Pred == FCmpInst::FCMP_UNE)
4765     return {Src, fcAllFlags, RHSClass};
4766
4767   assert((RHSClass == fcNone || RHSClass == fcPosNormal ||
4768           RHSClass == fcNegNormal || RHSClass == fcNormal ||
4769           RHSClass == fcPosSubnormal || RHSClass == fcNegSubnormal ||
4770           RHSClass == fcSubnormal) &&
4771          "should have been recognized as an exact class test");
4772
4773   if (IsNegativeRHS) {
4774     // TODO: Handle fneg(fabs)
4775     if (IsFabs) {
4776       // fabs(x) o> -k -> fcmp ord x, x
4777       // fabs(x) u> -k -> true
4778       // fabs(x) o< -k -> false
4779       // fabs(x) u< -k -> fcmp uno x, x
4780       switch (Pred) {
4781       case FCmpInst::FCMP_OGT:
4782       case FCmpInst::FCMP_OGE:
4783         return {Src, ~fcNan, fcNan};
4784       case FCmpInst::FCMP_UGT:
4785       case FCmpInst::FCMP_UGE:
4786         return {Src, fcAllFlags, fcNone};
4787       case FCmpInst::FCMP_OLT:
4788       case FCmpInst::FCMP_OLE:
4789         return {Src, fcNone, fcAllFlags};
4790       case FCmpInst::FCMP_ULT:
4791       case FCmpInst::FCMP_ULE:
4792         return {Src, fcNan, ~fcNan};
4793       default:
4794         break;
4795       }
4796
4797       return {nullptr, fcAllFlags, fcAllFlags};
4798     }
4799
4800     FPClassTest ClassesLE = fcNegInf | fcNegNormal;
4801     FPClassTest ClassesGE = fcPositive | fcNegZero | fcNegSubnormal;
4802
4803     if (IsDenormalRHS)
4804       ClassesLE |= fcNegSubnormal;
4805     else
4806       ClassesGE |= fcNegNormal;
4807
4808     switch (Pred) {
4809     case FCmpInst::FCMP_OGT:
4810     case FCmpInst::FCMP_OGE:
4811       return {Src, ClassesGE, ~ClassesGE | RHSClass};
4812     case FCmpInst::FCMP_UGT:
4813     case FCmpInst::FCMP_UGE:
4814       return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass};
4815     case FCmpInst::FCMP_OLT:
4816     case FCmpInst::FCMP_OLE:
4817       return {Src, ClassesLE, ~ClassesLE | RHSClass};
4818     case FCmpInst::FCMP_ULT:
4819     case FCmpInst::FCMP_ULE:
4820       return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass};
4821     default:
4822       break;
4823     }
4824   } else if (IsPositiveRHS) {
4825     FPClassTest ClassesGE = fcPosNormal | fcPosInf;
4826     FPClassTest ClassesLE = fcNegative | fcPosZero | fcPosSubnormal;
4827     if (IsDenormalRHS)
4828       ClassesGE |= fcPosSubnormal;
4829     else
4830       ClassesLE |= fcPosNormal;
4831
4832     if (IsFabs) {
4833       ClassesGE = llvm::inverse_fabs(ClassesGE);
4834       ClassesLE = llvm::inverse_fabs(ClassesLE);
4835     }
4836
4837     switch (Pred) {
4838     case FCmpInst::FCMP_OGT:
4839     case FCmpInst::FCMP_OGE:
4840       return {Src, ClassesGE, ~ClassesGE | RHSClass};
4841     case FCmpInst::FCMP_UGT:
4842     case FCmpInst::FCMP_UGE:
4843       return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass};
4844     case FCmpInst::FCMP_OLT:
4845     case FCmpInst::FCMP_OLE:
4846       return {Src, ClassesLE, ~ClassesLE | RHSClass};
4847     case FCmpInst::FCMP_ULT:
4848     case FCmpInst::FCMP_ULE:
4849       return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass};
4850     default:
4851       break;
4852     }
4853   }
4854
4855   return {nullptr, fcAllFlags, fcAllFlags};
4856 }
4857
4858 std::tuple<Value *, FPClassTest, FPClassTest>
4859 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4860                        const APFloat &ConstRHS, bool LookThroughSrc) {
4861   // We can refine checks against smallest normal / largest denormal to an
4862   // exact class test.
4863   if (!ConstRHS.isNegative() && ConstRHS.isSmallestNormalized()) {
4864     Value *Src = LHS;
4865     const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
4866
4867     FPClassTest Mask;
4868     // Match pattern that's used in __builtin_isnormal.
4869     switch (Pred) {
4870     case FCmpInst::FCMP_OLT:
4871     case FCmpInst::FCMP_UGE: {
4872       // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero
4873       // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero
4874       // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf
4875       // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero)
4876       Mask = fcZero | fcSubnormal;
4877       if (!IsFabs)
4878         Mask |= fcNegNormal | fcNegInf;
4879
4880       break;
4881     }
4882     case FCmpInst::FCMP_OGE:
4883     case FCmpInst::FCMP_ULT: {
4884       // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf
4885       // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal
4886       // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf)
4887       // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal)
4888       Mask = fcPosInf | fcPosNormal;
4889       if (IsFabs)
4890         Mask |= fcNegInf | fcNegNormal;
4891       break;
4892     }
4893     default:
4894       return fcmpImpliesClass(Pred, F, LHS, ConstRHS.classify(),
4895                               LookThroughSrc);
4896     }
4897
4898     // Invert the comparison for the unordered cases.
4899     if (FCmpInst::isUnordered(Pred))
4900       Mask = ~Mask;
4901
4902     return exactClass(Src, Mask);
4903   }
4904
4905   return fcmpImpliesClass(Pred, F, LHS, ConstRHS.classify(), LookThroughSrc);
4906 }
4907
4908 std::tuple<Value *, FPClassTest, FPClassTest>
4909 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
4910                        Value *RHS, bool LookThroughSrc) {
4911   const APFloat *ConstRHS;
4912   if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4913     return {nullptr, fcAllFlags, fcAllFlags};
4914
4915   // TODO: Just call computeKnownFPClass for RHS to handle non-constants.
4916   return fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc);
4917 }
4918
4919 static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
4920                                         unsigned Depth, bool CondIsTrue,
4921                                         const Instruction *CxtI,
4922                                         KnownFPClass &KnownFromContext) {
4923   Value *A, *B;
4924   if (Depth < MaxAnalysisRecursionDepth &&
4925       (CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
4926                   : match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
4927     computeKnownFPClassFromCond(V, A, Depth + 1, CondIsTrue, CxtI,
4928                                 KnownFromContext);
4929     computeKnownFPClassFromCond(V, B, Depth + 1, CondIsTrue, CxtI,
4930                                 KnownFromContext);
4931     return;
4932   }
4933   CmpPredicate Pred;
4934   Value *LHS;
4935   uint64_t ClassVal = 0;
4936   const APFloat *CRHS;
4937   const APInt *RHS;
4938   if (match(Cond, m_FCmp(Pred, m_Value(LHS), m_APFloat(CRHS)))) {
4939     auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass(
4940         Pred, *CxtI->getParent()->getParent(), LHS, *CRHS, LHS != V);
4941     if (CmpVal == V)
4942       KnownFromContext.knownNot(~(CondIsTrue ? MaskIfTrue : MaskIfFalse));
4943   } else if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>(
4944                              m_Specific(V), m_ConstantInt(ClassVal)))) {
4945     FPClassTest Mask = static_cast<FPClassTest>(ClassVal);
4946     KnownFromContext.knownNot(CondIsTrue ? ~Mask : Mask);
4947   } else if (match(Cond, m_ICmp(Pred, m_ElementWiseBitCast(m_Specific(V)),
4948                                 m_APInt(RHS)))) {
4949     bool TrueIfSigned;
4950     if (!isSignBitCheck(Pred, *RHS, TrueIfSigned))
4951       return;
4952     if (TrueIfSigned == CondIsTrue)
4953       KnownFromContext.signBitMustBeOne();
4954     else
4955       KnownFromContext.signBitMustBeZero();
4956   }
4957 }
4958
4959 static KnownFPClass computeKnownFPClassFromContext(const Value *V,
4960                                                    const SimplifyQuery &Q) {
4961   KnownFPClass KnownFromContext;
4962
4963   if (!Q.CxtI)
4964     return KnownFromContext;
4965
4966   if (Q.DC && Q.DT) {
4967     // Handle dominating conditions.
4968     for (BranchInst *BI : Q.DC->conditionsFor(V)) {
4969       Value *Cond = BI->getCondition();
4970
4971       BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
4972       if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
4973         computeKnownFPClassFromCond(V, Cond, /*Depth=*/0, /*CondIsTrue=*/true,
4974                                     Q.CxtI, KnownFromContext);
4975
4976       BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
4977       if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
4978         computeKnownFPClassFromCond(V, Cond, /*Depth=*/0, /*CondIsTrue=*/false,
4979                                     Q.CxtI, KnownFromContext);
4980     }
4981   }
4982
4983   if (!Q.AC)
4984     return KnownFromContext;
4985
4986   // Try to restrict the floating-point classes based on information from
4987   // assumptions.
4988   for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
4989     if (!AssumeVH)
4990       continue;
4991     CallInst *I = cast<CallInst>(AssumeVH);
4992
4993     assert(I->getFunction() == Q.CxtI->getParent()->getParent() &&
4994            "Got assumption for the wrong function!");
4995     assert(I->getIntrinsicID() == Intrinsic::assume &&
4996            "must be an assume intrinsic");
4997
4998     if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
4999       continue;
5000
5001     computeKnownFPClassFromCond(V, I->getArgOperand(0), /*Depth=*/0,
5002                                 /*CondIsTrue=*/true, Q.CxtI, KnownFromContext);
5003   }
5004
5005   return KnownFromContext;
5006 }
5007
5008 void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
5009                          FPClassTest InterestedClasses, KnownFPClass &Known,
5010                          unsigned Depth, const SimplifyQuery &Q);
5011
5012 static void computeKnownFPClass(const Value *V, KnownFPClass &Known,
5013                                 FPClassTest InterestedClasses, unsigned Depth,
5014                                 const SimplifyQuery &Q) {
5015   auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
5016   APInt DemandedElts =
5017       FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
5018   computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q);
5019 }
5020
5021 static void computeKnownFPClassForFPTrunc(const Operator *Op,
5022                                           const APInt &DemandedElts,
5023                                           FPClassTest InterestedClasses,
5024                                           KnownFPClass &Known, unsigned Depth,
5025                                           const SimplifyQuery &Q) {
5026   if ((InterestedClasses &
5027        (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone)
5028     return;
5029
5030   KnownFPClass KnownSrc;
5031   computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5032                       KnownSrc, Depth + 1, Q);
5033
5034   // Sign should be preserved
5035   // TODO: Handle cannot be ordered greater than zero
5036   if (KnownSrc.cannotBeOrderedLessThanZero())
5037     Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5038
5039   Known.propagateNaN(KnownSrc, true);
5040
5041   // Infinity needs a range check.
5042 }
5043
5044 void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
5045                          FPClassTest InterestedClasses, KnownFPClass &Known,
5046                          unsigned Depth, const SimplifyQuery &Q) {
5047   assert(Known.isUnknown() && "should not be called with known information");
5048
5049   if (!DemandedElts) {
5050     // No demanded elts, better to assume we don't know anything.
5051     Known.resetAll();
5052     return;
5053   }
5054
5055   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
5056
5057   if (auto *CFP = dyn_cast<ConstantFP>(V)) {
5058     Known.KnownFPClasses = CFP->getValueAPF().classify();
5059     Known.SignBit = CFP->isNegative();
5060     return;
5061   }
5062
5063   if (isa<ConstantAggregateZero>(V)) {
5064     Known.KnownFPClasses = fcPosZero;
5065     Known.SignBit = false;
5066     return;
5067   }
5068
5069   if (isa<PoisonValue>(V)) {
5070     Known.KnownFPClasses = fcNone;
5071     Known.SignBit = false;
5072     return;
5073   }
5074
5075   // Try to handle fixed width vector constants
5076   auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
5077   const Constant *CV = dyn_cast<Constant>(V);
5078   if (VFVTy && CV) {
5079     Known.KnownFPClasses = fcNone;
5080     bool SignBitAllZero = true;
5081     bool SignBitAllOne = true;
5082
5083     // For vectors, verify that each element is not NaN.
5084     unsigned NumElts = VFVTy->getNumElements();
5085     for (unsigned i = 0; i != NumElts; ++i) {
5086       if (!DemandedElts[i])
5087         continue;
5088
5089       Constant *Elt = CV->getAggregateElement(i);
5090       if (!Elt) {
5091         Known = KnownFPClass();
5092         return;
5093       }
5094       if (isa<PoisonValue>(Elt))
5095         continue;
5096       auto *CElt = dyn_cast<ConstantFP>(Elt);
5097       if (!CElt) {
5098         Known = KnownFPClass();
5099         return;
5100       }
5101
5102       const APFloat &C = CElt->getValueAPF();
5103       Known.KnownFPClasses |= C.classify();
5104       if (C.isNegative())
5105         SignBitAllZero = false;
5106       else
5107         SignBitAllOne = false;
5108     }
5109     if (SignBitAllOne != SignBitAllZero)
5110       Known.SignBit = SignBitAllOne;
5111     return;
5112   }
5113
5114   FPClassTest KnownNotFromFlags = fcNone;
5115   if (const auto *CB = dyn_cast<CallBase>(V))
5116     KnownNotFromFlags |= CB->getRetNoFPClass();
5117   else if (const auto *Arg = dyn_cast<Argument>(V))
5118     KnownNotFromFlags |= Arg->getNoFPClass();
5119
5120   const Operator *Op = dyn_cast<Operator>(V);
5121   if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) {
5122     if (FPOp->hasNoNaNs())
5123       KnownNotFromFlags |= fcNan;
5124     if (FPOp->hasNoInfs())
5125       KnownNotFromFlags |= fcInf;
5126   }
5127
5128   KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q);
5129   KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses;
5130
5131   // We no longer need to find out about these bits from inputs if we can
5132   // assume this from flags/attributes.
5133   InterestedClasses &= ~KnownNotFromFlags;
5134
5135   auto ClearClassesFromFlags = make_scope_exit([=, &Known] {
5136     Known.knownNot(KnownNotFromFlags);
5137     if (!Known.SignBit && AssumedClasses.SignBit) {
5138       if (*AssumedClasses.SignBit)
5139         Known.signBitMustBeOne();
5140       else
5141         Known.signBitMustBeZero();
5142     }
5143   });
5144
5145   if (!Op)
5146     return;
5147
5148   // All recursive calls that increase depth must come after this.
5149   if (Depth == MaxAnalysisRecursionDepth)
5150     return;
5151
5152   const unsigned Opc = Op->getOpcode();
5153   switch (Opc) {
5154   case Instruction::FNeg: {
5155     computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5156                         Known, Depth + 1, Q);
5157     Known.fneg();
5158     break;
5159   }
5160   case Instruction::Select: {
5161     Value *Cond = Op->getOperand(0);
5162     Value *LHS = Op->getOperand(1);
5163     Value *RHS = Op->getOperand(2);
5164
5165     FPClassTest FilterLHS = fcAllFlags;
5166     FPClassTest FilterRHS = fcAllFlags;
5167
5168     Value *TestedValue = nullptr;
5169     FPClassTest MaskIfTrue = fcAllFlags;
5170     FPClassTest MaskIfFalse = fcAllFlags;
5171     uint64_t ClassVal = 0;
5172     const Function *F = cast<Instruction>(Op)->getFunction();
5173     CmpPredicate Pred;
5174     Value *CmpLHS, *CmpRHS;
5175     if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) {
5176       // If the select filters out a value based on the class, it no longer
5177       // participates in the class of the result
5178
5179       // TODO: In some degenerate cases we can infer something if we try again
5180       // without looking through sign operations.
5181       bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
5182       std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
5183           fcmpImpliesClass(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
5184     } else if (match(Cond,
5185                      m_Intrinsic<Intrinsic::is_fpclass>(
5186                          m_Value(TestedValue), m_ConstantInt(ClassVal)))) {
5187       FPClassTest TestedMask = static_cast<FPClassTest>(ClassVal);
5188       MaskIfTrue = TestedMask;
5189       MaskIfFalse = ~TestedMask;
5190     }
5191
5192     if (TestedValue == LHS) {
5193       // match !isnan(x) ? x : y
5194       FilterLHS = MaskIfTrue;
5195     } else if (TestedValue == RHS) { // && IsExactClass
5196       // match !isnan(x) ? y : x
5197       FilterRHS = MaskIfFalse;
5198     }
5199
5200     KnownFPClass Known2;
5201     computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
5202                         Depth + 1, Q);
5203     Known.KnownFPClasses &= FilterLHS;
5204
5205     computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
5206                         Known2, Depth + 1, Q);
5207     Known2.KnownFPClasses &= FilterRHS;
5208
5209     Known |= Known2;
5210     break;
5211   }
5212   case Instruction::Call: {
5213     const CallInst *II = cast<CallInst>(Op);
5214     const Intrinsic::ID IID = II->getIntrinsicID();
5215     switch (IID) {
5216     case Intrinsic::fabs: {
5217       if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
5218         // If we only care about the sign bit we don't need to inspect the
5219         // operand.
5220         computeKnownFPClass(II->getArgOperand(0), DemandedElts,
5221                             InterestedClasses, Known, Depth + 1, Q);
5222       }
5223
5224       Known.fabs();
5225       break;
5226     }
5227     case Intrinsic::copysign: {
5228       KnownFPClass KnownSign;
5229
5230       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5231                           Known, Depth + 1, Q);
5232       computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
5233                           KnownSign, Depth + 1, Q);
5234       Known.copysign(KnownSign);
5235       break;
5236     }
5237     case Intrinsic::fma:
5238     case Intrinsic::fmuladd: {
5239       if ((InterestedClasses & fcNegative) == fcNone)
5240         break;
5241
5242       if (II->getArgOperand(0) != II->getArgOperand(1))
5243         break;
5244
5245       // The multiply cannot be -0 and therefore the add can't be -0
5246       Known.knownNot(fcNegZero);
5247
5248       // x * x + y is non-negative if y is non-negative.
5249       KnownFPClass KnownAddend;
5250       computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses,
5251                           KnownAddend, Depth + 1, Q);
5252
5253       if (KnownAddend.cannotBeOrderedLessThanZero())
5254         Known.knownNot(fcNegative);
5255       break;
5256     }
5257     case Intrinsic::sqrt:
5258     case Intrinsic::experimental_constrained_sqrt: {
5259       KnownFPClass KnownSrc;
5260       FPClassTest InterestedSrcs = InterestedClasses;
5261       if (InterestedClasses & fcNan)
5262         InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5263
5264       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5265                           KnownSrc, Depth + 1, Q);
5266
5267       if (KnownSrc.isKnownNeverPosInfinity())
5268         Known.knownNot(fcPosInf);
5269       if (KnownSrc.isKnownNever(fcSNan))
5270         Known.knownNot(fcSNan);
5271
5272       // Any negative value besides -0 returns a nan.
5273       if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
5274         Known.knownNot(fcNan);
5275
5276       // The only negative value that can be returned is -0 for -0 inputs.
5277       Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
5278
5279       // If the input denormal mode could be PreserveSign, a negative
5280       // subnormal input could produce a negative zero output.
5281       const Function *F = II->getFunction();
5282       if (Q.IIQ.hasNoSignedZeros(II) ||
5283           (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())))
5284         Known.knownNot(fcNegZero);
5285
5286       break;
5287     }
5288     case Intrinsic::sin:
5289     case Intrinsic::cos: {
5290       // Return NaN on infinite inputs.
5291       KnownFPClass KnownSrc;
5292       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5293                           KnownSrc, Depth + 1, Q);
5294       Known.knownNot(fcInf);
5295       if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
5296         Known.knownNot(fcNan);
5297       break;
5298     }
5299     case Intrinsic::maxnum:
5300     case Intrinsic::minnum:
5301     case Intrinsic::minimum:
5302     case Intrinsic::maximum: {
5303       KnownFPClass KnownLHS, KnownRHS;
5304       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5305                           KnownLHS, Depth + 1, Q);
5306       computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
5307                           KnownRHS, Depth + 1, Q);
5308
5309       bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
5310       Known = KnownLHS | KnownRHS;
5311
5312       // If either operand is not NaN, the result is not NaN.
5313       if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
5314         Known.knownNot(fcNan);
5315
5316       if (IID == Intrinsic::maxnum) {
5317         // If at least one operand is known to be positive, the result must be
5318         // positive.
5319         if ((KnownLHS.cannotBeOrderedLessThanZero() &&
5320              KnownLHS.isKnownNeverNaN()) ||
5321             (KnownRHS.cannotBeOrderedLessThanZero() &&
5322              KnownRHS.isKnownNeverNaN()))
5323           Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5324       } else if (IID == Intrinsic::maximum) {
5325         // If at least one operand is known to be positive, the result must be
5326         // positive.
5327         if (KnownLHS.cannotBeOrderedLessThanZero() ||
5328             KnownRHS.cannotBeOrderedLessThanZero())
5329           Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5330       } else if (IID == Intrinsic::minnum) {
5331         // If at least one operand is known to be negative, the result must be
5332         // negative.
5333         if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
5334              KnownLHS.isKnownNeverNaN()) ||
5335             (KnownRHS.cannotBeOrderedGreaterThanZero() &&
5336              KnownRHS.isKnownNeverNaN()))
5337           Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5338       } else {
5339         // If at least one operand is known to be negative, the result must be
5340         // negative.
5341         if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
5342             KnownRHS.cannotBeOrderedGreaterThanZero())
5343           Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5344       }
5345
5346       // Fixup zero handling if denormals could be returned as a zero.
5347       //
5348       // As there's no spec for denormal flushing, be conservative with the
5349       // treatment of denormals that could be flushed to zero. For older
5350       // subtargets on AMDGPU the min/max instructions would not flush the
5351       // output and return the original value.
5352       //
5353       if ((Known.KnownFPClasses & fcZero) != fcNone &&
5354           !Known.isKnownNeverSubnormal()) {
5355         const Function *Parent = II->getFunction();
5356         if (!Parent)
5357           break;
5358
5359         DenormalMode Mode = Parent->getDenormalMode(
5360             II->getType()->getScalarType()->getFltSemantics());
5361         if (Mode != DenormalMode::getIEEE())
5362           Known.KnownFPClasses |= fcZero;
5363       }
5364
5365       if (Known.isKnownNeverNaN()) {
5366         if (KnownLHS.SignBit && KnownRHS.SignBit &&
5367             *KnownLHS.SignBit == *KnownRHS.SignBit) {
5368           if (*KnownLHS.SignBit)
5369             Known.signBitMustBeOne();
5370           else
5371             Known.signBitMustBeZero();
5372         } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) ||
5373                    ((KnownLHS.isKnownNeverNegZero() ||
5374                      KnownRHS.isKnownNeverPosZero()) &&
5375                     (KnownLHS.isKnownNeverPosZero() ||
5376                      KnownRHS.isKnownNeverNegZero()))) {
5377           if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) &&
5378               (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
5379             Known.signBitMustBeZero();
5380           else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) &&
5381                    (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
5382             Known.signBitMustBeOne();
5383         }
5384       }
5385       break;
5386     }
5387     case Intrinsic::canonicalize: {
5388       KnownFPClass KnownSrc;
5389       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5390                           KnownSrc, Depth + 1, Q);
5391
5392       // This is essentially a stronger form of
5393       // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5394       // actually have an IR canonicalization guarantee.
5395
5396       // Canonicalize may flush denormals to zero, so we have to consider the
5397       // denormal mode to preserve known-not-0 knowledge.
5398       Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
5399
5400       // Stronger version of propagateNaN
5401       // Canonicalize is guaranteed to quiet signaling nans.
5402       if (KnownSrc.isKnownNeverNaN())
5403         Known.knownNot(fcNan);
5404       else
5405         Known.knownNot(fcSNan);
5406
5407       const Function *F = II->getFunction();
5408       if (!F)
5409         break;
5410
5411       // If the parent function flushes denormals, the canonical output cannot
5412       // be a denormal.
5413       const fltSemantics &FPType =
5414           II->getType()->getScalarType()->getFltSemantics();
5415       DenormalMode DenormMode = F->getDenormalMode(FPType);
5416       if (DenormMode == DenormalMode::getIEEE()) {
5417         if (KnownSrc.isKnownNever(fcPosZero))
5418           Known.knownNot(fcPosZero);
5419         if (KnownSrc.isKnownNever(fcNegZero))
5420           Known.knownNot(fcNegZero);
5421         break;
5422       }
5423
5424       if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
5425         Known.knownNot(fcSubnormal);
5426
5427       if (DenormMode.Input == DenormalMode::PositiveZero ||
5428           (DenormMode.Output == DenormalMode::PositiveZero &&
5429            DenormMode.Input == DenormalMode::IEEE))
5430         Known.knownNot(fcNegZero);
5431
5432       break;
5433     }
5434     case Intrinsic::vector_reduce_fmax:
5435     case Intrinsic::vector_reduce_fmin:
5436     case Intrinsic::vector_reduce_fmaximum:
5437     case Intrinsic::vector_reduce_fminimum: {
5438       // reduce min/max will choose an element from one of the vector elements,
5439       // so we can infer and class information that is common to all elements.
5440       Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(),
5441                                   InterestedClasses, Depth + 1, Q);
5442       // Can only propagate sign if output is never NaN.
5443       if (!Known.isKnownNeverNaN())
5444         Known.SignBit.reset();
5445       break;
5446     }
5447       // reverse preserves all characteristics of the input vec's element.
5448     case Intrinsic::vector_reverse:
5449       Known = computeKnownFPClass(
5450           II->getArgOperand(0), DemandedElts.reverseBits(),
5451           II->getFastMathFlags(), InterestedClasses, Depth + 1, Q);
5452       break;
5453     case Intrinsic::trunc:
5454     case Intrinsic::floor:
5455     case Intrinsic::ceil:
5456     case Intrinsic::rint:
5457     case Intrinsic::nearbyint:
5458     case Intrinsic::round:
5459     case Intrinsic::roundeven: {
5460       KnownFPClass KnownSrc;
5461       FPClassTest InterestedSrcs = InterestedClasses;
5462       if (InterestedSrcs & fcPosFinite)
5463         InterestedSrcs |= fcPosFinite;
5464       if (InterestedSrcs & fcNegFinite)
5465         InterestedSrcs |= fcNegFinite;
5466       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5467                           KnownSrc, Depth + 1, Q);
5468
5469       // Integer results cannot be subnormal.
5470       Known.knownNot(fcSubnormal);
5471
5472       Known.propagateNaN(KnownSrc, true);
5473
5474       // Pass through infinities, except PPC_FP128 is a special case for
5475       // intrinsics other than trunc.
5476       if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
5477         if (KnownSrc.isKnownNeverPosInfinity())
5478           Known.knownNot(fcPosInf);
5479         if (KnownSrc.isKnownNeverNegInfinity())
5480           Known.knownNot(fcNegInf);
5481       }
5482
5483       // Negative round ups to 0 produce -0
5484       if (KnownSrc.isKnownNever(fcPosFinite))
5485         Known.knownNot(fcPosFinite);
5486       if (KnownSrc.isKnownNever(fcNegFinite))
5487         Known.knownNot(fcNegFinite);
5488
5489       break;
5490     }
5491     case Intrinsic::exp:
5492     case Intrinsic::exp2:
5493     case Intrinsic::exp10: {
5494       Known.knownNot(fcNegative);
5495       if ((InterestedClasses & fcNan) == fcNone)
5496         break;
5497
5498       KnownFPClass KnownSrc;
5499       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5500                           KnownSrc, Depth + 1, Q);
5501       if (KnownSrc.isKnownNeverNaN()) {
5502         Known.knownNot(fcNan);
5503         Known.signBitMustBeZero();
5504       }
5505
5506       break;
5507     }
5508     case Intrinsic::fptrunc_round: {
5509       computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5510                                     Depth, Q);
5511       break;
5512     }
5513     case Intrinsic::log:
5514     case Intrinsic::log10:
5515     case Intrinsic::log2:
5516     case Intrinsic::experimental_constrained_log:
5517     case Intrinsic::experimental_constrained_log10:
5518     case Intrinsic::experimental_constrained_log2: {
5519       // log(+inf) -> +inf
5520       // log([+-]0.0) -> -inf
5521       // log(-inf) -> nan
5522       // log(-x) -> nan
5523       if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
5524         break;
5525
5526       FPClassTest InterestedSrcs = InterestedClasses;
5527       if ((InterestedClasses & fcNegInf) != fcNone)
5528         InterestedSrcs |= fcZero | fcSubnormal;
5529       if ((InterestedClasses & fcNan) != fcNone)
5530         InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
5531
5532       KnownFPClass KnownSrc;
5533       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5534                           KnownSrc, Depth + 1, Q);
5535
5536       if (KnownSrc.isKnownNeverPosInfinity())
5537         Known.knownNot(fcPosInf);
5538
5539       if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
5540         Known.knownNot(fcNan);
5541
5542       const Function *F = II->getFunction();
5543       if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType()))
5544         Known.knownNot(fcNegInf);
5545
5546       break;
5547     }
5548     case Intrinsic::powi: {
5549       if ((InterestedClasses & fcNegative) == fcNone)
5550         break;
5551
5552       const Value *Exp = II->getArgOperand(1);
5553       Type *ExpTy = Exp->getType();
5554       unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
5555       KnownBits ExponentKnownBits(BitWidth);
5556       computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1),
5557                        ExponentKnownBits, Depth + 1, Q);
5558
5559       if (ExponentKnownBits.Zero[0]) { // Is even
5560         Known.knownNot(fcNegative);
5561         break;
5562       }
5563
5564       // Given that exp is an integer, here are the
5565       // ways that pow can return a negative value:
5566       //
5567       //   pow(-x, exp)   --> negative if exp is odd and x is negative.
5568       //   pow(-0, exp)   --> -inf if exp is negative odd.
5569       //   pow(-0, exp)   --> -0 if exp is positive odd.
5570       //   pow(-inf, exp) --> -0 if exp is negative odd.
5571       //   pow(-inf, exp) --> -inf if exp is positive odd.
5572       KnownFPClass KnownSrc;
5573       computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative,
5574                           KnownSrc, Depth + 1, Q);
5575       if (KnownSrc.isKnownNever(fcNegative))
5576         Known.knownNot(fcNegative);
5577       break;
5578     }
5579     case Intrinsic::ldexp: {
5580       KnownFPClass KnownSrc;
5581       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5582                           KnownSrc, Depth + 1, Q);
5583       Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
5584
5585       // Sign is preserved, but underflows may produce zeroes.
5586       if (KnownSrc.isKnownNever(fcNegative))
5587         Known.knownNot(fcNegative);
5588       else if (KnownSrc.cannotBeOrderedLessThanZero())
5589         Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5590
5591       if (KnownSrc.isKnownNever(fcPositive))
5592         Known.knownNot(fcPositive);
5593       else if (KnownSrc.cannotBeOrderedGreaterThanZero())
5594         Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5595
5596       // Can refine inf/zero handling based on the exponent operand.
5597       const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
5598       if ((InterestedClasses & ExpInfoMask) == fcNone)
5599         break;
5600       if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
5601         break;
5602
5603       const fltSemantics &Flt =
5604           II->getType()->getScalarType()->getFltSemantics();
5605       unsigned Precision = APFloat::semanticsPrecision(Flt);
5606       const Value *ExpArg = II->getArgOperand(1);
5607       ConstantRange ExpRange = computeConstantRange(
5608           ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1);
5609
5610       const int MantissaBits = Precision - 1;
5611       if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits)))
5612         Known.knownNot(fcSubnormal);
5613
5614       const Function *F = II->getFunction();
5615       const APInt *ConstVal = ExpRange.getSingleElement();
5616       if (ConstVal && ConstVal->isZero()) {
5617         // ldexp(x, 0) -> x, so propagate everything.
5618         Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType());
5619       } else if (ExpRange.isAllNegative()) {
5620         // If we know the power is <= 0, can't introduce inf
5621         if (KnownSrc.isKnownNeverPosInfinity())
5622           Known.knownNot(fcPosInf);
5623         if (KnownSrc.isKnownNeverNegInfinity())
5624           Known.knownNot(fcNegInf);
5625       } else if (ExpRange.isAllNonNegative()) {
5626         // If we know the power is >= 0, can't introduce subnormal or zero
5627         if (KnownSrc.isKnownNeverPosSubnormal())
5628           Known.knownNot(fcPosSubnormal);
5629         if (KnownSrc.isKnownNeverNegSubnormal())
5630           Known.knownNot(fcNegSubnormal);
5631         if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType()))
5632           Known.knownNot(fcPosZero);
5633         if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))
5634           Known.knownNot(fcNegZero);
5635       }
5636
5637       break;
5638     }
5639     case Intrinsic::arithmetic_fence: {
5640       computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5641                           Known, Depth + 1, Q);
5642       break;
5643     }
5644     case Intrinsic::experimental_constrained_sitofp:
5645     case Intrinsic::experimental_constrained_uitofp:
5646       // Cannot produce nan
5647       Known.knownNot(fcNan);
5648
5649       // sitofp and uitofp turn into +0.0 for zero.
5650       Known.knownNot(fcNegZero);
5651
5652       // Integers cannot be subnormal
5653       Known.knownNot(fcSubnormal);
5654
5655       if (IID == Intrinsic::experimental_constrained_uitofp)
5656         Known.signBitMustBeZero();
5657
5658       // TODO: Copy inf handling from instructions
5659       break;
5660     default:
5661       break;
5662     }
5663
5664     break;
5665   }
5666   case Instruction::FAdd:
5667   case Instruction::FSub: {
5668     KnownFPClass KnownLHS, KnownRHS;
5669     bool WantNegative =
5670         Op->getOpcode() == Instruction::FAdd &&
5671         (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
5672     bool WantNaN = (InterestedClasses & fcNan) != fcNone;
5673     bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
5674
5675     if (!WantNaN && !WantNegative && !WantNegZero)
5676       break;
5677
5678     FPClassTest InterestedSrcs = InterestedClasses;
5679     if (WantNegative)
5680       InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5681     if (InterestedClasses & fcNan)
5682       InterestedSrcs |= fcInf;
5683     computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs,
5684                         KnownRHS, Depth + 1, Q);
5685
5686     if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
5687         (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
5688         WantNegZero || Opc == Instruction::FSub) {
5689
5690       // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5691       // there's no point.
5692       computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs,
5693                           KnownLHS, Depth + 1, Q);
5694       // Adding positive and negative infinity produces NaN.
5695       // TODO: Check sign of infinities.
5696       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5697           (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
5698         Known.knownNot(fcNan);
5699
5700       // FIXME: Context function should always be passed in separately
5701       const Function *F = cast<Instruction>(Op)->getFunction();
5702
5703       if (Op->getOpcode() == Instruction::FAdd) {
5704         if (KnownLHS.cannotBeOrderedLessThanZero() &&
5705             KnownRHS.cannotBeOrderedLessThanZero())
5706           Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5707         if (!F)
5708           break;
5709
5710         // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5711         if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) ||
5712              KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) &&
5713             // Make sure output negative denormal can't flush to -0
5714             outputDenormalIsIEEEOrPosZero(*F, Op->getType()))
5715           Known.knownNot(fcNegZero);
5716       } else {
5717         if (!F)
5718           break;
5719
5720         // Only fsub -0, +0 can return -0
5721         if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) ||
5722              KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) &&
5723             // Make sure output negative denormal can't flush to -0
5724             outputDenormalIsIEEEOrPosZero(*F, Op->getType()))
5725           Known.knownNot(fcNegZero);
5726       }
5727     }
5728
5729     break;
5730   }
5731   case Instruction::FMul: {
5732     // X * X is always non-negative or a NaN.
5733     if (Op->getOperand(0) == Op->getOperand(1))
5734       Known.knownNot(fcNegative);
5735
5736     if ((InterestedClasses & fcNan) != fcNan)
5737       break;
5738
5739     // fcSubnormal is only needed in case of DAZ.
5740     const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
5741
5742     KnownFPClass KnownLHS, KnownRHS;
5743     computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS,
5744                         Depth + 1, Q);
5745     if (!KnownRHS.isKnownNeverNaN())
5746       break;
5747
5748     computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS,
5749                         Depth + 1, Q);
5750     if (!KnownLHS.isKnownNeverNaN())
5751       break;
5752
5753     if (KnownLHS.SignBit && KnownRHS.SignBit) {
5754       if (*KnownLHS.SignBit == *KnownRHS.SignBit)
5755         Known.signBitMustBeZero();
5756       else
5757         Known.signBitMustBeOne();
5758     }
5759
5760     // If 0 * +/-inf produces NaN.
5761     if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
5762       Known.knownNot(fcNan);
5763       break;
5764     }
5765
5766     const Function *F = cast<Instruction>(Op)->getFunction();
5767     if (!F)
5768       break;
5769
5770     if ((KnownRHS.isKnownNeverInfinity() ||
5771          KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) &&
5772         (KnownLHS.isKnownNeverInfinity() ||
5773          KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))
5774       Known.knownNot(fcNan);
5775
5776     break;
5777   }
5778   case Instruction::FDiv:
5779   case Instruction::FRem: {
5780     if (Op->getOperand(0) == Op->getOperand(1)) {
5781       // TODO: Could filter out snan if we inspect the operand
5782       if (Op->getOpcode() == Instruction::FDiv) {
5783         // X / X is always exactly 1.0 or a NaN.
5784         Known.KnownFPClasses = fcNan | fcPosNormal;
5785       } else {
5786         // X % X is always exactly [+-]0.0 or a NaN.
5787         Known.KnownFPClasses = fcNan | fcZero;
5788       }
5789
5790       break;
5791     }
5792
5793     const bool WantNan = (InterestedClasses & fcNan) != fcNone;
5794     const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
5795     const bool WantPositive =
5796         Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone;
5797     if (!WantNan && !WantNegative && !WantPositive)
5798       break;
5799
5800     KnownFPClass KnownLHS, KnownRHS;
5801
5802     computeKnownFPClass(Op->getOperand(1), DemandedElts,
5803                         fcNan | fcInf | fcZero | fcNegative, KnownRHS,
5804                         Depth + 1, Q);
5805
5806     bool KnowSomethingUseful =
5807         KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
5808
5809     if (KnowSomethingUseful || WantPositive) {
5810       const FPClassTest InterestedLHS =
5811           WantPositive ? fcAllFlags
5812                        : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
5813
5814       computeKnownFPClass(Op->getOperand(0), DemandedElts,
5815                           InterestedClasses & InterestedLHS, KnownLHS,
5816                           Depth + 1, Q);
5817     }
5818
5819     const Function *F = cast<Instruction>(Op)->getFunction();
5820
5821     if (Op->getOpcode() == Instruction::FDiv) {
5822       // Only 0/0, Inf/Inf produce NaN.
5823       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5824           (KnownLHS.isKnownNeverInfinity() ||
5825            KnownRHS.isKnownNeverInfinity()) &&
5826           ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) ||
5827            (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) {
5828         Known.knownNot(fcNan);
5829       }
5830
5831       // X / -0.0 is -Inf (or NaN).
5832       // +X / +X is +X
5833       if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative))
5834         Known.knownNot(fcNegative);
5835     } else {
5836       // Inf REM x and x REM 0 produce NaN.
5837       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5838           KnownLHS.isKnownNeverInfinity() && F &&
5839           KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) {
5840         Known.knownNot(fcNan);
5841       }
5842
5843       // The sign for frem is the same as the first operand.
5844       if (KnownLHS.cannotBeOrderedLessThanZero())
5845         Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5846       if (KnownLHS.cannotBeOrderedGreaterThanZero())
5847         Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5848
5849       // See if we can be more aggressive about the sign of 0.
5850       if (KnownLHS.isKnownNever(fcNegative))
5851         Known.knownNot(fcNegative);
5852       if (KnownLHS.isKnownNever(fcPositive))
5853         Known.knownNot(fcPositive);
5854     }
5855
5856     break;
5857   }
5858   case Instruction::FPExt: {
5859     // Infinity, nan and zero propagate from source.
5860     computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5861                         Known, Depth + 1, Q);
5862
5863     const fltSemantics &DstTy =
5864         Op->getType()->getScalarType()->getFltSemantics();
5865     const fltSemantics &SrcTy =
5866         Op->getOperand(0)->getType()->getScalarType()->getFltSemantics();
5867
5868     // All subnormal inputs should be in the normal range in the result type.
5869     if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) {
5870       if (Known.KnownFPClasses & fcPosSubnormal)
5871         Known.KnownFPClasses |= fcPosNormal;
5872       if (Known.KnownFPClasses & fcNegSubnormal)
5873         Known.KnownFPClasses |= fcNegNormal;
5874       Known.knownNot(fcSubnormal);
5875     }
5876
5877     // Sign bit of a nan isn't guaranteed.
5878     if (!Known.isKnownNeverNaN())
5879       Known.SignBit = std::nullopt;
5880     break;
5881   }
5882   case Instruction::FPTrunc: {
5883     computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5884                                   Depth, Q);
5885     break;
5886   }
5887   case Instruction::SIToFP:
5888   case Instruction::UIToFP: {
5889     // Cannot produce nan
5890     Known.knownNot(fcNan);
5891
5892     // Integers cannot be subnormal
5893     Known.knownNot(fcSubnormal);
5894
5895     // sitofp and uitofp turn into +0.0 for zero.
5896     Known.knownNot(fcNegZero);
5897     if (Op->getOpcode() == Instruction::UIToFP)
5898       Known.signBitMustBeZero();
5899
5900     if (InterestedClasses & fcInf) {
5901       // Get width of largest magnitude integer (remove a bit if signed).
5902       // This still works for a signed minimum value because the largest FP
5903       // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5904       int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits();
5905       if (Op->getOpcode() == Instruction::SIToFP)
5906         --IntSize;
5907
5908       // If the exponent of the largest finite FP value can hold the largest
5909       // integer, the result of the cast must be finite.
5910       Type *FPTy = Op->getType()->getScalarType();
5911       if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize)
5912         Known.knownNot(fcInf);
5913     }
5914
5915     break;
5916   }
5917   case Instruction::ExtractElement: {
5918     // Look through extract element. If the index is non-constant or
5919     // out-of-range demand all elements, otherwise just the extracted element.
5920     const Value *Vec = Op->getOperand(0);
5921     const Value *Idx = Op->getOperand(1);
5922     auto *CIdx = dyn_cast<ConstantInt>(Idx);
5923
5924     if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
5925       unsigned NumElts = VecTy->getNumElements();
5926       APInt DemandedVecElts = APInt::getAllOnes(NumElts);
5927       if (CIdx && CIdx->getValue().ult(NumElts))
5928         DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
5929       return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
5930                                  Depth + 1, Q);
5931     }
5932
5933     break;
5934   }
5935   case Instruction::InsertElement: {
5936     if (isa<ScalableVectorType>(Op->getType()))
5937       return;
5938
5939     const Value *Vec = Op->getOperand(0);
5940     const Value *Elt = Op->getOperand(1);
5941     auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
5942     unsigned NumElts = DemandedElts.getBitWidth();
5943     APInt DemandedVecElts = DemandedElts;
5944     bool NeedsElt = true;
5945     // If we know the index we are inserting to, clear it from Vec check.
5946     if (CIdx && CIdx->getValue().ult(NumElts)) {
5947       DemandedVecElts.clearBit(CIdx->getZExtValue());
5948       NeedsElt = DemandedElts[CIdx->getZExtValue()];
5949     }
5950
5951     // Do we demand the inserted element?
5952     if (NeedsElt) {
5953       computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q);
5954       // If we don't know any bits, early out.
5955       if (Known.isUnknown())
5956         break;
5957     } else {
5958       Known.KnownFPClasses = fcNone;
5959     }
5960
5961     // Do we need anymore elements from Vec?
5962     if (!DemandedVecElts.isZero()) {
5963       KnownFPClass Known2;
5964       computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
5965                           Depth + 1, Q);
5966       Known |= Known2;
5967     }
5968
5969     break;
5970   }
5971   case Instruction::ShuffleVector: {
5972     // For undef elements, we don't know anything about the common state of
5973     // the shuffle result.
5974     APInt DemandedLHS, DemandedRHS;
5975     auto *Shuf = dyn_cast<ShuffleVectorInst>(Op);
5976     if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
5977       return;
5978
5979     if (!!DemandedLHS) {
5980       const Value *LHS = Shuf->getOperand(0);
5981       computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
5982                           Depth + 1, Q);
5983
5984       // If we don't know any bits, early out.
5985       if (Known.isUnknown())
5986         break;
5987     } else {
5988       Known.KnownFPClasses = fcNone;
5989     }
5990
5991     if (!!DemandedRHS) {
5992       KnownFPClass Known2;
5993       const Value *RHS = Shuf->getOperand(1);
5994       computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
5995                           Depth + 1, Q);
5996       Known |= Known2;
5997     }
5998
5999     break;
6000   }
6001   case Instruction::ExtractValue: {
6002     const ExtractValueInst *Extract = cast<ExtractValueInst>(Op);
6003     ArrayRef<unsigned> Indices = Extract->getIndices();
6004     const Value *Src = Extract->getAggregateOperand();
6005     if (isa<StructType>(Src->getType()) && Indices.size() == 1 &&
6006         Indices[0] == 0) {
6007       if (const auto *II = dyn_cast<IntrinsicInst>(Src)) {
6008         switch (II->getIntrinsicID()) {
6009         case Intrinsic::frexp: {
6010           Known.knownNot(fcSubnormal);
6011
6012           KnownFPClass KnownSrc;
6013           computeKnownFPClass(II->getArgOperand(0), DemandedElts,
6014                               InterestedClasses, KnownSrc, Depth + 1, Q);
6015
6016           const Function *F = cast<Instruction>(Op)->getFunction();
6017
6018           if (KnownSrc.isKnownNever(fcNegative))
6019             Known.knownNot(fcNegative);
6020           else {
6021             if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType()))
6022               Known.knownNot(fcNegZero);
6023             if (KnownSrc.isKnownNever(fcNegInf))
6024               Known.knownNot(fcNegInf);
6025           }
6026
6027           if (KnownSrc.isKnownNever(fcPositive))
6028             Known.knownNot(fcPositive);
6029           else {
6030             if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType()))
6031               Known.knownNot(fcPosZero);
6032             if (KnownSrc.isKnownNever(fcPosInf))
6033               Known.knownNot(fcPosInf);
6034           }
6035
6036           Known.propagateNaN(KnownSrc);
6037           return;
6038         }
6039         default:
6040           break;
6041         }
6042       }
6043     }
6044
6045     computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1,
6046                         Q);
6047     break;
6048   }
6049   case Instruction::PHI: {
6050     const PHINode *P = cast<PHINode>(Op);
6051     // Unreachable blocks may have zero-operand PHI nodes.
6052     if (P->getNumIncomingValues() == 0)
6053       break;
6054
6055     // Otherwise take the unions of the known bit sets of the operands,
6056     // taking conservative care to avoid excessive recursion.
6057     const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
6058
6059     if (Depth < PhiRecursionLimit) {
6060       // Skip if every incoming value references to ourself.
6061       if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
6062         break;
6063
6064       bool First = true;
6065
6066       for (const Use &U : P->operands()) {
6067         Value *IncValue;
6068         Instruction *CxtI;
6069         breakSelfRecursivePHI(&U, P, IncValue, CxtI);
6070         // Skip direct self references.
6071         if (IncValue == P)
6072           continue;
6073
6074         KnownFPClass KnownSrc;
6075         // Recurse, but cap the recursion to two levels, because we don't want
6076         // to waste time spinning around in loops. We need at least depth 2 to
6077         // detect known sign bits.
6078         computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
6079                             PhiRecursionLimit,
6080                             Q.getWithoutCondContext().getWithInstruction(CxtI));
6081
6082         if (First) {
6083           Known = KnownSrc;
6084           First = false;
6085         } else {
6086           Known |= KnownSrc;
6087         }
6088
6089         if (Known.KnownFPClasses == fcAllFlags)
6090           break;
6091       }
6092     }
6093
6094     break;
6095   }
6096   case Instruction::BitCast: {
6097     const Value *Src;
6098     if (!match(Op, m_ElementWiseBitCast(m_Value(Src))) ||
6099         !Src->getType()->isIntOrIntVectorTy())
6100       break;
6101
6102     const Type *Ty = Op->getType()->getScalarType();
6103     KnownBits Bits(Ty->getScalarSizeInBits());
6104     computeKnownBits(Src, DemandedElts, Bits, Depth + 1, Q);
6105
6106     // Transfer information from the sign bit.
6107     if (Bits.isNonNegative())
6108       Known.signBitMustBeZero();
6109     else if (Bits.isNegative())
6110       Known.signBitMustBeOne();
6111
6112     if (Ty->isIEEE()) {
6113       // IEEE floats are NaN when all bits of the exponent plus at least one of
6114       // the fraction bits are 1. This means:
6115       //   - If we assume unknown bits are 0 and the value is NaN, it will
6116       //     always be NaN
6117       //   - If we assume unknown bits are 1 and the value is not NaN, it can
6118       //     never be NaN
6119       if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN())
6120         Known.KnownFPClasses = fcNan;
6121       else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN())
6122         Known.knownNot(fcNan);
6123
6124       // Build KnownBits representing Inf and check if it must be equal or
6125       // unequal to this value.
6126       auto InfKB = KnownBits::makeConstant(
6127           APFloat::getInf(Ty->getFltSemantics()).bitcastToAPInt());
6128       InfKB.Zero.clearSignBit();
6129       if (const auto InfResult = KnownBits::eq(Bits, InfKB)) {
6130         assert(!InfResult.value());
6131         Known.knownNot(fcInf);
6132       } else if (Bits == InfKB) {
6133         Known.KnownFPClasses = fcInf;
6134       }
6135
6136       // Build KnownBits representing Zero and check if it must be equal or
6137       // unequal to this value.
6138       auto ZeroKB = KnownBits::makeConstant(
6139           APFloat::getZero(Ty->getFltSemantics()).bitcastToAPInt());
6140       ZeroKB.Zero.clearSignBit();
6141       if (const auto ZeroResult = KnownBits::eq(Bits, ZeroKB)) {
6142         assert(!ZeroResult.value());
6143         Known.knownNot(fcZero);
6144       } else if (Bits == ZeroKB) {
6145         Known.KnownFPClasses = fcZero;
6146       }
6147     }
6148
6149     break;
6150   }
6151   default:
6152     break;
6153   }
6154 }
6155
6156 KnownFPClass llvm::computeKnownFPClass(const Value *V,
6157                                        const APInt &DemandedElts,
6158                                        FPClassTest InterestedClasses,
6159                                        unsigned Depth,
6160                                        const SimplifyQuery &SQ) {
6161   KnownFPClass KnownClasses;
6162   ::computeKnownFPClass(V, DemandedElts, InterestedClasses, KnownClasses, Depth,
6163                         SQ);
6164   return KnownClasses;
6165 }
6166
6167 KnownFPClass llvm::computeKnownFPClass(const Value *V,
6168                                        FPClassTest InterestedClasses,
6169                                        unsigned Depth,
6170                                        const SimplifyQuery &SQ) {
6171   KnownFPClass Known;
6172   ::computeKnownFPClass(V, Known, InterestedClasses, Depth, SQ);
6173   return Known;
6174 }
6175
6176 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
6177
6178   // All byte-wide stores are splatable, even of arbitrary variables.
6179   if (V->getType()->isIntegerTy(8))
6180     return V;
6181
6182   LLVMContext &Ctx = V->getContext();
6183
6184   // Undef don't care.
6185   auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
6186   if (isa<UndefValue>(V))
6187     return UndefInt8;
6188
6189   // Return poison for zero-sized type.
6190   if (DL.getTypeStoreSize(V->getType()).isZero())
6191     return PoisonValue::get(Type::getInt8Ty(Ctx));
6192
6193   Constant *C = dyn_cast<Constant>(V);
6194   if (!C) {
6195     // Conceptually, we could handle things like:
6196     //   %a = zext i8 %X to i16
6197     //   %b = shl i16 %a, 8
6198     //   %c = or i16 %a, %b
6199     // but until there is an example that actually needs this, it doesn't seem
6200     // worth worrying about.
6201     return nullptr;
6202   }
6203
6204   // Handle 'null' ConstantArrayZero etc.
6205   if (C->isNullValue())
6206     return Constant::getNullValue(Type::getInt8Ty(Ctx));
6207
6208   // Constant floating-point values can be handled as integer values if the
6209   // corresponding integer value is "byteable".  An important case is 0.0.
6210   if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
6211     Type *Ty = nullptr;
6212     if (CFP->getType()->isHalfTy())
6213       Ty = Type::getInt16Ty(Ctx);
6214     else if (CFP->getType()->isFloatTy())
6215       Ty = Type::getInt32Ty(Ctx);
6216     else if (CFP->getType()->isDoubleTy())
6217       Ty = Type::getInt64Ty(Ctx);
6218     // Don't handle long double formats, which have strange constraints.
6219     return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
6220               : nullptr;
6221   }
6222
6223   // We can handle constant integers that are multiple of 8 bits.
6224   if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
6225     if (CI->getBitWidth() % 8 == 0) {
6226       assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
6227       if (!CI->getValue().isSplat(8))
6228         return nullptr;
6229       return ConstantInt::get(Ctx, CI->getValue().trunc(8));
6230     }
6231   }
6232
6233   if (auto *CE = dyn_cast<ConstantExpr>(C)) {
6234     if (CE->getOpcode() == Instruction::IntToPtr) {
6235       if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
6236         unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
6237         if (Constant *Op = ConstantFoldIntegerCast(
6238                 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL))
6239           return isBytewiseValue(Op, DL);
6240       }
6241     }
6242   }
6243
6244   auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
6245     if (LHS == RHS)
6246       return LHS;
6247     if (!LHS || !RHS)
6248       return nullptr;
6249     if (LHS == UndefInt8)
6250       return RHS;
6251     if (RHS == UndefInt8)
6252       return LHS;
6253     return nullptr;
6254   };
6255
6256   if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
6257     Value *Val = UndefInt8;
6258     for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
6259       if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
6260         return nullptr;
6261     return Val;
6262   }
6263
6264   if (isa<ConstantAggregate>(C)) {
6265     Value *Val = UndefInt8;
6266     for (Value *Op : C->operands())
6267       if (!(Val = Merge(Val, isBytewiseValue(Op, DL))))
6268         return nullptr;
6269     return Val;
6270   }
6271
6272   // Don't try to handle the handful of other constants.
6273   return nullptr;
6274 }
6275
6276 // This is the recursive version of BuildSubAggregate. It takes a few different
6277 // arguments. Idxs is the index within the nested struct From that we are
6278 // looking at now (which is of type IndexedType). IdxSkip is the number of
6279 // indices from Idxs that should be left out when inserting into the resulting
6280 // struct. To is the result struct built so far, new insertvalue instructions
6281 // build on that.
6282 static Value *BuildSubAggregate(Value *From, Value *To, Type *IndexedType,
6283                                 SmallVectorImpl<unsigned> &Idxs,
6284                                 unsigned IdxSkip,
6285                                 BasicBlock::iterator InsertBefore) {
6286   StructType *STy = dyn_cast<StructType>(IndexedType);
6287   if (STy) {
6288     // Save the original To argument so we can modify it
6289     Value *OrigTo = To;
6290     // General case, the type indexed by Idxs is a struct
6291     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
6292       // Process each struct element recursively
6293       Idxs.push_back(i);
6294       Value *PrevTo = To;
6295       To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
6296                              InsertBefore);
6297       Idxs.pop_back();
6298       if (!To) {
6299         // Couldn't find any inserted value for this index? Cleanup
6300         while (PrevTo != OrigTo) {
6301           InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
6302           PrevTo = Del->getAggregateOperand();
6303           Del->eraseFromParent();
6304         }
6305         // Stop processing elements
6306         break;
6307       }
6308     }
6309     // If we successfully found a value for each of our subaggregates
6310     if (To)
6311       return To;
6312   }
6313   // Base case, the type indexed by SourceIdxs is not a struct, or not all of
6314   // the struct's elements had a value that was inserted directly. In the latter
6315   // case, perhaps we can't determine each of the subelements individually, but
6316   // we might be able to find the complete struct somewhere.
6317
6318   // Find the value that is at that particular spot
6319   Value *V = FindInsertedValue(From, Idxs);
6320
6321   if (!V)
6322     return nullptr;
6323
6324   // Insert the value in the new (sub) aggregate
6325   return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp",
6326                                  InsertBefore);
6327 }
6328
6329 // This helper takes a nested struct and extracts a part of it (which is again a
6330 // struct) into a new value. For example, given the struct:
6331 // { a, { b, { c, d }, e } }
6332 // and the indices "1, 1" this returns
6333 // { c, d }.
6334 //
6335 // It does this by inserting an insertvalue for each element in the resulting
6336 // struct, as opposed to just inserting a single struct. This will only work if
6337 // each of the elements of the substruct are known (ie, inserted into From by an
6338 // insertvalue instruction somewhere).
6339 //
6340 // All inserted insertvalue instructions are inserted before InsertBefore
6341 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
6342                                 BasicBlock::iterator InsertBefore) {
6343   Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
6344                                                              idx_range);
6345   Value *To = PoisonValue::get(IndexedType);
6346   SmallVector<unsigned, 10> Idxs(idx_range);
6347   unsigned IdxSkip = Idxs.size();
6348
6349   return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
6350 }
6351
6352 /// Given an aggregate and a sequence of indices, see if the scalar value
6353 /// indexed is already around as a register, for example if it was inserted
6354 /// directly into the aggregate.
6355 ///
6356 /// If InsertBefore is not null, this function will duplicate (modified)
6357 /// insertvalues when a part of a nested struct is extracted.
6358 Value *
6359 llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
6360                         std::optional<BasicBlock::iterator> InsertBefore) {
6361   // Nothing to index? Just return V then (this is useful at the end of our
6362   // recursion).
6363   if (idx_range.empty())
6364     return V;
6365   // We have indices, so V should have an indexable type.
6366   assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
6367          "Not looking at a struct or array?");
6368   assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
6369          "Invalid indices for type?");
6370
6371   if (Constant *C = dyn_cast<Constant>(V)) {
6372     C = C->getAggregateElement(idx_range[0]);
6373     if (!C) return nullptr;
6374     return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
6375   }
6376
6377   if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
6378     // Loop the indices for the insertvalue instruction in parallel with the
6379     // requested indices
6380     const unsigned *req_idx = idx_range.begin();
6381     for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
6382          i != e; ++i, ++req_idx) {
6383       if (req_idx == idx_range.end()) {
6384         // We can't handle this without inserting insertvalues
6385         if (!InsertBefore)
6386           return nullptr;
6387
6388         // The requested index identifies a part of a nested aggregate. Handle
6389         // this specially. For example,
6390         // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
6391         // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
6392         // %C = extractvalue {i32, { i32, i32 } } %B, 1
6393         // This can be changed into
6394         // %A = insertvalue {i32, i32 } undef, i32 10, 0
6395         // %C = insertvalue {i32, i32 } %A, i32 11, 1
6396         // which allows the unused 0,0 element from the nested struct to be
6397         // removed.
6398         return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx),
6399                                  *InsertBefore);
6400       }
6401
6402       // This insert value inserts something else than what we are looking for.
6403       // See if the (aggregate) value inserted into has the value we are
6404       // looking for, then.
6405       if (*req_idx != *i)
6406         return FindInsertedValue(I->getAggregateOperand(), idx_range,
6407                                  InsertBefore);
6408     }
6409     // If we end up here, the indices of the insertvalue match with those
6410     // requested (though possibly only partially). Now we recursively look at
6411     // the inserted value, passing any remaining indices.
6412     return FindInsertedValue(I->getInsertedValueOperand(),
6413                              ArrayRef(req_idx, idx_range.end()), InsertBefore);
6414   }
6415
6416   if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
6417     // If we're extracting a value from an aggregate that was extracted from
6418     // something else, we can extract from that something else directly instead.
6419     // However, we will need to chain I's indices with the requested indices.
6420
6421     // Calculate the number of indices required
6422     unsigned size = I->getNumIndices() + idx_range.size();
6423     // Allocate some space to put the new indices in
6424     SmallVector<unsigned, 5> Idxs;
6425     Idxs.reserve(size);
6426     // Add indices from the extract value instruction
6427     Idxs.append(I->idx_begin(), I->idx_end());
6428
6429     // Add requested indices
6430     Idxs.append(idx_range.begin(), idx_range.end());
6431
6432     assert(Idxs.size() == size
6433            && "Number of indices added not correct?");
6434
6435     return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
6436   }
6437   // Otherwise, we don't know (such as, extracting from a function return value
6438   // or load instruction)
6439   return nullptr;
6440 }
6441
6442 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
6443                                        unsigned CharSize) {
6444   // Make sure the GEP has exactly three arguments.
6445   if (GEP->getNumOperands() != 3)
6446     return false;
6447
6448   // Make sure the index-ee is a pointer to array of \p CharSize integers.
6449   // CharSize.
6450   ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
6451   if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
6452     return false;
6453
6454   // Check to make sure that the first operand of the GEP is an integer and
6455   // has value 0 so that we are sure we're indexing into the initializer.
6456   const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
6457   if (!FirstIdx || !FirstIdx->isZero())
6458     return false;
6459
6460   return true;
6461 }
6462
6463 // If V refers to an initialized global constant, set Slice either to
6464 // its initializer if the size of its elements equals ElementSize, or,
6465 // for ElementSize == 8, to its representation as an array of unsiged
6466 // char. Return true on success.
6467 // Offset is in the unit "nr of ElementSize sized elements".
6468 bool llvm::getConstantDataArrayInfo(const Value *V,
6469                                     ConstantDataArraySlice &Slice,
6470                                     unsigned ElementSize, uint64_t Offset) {
6471   assert(V && "V should not be null.");
6472   assert((ElementSize % 8) == 0 &&
6473          "ElementSize expected to be a multiple of the size of a byte.");
6474   unsigned ElementSizeInBytes = ElementSize / 8;
6475
6476   // Drill down into the pointer expression V, ignoring any intervening
6477   // casts, and determine the identity of the object it references along
6478   // with the cumulative byte offset into it.
6479   const GlobalVariable *GV =
6480     dyn_cast<GlobalVariable>(getUnderlyingObject(V));
6481   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
6482     // Fail if V is not based on constant global object.
6483     return false;
6484
6485   const DataLayout &DL = GV->getDataLayout();
6486   APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
6487
6488   if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
6489                                                  /*AllowNonInbounds*/ true))
6490     // Fail if a constant offset could not be determined.
6491     return false;
6492
6493   uint64_t StartIdx = Off.getLimitedValue();
6494   if (StartIdx == UINT64_MAX)
6495     // Fail if the constant offset is excessive.
6496     return false;
6497
6498   // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6499   // elements. Simply bail out if that isn't possible.
6500   if ((StartIdx % ElementSizeInBytes) != 0)
6501     return false;
6502
6503   Offset += StartIdx / ElementSizeInBytes;
6504   ConstantDataArray *Array = nullptr;
6505   ArrayType *ArrayTy = nullptr;
6506
6507   if (GV->getInitializer()->isNullValue()) {
6508     Type *GVTy = GV->getValueType();
6509     uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue();
6510     uint64_t Length = SizeInBytes / ElementSizeInBytes;
6511
6512     Slice.Array = nullptr;
6513     Slice.Offset = 0;
6514     // Return an empty Slice for undersized constants to let callers
6515     // transform even undefined library calls into simpler, well-defined
6516     // expressions.  This is preferable to making the calls although it
6517     // prevents sanitizers from detecting such calls.
6518     Slice.Length = Length < Offset ? 0 : Length - Offset;
6519     return true;
6520   }
6521
6522   auto *Init = const_cast<Constant *>(GV->getInitializer());
6523   if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
6524     Type *InitElTy = ArrayInit->getElementType();
6525     if (InitElTy->isIntegerTy(ElementSize)) {
6526       // If Init is an initializer for an array of the expected type
6527       // and size, use it as is.
6528       Array = ArrayInit;
6529       ArrayTy = ArrayInit->getType();
6530     }
6531   }
6532
6533   if (!Array) {
6534     if (ElementSize != 8)
6535       // TODO: Handle conversions to larger integral types.
6536       return false;
6537
6538     // Otherwise extract the portion of the initializer starting
6539     // at Offset as an array of bytes, and reset Offset.
6540     Init = ReadByteArrayFromGlobal(GV, Offset);
6541     if (!Init)
6542       return false;
6543
6544     Offset = 0;
6545     Array = dyn_cast<ConstantDataArray>(Init);
6546     ArrayTy = dyn_cast<ArrayType>(Init->getType());
6547   }
6548
6549   uint64_t NumElts = ArrayTy->getArrayNumElements();
6550   if (Offset > NumElts)
6551     return false;
6552
6553   Slice.Array = Array;
6554   Slice.Offset = Offset;
6555   Slice.Length = NumElts - Offset;
6556   return true;
6557 }
6558
6559 /// Extract bytes from the initializer of the constant array V, which need
6560 /// not be a nul-terminated string.  On success, store the bytes in Str and
6561 /// return true.  When TrimAtNul is set, Str will contain only the bytes up
6562 /// to but not including the first nul.  Return false on failure.
6563 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
6564                                  bool TrimAtNul) {
6565   ConstantDataArraySlice Slice;
6566   if (!getConstantDataArrayInfo(V, Slice, 8))
6567     return false;
6568
6569   if (Slice.Array == nullptr) {
6570     if (TrimAtNul) {
6571       // Return a nul-terminated string even for an empty Slice.  This is
6572       // safe because all existing SimplifyLibcalls callers require string
6573       // arguments and the behavior of the functions they fold is undefined
6574       // otherwise.  Folding the calls this way is preferable to making
6575       // the undefined library calls, even though it prevents sanitizers
6576       // from reporting such calls.
6577       Str = StringRef();
6578       return true;
6579     }
6580     if (Slice.Length == 1) {
6581       Str = StringRef("", 1);
6582       return true;
6583     }
6584     // We cannot instantiate a StringRef as we do not have an appropriate string
6585     // of 0s at hand.
6586     return false;
6587   }
6588
6589   // Start out with the entire array in the StringRef.
6590   Str = Slice.Array->getAsString();
6591   // Skip over 'offset' bytes.
6592   Str = Str.substr(Slice.Offset);
6593
6594   if (TrimAtNul) {
6595     // Trim off the \0 and anything after it.  If the array is not nul
6596     // terminated, we just return the whole end of string.  The client may know
6597     // some other way that the string is length-bound.
6598     Str = Str.substr(0, Str.find('\0'));
6599   }
6600   return true;
6601 }
6602
6603 // These next two are very similar to the above, but also look through PHI
6604 // nodes.
6605 // TODO: See if we can integrate these two together.
6606
6607 /// If we can compute the length of the string pointed to by
6608 /// the specified pointer, return 'len+1'.  If we can't, return 0.
6609 static uint64_t GetStringLengthH(const Value *V,
6610                                  SmallPtrSetImpl<const PHINode*> &PHIs,
6611                                  unsigned CharSize) {
6612   // Look through noop bitcast instructions.
6613   V = V->stripPointerCasts();
6614
6615   // If this is a PHI node, there are two cases: either we have already seen it
6616   // or we haven't.
6617   if (const PHINode *PN = dyn_cast<PHINode>(V)) {
6618     if (!PHIs.insert(PN).second)
6619       return ~0ULL;  // already in the set.
6620
6621     // If it was new, see if all the input strings are the same length.
6622     uint64_t LenSoFar = ~0ULL;
6623     for (Value *IncValue : PN->incoming_values()) {
6624       uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
6625       if (Len == 0) return 0; // Unknown length -> unknown.
6626
6627       if (Len == ~0ULL) continue;
6628
6629       if (Len != LenSoFar && LenSoFar != ~0ULL)
6630         return 0;    // Disagree -> unknown.
6631       LenSoFar = Len;
6632     }
6633
6634     // Success, all agree.
6635     return LenSoFar;
6636   }
6637
6638   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6639   if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
6640     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
6641     if (Len1 == 0) return 0;
6642     uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
6643     if (Len2 == 0) return 0;
6644     if (Len1 == ~0ULL) return Len2;
6645     if (Len2 == ~0ULL) return Len1;
6646     if (Len1 != Len2) return 0;
6647     return Len1;
6648   }
6649
6650   // Otherwise, see if we can read the string.
6651   ConstantDataArraySlice Slice;
6652   if (!getConstantDataArrayInfo(V, Slice, CharSize))
6653     return 0;
6654
6655   if (Slice.Array == nullptr)
6656     // Zeroinitializer (including an empty one).
6657     return 1;
6658
6659   // Search for the first nul character.  Return a conservative result even
6660   // when there is no nul.  This is safe since otherwise the string function
6661   // being folded such as strlen is undefined, and can be preferable to
6662   // making the undefined library call.
6663   unsigned NullIndex = 0;
6664   for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
6665     if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
6666       break;
6667   }
6668
6669   return NullIndex + 1;
6670 }
6671
6672 /// If we can compute the length of the string pointed to by
6673 /// the specified pointer, return 'len+1'.  If we can't, return 0.
6674 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6675   if (!V->getType()->isPointerTy())
6676     return 0;
6677
6678   SmallPtrSet<const PHINode*, 32> PHIs;
6679   uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6680   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6681   // an empty string as a length.
6682   return Len == ~0ULL ? 1 : Len;
6683 }
6684
6685 const Value *
6686 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
6687                                            bool MustPreserveNullness) {
6688   assert(Call &&
6689          "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6690   if (const Value *RV = Call->getReturnedArgOperand())
6691     return RV;
6692   // This can be used only as a aliasing property.
6693   if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6694           Call, MustPreserveNullness))
6695     return Call->getArgOperand(0);
6696   return nullptr;
6697 }
6698
6699 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6700     const CallBase *Call, bool MustPreserveNullness) {
6701   switch (Call->getIntrinsicID()) {
6702   case Intrinsic::launder_invariant_group:
6703   case Intrinsic::strip_invariant_group:
6704   case Intrinsic::aarch64_irg:
6705   case Intrinsic::aarch64_tagp:
6706   // The amdgcn_make_buffer_rsrc function does not alter the address of the
6707   // input pointer (and thus preserve null-ness for the purposes of escape
6708   // analysis, which is where the MustPreserveNullness flag comes in to play).
6709   // However, it will not necessarily map ptr addrspace(N) null to ptr
6710   // addrspace(8) null, aka the "null descriptor", which has "all loads return
6711   // 0, all stores are dropped" semantics. Given the context of this intrinsic
6712   // list, no one should be relying on such a strict interpretation of
6713   // MustPreserveNullness (and, at time of writing, they are not), but we
6714   // document this fact out of an abundance of caution.
6715   case Intrinsic::amdgcn_make_buffer_rsrc:
6716     return true;
6717   case Intrinsic::ptrmask:
6718     return !MustPreserveNullness;
6719   case Intrinsic::threadlocal_address:
6720     // The underlying variable changes with thread ID. The Thread ID may change
6721     // at coroutine suspend points.
6722     return !Call->getParent()->getParent()->isPresplitCoroutine();
6723   default:
6724     return false;
6725   }
6726 }
6727
6728 /// \p PN defines a loop-variant pointer to an object.  Check if the
6729 /// previous iteration of the loop was referring to the same object as \p PN.
6730 static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
6731                                          const LoopInfo *LI) {
6732   // Find the loop-defined value.
6733   Loop *L = LI->getLoopFor(PN->getParent());
6734   if (PN->getNumIncomingValues() != 2)
6735     return true;
6736
6737   // Find the value from previous iteration.
6738   auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
6739   if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
6740     PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
6741   if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
6742     return true;
6743
6744   // If a new pointer is loaded in the loop, the pointer references a different
6745   // object in every iteration.  E.g.:
6746   //    for (i)
6747   //       int *p = a[i];
6748   //       ...
6749   if (auto *Load = dyn_cast<LoadInst>(PrevValue))
6750     if (!L->isLoopInvariant(Load->getPointerOperand()))
6751       return false;
6752   return true;
6753 }
6754
6755 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
6756   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
6757     if (auto *GEP = dyn_cast<GEPOperator>(V)) {
6758       const Value *PtrOp = GEP->getPointerOperand();
6759       if (!PtrOp->getType()->isPointerTy()) // Only handle scalar pointer base.
6760         return V;
6761       V = PtrOp;
6762     } else if (Operator::getOpcode(V) == Instruction::BitCast ||
6763                Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
6764       Value *NewV = cast<Operator>(V)->getOperand(0);
6765       if (!NewV->getType()->isPointerTy())
6766         return V;
6767       V = NewV;
6768     } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
6769       if (GA->isInterposable())
6770         return V;
6771       V = GA->getAliasee();
6772     } else {
6773       if (auto *PHI = dyn_cast<PHINode>(V)) {
6774         // Look through single-arg phi nodes created by LCSSA.
6775         if (PHI->getNumIncomingValues() == 1) {
6776           V = PHI->getIncomingValue(0);
6777           continue;
6778         }
6779       } else if (auto *Call = dyn_cast<CallBase>(V)) {
6780         // CaptureTracking can know about special capturing properties of some
6781         // intrinsics like launder.invariant.group, that can't be expressed with
6782         // the attributes, but have properties like returning aliasing pointer.
6783         // Because some analysis may assume that nocaptured pointer is not
6784         // returned from some special intrinsic (because function would have to
6785         // be marked with returns attribute), it is crucial to use this function
6786         // because it should be in sync with CaptureTracking. Not using it may
6787         // cause weird miscompilations where 2 aliasing pointers are assumed to
6788         // noalias.
6789         if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
6790           V = RP;
6791           continue;
6792         }
6793       }
6794
6795       return V;
6796     }
6797     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
6798   }
6799   return V;
6800 }
6801
6802 void llvm::getUnderlyingObjects(const Value *V,
6803                                 SmallVectorImpl<const Value *> &Objects,
6804                                 const LoopInfo *LI, unsigned MaxLookup) {
6805   SmallPtrSet<const Value *, 4> Visited;
6806   SmallVector<const Value *, 4> Worklist;
6807   Worklist.push_back(V);
6808   do {
6809     const Value *P = Worklist.pop_back_val();
6810     P = getUnderlyingObject(P, MaxLookup);
6811
6812     if (!Visited.insert(P).second)
6813       continue;
6814
6815     if (auto *SI = dyn_cast<SelectInst>(P)) {
6816       Worklist.push_back(SI->getTrueValue());
6817       Worklist.push_back(SI->getFalseValue());
6818       continue;
6819     }
6820
6821     if (auto *PN = dyn_cast<PHINode>(P)) {
6822       // If this PHI changes the underlying object in every iteration of the
6823       // loop, don't look through it.  Consider:
6824       //   int **A;
6825       //   for (i) {
6826       //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
6827       //     Curr = A[i];
6828       //     *Prev, *Curr;
6829       //
6830       // Prev is tracking Curr one iteration behind so they refer to different
6831       // underlying objects.
6832       if (!LI || !LI->isLoopHeader(PN->getParent()) ||
6833           isSameUnderlyingObjectInLoop(PN, LI))
6834         append_range(Worklist, PN->incoming_values());
6835       else
6836         Objects.push_back(P);
6837       continue;
6838     }
6839
6840     Objects.push_back(P);
6841   } while (!Worklist.empty());
6842 }
6843
6844 const Value *llvm::getUnderlyingObjectAggressive(const Value *V) {
6845   const unsigned MaxVisited = 8;
6846
6847   SmallPtrSet<const Value *, 8> Visited;
6848   SmallVector<const Value *, 8> Worklist;
6849   Worklist.push_back(V);
6850   const Value *Object = nullptr;
6851   // Used as fallback if we can't find a common underlying object through
6852   // recursion.
6853   bool First = true;
6854   const Value *FirstObject = getUnderlyingObject(V);
6855   do {
6856     const Value *P = Worklist.pop_back_val();
6857     P = First ? FirstObject : getUnderlyingObject(P);
6858     First = false;
6859
6860     if (!Visited.insert(P).second)
6861       continue;
6862
6863     if (Visited.size() == MaxVisited)
6864       return FirstObject;
6865
6866     if (auto *SI = dyn_cast<SelectInst>(P)) {
6867       Worklist.push_back(SI->getTrueValue());
6868       Worklist.push_back(SI->getFalseValue());
6869       continue;
6870     }
6871
6872     if (auto *PN = dyn_cast<PHINode>(P)) {
6873       append_range(Worklist, PN->incoming_values());
6874       continue;
6875     }
6876
6877     if (!Object)
6878       Object = P;
6879     else if (Object != P)
6880       return FirstObject;
6881   } while (!Worklist.empty());
6882
6883   return Object ? Object : FirstObject;
6884 }
6885
6886 /// This is the function that does the work of looking through basic
6887 /// ptrtoint+arithmetic+inttoptr sequences.
6888 static const Value *getUnderlyingObjectFromInt(const Value *V) {
6889   do {
6890     if (const Operator *U = dyn_cast<Operator>(V)) {
6891       // If we find a ptrtoint, we can transfer control back to the
6892       // regular getUnderlyingObjectFromInt.
6893       if (U->getOpcode() == Instruction::PtrToInt)
6894         return U->getOperand(0);
6895       // If we find an add of a constant, a multiplied value, or a phi, it's
6896       // likely that the other operand will lead us to the base
6897       // object. We don't have to worry about the case where the
6898       // object address is somehow being computed by the multiply,
6899       // because our callers only care when the result is an
6900       // identifiable object.
6901       if (U->getOpcode() != Instruction::Add ||
6902           (!isa<ConstantInt>(U->getOperand(1)) &&
6903            Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
6904            !isa<PHINode>(U->getOperand(1))))
6905         return V;
6906       V = U->getOperand(0);
6907     } else {
6908       return V;
6909     }
6910     assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
6911   } while (true);
6912 }
6913
6914 /// This is a wrapper around getUnderlyingObjects and adds support for basic
6915 /// ptrtoint+arithmetic+inttoptr sequences.
6916 /// It returns false if unidentified object is found in getUnderlyingObjects.
6917 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
6918                                           SmallVectorImpl<Value *> &Objects) {
6919   SmallPtrSet<const Value *, 16> Visited;
6920   SmallVector<const Value *, 4> Working(1, V);
6921   do {
6922     V = Working.pop_back_val();
6923
6924     SmallVector<const Value *, 4> Objs;
6925     getUnderlyingObjects(V, Objs);
6926
6927     for (const Value *V : Objs) {
6928       if (!Visited.insert(V).second)
6929         continue;
6930       if (Operator::getOpcode(V) == Instruction::IntToPtr) {
6931         const Value *O =
6932           getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
6933         if (O->getType()->isPointerTy()) {
6934           Working.push_back(O);
6935           continue;
6936         }
6937       }
6938       // If getUnderlyingObjects fails to find an identifiable object,
6939       // getUnderlyingObjectsForCodeGen also fails for safety.
6940       if (!isIdentifiedObject(V)) {
6941         Objects.clear();
6942         return false;
6943       }
6944       Objects.push_back(const_cast<Value *>(V));
6945     }
6946   } while (!Working.empty());
6947   return true;
6948 }
6949
6950 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
6951   AllocaInst *Result = nullptr;
6952   SmallPtrSet<Value *, 4> Visited;
6953   SmallVector<Value *, 4> Worklist;
6954
6955   auto AddWork = [&](Value *V) {
6956     if (Visited.insert(V).second)
6957       Worklist.push_back(V);
6958   };
6959
6960   AddWork(V);
6961   do {
6962     V = Worklist.pop_back_val();
6963     assert(Visited.count(V));
6964
6965     if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
6966       if (Result && Result != AI)
6967         return nullptr;
6968       Result = AI;
6969     } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
6970       AddWork(CI->getOperand(0));
6971     } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
6972       for (Value *IncValue : PN->incoming_values())
6973         AddWork(IncValue);
6974     } else if (auto *SI = dyn_cast<SelectInst>(V)) {
6975       AddWork(SI->getTrueValue());
6976       AddWork(SI->getFalseValue());
6977     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
6978       if (OffsetZero && !GEP->hasAllZeroIndices())
6979         return nullptr;
6980       AddWork(GEP->getPointerOperand());
6981     } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
6982       Value *Returned = CB->getReturnedArgOperand();
6983       if (Returned)
6984         AddWork(Returned);
6985       else
6986         return nullptr;
6987     } else {
6988       return nullptr;
6989     }
6990   } while (!Worklist.empty());
6991
6992   return Result;
6993 }
6994
6995 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6996     const Value *V, bool AllowLifetime, bool AllowDroppable) {
6997   for (const User *U : V->users()) {
6998     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
6999     if (!II)
7000       return false;
7001
7002     if (AllowLifetime && II->isLifetimeStartOrEnd())
7003       continue;
7004
7005     if (AllowDroppable && II->isDroppable())
7006       continue;
7007
7008     return false;
7009   }
7010   return true;
7011 }
7012
7013 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
7014   return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
7015       V, /* AllowLifetime */ true, /* AllowDroppable */ false);
7016 }
7017 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
7018   return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
7019       V, /* AllowLifetime */ true, /* AllowDroppable */ true);
7020 }
7021
7022 bool llvm::isNotCrossLaneOperation(const Instruction *I) {
7023   if (auto *II = dyn_cast<IntrinsicInst>(I))
7024     return isTriviallyVectorizable(II->getIntrinsicID());
7025   auto *Shuffle = dyn_cast<ShuffleVectorInst>(I);
7026   return (!Shuffle || Shuffle->isSelect()) &&
7027          !isa<CallBase, BitCastInst, ExtractElementInst>(I);
7028 }
7029
7030 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
7031                                         const Instruction *CtxI,
7032                                         AssumptionCache *AC,
7033                                         const DominatorTree *DT,
7034                                         const TargetLibraryInfo *TLI,
7035                                         bool UseVariableInfo) {
7036   return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI,
7037                                                 AC, DT, TLI, UseVariableInfo);
7038 }
7039
7040 bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
7041     unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
7042     AssumptionCache *AC, const DominatorTree *DT, const TargetLibraryInfo *TLI,
7043     bool UseVariableInfo) {
7044 #ifndef NDEBUG
7045   if (Inst->getOpcode() != Opcode) {
7046     // Check that the operands are actually compatible with the Opcode override.
7047     auto hasEqualReturnAndLeadingOperandTypes =
7048         [](const Instruction *Inst, unsigned NumLeadingOperands) {
7049           if (Inst->getNumOperands() < NumLeadingOperands)
7050             return false;
7051           const Type *ExpectedType = Inst->getType();
7052           for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
7053             if (Inst->getOperand(ItOp)->getType() != ExpectedType)
7054               return false;
7055           return true;
7056         };
7057     assert(!Instruction::isBinaryOp(Opcode) ||
7058            hasEqualReturnAndLeadingOperandTypes(Inst, 2));
7059     assert(!Instruction::isUnaryOp(Opcode) ||
7060            hasEqualReturnAndLeadingOperandTypes(Inst, 1));
7061   }
7062 #endif
7063
7064   switch (Opcode) {
7065   default:
7066     return true;
7067   case Instruction::UDiv:
7068   case Instruction::URem: {
7069     // x / y is undefined if y == 0.
7070     const APInt *V;
7071     if (match(Inst->getOperand(1), m_APInt(V)))
7072       return *V != 0;
7073     return false;
7074   }
7075   case Instruction::SDiv:
7076   case Instruction::SRem: {
7077     // x / y is undefined if y == 0 or x == INT_MIN and y == -1
7078     const APInt *Numerator, *Denominator;
7079     if (!match(Inst->getOperand(1), m_APInt(Denominator)))
7080       return false;
7081     // We cannot hoist this division if the denominator is 0.
7082     if (*Denominator == 0)
7083       return false;
7084     // It's safe to hoist if the denominator is not 0 or -1.
7085     if (!Denominator->isAllOnes())
7086       return true;
7087     // At this point we know that the denominator is -1.  It is safe to hoist as
7088     // long we know that the numerator is not INT_MIN.
7089     if (match(Inst->getOperand(0), m_APInt(Numerator)))
7090       return !Numerator->isMinSignedValue();
7091     // The numerator *might* be MinSignedValue.
7092     return false;
7093   }
7094   case Instruction::Load: {
7095     if (!UseVariableInfo)
7096       return false;
7097
7098     const LoadInst *LI = dyn_cast<LoadInst>(Inst);
7099     if (!LI)
7100       return false;
7101     if (mustSuppressSpeculation(*LI))
7102       return false;
7103     const DataLayout &DL = LI->getDataLayout();
7104     return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
7105                                               LI->getType(), LI->getAlign(), DL,
7106                                               CtxI, AC, DT, TLI);
7107   }
7108   case Instruction::Call: {
7109     auto *CI = dyn_cast<const CallInst>(Inst);
7110     if (!CI)
7111       return false;
7112     const Function *Callee = CI->getCalledFunction();
7113
7114     // The called function could have undefined behavior or side-effects, even
7115     // if marked readnone nounwind.
7116     return Callee && Callee->isSpeculatable();
7117   }
7118   case Instruction::VAArg:
7119   case Instruction::Alloca:
7120   case Instruction::Invoke:
7121   case Instruction::CallBr:
7122   case Instruction::PHI:
7123   case Instruction::Store:
7124   case Instruction::Ret:
7125   case Instruction::Br:
7126   case Instruction::IndirectBr:
7127   case Instruction::Switch:
7128   case Instruction::Unreachable:
7129   case Instruction::Fence:
7130   case Instruction::AtomicRMW:
7131   case Instruction::AtomicCmpXchg:
7132   case Instruction::LandingPad:
7133   case Instruction::Resume:
7134   case Instruction::CatchSwitch:
7135   case Instruction::CatchPad:
7136   case Instruction::CatchRet:
7137   case Instruction::CleanupPad:
7138   case Instruction::CleanupRet:
7139     return false; // Misc instructions which have effects
7140   }
7141 }
7142
7143 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
7144   if (I.mayReadOrWriteMemory())
7145     // Memory dependency possible
7146     return true;
7147   if (!isSafeToSpeculativelyExecute(&I))
7148     // Can't move above a maythrow call or infinite loop.  Or if an
7149     // inalloca alloca, above a stacksave call.
7150     return true;
7151   if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7152     // 1) Can't reorder two inf-loop calls, even if readonly
7153     // 2) Also can't reorder an inf-loop call below a instruction which isn't
7154     //    safe to speculative execute.  (Inverse of above)
7155     return true;
7156   return false;
7157 }
7158
7159 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
7160 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
7161   switch (OR) {
7162     case ConstantRange::OverflowResult::MayOverflow:
7163       return OverflowResult::MayOverflow;
7164     case ConstantRange::OverflowResult::AlwaysOverflowsLow:
7165       return OverflowResult::AlwaysOverflowsLow;
7166     case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
7167       return OverflowResult::AlwaysOverflowsHigh;
7168     case ConstantRange::OverflowResult::NeverOverflows:
7169       return OverflowResult::NeverOverflows;
7170   }
7171   llvm_unreachable("Unknown OverflowResult");
7172 }
7173
7174 /// Combine constant ranges from computeConstantRange() and computeKnownBits().
7175 ConstantRange
7176 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
7177                                              bool ForSigned,
7178                                              const SimplifyQuery &SQ) {
7179   ConstantRange CR1 =
7180       ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
7181   ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
7182   ConstantRange::PreferredRangeType RangeType =
7183       ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
7184   return CR1.intersectWith(CR2, RangeType);
7185 }
7186
7187 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
7188                                                    const Value *RHS,
7189                                                    const SimplifyQuery &SQ,
7190                                                    bool IsNSW) {
7191   KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ);
7192   KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ);
7193
7194   // mul nsw of two non-negative numbers is also nuw.
7195   if (IsNSW && LHSKnown.isNonNegative() && RHSKnown.isNonNegative())
7196     return OverflowResult::NeverOverflows;
7197
7198   ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
7199   ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
7200   return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
7201 }
7202
7203 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
7204                                                  const Value *RHS,
7205                                                  const SimplifyQuery &SQ) {
7206   // Multiplying n * m significant bits yields a result of n + m significant
7207   // bits. If the total number of significant bits does not exceed the
7208   // result bit width (minus 1), there is no overflow.
7209   // This means if we have enough leading sign bits in the operands
7210   // we can guarantee that the result does not overflow.
7211   // Ref: "Hacker's Delight" by Henry Warren
7212   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
7213
7214   // Note that underestimating the number of sign bits gives a more
7215   // conservative answer.
7216   unsigned SignBits =
7217       ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ);
7218
7219   // First handle the easy case: if we have enough sign bits there's
7220   // definitely no overflow.
7221   if (SignBits > BitWidth + 1)
7222     return OverflowResult::NeverOverflows;
7223
7224   // There are two ambiguous cases where there can be no overflow:
7225   //   SignBits == BitWidth + 1    and
7226   //   SignBits == BitWidth
7227   // The second case is difficult to check, therefore we only handle the
7228   // first case.
7229   if (SignBits == BitWidth + 1) {
7230     // It overflows only when both arguments are negative and the true
7231     // product is exactly the minimum negative number.
7232     // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
7233     // For simplicity we just check if at least one side is not negative.
7234     KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ);
7235     KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ);
7236     if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
7237       return OverflowResult::NeverOverflows;
7238   }
7239   return OverflowResult::MayOverflow;
7240 }
7241
7242 OverflowResult
7243 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
7244                                     const WithCache<const Value *> &RHS,
7245                                     const SimplifyQuery &SQ) {
7246   ConstantRange LHSRange =
7247       computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
7248   ConstantRange RHSRange =
7249       computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
7250   return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
7251 }
7252
7253 static OverflowResult
7254 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7255                             const WithCache<const Value *> &RHS,
7256                             const AddOperator *Add, const SimplifyQuery &SQ) {
7257   if (Add && Add->hasNoSignedWrap()) {
7258     return OverflowResult::NeverOverflows;
7259   }
7260
7261   // If LHS and RHS each have at least two sign bits, the addition will look
7262   // like
7263   //
7264   // XX..... +
7265   // YY.....
7266   //
7267   // If the carry into the most significant position is 0, X and Y can't both
7268   // be 1 and therefore the carry out of the addition is also 0.
7269   //
7270   // If the carry into the most significant position is 1, X and Y can't both
7271   // be 0 and therefore the carry out of the addition is also 1.
7272   //
7273   // Since the carry into the most significant position is always equal to
7274   // the carry out of the addition, there is no signed overflow.
7275   if (::ComputeNumSignBits(LHS, 0, SQ) > 1 &&
7276       ::ComputeNumSignBits(RHS, 0, SQ) > 1)
7277     return OverflowResult::NeverOverflows;
7278
7279   ConstantRange LHSRange =
7280       computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
7281   ConstantRange RHSRange =
7282       computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
7283   OverflowResult OR =
7284       mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
7285   if (OR != OverflowResult::MayOverflow)
7286     return OR;
7287
7288   // The remaining code needs Add to be available. Early returns if not so.
7289   if (!Add)
7290     return OverflowResult::MayOverflow;
7291
7292   // If the sign of Add is the same as at least one of the operands, this add
7293   // CANNOT overflow. If this can be determined from the known bits of the
7294   // operands the above signedAddMayOverflow() check will have already done so.
7295   // The only other way to improve on the known bits is from an assumption, so
7296   // call computeKnownBitsFromContext() directly.
7297   bool LHSOrRHSKnownNonNegative =
7298       (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
7299   bool LHSOrRHSKnownNegative =
7300       (LHSRange.isAllNegative() || RHSRange.isAllNegative());
7301   if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
7302     KnownBits AddKnown(LHSRange.getBitWidth());
7303     computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ);
7304     if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
7305         (AddKnown.isNegative() && LHSOrRHSKnownNegative))
7306       return OverflowResult::NeverOverflows;
7307   }
7308
7309   return OverflowResult::MayOverflow;
7310 }
7311
7312 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
7313                                                    const Value *RHS,
7314                                                    const SimplifyQuery &SQ) {
7315   // X - (X % ?)
7316   // The remainder of a value can't have greater magnitude than itself,
7317   // so the subtraction can't overflow.
7318
7319   // X - (X -nuw ?)
7320   // In the minimal case, this would simplify to "?", so there's no subtract
7321   // at all. But if this analysis is used to peek through casts, for example,
7322   // then determining no-overflow may allow other transforms.
7323
7324   // TODO: There are other patterns like this.
7325   //       See simplifyICmpWithBinOpOnLHS() for candidates.
7326   if (match(RHS, m_URem(m_Specific(LHS), m_Value())) ||
7327       match(RHS, m_NUWSub(m_Specific(LHS), m_Value())))
7328     if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
7329       return OverflowResult::NeverOverflows;
7330
7331   if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI,
7332                                        SQ.DL)) {
7333     if (*C)
7334       return OverflowResult::NeverOverflows;
7335     return OverflowResult::AlwaysOverflowsLow;
7336   }
7337
7338   ConstantRange LHSRange =
7339       computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
7340   ConstantRange RHSRange =
7341       computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
7342   return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
7343 }
7344
7345 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
7346                                                  const Value *RHS,
7347                                                  const SimplifyQuery &SQ) {
7348   // X - (X % ?)
7349   // The remainder of a value can't have greater magnitude than itself,
7350   // so the subtraction can't overflow.
7351
7352   // X - (X -nsw ?)
7353   // In the minimal case, this would simplify to "?", so there's no subtract
7354   // at all. But if this analysis is used to peek through casts, for example,
7355   // then determining no-overflow may allow other transforms.
7356   if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) ||
7357       match(RHS, m_NSWSub(m_Specific(LHS), m_Value())))
7358     if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
7359       return OverflowResult::NeverOverflows;
7360
7361   // If LHS and RHS each have at least two sign bits, the subtraction
7362   // cannot overflow.
7363   if (::ComputeNumSignBits(LHS, 0, SQ) > 1 &&
7364       ::ComputeNumSignBits(RHS, 0, SQ) > 1)
7365     return OverflowResult::NeverOverflows;
7366
7367   ConstantRange LHSRange =
7368       computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
7369   ConstantRange RHSRange =
7370       computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
7371   return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
7372 }
7373
7374 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
7375                                      const DominatorTree &DT) {
7376   SmallVector<const BranchInst *, 2> GuardingBranches;
7377   SmallVector<const ExtractValueInst *, 2> Results;
7378
7379   for (const User *U : WO->users()) {
7380     if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
7381       assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
7382
7383       if (EVI->getIndices()[0] == 0)
7384         Results.push_back(EVI);
7385       else {
7386         assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
7387
7388         for (const auto *U : EVI->users())
7389           if (const auto *B = dyn_cast<BranchInst>(U)) {
7390             assert(B->isConditional() && "How else is it using an i1?");
7391             GuardingBranches.push_back(B);
7392           }
7393       }
7394     } else {
7395       // We are using the aggregate directly in a way we don't want to analyze
7396       // here (storing it to a global, say).
7397       return false;
7398     }
7399   }
7400
7401   auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
7402     BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
7403     if (!NoWrapEdge.isSingleEdge())
7404       return false;
7405
7406     // Check if all users of the add are provably no-wrap.
7407     for (const auto *Result : Results) {
7408       // If the extractvalue itself is not executed on overflow, the we don't
7409       // need to check each use separately, since domination is transitive.
7410       if (DT.dominates(NoWrapEdge, Result->getParent()))
7411         continue;
7412
7413       for (const auto &RU : Result->uses())
7414         if (!DT.dominates(NoWrapEdge, RU))
7415           return false;
7416     }
7417
7418     return true;
7419   };
7420
7421   return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
7422 }
7423
7424 /// Shifts return poison if shiftwidth is larger than the bitwidth.
7425 static bool shiftAmountKnownInRange(const Value *ShiftAmount) {
7426   auto *C = dyn_cast<Constant>(ShiftAmount);
7427   if (!C)
7428     return false;
7429
7430   // Shifts return poison if shiftwidth is larger than the bitwidth.
7431   SmallVector<const Constant *, 4> ShiftAmounts;
7432   if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) {
7433     unsigned NumElts = FVTy->getNumElements();
7434     for (unsigned i = 0; i < NumElts; ++i)
7435       ShiftAmounts.push_back(C->getAggregateElement(i));
7436   } else if (isa<ScalableVectorType>(C->getType()))
7437     return false; // Can't tell, just return false to be safe
7438   else
7439     ShiftAmounts.push_back(C);
7440
7441   bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) {
7442     auto *CI = dyn_cast_or_null<ConstantInt>(C);
7443     return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth());
7444   });
7445
7446   return Safe;
7447 }
7448
7449 enum class UndefPoisonKind {
7450   PoisonOnly = (1 << 0),
7451   UndefOnly = (1 << 1),
7452   UndefOrPoison = PoisonOnly | UndefOnly,
7453 };
7454
7455 static bool includesPoison(UndefPoisonKind Kind) {
7456   return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
7457 }
7458
7459 static bool includesUndef(UndefPoisonKind Kind) {
7460   return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
7461 }
7462
7463 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
7464                                    bool ConsiderFlagsAndMetadata) {
7465
7466   if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
7467       Op->hasPoisonGeneratingAnnotations())
7468     return true;
7469
7470   unsigned Opcode = Op->getOpcode();
7471
7472   // Check whether opcode is a poison/undef-generating operation
7473   switch (Opcode) {
7474   case Instruction::Shl:
7475   case Instruction::AShr:
7476   case Instruction::LShr:
7477     return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1));
7478   case Instruction::FPToSI:
7479   case Instruction::FPToUI:
7480     // fptosi/ui yields poison if the resulting value does not fit in the
7481     // destination type.
7482     return true;
7483   case Instruction::Call:
7484     if (auto *II = dyn_cast<IntrinsicInst>(Op)) {
7485       switch (II->getIntrinsicID()) {
7486       // TODO: Add more intrinsics.
7487       case Intrinsic::ctlz:
7488       case Intrinsic::cttz:
7489       case Intrinsic::abs:
7490         if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue())
7491           return false;
7492         break;
7493       case Intrinsic::ctpop:
7494       case Intrinsic::bswap:
7495       case Intrinsic::bitreverse:
7496       case Intrinsic::fshl:
7497       case Intrinsic::fshr:
7498       case Intrinsic::smax:
7499       case Intrinsic::smin:
7500       case Intrinsic::umax:
7501       case Intrinsic::umin:
7502       case Intrinsic::ptrmask:
7503       case Intrinsic::fptoui_sat:
7504       case Intrinsic::fptosi_sat:
7505       case Intrinsic::sadd_with_overflow:
7506       case Intrinsic::ssub_with_overflow:
7507       case Intrinsic::smul_with_overflow:
7508       case Intrinsic::uadd_with_overflow:
7509       case Intrinsic::usub_with_overflow:
7510       case Intrinsic::umul_with_overflow:
7511       case Intrinsic::sadd_sat:
7512       case Intrinsic::uadd_sat:
7513       case Intrinsic::ssub_sat:
7514       case Intrinsic::usub_sat:
7515         return false;
7516       case Intrinsic::sshl_sat:
7517       case Intrinsic::ushl_sat:
7518         return includesPoison(Kind) &&
7519                !shiftAmountKnownInRange(II->getArgOperand(1));
7520       case Intrinsic::fma:
7521       case Intrinsic::fmuladd:
7522       case Intrinsic::sqrt:
7523       case Intrinsic::powi:
7524       case Intrinsic::sin:
7525       case Intrinsic::cos:
7526       case Intrinsic::pow:
7527       case Intrinsic::log:
7528       case Intrinsic::log10:
7529       case Intrinsic::log2:
7530       case Intrinsic::exp:
7531       case Intrinsic::exp2:
7532       case Intrinsic::exp10:
7533       case Intrinsic::fabs:
7534       case Intrinsic::copysign:
7535       case Intrinsic::floor:
7536       case Intrinsic::ceil:
7537       case Intrinsic::trunc:
7538       case Intrinsic::rint:
7539       case Intrinsic::nearbyint:
7540       case Intrinsic::round:
7541       case Intrinsic::roundeven:
7542       case Intrinsic::fptrunc_round:
7543       case Intrinsic::canonicalize:
7544       case Intrinsic::arithmetic_fence:
7545       case Intrinsic::minnum:
7546       case Intrinsic::maxnum:
7547       case Intrinsic::minimum:
7548       case Intrinsic::maximum:
7549       case Intrinsic::is_fpclass:
7550       case Intrinsic::ldexp:
7551       case Intrinsic::frexp:
7552         return false;
7553       case Intrinsic::lround:
7554       case Intrinsic::llround:
7555       case Intrinsic::lrint:
7556       case Intrinsic::llrint:
7557         // If the value doesn't fit an unspecified value is returned (but this
7558         // is not poison).
7559         return false;
7560       }
7561     }
7562     [[fallthrough]];
7563   case Instruction::CallBr:
7564   case Instruction::Invoke: {
7565     const auto *CB = cast<CallBase>(Op);
7566     return !CB->hasRetAttr(Attribute::NoUndef);
7567   }
7568   case Instruction::InsertElement:
7569   case Instruction::ExtractElement: {
7570     // If index exceeds the length of the vector, it returns poison
7571     auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
7572     unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
7573     auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
7574     if (includesPoison(Kind))
7575       return !Idx ||
7576              Idx->getValue().uge(VTy->getElementCount().getKnownMinValue());
7577     return false;
7578   }
7579   case Instruction::ShuffleVector: {
7580     ArrayRef<int> Mask = isa<ConstantExpr>(Op)
7581                              ? cast<ConstantExpr>(Op)->getShuffleMask()
7582                              : cast<ShuffleVectorInst>(Op)->getShuffleMask();
7583     return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem);
7584   }
7585   case Instruction::FNeg:
7586   case Instruction::PHI:
7587   case Instruction::Select:
7588   case Instruction::URem:
7589   case Instruction::SRem:
7590   case Instruction::ExtractValue:
7591   case Instruction::InsertValue:
7592   case Instruction::Freeze:
7593   case Instruction::ICmp:
7594   case Instruction::FCmp:
7595   case Instruction::FAdd:
7596   case Instruction::FSub:
7597   case Instruction::FMul:
7598   case Instruction::FDiv:
7599   case Instruction::FRem:
7600     return false;
7601   case Instruction::GetElementPtr:
7602     // inbounds is handled above
7603     // TODO: what about inrange on constexpr?
7604     return false;
7605   default: {
7606     const auto *CE = dyn_cast<ConstantExpr>(Op);
7607     if (isa<CastInst>(Op) || (CE && CE->isCast()))
7608       return false;
7609     else if (Instruction::isBinaryOp(Opcode))
7610       return false;
7611     // Be conservative and return true.
7612     return true;
7613   }
7614   }
7615 }
7616
7617 bool llvm::canCreateUndefOrPoison(const Operator *Op,
7618                                   bool ConsiderFlagsAndMetadata) {
7619   return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison,
7620                                   ConsiderFlagsAndMetadata);
7621 }
7622
7623 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) {
7624   return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly,
7625                                   ConsiderFlagsAndMetadata);
7626 }
7627
7628 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V,
7629                                   unsigned Depth) {
7630   if (ValAssumedPoison == V)
7631     return true;
7632
7633   const unsigned MaxDepth = 2;
7634   if (Depth >= MaxDepth)
7635     return false;
7636
7637   if (const auto *I = dyn_cast<Instruction>(V)) {
7638     if (any_of(I->operands(), [=](const Use &Op) {
7639           return propagatesPoison(Op) &&
7640                  directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1);
7641         }))
7642       return true;
7643
7644     // V  = extractvalue V0, idx
7645     // V2 = extractvalue V0, idx2
7646     // V0's elements are all poison or not. (e.g., add_with_overflow)
7647     const WithOverflowInst *II;
7648     if (match(I, m_ExtractValue(m_WithOverflowInst(II))) &&
7649         (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) ||
7650          llvm::is_contained(II->args(), ValAssumedPoison)))
7651       return true;
7652   }
7653   return false;
7654 }
7655
7656 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
7657                           unsigned Depth) {
7658   if (isGuaranteedNotToBePoison(ValAssumedPoison))
7659     return true;
7660
7661   if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
7662     return true;
7663
7664   const unsigned MaxDepth = 2;
7665   if (Depth >= MaxDepth)
7666     return false;
7667
7668   const auto *I = dyn_cast<Instruction>(ValAssumedPoison);
7669   if (I && !canCreatePoison(cast<Operator>(I))) {
7670     return all_of(I->operands(), [=](const Value *Op) {
7671       return impliesPoison(Op, V, Depth + 1);
7672     });
7673   }
7674   return false;
7675 }
7676
7677 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
7678   return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
7679 }
7680
7681 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
7682
7683 static bool isGuaranteedNotToBeUndefOrPoison(
7684     const Value *V, AssumptionCache *AC, const Instruction *CtxI,
7685     const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
7686   if (Depth >= MaxAnalysisRecursionDepth)
7687     return false;
7688
7689   if (isa<MetadataAsValue>(V))
7690     return false;
7691
7692   if (const auto *A = dyn_cast<Argument>(V)) {
7693     if (A->hasAttribute(Attribute::NoUndef) ||
7694         A->hasAttribute(Attribute::Dereferenceable) ||
7695         A->hasAttribute(Attribute::DereferenceableOrNull))
7696       return true;
7697   }
7698
7699   if (auto *C = dyn_cast<Constant>(V)) {
7700     if (isa<PoisonValue>(C))
7701       return !includesPoison(Kind);
7702
7703     if (isa<UndefValue>(C))
7704       return !includesUndef(Kind);
7705
7706     if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) ||
7707         isa<ConstantPointerNull>(C) || isa<Function>(C))
7708       return true;
7709
7710     if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) {
7711       if (includesUndef(Kind) && C->containsUndefElement())
7712         return false;
7713       if (includesPoison(Kind) && C->containsPoisonElement())
7714         return false;
7715       return !C->containsConstantExpression();
7716     }
7717   }
7718
7719   // Strip cast operations from a pointer value.
7720   // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7721   // inbounds with zero offset. To guarantee that the result isn't poison, the
7722   // stripped pointer is checked as it has to be pointing into an allocated
7723   // object or be null `null` to ensure `inbounds` getelement pointers with a
7724   // zero offset could not produce poison.
7725   // It can strip off addrspacecast that do not change bit representation as
7726   // well. We believe that such addrspacecast is equivalent to no-op.
7727   auto *StrippedV = V->stripPointerCastsSameRepresentation();
7728   if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) ||
7729       isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV))
7730     return true;
7731
7732   auto OpCheck = [&](const Value *V) {
7733     return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind);
7734   };
7735
7736   if (auto *Opr = dyn_cast<Operator>(V)) {
7737     // If the value is a freeze instruction, then it can never
7738     // be undef or poison.
7739     if (isa<FreezeInst>(V))
7740       return true;
7741
7742     if (const auto *CB = dyn_cast<CallBase>(V)) {
7743       if (CB->hasRetAttr(Attribute::NoUndef) ||
7744           CB->hasRetAttr(Attribute::Dereferenceable) ||
7745           CB->hasRetAttr(Attribute::DereferenceableOrNull))
7746         return true;
7747     }
7748
7749     if (const auto *PN = dyn_cast<PHINode>(V)) {
7750       unsigned Num = PN->getNumIncomingValues();
7751       bool IsWellDefined = true;
7752       for (unsigned i = 0; i < Num; ++i) {
7753         auto *TI = PN->getIncomingBlock(i)->getTerminator();
7754         if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
7755                                               DT, Depth + 1, Kind)) {
7756           IsWellDefined = false;
7757           break;
7758         }
7759       }
7760       if (IsWellDefined)
7761         return true;
7762     } else if (!::canCreateUndefOrPoison(Opr, Kind,
7763                                          /*ConsiderFlagsAndMetadata*/ true) &&
7764                all_of(Opr->operands(), OpCheck))
7765       return true;
7766   }
7767
7768   if (auto *I = dyn_cast<LoadInst>(V))
7769     if (I->hasMetadata(LLVMContext::MD_noundef) ||
7770         I->hasMetadata(LLVMContext::MD_dereferenceable) ||
7771         I->hasMetadata(LLVMContext::MD_dereferenceable_or_null))
7772       return true;
7773
7774   if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind)))
7775     return true;
7776
7777   // CxtI may be null or a cloned instruction.
7778   if (!CtxI || !CtxI->getParent() || !DT)
7779     return false;
7780
7781   auto *DNode = DT->getNode(CtxI->getParent());
7782   if (!DNode)
7783     // Unreachable block
7784     return false;
7785
7786   // If V is used as a branch condition before reaching CtxI, V cannot be
7787   // undef or poison.
7788   //   br V, BB1, BB2
7789   // BB1:
7790   //   CtxI ; V cannot be undef or poison here
7791   auto *Dominator = DNode->getIDom();
7792   // This check is purely for compile time reasons: we can skip the IDom walk
7793   // if what we are checking for includes undef and the value is not an integer.
7794   if (!includesUndef(Kind) || V->getType()->isIntegerTy())
7795     while (Dominator) {
7796       auto *TI = Dominator->getBlock()->getTerminator();
7797
7798       Value *Cond = nullptr;
7799       if (auto BI = dyn_cast_or_null<BranchInst>(TI)) {
7800         if (BI->isConditional())
7801           Cond = BI->getCondition();
7802       } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) {
7803         Cond = SI->getCondition();
7804       }
7805
7806       if (Cond) {
7807         if (Cond == V)
7808           return true;
7809         else if (!includesUndef(Kind) && isa<Operator>(Cond)) {
7810           // For poison, we can analyze further
7811           auto *Opr = cast<Operator>(Cond);
7812           if (any_of(Opr->operands(), [V](const Use &U) {
7813                 return V == U && propagatesPoison(U);
7814               }))
7815             return true;
7816         }
7817       }
7818
7819       Dominator = Dominator->getIDom();
7820     }
7821
7822   if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC))
7823     return true;
7824
7825   return false;
7826 }
7827
7828 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
7829                                             const Instruction *CtxI,
7830                                             const DominatorTree *DT,
7831                                             unsigned Depth) {
7832   return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7833                                             UndefPoisonKind::UndefOrPoison);
7834 }
7835
7836 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
7837                                      const Instruction *CtxI,
7838                                      const DominatorTree *DT, unsigned Depth) {
7839   return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7840                                             UndefPoisonKind::PoisonOnly);
7841 }
7842
7843 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
7844                                     const Instruction *CtxI,
7845                                     const DominatorTree *DT, unsigned Depth) {
7846   return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7847                                             UndefPoisonKind::UndefOnly);
7848 }
7849
7850 /// Return true if undefined behavior would provably be executed on the path to
7851 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
7852 /// anything about whether OnPathTo is actually executed or whether Root is
7853 /// actually poison.  This can be used to assess whether a new use of Root can
7854 /// be added at a location which is control equivalent with OnPathTo (such as
7855 /// immediately before it) without introducing UB which didn't previously
7856 /// exist.  Note that a false result conveys no information.
7857 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
7858                                          Instruction *OnPathTo,
7859                                          DominatorTree *DT) {
7860   // Basic approach is to assume Root is poison, propagate poison forward
7861   // through all users we can easily track, and then check whether any of those
7862   // users are provable UB and must execute before out exiting block might
7863   // exit.
7864
7865   // The set of all recursive users we've visited (which are assumed to all be
7866   // poison because of said visit)
7867   SmallSet<const Value *, 16> KnownPoison;
7868   SmallVector<const Instruction*, 16> Worklist;
7869   Worklist.push_back(Root);
7870   while (!Worklist.empty()) {
7871     const Instruction *I = Worklist.pop_back_val();
7872
7873     // If we know this must trigger UB on a path leading our target.
7874     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
7875       return true;
7876
7877     // If we can't analyze propagation through this instruction, just skip it
7878     // and transitive users.  Safe as false is a conservative result.
7879     if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
7880           return KnownPoison.contains(U) && propagatesPoison(U);
7881         }))
7882       continue;
7883
7884     if (KnownPoison.insert(I).second)
7885       for (const User *User : I->users())
7886         Worklist.push_back(cast<Instruction>(User));
7887   }
7888
7889   // Might be non-UB, or might have a path we couldn't prove must execute on
7890   // way to exiting bb.
7891   return false;
7892 }
7893
7894 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
7895                                                  const SimplifyQuery &SQ) {
7896   return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
7897                                        Add, SQ);
7898 }
7899
7900 OverflowResult
7901 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7902                                   const WithCache<const Value *> &RHS,
7903                                   const SimplifyQuery &SQ) {
7904   return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ);
7905 }
7906
7907 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
7908   // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7909   // of time because it's possible for another thread to interfere with it for an
7910   // arbitrary length of time, but programs aren't allowed to rely on that.
7911
7912   // If there is no successor, then execution can't transfer to it.
7913   if (isa<ReturnInst>(I))
7914     return false;
7915   if (isa<UnreachableInst>(I))
7916     return false;
7917
7918   // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7919   // Instruction::willReturn.
7920   //
7921   // FIXME: Move this check into Instruction::willReturn.
7922   if (isa<CatchPadInst>(I)) {
7923     switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) {
7924     default:
7925       // A catchpad may invoke exception object constructors and such, which
7926       // in some languages can be arbitrary code, so be conservative by default.
7927       return false;
7928     case EHPersonality::CoreCLR:
7929       // For CoreCLR, it just involves a type test.
7930       return true;
7931     }
7932   }
7933
7934   // An instruction that returns without throwing must transfer control flow
7935   // to a successor.
7936   return !I->mayThrow() && I->willReturn();
7937 }
7938
7939 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
7940   // TODO: This is slightly conservative for invoke instruction since exiting
7941   // via an exception *is* normal control for them.
7942   for (const Instruction &I : *BB)
7943     if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7944       return false;
7945   return true;
7946 }
7947
7948 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7949    BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
7950    unsigned ScanLimit) {
7951   return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End),
7952                                                     ScanLimit);
7953 }
7954
7955 bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7956    iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
7957   assert(ScanLimit && "scan limit must be non-zero");
7958   for (const Instruction &I : Range) {
7959     if (isa<DbgInfoIntrinsic>(I))
7960         continue;
7961     if (--ScanLimit == 0)
7962       return false;
7963     if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7964       return false;
7965   }
7966   return true;
7967 }
7968
7969 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
7970                                                   const Loop *L) {
7971   // The loop header is guaranteed to be executed for every iteration.
7972   //
7973   // FIXME: Relax this constraint to cover all basic blocks that are
7974   // guaranteed to be executed at every iteration.
7975   if (I->getParent() != L->getHeader()) return false;
7976
7977   for (const Instruction &LI : *L->getHeader()) {
7978     if (&LI == I) return true;
7979     if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
7980   }
7981   llvm_unreachable("Instruction not contained in its own parent basic block.");
7982 }
7983
7984 bool llvm::propagatesPoison(const Use &PoisonOp) {
7985   const Operator *I = cast<Operator>(PoisonOp.getUser());
7986   switch (I->getOpcode()) {
7987   case Instruction::Freeze:
7988   case Instruction::PHI:
7989   case Instruction::Invoke:
7990     return false;
7991   case Instruction::Select:
7992     return PoisonOp.getOperandNo() == 0;
7993   case Instruction::Call:
7994     if (auto *II = dyn_cast<IntrinsicInst>(I)) {
7995       switch (II->getIntrinsicID()) {
7996       // TODO: Add more intrinsics.
7997       case Intrinsic::sadd_with_overflow:
7998       case Intrinsic::ssub_with_overflow:
7999       case Intrinsic::smul_with_overflow:
8000       case Intrinsic::uadd_with_overflow:
8001       case Intrinsic::usub_with_overflow:
8002       case Intrinsic::umul_with_overflow:
8003         // If an input is a vector containing a poison element, the
8004         // two output vectors (calculated results, overflow bits)'
8005         // corresponding lanes are poison.
8006         return true;
8007       case Intrinsic::ctpop:
8008       case Intrinsic::ctlz:
8009       case Intrinsic::cttz:
8010       case Intrinsic::abs:
8011       case Intrinsic::smax:
8012       case Intrinsic::smin:
8013       case Intrinsic::umax:
8014       case Intrinsic::umin:
8015       case Intrinsic::bitreverse:
8016       case Intrinsic::bswap:
8017       case Intrinsic::sadd_sat:
8018       case Intrinsic::ssub_sat:
8019       case Intrinsic::sshl_sat:
8020       case Intrinsic::uadd_sat:
8021       case Intrinsic::usub_sat:
8022       case Intrinsic::ushl_sat:
8023         return true;
8024       }
8025     }
8026     return false;
8027   case Instruction::ICmp:
8028   case Instruction::FCmp:
8029   case Instruction::GetElementPtr:
8030     return true;
8031   default:
8032     if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I))
8033       return true;
8034
8035     // Be conservative and return false.
8036     return false;
8037   }
8038 }
8039
8040 /// Enumerates all operands of \p I that are guaranteed to not be undef or
8041 /// poison. If the callback \p Handle returns true, stop processing and return
8042 /// true. Otherwise, return false.
8043 template <typename CallableT>
8044 static bool handleGuaranteedWellDefinedOps(const Instruction *I,
8045                                            const CallableT &Handle) {
8046   switch (I->getOpcode()) {
8047     case Instruction::Store:
8048       if (Handle(cast<StoreInst>(I)->getPointerOperand()))
8049         return true;
8050       break;
8051
8052     case Instruction::Load:
8053       if (Handle(cast<LoadInst>(I)->getPointerOperand()))
8054         return true;
8055       break;
8056
8057     // Since dereferenceable attribute imply noundef, atomic operations
8058     // also implicitly have noundef pointers too
8059     case Instruction::AtomicCmpXchg:
8060       if (Handle(cast<AtomicCmpXchgInst>(I)->getPointerOperand()))
8061         return true;
8062       break;
8063
8064     case Instruction::AtomicRMW:
8065       if (Handle(cast<AtomicRMWInst>(I)->getPointerOperand()))
8066         return true;
8067       break;
8068
8069     case Instruction::Call:
8070     case Instruction::Invoke: {
8071       const CallBase *CB = cast<CallBase>(I);
8072       if (CB->isIndirectCall() && Handle(CB->getCalledOperand()))
8073         return true;
8074       for (unsigned i = 0; i < CB->arg_size(); ++i)
8075         if ((CB->paramHasAttr(i, Attribute::NoUndef) ||
8076              CB->paramHasAttr(i, Attribute::Dereferenceable) ||
8077              CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) &&
8078             Handle(CB->getArgOperand(i)))
8079           return true;
8080       break;
8081     }
8082     case Instruction::Ret:
8083       if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) &&
8084           Handle(I->getOperand(0)))
8085         return true;
8086       break;
8087     case Instruction::Switch:
8088       if (Handle(cast<SwitchInst>(I)->getCondition()))
8089         return true;
8090       break;
8091     case Instruction::Br: {
8092       auto *BR = cast<BranchInst>(I);
8093       if (BR->isConditional() && Handle(BR->getCondition()))
8094         return true;
8095       break;
8096     }
8097     default:
8098       break;
8099   }
8100
8101   return false;
8102 }
8103
8104 void llvm::getGuaranteedWellDefinedOps(
8105     const Instruction *I, SmallVectorImpl<const Value *> &Operands) {
8106   handleGuaranteedWellDefinedOps(I, [&](const Value *V) {
8107     Operands.push_back(V);
8108     return false;
8109   });
8110 }
8111
8112 /// Enumerates all operands of \p I that are guaranteed to not be poison.
8113 template <typename CallableT>
8114 static bool handleGuaranteedNonPoisonOps(const Instruction *I,
8115                                          const CallableT &Handle) {
8116   if (handleGuaranteedWellDefinedOps(I, Handle))
8117     return true;
8118   switch (I->getOpcode()) {
8119   // Divisors of these operations are allowed to be partially undef.
8120   case Instruction::UDiv:
8121   case Instruction::SDiv:
8122   case Instruction::URem:
8123   case Instruction::SRem:
8124     return Handle(I->getOperand(1));
8125   default:
8126     return false;
8127   }
8128 }
8129
8130 void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
8131                                      SmallVectorImpl<const Value *> &Operands) {
8132   handleGuaranteedNonPoisonOps(I, [&](const Value *V) {
8133     Operands.push_back(V);
8134     return false;
8135   });
8136 }
8137
8138 bool llvm::mustTriggerUB(const Instruction *I,
8139                          const SmallPtrSetImpl<const Value *> &KnownPoison) {
8140   return handleGuaranteedNonPoisonOps(
8141       I, [&](const Value *V) { return KnownPoison.count(V); });
8142 }
8143
8144 static bool programUndefinedIfUndefOrPoison(const Value *V,
8145                                             bool PoisonOnly) {
8146   // We currently only look for uses of values within the same basic
8147   // block, as that makes it easier to guarantee that the uses will be
8148   // executed given that Inst is executed.
8149   //
8150   // FIXME: Expand this to consider uses beyond the same basic block. To do
8151   // this, look out for the distinction between post-dominance and strong
8152   // post-dominance.
8153   const BasicBlock *BB = nullptr;
8154   BasicBlock::const_iterator Begin;
8155   if (const auto *Inst = dyn_cast<Instruction>(V)) {
8156     BB = Inst->getParent();
8157     Begin = Inst->getIterator();
8158     Begin++;
8159   } else if (const auto *Arg = dyn_cast<Argument>(V)) {
8160     if (Arg->getParent()->isDeclaration())
8161       return false;
8162     BB = &Arg->getParent()->getEntryBlock();
8163     Begin = BB->begin();
8164   } else {
8165     return false;
8166   }
8167
8168   // Limit number of instructions we look at, to avoid scanning through large
8169   // blocks. The current limit is chosen arbitrarily.
8170   unsigned ScanLimit = 32;
8171   BasicBlock::const_iterator End = BB->end();
8172
8173   if (!PoisonOnly) {
8174     // Since undef does not propagate eagerly, be conservative & just check
8175     // whether a value is directly passed to an instruction that must take
8176     // well-defined operands.
8177
8178     for (const auto &I : make_range(Begin, End)) {
8179       if (isa<DbgInfoIntrinsic>(I))
8180         continue;
8181       if (--ScanLimit == 0)
8182         break;
8183
8184       if (handleGuaranteedWellDefinedOps(&I, [V](const Value *WellDefinedOp) {
8185             return WellDefinedOp == V;
8186           }))
8187         return true;
8188
8189       if (!isGuaranteedToTransferExecutionToSuccessor(&I))
8190         break;
8191     }
8192     return false;
8193   }
8194
8195   // Set of instructions that we have proved will yield poison if Inst
8196   // does.
8197   SmallSet<const Value *, 16> YieldsPoison;
8198   SmallSet<const BasicBlock *, 4> Visited;
8199
8200   YieldsPoison.insert(V);
8201   Visited.insert(BB);
8202
8203   while (true) {
8204     for (const auto &I : make_range(Begin, End)) {
8205       if (isa<DbgInfoIntrinsic>(I))
8206         continue;
8207       if (--ScanLimit == 0)
8208         return false;
8209       if (mustTriggerUB(&I, YieldsPoison))
8210         return true;
8211       if (!isGuaranteedToTransferExecutionToSuccessor(&I))
8212         return false;
8213
8214       // If an operand is poison and propagates it, mark I as yielding poison.
8215       for (const Use &Op : I.operands()) {
8216         if (YieldsPoison.count(Op) && propagatesPoison(Op)) {
8217           YieldsPoison.insert(&I);
8218           break;
8219         }
8220       }
8221
8222       // Special handling for select, which returns poison if its operand 0 is
8223       // poison (handled in the loop above) *or* if both its true/false operands
8224       // are poison (handled here).
8225       if (I.getOpcode() == Instruction::Select &&
8226           YieldsPoison.count(I.getOperand(1)) &&
8227           YieldsPoison.count(I.getOperand(2))) {
8228         YieldsPoison.insert(&I);
8229       }
8230     }
8231
8232     BB = BB->getSingleSuccessor();
8233     if (!BB || !Visited.insert(BB).second)
8234       break;
8235
8236     Begin = BB->getFirstNonPHIIt();
8237     End = BB->end();
8238   }
8239   return false;
8240 }
8241
8242 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) {
8243   return ::programUndefinedIfUndefOrPoison(Inst, false);
8244 }
8245
8246 bool llvm::programUndefinedIfPoison(const Instruction *Inst) {
8247   return ::programUndefinedIfUndefOrPoison(Inst, true);
8248 }
8249
8250 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
8251   if (FMF.noNaNs())
8252     return true;
8253
8254   if (auto *C = dyn_cast<ConstantFP>(V))
8255     return !C->isNaN();
8256
8257   if (auto *C = dyn_cast<ConstantDataVector>(V)) {
8258     if (!C->getElementType()->isFloatingPointTy())
8259       return false;
8260     for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
8261       if (C->getElementAsAPFloat(I).isNaN())
8262         return false;
8263     }
8264     return true;
8265   }
8266
8267   if (isa<ConstantAggregateZero>(V))
8268     return true;
8269
8270   return false;
8271 }
8272
8273 static bool isKnownNonZero(const Value *V) {
8274   if (auto *C = dyn_cast<ConstantFP>(V))
8275     return !C->isZero();
8276
8277   if (auto *C = dyn_cast<ConstantDataVector>(V)) {
8278     if (!C->getElementType()->isFloatingPointTy())
8279       return false;
8280     for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
8281       if (C->getElementAsAPFloat(I).isZero())
8282         return false;
8283     }
8284     return true;
8285   }
8286
8287   return false;
8288 }
8289
8290 /// Match clamp pattern for float types without care about NaNs or signed zeros.
8291 /// Given non-min/max outer cmp/select from the clamp pattern this
8292 /// function recognizes if it can be substitued by a "canonical" min/max
8293 /// pattern.
8294 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
8295                                                Value *CmpLHS, Value *CmpRHS,
8296                                                Value *TrueVal, Value *FalseVal,
8297                                                Value *&LHS, Value *&RHS) {
8298   // Try to match
8299   //   X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
8300   //   X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
8301   // and return description of the outer Max/Min.
8302
8303   // First, check if select has inverse order:
8304   if (CmpRHS == FalseVal) {
8305     std::swap(TrueVal, FalseVal);
8306     Pred = CmpInst::getInversePredicate(Pred);
8307   }
8308
8309   // Assume success now. If there's no match, callers should not use these anyway.
8310   LHS = TrueVal;
8311   RHS = FalseVal;
8312
8313   const APFloat *FC1;
8314   if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite())
8315     return {SPF_UNKNOWN, SPNB_NA, false};
8316
8317   const APFloat *FC2;
8318   switch (Pred) {
8319   case CmpInst::FCMP_OLT:
8320   case CmpInst::FCMP_OLE:
8321   case CmpInst::FCMP_ULT:
8322   case CmpInst::FCMP_ULE:
8323     if (match(FalseVal, m_OrdOrUnordFMin(m_Specific(CmpLHS), m_APFloat(FC2))) &&
8324         *FC1 < *FC2)
8325       return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
8326     break;
8327   case CmpInst::FCMP_OGT:
8328   case CmpInst::FCMP_OGE:
8329   case CmpInst::FCMP_UGT:
8330   case CmpInst::FCMP_UGE:
8331     if (match(FalseVal, m_OrdOrUnordFMax(m_Specific(CmpLHS), m_APFloat(FC2))) &&
8332         *FC1 > *FC2)
8333       return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
8334     break;
8335   default:
8336     break;
8337   }
8338
8339   return {SPF_UNKNOWN, SPNB_NA, false};
8340 }
8341
8342 /// Recognize variations of:
8343 ///   CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
8344 static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
8345                                       Value *CmpLHS, Value *CmpRHS,
8346                                       Value *TrueVal, Value *FalseVal) {
8347   // Swap the select operands and predicate to match the patterns below.
8348   if (CmpRHS != TrueVal) {
8349     Pred = ICmpInst::getSwappedPredicate(Pred);
8350     std::swap(TrueVal, FalseVal);
8351   }
8352   const APInt *C1;
8353   if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
8354     const APInt *C2;
8355     // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
8356     if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
8357         C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
8358       return {SPF_SMAX, SPNB_NA, false};
8359
8360     // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
8361     if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
8362         C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT)
8363       return {SPF_SMIN, SPNB_NA, false};
8364
8365     // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
8366     if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
8367         C1->ult(*C2) && Pred == CmpInst::ICMP_ULT)
8368       return {SPF_UMAX, SPNB_NA, false};
8369
8370     // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
8371     if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
8372         C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
8373       return {SPF_UMIN, SPNB_NA, false};
8374   }
8375   return {SPF_UNKNOWN, SPNB_NA, false};
8376 }
8377
8378 /// Recognize variations of:
8379 ///   a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
8380 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
8381                                                Value *CmpLHS, Value *CmpRHS,
8382                                                Value *TVal, Value *FVal,
8383                                                unsigned Depth) {
8384   // TODO: Allow FP min/max with nnan/nsz.
8385   assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
8386
8387   Value *A = nullptr, *B = nullptr;
8388   SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
8389   if (!SelectPatternResult::isMinOrMax(L.Flavor))
8390     return {SPF_UNKNOWN, SPNB_NA, false};
8391
8392   Value *C = nullptr, *D = nullptr;
8393   SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
8394   if (L.Flavor != R.Flavor)
8395     return {SPF_UNKNOWN, SPNB_NA, false};
8396
8397   // We have something like: x Pred y ? min(a, b) : min(c, d).
8398   // Try to match the compare to the min/max operations of the select operands.
8399   // First, make sure we have the right compare predicate.
8400   switch (L.Flavor) {
8401   case SPF_SMIN:
8402     if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
8403       Pred = ICmpInst::getSwappedPredicate(Pred);
8404       std::swap(CmpLHS, CmpRHS);
8405     }
8406     if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
8407       break;
8408     return {SPF_UNKNOWN, SPNB_NA, false};
8409   case SPF_SMAX:
8410     if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
8411       Pred = ICmpInst::getSwappedPredicate(Pred);
8412       std::swap(CmpLHS, CmpRHS);
8413     }
8414     if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
8415       break;
8416     return {SPF_UNKNOWN, SPNB_NA, false};
8417   case SPF_UMIN:
8418     if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
8419       Pred = ICmpInst::getSwappedPredicate(Pred);
8420       std::swap(CmpLHS, CmpRHS);
8421     }
8422     if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
8423       break;
8424     return {SPF_UNKNOWN, SPNB_NA, false};
8425   case SPF_UMAX:
8426     if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
8427       Pred = ICmpInst::getSwappedPredicate(Pred);
8428       std::swap(CmpLHS, CmpRHS);
8429     }
8430     if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
8431       break;
8432     return {SPF_UNKNOWN, SPNB_NA, false};
8433   default:
8434     return {SPF_UNKNOWN, SPNB_NA, false};
8435   }
8436
8437   // If there is a common operand in the already matched min/max and the other
8438   // min/max operands match the compare operands (either directly or inverted),
8439   // then this is min/max of the same flavor.
8440
8441   // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8442   // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8443   if (D == B) {
8444     if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
8445                                          match(A, m_Not(m_Specific(CmpRHS)))))
8446       return {L.Flavor, SPNB_NA, false};
8447   }
8448   // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8449   // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8450   if (C == B) {
8451     if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
8452                                          match(A, m_Not(m_Specific(CmpRHS)))))
8453       return {L.Flavor, SPNB_NA, false};
8454   }
8455   // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8456   // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8457   if (D == A) {
8458     if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
8459                                          match(B, m_Not(m_Specific(CmpRHS)))))
8460       return {L.Flavor, SPNB_NA, false};
8461   }
8462   // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8463   // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8464   if (C == A) {
8465     if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
8466                                          match(B, m_Not(m_Specific(CmpRHS)))))
8467       return {L.Flavor, SPNB_NA, false};
8468   }
8469
8470   return {SPF_UNKNOWN, SPNB_NA, false};
8471 }
8472
8473 /// If the input value is the result of a 'not' op, constant integer, or vector
8474 /// splat of a constant integer, return the bitwise-not source value.
8475 /// TODO: This could be extended to handle non-splat vector integer constants.
8476 static Value *getNotValue(Value *V) {
8477   Value *NotV;
8478   if (match(V, m_Not(m_Value(NotV))))
8479     return NotV;
8480
8481   const APInt *C;
8482   if (match(V, m_APInt(C)))
8483     return ConstantInt::get(V->getType(), ~(*C));
8484
8485   return nullptr;
8486 }
8487
8488 /// Match non-obvious integer minimum and maximum sequences.
8489 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
8490                                        Value *CmpLHS, Value *CmpRHS,
8491                                        Value *TrueVal, Value *FalseVal,
8492                                        Value *&LHS, Value *&RHS,
8493                                        unsigned Depth) {
8494   // Assume success. If there's no match, callers should not use these anyway.
8495   LHS = TrueVal;
8496   RHS = FalseVal;
8497
8498   SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
8499   if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8500     return SPR;
8501
8502   SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
8503   if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8504     return SPR;
8505
8506   // Look through 'not' ops to find disguised min/max.
8507   // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8508   // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8509   if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) {
8510     switch (Pred) {
8511     case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false};
8512     case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false};
8513     case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false};
8514     case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false};
8515     default: break;
8516     }
8517   }
8518
8519   // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8520   // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8521   if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) {
8522     switch (Pred) {
8523     case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false};
8524     case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false};
8525     case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false};
8526     case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false};
8527     default: break;
8528     }
8529   }
8530
8531   if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
8532     return {SPF_UNKNOWN, SPNB_NA, false};
8533
8534   const APInt *C1;
8535   if (!match(CmpRHS, m_APInt(C1)))
8536     return {SPF_UNKNOWN, SPNB_NA, false};
8537
8538   // An unsigned min/max can be written with a signed compare.
8539   const APInt *C2;
8540   if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
8541       (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
8542     // Is the sign bit set?
8543     // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8544     // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8545     if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue())
8546       return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
8547
8548     // Is the sign bit clear?
8549     // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8550     // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8551     if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue())
8552       return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
8553   }
8554
8555   return {SPF_UNKNOWN, SPNB_NA, false};
8556 }
8557
8558 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW,
8559                            bool AllowPoison) {
8560   assert(X && Y && "Invalid operand");
8561
8562   auto IsNegationOf = [&](const Value *X, const Value *Y) {
8563     if (!match(X, m_Neg(m_Specific(Y))))
8564       return false;
8565
8566     auto *BO = cast<BinaryOperator>(X);
8567     if (NeedNSW && !BO->hasNoSignedWrap())
8568       return false;
8569
8570     auto *Zero = cast<Constant>(BO->getOperand(0));
8571     if (!AllowPoison && !Zero->isNullValue())
8572       return false;
8573
8574     return true;
8575   };
8576
8577   // X = -Y or Y = -X
8578   if (IsNegationOf(X, Y) || IsNegationOf(Y, X))
8579     return true;
8580
8581   // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8582   Value *A, *B;
8583   return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) &&
8584                         match(Y, m_Sub(m_Specific(B), m_Specific(A))))) ||
8585          (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) &&
8586                        match(Y, m_NSWSub(m_Specific(B), m_Specific(A)))));
8587 }
8588
8589 bool llvm::isKnownInversion(const Value *X, const Value *Y) {
8590   // Handle X = icmp pred A, B, Y = icmp pred A, C.
8591   Value *A, *B, *C;
8592   CmpPredicate Pred1, Pred2;
8593   if (!match(X, m_ICmp(Pred1, m_Value(A), m_Value(B))) ||
8594       !match(Y, m_c_ICmp(Pred2, m_Specific(A), m_Value(C))))
8595     return false;
8596
8597   // They must both have samesign flag or not.
8598   if (cast<ICmpInst>(X)->hasSameSign() != cast<ICmpInst>(Y)->hasSameSign())
8599     return false;
8600
8601   if (B == C)
8602     return Pred1 == ICmpInst::getInversePredicate(Pred2);
8603
8604   // Try to infer the relationship from constant ranges.
8605   const APInt *RHSC1, *RHSC2;
8606   if (!match(B, m_APInt(RHSC1)) || !match(C, m_APInt(RHSC2)))
8607     return false;
8608
8609   // Sign bits of two RHSCs should match.
8610   if (cast<ICmpInst>(X)->hasSameSign() &&
8611       RHSC1->isNonNegative() != RHSC2->isNonNegative())
8612     return false;
8613
8614   const auto CR1 = ConstantRange::makeExactICmpRegion(Pred1, *RHSC1);
8615   const auto CR2 = ConstantRange::makeExactICmpRegion(Pred2, *RHSC2);
8616
8617   return CR1.inverse() == CR2;
8618 }
8619
8620 SelectPatternResult llvm::getSelectPattern(CmpInst::Predicate Pred,
8621                                            SelectPatternNaNBehavior NaNBehavior,
8622                                            bool Ordered) {
8623   switch (Pred) {
8624   default:
8625     return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
8626   case ICmpInst::ICMP_UGT:
8627   case ICmpInst::ICMP_UGE:
8628     return {SPF_UMAX, SPNB_NA, false};
8629   case ICmpInst::ICMP_SGT:
8630   case ICmpInst::ICMP_SGE:
8631     return {SPF_SMAX, SPNB_NA, false};
8632   case ICmpInst::ICMP_ULT:
8633   case ICmpInst::ICMP_ULE:
8634     return {SPF_UMIN, SPNB_NA, false};
8635   case ICmpInst::ICMP_SLT:
8636   case ICmpInst::ICMP_SLE:
8637     return {SPF_SMIN, SPNB_NA, false};
8638   case FCmpInst::FCMP_UGT:
8639   case FCmpInst::FCMP_UGE:
8640   case FCmpInst::FCMP_OGT:
8641   case FCmpInst::FCMP_OGE:
8642     return {SPF_FMAXNUM, NaNBehavior, Ordered};
8643   case FCmpInst::FCMP_ULT:
8644   case FCmpInst::FCMP_ULE:
8645   case FCmpInst::FCMP_OLT:
8646   case FCmpInst::FCMP_OLE:
8647     return {SPF_FMINNUM, NaNBehavior, Ordered};
8648   }
8649 }
8650
8651 std::optional<std::pair<CmpPredicate, Constant *>>
8652 llvm::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred, Constant *C) {
8653   assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
8654          "Only for relational integer predicates.");
8655   if (isa<UndefValue>(C))
8656     return std::nullopt;
8657
8658   Type *Type = C->getType();
8659   bool IsSigned = ICmpInst::isSigned(Pred);
8660
8661   CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
8662   bool WillIncrement =
8663       UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
8664
8665   // Check if the constant operand can be safely incremented/decremented
8666   // without overflowing/underflowing.
8667   auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
8668     return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
8669   };
8670
8671   Constant *SafeReplacementConstant = nullptr;
8672   if (auto *CI = dyn_cast<ConstantInt>(C)) {
8673     // Bail out if the constant can't be safely incremented/decremented.
8674     if (!ConstantIsOk(CI))
8675       return std::nullopt;
8676   } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
8677     unsigned NumElts = FVTy->getNumElements();
8678     for (unsigned i = 0; i != NumElts; ++i) {
8679       Constant *Elt = C->getAggregateElement(i);
8680       if (!Elt)
8681         return std::nullopt;
8682
8683       if (isa<UndefValue>(Elt))
8684         continue;
8685
8686       // Bail out if we can't determine if this constant is min/max or if we
8687       // know that this constant is min/max.
8688       auto *CI = dyn_cast<ConstantInt>(Elt);
8689       if (!CI || !ConstantIsOk(CI))
8690         return std::nullopt;
8691
8692       if (!SafeReplacementConstant)
8693         SafeReplacementConstant = CI;
8694     }
8695   } else if (isa<VectorType>(C->getType())) {
8696     // Handle scalable splat
8697     Value *SplatC = C->getSplatValue();
8698     auto *CI = dyn_cast_or_null<ConstantInt>(SplatC);
8699     // Bail out if the constant can't be safely incremented/decremented.
8700     if (!CI || !ConstantIsOk(CI))
8701       return std::nullopt;
8702   } else {
8703     // ConstantExpr?
8704     return std::nullopt;
8705   }
8706
8707   // It may not be safe to change a compare predicate in the presence of
8708   // undefined elements, so replace those elements with the first safe constant
8709   // that we found.
8710   // TODO: in case of poison, it is safe; let's replace undefs only.
8711   if (C->containsUndefOrPoisonElement()) {
8712     assert(SafeReplacementConstant && "Replacement constant not set");
8713     C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
8714   }
8715
8716   CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
8717
8718   // Increment or decrement the constant.
8719   Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
8720   Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
8721
8722   return std::make_pair(NewPred, NewC);
8723 }
8724
8725 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
8726                                               FastMathFlags FMF,
8727                                               Value *CmpLHS, Value *CmpRHS,
8728                                               Value *TrueVal, Value *FalseVal,
8729                                               Value *&LHS, Value *&RHS,
8730                                               unsigned Depth) {
8731   bool HasMismatchedZeros = false;
8732   if (CmpInst::isFPPredicate(Pred)) {
8733     // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8734     // 0.0 operand, set the compare's 0.0 operands to that same value for the
8735     // purpose of identifying min/max. Disregard vector constants with undefined
8736     // elements because those can not be back-propagated for analysis.
8737     Value *OutputZeroVal = nullptr;
8738     if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) &&
8739         !cast<Constant>(TrueVal)->containsUndefOrPoisonElement())
8740       OutputZeroVal = TrueVal;
8741     else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) &&
8742              !cast<Constant>(FalseVal)->containsUndefOrPoisonElement())
8743       OutputZeroVal = FalseVal;
8744
8745     if (OutputZeroVal) {
8746       if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) {
8747         HasMismatchedZeros = true;
8748         CmpLHS = OutputZeroVal;
8749       }
8750       if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) {
8751         HasMismatchedZeros = true;
8752         CmpRHS = OutputZeroVal;
8753       }
8754     }
8755   }
8756
8757   LHS = CmpLHS;
8758   RHS = CmpRHS;
8759
8760   // Signed zero may return inconsistent results between implementations.
8761   //  (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8762   //  minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8763   // Therefore, we behave conservatively and only proceed if at least one of the
8764   // operands is known to not be zero or if we don't care about signed zero.
8765   switch (Pred) {
8766   default: break;
8767   case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT:
8768   case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT:
8769     if (!HasMismatchedZeros)
8770       break;
8771     [[fallthrough]];
8772   case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
8773   case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
8774     if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
8775         !isKnownNonZero(CmpRHS))
8776       return {SPF_UNKNOWN, SPNB_NA, false};
8777   }
8778
8779   SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
8780   bool Ordered = false;
8781
8782   // When given one NaN and one non-NaN input:
8783   //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8784   //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8785   //     ordered comparison fails), which could be NaN or non-NaN.
8786   // so here we discover exactly what NaN behavior is required/accepted.
8787   if (CmpInst::isFPPredicate(Pred)) {
8788     bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
8789     bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
8790
8791     if (LHSSafe && RHSSafe) {
8792       // Both operands are known non-NaN.
8793       NaNBehavior = SPNB_RETURNS_ANY;
8794     } else if (CmpInst::isOrdered(Pred)) {
8795       // An ordered comparison will return false when given a NaN, so it
8796       // returns the RHS.
8797       Ordered = true;
8798       if (LHSSafe)
8799         // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8800         NaNBehavior = SPNB_RETURNS_NAN;
8801       else if (RHSSafe)
8802         NaNBehavior = SPNB_RETURNS_OTHER;
8803       else
8804         // Completely unsafe.
8805         return {SPF_UNKNOWN, SPNB_NA, false};
8806     } else {
8807       Ordered = false;
8808       // An unordered comparison will return true when given a NaN, so it
8809       // returns the LHS.
8810       if (LHSSafe)
8811         // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8812         NaNBehavior = SPNB_RETURNS_OTHER;
8813       else if (RHSSafe)
8814         NaNBehavior = SPNB_RETURNS_NAN;
8815       else
8816         // Completely unsafe.
8817         return {SPF_UNKNOWN, SPNB_NA, false};
8818     }
8819   }
8820
8821   if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
8822     std::swap(CmpLHS, CmpRHS);
8823     Pred = CmpInst::getSwappedPredicate(Pred);
8824     if (NaNBehavior == SPNB_RETURNS_NAN)
8825       NaNBehavior = SPNB_RETURNS_OTHER;
8826     else if (NaNBehavior == SPNB_RETURNS_OTHER)
8827       NaNBehavior = SPNB_RETURNS_NAN;
8828     Ordered = !Ordered;
8829   }
8830
8831   // ([if]cmp X, Y) ? X : Y
8832   if (TrueVal == CmpLHS && FalseVal == CmpRHS)
8833     return getSelectPattern(Pred, NaNBehavior, Ordered);
8834
8835   if (isKnownNegation(TrueVal, FalseVal)) {
8836     // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8837     // match against either LHS or sext(LHS).
8838     auto MaybeSExtCmpLHS =
8839         m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS)));
8840     auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes());
8841     auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One());
8842     if (match(TrueVal, MaybeSExtCmpLHS)) {
8843       // Set the return values. If the compare uses the negated value (-X >s 0),
8844       // swap the return values because the negated value is always 'RHS'.
8845       LHS = TrueVal;
8846       RHS = FalseVal;
8847       if (match(CmpLHS, m_Neg(m_Specific(FalseVal))))
8848         std::swap(LHS, RHS);
8849
8850       // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8851       // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8852       if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
8853         return {SPF_ABS, SPNB_NA, false};
8854
8855       // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8856       if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne))
8857         return {SPF_ABS, SPNB_NA, false};
8858
8859       // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8860       // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8861       if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
8862         return {SPF_NABS, SPNB_NA, false};
8863     }
8864     else if (match(FalseVal, MaybeSExtCmpLHS)) {
8865       // Set the return values. If the compare uses the negated value (-X >s 0),
8866       // swap the return values because the negated value is always 'RHS'.
8867       LHS = FalseVal;
8868       RHS = TrueVal;
8869       if (match(CmpLHS, m_Neg(m_Specific(TrueVal))))
8870         std::swap(LHS, RHS);
8871
8872       // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8873       // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8874       if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
8875         return {SPF_NABS, SPNB_NA, false};
8876
8877       // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8878       // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8879       if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
8880         return {SPF_ABS, SPNB_NA, false};
8881     }
8882   }
8883
8884   if (CmpInst::isIntPredicate(Pred))
8885     return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
8886
8887   // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8888   // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8889   // semantics than minNum. Be conservative in such case.
8890   if (NaNBehavior != SPNB_RETURNS_ANY ||
8891       (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
8892        !isKnownNonZero(CmpRHS)))
8893     return {SPF_UNKNOWN, SPNB_NA, false};
8894
8895   return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
8896 }
8897
8898 static Value *lookThroughCastConst(CmpInst *CmpI, Type *SrcTy, Constant *C,
8899                                    Instruction::CastOps *CastOp) {
8900   const DataLayout &DL = CmpI->getDataLayout();
8901
8902   Constant *CastedTo = nullptr;
8903   switch (*CastOp) {
8904   case Instruction::ZExt:
8905     if (CmpI->isUnsigned())
8906       CastedTo = ConstantExpr::getTrunc(C, SrcTy);
8907     break;
8908   case Instruction::SExt:
8909     if (CmpI->isSigned())
8910       CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
8911     break;
8912   case Instruction::Trunc:
8913     Constant *CmpConst;
8914     if (match(CmpI->getOperand(1), m_Constant(CmpConst)) &&
8915         CmpConst->getType() == SrcTy) {
8916       // Here we have the following case:
8917       //
8918       //   %cond = cmp iN %x, CmpConst
8919       //   %tr = trunc iN %x to iK
8920       //   %narrowsel = select i1 %cond, iK %t, iK C
8921       //
8922       // We can always move trunc after select operation:
8923       //
8924       //   %cond = cmp iN %x, CmpConst
8925       //   %widesel = select i1 %cond, iN %x, iN CmpConst
8926       //   %tr = trunc iN %widesel to iK
8927       //
8928       // Note that C could be extended in any way because we don't care about
8929       // upper bits after truncation. It can't be abs pattern, because it would
8930       // look like:
8931       //
8932       //   select i1 %cond, x, -x.
8933       //
8934       // So only min/max pattern could be matched. Such match requires widened C
8935       // == CmpConst. That is why set widened C = CmpConst, condition trunc
8936       // CmpConst == C is checked below.
8937       CastedTo = CmpConst;
8938     } else {
8939       unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt;
8940       CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL);
8941     }
8942     break;
8943   case Instruction::FPTrunc:
8944     CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL);
8945     break;
8946   case Instruction::FPExt:
8947     CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL);
8948     break;
8949   case Instruction::FPToUI:
8950     CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL);
8951     break;
8952   case Instruction::FPToSI:
8953     CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL);
8954     break;
8955   case Instruction::UIToFP:
8956     CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL);
8957     break;
8958   case Instruction::SIToFP:
8959     CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL);
8960     break;
8961   default:
8962     break;
8963   }
8964
8965   if (!CastedTo)
8966     return nullptr;
8967
8968   // Make sure the cast doesn't lose any information.
8969   Constant *CastedBack =
8970       ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL);
8971   if (CastedBack && CastedBack != C)
8972     return nullptr;
8973
8974   return CastedTo;
8975 }
8976
8977 /// Helps to match a select pattern in case of a type mismatch.
8978 ///
8979 /// The function processes the case when type of true and false values of a
8980 /// select instruction differs from type of the cmp instruction operands because
8981 /// of a cast instruction. The function checks if it is legal to move the cast
8982 /// operation after "select". If yes, it returns the new second value of
8983 /// "select" (with the assumption that cast is moved):
8984 /// 1. As operand of cast instruction when both values of "select" are same cast
8985 /// instructions.
8986 /// 2. As restored constant (by applying reverse cast operation) when the first
8987 /// value of the "select" is a cast operation and the second value is a
8988 /// constant. It is implemented in lookThroughCastConst().
8989 /// 3. As one operand is cast instruction and the other is not. The operands in
8990 /// sel(cmp) are in different type integer.
8991 /// NOTE: We return only the new second value because the first value could be
8992 /// accessed as operand of cast instruction.
8993 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
8994                               Instruction::CastOps *CastOp) {
8995   auto *Cast1 = dyn_cast<CastInst>(V1);
8996   if (!Cast1)
8997     return nullptr;
8998
8999   *CastOp = Cast1->getOpcode();
9000   Type *SrcTy = Cast1->getSrcTy();
9001   if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
9002     // If V1 and V2 are both the same cast from the same type, look through V1.
9003     if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
9004       return Cast2->getOperand(0);
9005     return nullptr;
9006   }
9007
9008   auto *C = dyn_cast<Constant>(V2);
9009   if (C)
9010     return lookThroughCastConst(CmpI, SrcTy, C, CastOp);
9011
9012   Value *CastedTo = nullptr;
9013   if (*CastOp == Instruction::Trunc) {
9014     if (match(CmpI->getOperand(1), m_ZExtOrSExt(m_Specific(V2)))) {
9015       // Here we have the following case:
9016       //   %y_ext = sext iK %y to iN
9017       //   %cond = cmp iN %x, %y_ext
9018       //   %tr = trunc iN %x to iK
9019       //   %narrowsel = select i1 %cond, iK %tr, iK %y
9020       //
9021       // We can always move trunc after select operation:
9022       //   %y_ext = sext iK %y to iN
9023       //   %cond = cmp iN %x, %y_ext
9024       //   %widesel = select i1 %cond, iN %x, iN %y_ext
9025       //   %tr = trunc iN %widesel to iK
9026       assert(V2->getType() == Cast1->getType() &&
9027              "V2 and Cast1 should be the same type.");
9028       CastedTo = CmpI->getOperand(1);
9029     }
9030   }
9031
9032   return CastedTo;
9033 }
9034 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
9035                                              Instruction::CastOps *CastOp,
9036                                              unsigned Depth) {
9037   if (Depth >= MaxAnalysisRecursionDepth)
9038     return {SPF_UNKNOWN, SPNB_NA, false};
9039
9040   SelectInst *SI = dyn_cast<SelectInst>(V);
9041   if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
9042
9043   CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
9044   if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
9045
9046   Value *TrueVal = SI->getTrueValue();
9047   Value *FalseVal = SI->getFalseValue();
9048
9049   return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
9050                                             CastOp, Depth);
9051 }
9052
9053 SelectPatternResult llvm::matchDecomposedSelectPattern(
9054     CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
9055     Instruction::CastOps *CastOp, unsigned Depth) {
9056   CmpInst::Predicate Pred = CmpI->getPredicate();
9057   Value *CmpLHS = CmpI->getOperand(0);
9058   Value *CmpRHS = CmpI->getOperand(1);
9059   FastMathFlags FMF;
9060   if (isa<FPMathOperator>(CmpI))
9061     FMF = CmpI->getFastMathFlags();
9062
9063   // Bail out early.
9064   if (CmpI->isEquality())
9065     return {SPF_UNKNOWN, SPNB_NA, false};
9066
9067   // Deal with type mismatches.
9068   if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
9069     if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
9070       // If this is a potential fmin/fmax with a cast to integer, then ignore
9071       // -0.0 because there is no corresponding integer value.
9072       if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
9073         FMF.setNoSignedZeros();
9074       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
9075                                   cast<CastInst>(TrueVal)->getOperand(0), C,
9076                                   LHS, RHS, Depth);
9077     }
9078     if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
9079       // If this is a potential fmin/fmax with a cast to integer, then ignore
9080       // -0.0 because there is no corresponding integer value.
9081       if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
9082         FMF.setNoSignedZeros();
9083       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
9084                                   C, cast<CastInst>(FalseVal)->getOperand(0),
9085                                   LHS, RHS, Depth);
9086     }
9087   }
9088   return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
9089                               LHS, RHS, Depth);
9090 }
9091
9092 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) {
9093   if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT;
9094   if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT;
9095   if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT;
9096   if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT;
9097   if (SPF == SPF_FMINNUM)
9098     return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
9099   if (SPF == SPF_FMAXNUM)
9100     return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
9101   llvm_unreachable("unhandled!");
9102 }
9103
9104 Intrinsic::ID llvm::getMinMaxIntrinsic(SelectPatternFlavor SPF) {
9105   switch (SPF) {
9106   case SelectPatternFlavor::SPF_UMIN:
9107     return Intrinsic::umin;
9108   case SelectPatternFlavor::SPF_UMAX:
9109     return Intrinsic::umax;
9110   case SelectPatternFlavor::SPF_SMIN:
9111     return Intrinsic::smin;
9112   case SelectPatternFlavor::SPF_SMAX:
9113     return Intrinsic::smax;
9114   default:
9115     llvm_unreachable("Unexpected SPF");
9116   }
9117 }
9118
9119 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) {
9120   if (SPF == SPF_SMIN) return SPF_SMAX;
9121   if (SPF == SPF_UMIN) return SPF_UMAX;
9122   if (SPF == SPF_SMAX) return SPF_SMIN;
9123   if (SPF == SPF_UMAX) return SPF_UMIN;
9124   llvm_unreachable("unhandled!");
9125 }
9126
9127 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
9128   switch (MinMaxID) {
9129   case Intrinsic::smax: return Intrinsic::smin;
9130   case Intrinsic::smin: return Intrinsic::smax;
9131   case Intrinsic::umax: return Intrinsic::umin;
9132   case Intrinsic::umin: return Intrinsic::umax;
9133   // Please note that next four intrinsics may produce the same result for
9134   // original and inverted case even if X != Y due to NaN is handled specially.
9135   case Intrinsic::maximum: return Intrinsic::minimum;
9136   case Intrinsic::minimum: return Intrinsic::maximum;
9137   case Intrinsic::maxnum: return Intrinsic::minnum;
9138   case Intrinsic::minnum: return Intrinsic::maxnum;
9139   default: llvm_unreachable("Unexpected intrinsic");
9140   }
9141 }
9142
9143 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
9144   switch (SPF) {
9145   case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
9146   case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
9147   case SPF_UMAX: return APInt::getMaxValue(BitWidth);
9148   case SPF_UMIN: return APInt::getMinValue(BitWidth);
9149   default: llvm_unreachable("Unexpected flavor");
9150   }
9151 }
9152
9153 std::pair<Intrinsic::ID, bool>
9154 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
9155   // Check if VL contains select instructions that can be folded into a min/max
9156   // vector intrinsic and return the intrinsic if it is possible.
9157   // TODO: Support floating point min/max.
9158   bool AllCmpSingleUse = true;
9159   SelectPatternResult SelectPattern;
9160   SelectPattern.Flavor = SPF_UNKNOWN;
9161   if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
9162         Value *LHS, *RHS;
9163         auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
9164         if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor))
9165           return false;
9166         if (SelectPattern.Flavor != SPF_UNKNOWN &&
9167             SelectPattern.Flavor != CurrentPattern.Flavor)
9168           return false;
9169         SelectPattern = CurrentPattern;
9170         AllCmpSingleUse &=
9171             match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
9172         return true;
9173       })) {
9174     switch (SelectPattern.Flavor) {
9175     case SPF_SMIN:
9176       return {Intrinsic::smin, AllCmpSingleUse};
9177     case SPF_UMIN:
9178       return {Intrinsic::umin, AllCmpSingleUse};
9179     case SPF_SMAX:
9180       return {Intrinsic::smax, AllCmpSingleUse};
9181     case SPF_UMAX:
9182       return {Intrinsic::umax, AllCmpSingleUse};
9183     case SPF_FMAXNUM:
9184       return {Intrinsic::maxnum, AllCmpSingleUse};
9185     case SPF_FMINNUM:
9186       return {Intrinsic::minnum, AllCmpSingleUse};
9187     default:
9188       llvm_unreachable("unexpected select pattern flavor");
9189     }
9190   }
9191   return {Intrinsic::not_intrinsic, false};
9192 }
9193
9194 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
9195                                  Value *&Start, Value *&Step) {
9196   // Handle the case of a simple two-predecessor recurrence PHI.
9197   // There's a lot more that could theoretically be done here, but
9198   // this is sufficient to catch some interesting cases.
9199   if (P->getNumIncomingValues() != 2)
9200     return false;
9201
9202   for (unsigned i = 0; i != 2; ++i) {
9203     Value *L = P->getIncomingValue(i);
9204     Value *R = P->getIncomingValue(!i);
9205     auto *LU = dyn_cast<BinaryOperator>(L);
9206     if (!LU)
9207       continue;
9208     unsigned Opcode = LU->getOpcode();
9209
9210     switch (Opcode) {
9211     default:
9212       continue;
9213     // TODO: Expand list -- xor, gep, uadd.sat etc.
9214     case Instruction::LShr:
9215     case Instruction::AShr:
9216     case Instruction::Shl:
9217     case Instruction::Add:
9218     case Instruction::Sub:
9219     case Instruction::UDiv:
9220     case Instruction::URem:
9221     case Instruction::And:
9222     case Instruction::Or:
9223     case Instruction::Mul:
9224     case Instruction::FMul: {
9225       Value *LL = LU->getOperand(0);
9226       Value *LR = LU->getOperand(1);
9227       // Find a recurrence.
9228       if (LL == P)
9229         L = LR;
9230       else if (LR == P)
9231         L = LL;
9232       else
9233         continue; // Check for recurrence with L and R flipped.
9234
9235       break; // Match!
9236     }
9237     };
9238
9239     // We have matched a recurrence of the form:
9240     //   %iv = [R, %entry], [%iv.next, %backedge]
9241     //   %iv.next = binop %iv, L
9242     // OR
9243     //   %iv = [R, %entry], [%iv.next, %backedge]
9244     //   %iv.next = binop L, %iv
9245     BO = LU;
9246     Start = R;
9247     Step = L;
9248     return true;
9249   }
9250   return false;
9251 }
9252
9253 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
9254                                  Value *&Start, Value *&Step) {
9255   BinaryOperator *BO = nullptr;
9256   P = dyn_cast<PHINode>(I->getOperand(0));
9257   if (!P)
9258     P = dyn_cast<PHINode>(I->getOperand(1));
9259   return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I;
9260 }
9261
9262 /// Return true if "icmp Pred LHS RHS" is always true.
9263 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
9264                             const Value *RHS) {
9265   if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
9266     return true;
9267
9268   switch (Pred) {
9269   default:
9270     return false;
9271
9272   case CmpInst::ICMP_SLE: {
9273     const APInt *C;
9274
9275     // LHS s<= LHS +_{nsw} C   if C >= 0
9276     // LHS s<= LHS | C         if C >= 0
9277     if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))) ||
9278         match(RHS, m_Or(m_Specific(LHS), m_APInt(C))))
9279       return !C->isNegative();
9280
9281     // LHS s<= smax(LHS, V) for any V
9282     if (match(RHS, m_c_SMax(m_Specific(LHS), m_Value())))
9283       return true;
9284
9285     // smin(RHS, V) s<= RHS for any V
9286     if (match(LHS, m_c_SMin(m_Specific(RHS), m_Value())))
9287       return true;
9288
9289     // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
9290     const Value *X;
9291     const APInt *CLHS, *CRHS;
9292     if (match(LHS, m_NSWAddLike(m_Value(X), m_APInt(CLHS))) &&
9293         match(RHS, m_NSWAddLike(m_Specific(X), m_APInt(CRHS))))
9294       return CLHS->sle(*CRHS);
9295
9296     return false;
9297   }
9298
9299   case CmpInst::ICMP_ULE: {
9300     // LHS u<= LHS +_{nuw} V for any V
9301     if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) &&
9302         cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap())
9303       return true;
9304
9305     // LHS u<= LHS | V for any V
9306     if (match(RHS, m_c_Or(m_Specific(LHS), m_Value())))
9307       return true;
9308
9309     // LHS u<= umax(LHS, V) for any V
9310     if (match(RHS, m_c_UMax(m_Specific(LHS), m_Value())))
9311       return true;
9312
9313     // RHS >> V u<= RHS for any V
9314     if (match(LHS, m_LShr(m_Specific(RHS), m_Value())))
9315       return true;
9316
9317     // RHS u/ C_ugt_1 u<= RHS
9318     const APInt *C;
9319     if (match(LHS, m_UDiv(m_Specific(RHS), m_APInt(C))) && C->ugt(1))
9320       return true;
9321
9322     // RHS & V u<= RHS for any V
9323     if (match(LHS, m_c_And(m_Specific(RHS), m_Value())))
9324       return true;
9325
9326     // umin(RHS, V) u<= RHS for any V
9327     if (match(LHS, m_c_UMin(m_Specific(RHS), m_Value())))
9328       return true;
9329
9330     // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
9331     const Value *X;
9332     const APInt *CLHS, *CRHS;
9333     if (match(LHS, m_NUWAddLike(m_Value(X), m_APInt(CLHS))) &&
9334         match(RHS, m_NUWAddLike(m_Specific(X), m_APInt(CRHS))))
9335       return CLHS->ule(*CRHS);
9336
9337     return false;
9338   }
9339   }
9340 }
9341
9342 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
9343 /// ALHS ARHS" is true.  Otherwise, return std::nullopt.
9344 static std::optional<bool>
9345 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
9346                       const Value *ARHS, const Value *BLHS, const Value *BRHS) {
9347   switch (Pred) {
9348   default:
9349     return std::nullopt;
9350
9351   case CmpInst::ICMP_SLT:
9352   case CmpInst::ICMP_SLE:
9353     if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS) &&
9354         isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS))
9355       return true;
9356     return std::nullopt;
9357
9358   case CmpInst::ICMP_SGT:
9359   case CmpInst::ICMP_SGE:
9360     if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS) &&
9361         isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS))
9362       return true;
9363     return std::nullopt;
9364
9365   case CmpInst::ICMP_ULT:
9366   case CmpInst::ICMP_ULE:
9367     if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS) &&
9368         isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS))
9369       return true;
9370     return std::nullopt;
9371
9372   case CmpInst::ICMP_UGT:
9373   case CmpInst::ICMP_UGE:
9374     if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS) &&
9375         isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS))
9376       return true;
9377     return std::nullopt;
9378   }
9379 }
9380
9381 /// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true.
9382 /// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false.
9383 /// Otherwise, return std::nullopt if we can't infer anything.
9384 static std::optional<bool>
9385 isImpliedCondCommonOperandWithCR(CmpPredicate LPred, const ConstantRange &LCR,
9386                                  CmpPredicate RPred, const ConstantRange &RCR) {
9387   auto CRImpliesPred = [&](ConstantRange CR,
9388                            CmpInst::Predicate Pred) -> std::optional<bool> {
9389     // If all true values for lhs and true for rhs, lhs implies rhs
9390     if (CR.icmp(Pred, RCR))
9391       return true;
9392
9393     // If there is no overlap, lhs implies not rhs
9394     if (CR.icmp(CmpInst::getInversePredicate(Pred), RCR))
9395       return false;
9396
9397     return std::nullopt;
9398   };
9399   if (auto Res = CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred, LCR),
9400                                RPred))
9401     return Res;
9402   if (LPred.hasSameSign() ^ RPred.hasSameSign()) {
9403     LPred = LPred.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(LPred)
9404                                 : static_cast<CmpInst::Predicate>(LPred);
9405     RPred = RPred.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(RPred)
9406                                 : static_cast<CmpInst::Predicate>(RPred);
9407     return CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred, LCR),
9408                          RPred);
9409   }
9410   return std::nullopt;
9411 }
9412
9413 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
9414 /// is true.  Return false if LHS implies RHS is false. Otherwise, return
9415 /// std::nullopt if we can't infer anything.
9416 static std::optional<bool>
9417 isImpliedCondICmps(const ICmpInst *LHS, CmpPredicate RPred, const Value *R0,
9418                    const Value *R1, const DataLayout &DL, bool LHSIsTrue) {
9419   Value *L0 = LHS->getOperand(0);
9420   Value *L1 = LHS->getOperand(1);
9421
9422   // The rest of the logic assumes the LHS condition is true.  If that's not the
9423   // case, invert the predicate to make it so.
9424   CmpPredicate LPred =
9425       LHSIsTrue ? LHS->getCmpPredicate() : LHS->getInverseCmpPredicate();
9426
9427   // We can have non-canonical operands, so try to normalize any common operand
9428   // to L0/R0.
9429   if (L0 == R1) {
9430     std::swap(R0, R1);
9431     RPred = ICmpInst::getSwappedCmpPredicate(RPred);
9432   }
9433   if (R0 == L1) {
9434     std::swap(L0, L1);
9435     LPred = ICmpInst::getSwappedCmpPredicate(LPred);
9436   }
9437   if (L1 == R1) {
9438     // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
9439     if (L0 != R0 || match(L0, m_ImmConstant())) {
9440       std::swap(L0, L1);
9441       LPred = ICmpInst::getSwappedCmpPredicate(LPred);
9442       std::swap(R0, R1);
9443       RPred = ICmpInst::getSwappedCmpPredicate(RPred);
9444     }
9445   }
9446
9447   // See if we can infer anything if operand-0 matches and we have at least one
9448   // constant.
9449   const APInt *Unused;
9450   if (L0 == R0 && (match(L1, m_APInt(Unused)) || match(R1, m_APInt(Unused)))) {
9451     // Potential TODO: We could also further use the constant range of L0/R0 to
9452     // further constraint the constant ranges. At the moment this leads to
9453     // several regressions related to not transforming `multi_use(A + C0) eq/ne
9454     // C1` (see discussion: D58633).
9455     ConstantRange LCR = computeConstantRange(
9456         L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9457         /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
9458     ConstantRange RCR = computeConstantRange(
9459         R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9460         /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
9461     // Even if L1/R1 are not both constant, we can still sometimes deduce
9462     // relationship from a single constant. For example X u> Y implies X != 0.
9463     if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
9464       return R;
9465     // If both L1/R1 were exact constant ranges and we didn't get anything
9466     // here, we won't be able to deduce this.
9467     if (match(L1, m_APInt(Unused)) && match(R1, m_APInt(Unused)))
9468       return std::nullopt;
9469   }
9470
9471   // Can we infer anything when the two compares have matching operands?
9472   if (L0 == R0 && L1 == R1)
9473     return ICmpInst::isImpliedByMatchingCmp(LPred, RPred);
9474
9475   // It only really makes sense in the context of signed comparison for "X - Y
9476   // must be positive if X >= Y and no overflow".
9477   // Take SGT as an example:  L0:x > L1:y and C >= 0
9478   //                      ==> R0:(x -nsw y) < R1:(-C) is false
9479   CmpInst::Predicate SignedLPred = LPred.getPreferredSignedPredicate();
9480   if ((SignedLPred == ICmpInst::ICMP_SGT ||
9481        SignedLPred == ICmpInst::ICMP_SGE) &&
9482       match(R0, m_NSWSub(m_Specific(L0), m_Specific(L1)))) {
9483     if (match(R1, m_NonPositive()) &&
9484         ICmpInst::isImpliedByMatchingCmp(SignedLPred, RPred) == false)
9485       return false;
9486   }
9487
9488   // Take SLT as an example:  L0:x < L1:y and C <= 0
9489   //                      ==> R0:(x -nsw y) < R1:(-C) is true
9490   if ((SignedLPred == ICmpInst::ICMP_SLT ||
9491        SignedLPred == ICmpInst::ICMP_SLE) &&
9492       match(R0, m_NSWSub(m_Specific(L0), m_Specific(L1)))) {
9493     if (match(R1, m_NonNegative()) &&
9494         ICmpInst::isImpliedByMatchingCmp(SignedLPred, RPred) == true)
9495       return true;
9496   }
9497
9498   // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
9499   if (L0 == R0 &&
9500       (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) &&
9501       (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) &&
9502       match(L0, m_c_Add(m_Specific(L1), m_Specific(R1))))
9503     return CmpPredicate::getMatching(LPred, RPred).has_value();
9504
9505   if (auto P = CmpPredicate::getMatching(LPred, RPred))
9506     return isImpliedCondOperands(*P, L0, L1, R0, R1);
9507
9508   return std::nullopt;
9509 }
9510
9511 /// Return true if LHS implies RHS is true.  Return false if LHS implies RHS is
9512 /// false.  Otherwise, return std::nullopt if we can't infer anything.  We
9513 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
9514 /// instruction.
9515 static std::optional<bool>
9516 isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
9517                    const Value *RHSOp0, const Value *RHSOp1,
9518                    const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9519   // The LHS must be an 'or', 'and', or a 'select' instruction.
9520   assert((LHS->getOpcode() == Instruction::And ||
9521           LHS->getOpcode() == Instruction::Or ||
9522           LHS->getOpcode() == Instruction::Select) &&
9523          "Expected LHS to be 'and', 'or', or 'select'.");
9524
9525   assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
9526
9527   // If the result of an 'or' is false, then we know both legs of the 'or' are
9528   // false.  Similarly, if the result of an 'and' is true, then we know both
9529   // legs of the 'and' are true.
9530   const Value *ALHS, *ARHS;
9531   if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) ||
9532       (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) {
9533     // FIXME: Make this non-recursion.
9534     if (std::optional<bool> Implication = isImpliedCondition(
9535             ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
9536       return Implication;
9537     if (std::optional<bool> Implication = isImpliedCondition(
9538             ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
9539       return Implication;
9540     return std::nullopt;
9541   }
9542   return std::nullopt;
9543 }
9544
9545 std::optional<bool>
9546 llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
9547                          const Value *RHSOp0, const Value *RHSOp1,
9548                          const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9549   // Bail out when we hit the limit.
9550   if (Depth == MaxAnalysisRecursionDepth)
9551     return std::nullopt;
9552
9553   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
9554   // example.
9555   if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
9556     return std::nullopt;
9557
9558   assert(LHS->getType()->isIntOrIntVectorTy(1) &&
9559          "Expected integer type only!");
9560
9561   // Match not
9562   if (match(LHS, m_Not(m_Value(LHS))))
9563     LHSIsTrue = !LHSIsTrue;
9564
9565   // Both LHS and RHS are icmps.
9566   const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS);
9567   if (LHSCmp)
9568     return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue);
9569
9570   /// The LHS should be an 'or', 'and', or a 'select' instruction.  We expect
9571   /// the RHS to be an icmp.
9572   /// FIXME: Add support for and/or/select on the RHS.
9573   if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
9574     if ((LHSI->getOpcode() == Instruction::And ||
9575          LHSI->getOpcode() == Instruction::Or ||
9576          LHSI->getOpcode() == Instruction::Select))
9577       return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
9578                                 Depth);
9579   }
9580   return std::nullopt;
9581 }
9582
9583 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
9584                                              const DataLayout &DL,
9585                                              bool LHSIsTrue, unsigned Depth) {
9586   // LHS ==> RHS by definition
9587   if (LHS == RHS)
9588     return LHSIsTrue;
9589
9590   // Match not
9591   bool InvertRHS = false;
9592   if (match(RHS, m_Not(m_Value(RHS)))) {
9593     if (LHS == RHS)
9594       return !LHSIsTrue;
9595     InvertRHS = true;
9596   }
9597
9598   if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) {
9599     if (auto Implied = isImpliedCondition(
9600             LHS, RHSCmp->getCmpPredicate(), RHSCmp->getOperand(0),
9601             RHSCmp->getOperand(1), DL, LHSIsTrue, Depth))
9602       return InvertRHS ? !*Implied : *Implied;
9603     return std::nullopt;
9604   }
9605
9606   if (Depth == MaxAnalysisRecursionDepth)
9607     return std::nullopt;
9608
9609   // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
9610   // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
9611   const Value *RHS1, *RHS2;
9612   if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) {
9613     if (std::optional<bool> Imp =
9614             isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
9615       if (*Imp == true)
9616         return !InvertRHS;
9617     if (std::optional<bool> Imp =
9618             isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
9619       if (*Imp == true)
9620         return !InvertRHS;
9621   }
9622   if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) {
9623     if (std::optional<bool> Imp =
9624             isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
9625       if (*Imp == false)
9626         return InvertRHS;
9627     if (std::optional<bool> Imp =
9628             isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
9629       if (*Imp == false)
9630         return InvertRHS;
9631   }
9632
9633   return std::nullopt;
9634 }
9635
9636 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
9637 // condition dominating ContextI or nullptr, if no condition is found.
9638 static std::pair<Value *, bool>
9639 getDomPredecessorCondition(const Instruction *ContextI) {
9640   if (!ContextI || !ContextI->getParent())
9641     return {nullptr, false};
9642
9643   // TODO: This is a poor/cheap way to determine dominance. Should we use a
9644   // dominator tree (eg, from a SimplifyQuery) instead?
9645   const BasicBlock *ContextBB = ContextI->getParent();
9646   const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
9647   if (!PredBB)
9648     return {nullptr, false};
9649
9650   // We need a conditional branch in the predecessor.
9651   Value *PredCond;
9652   BasicBlock *TrueBB, *FalseBB;
9653   if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB)))
9654     return {nullptr, false};
9655
9656   // The branch should get simplified. Don't bother simplifying this condition.
9657   if (TrueBB == FalseBB)
9658     return {nullptr, false};
9659
9660   assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
9661          "Predecessor block does not point to successor?");
9662
9663   // Is this condition implied by the predecessor condition?
9664   return {PredCond, TrueBB == ContextBB};
9665 }
9666
9667 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
9668                                                   const Instruction *ContextI,
9669                                                   const DataLayout &DL) {
9670   assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
9671   auto PredCond = getDomPredecessorCondition(ContextI);
9672   if (PredCond.first)
9673     return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second);
9674   return std::nullopt;
9675 }
9676
9677 std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred,
9678                                                   const Value *LHS,
9679                                                   const Value *RHS,
9680                                                   const Instruction *ContextI,
9681                                                   const DataLayout &DL) {
9682   auto PredCond = getDomPredecessorCondition(ContextI);
9683   if (PredCond.first)
9684     return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL,
9685                               PredCond.second);
9686   return std::nullopt;
9687 }
9688
9689 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
9690                               APInt &Upper, const InstrInfoQuery &IIQ,
9691                               bool PreferSignedRange) {
9692   unsigned Width = Lower.getBitWidth();
9693   const APInt *C;
9694   switch (BO.getOpcode()) {
9695   case Instruction::Add:
9696     if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
9697       bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9698       bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9699
9700       // If the caller expects a signed compare, then try to use a signed range.
9701       // Otherwise if both no-wraps are set, use the unsigned range because it
9702       // is never larger than the signed range. Example:
9703       // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9704       if (PreferSignedRange && HasNSW && HasNUW)
9705         HasNUW = false;
9706
9707       if (HasNUW) {
9708         // 'add nuw x, C' produces [C, UINT_MAX].
9709         Lower = *C;
9710       } else if (HasNSW) {
9711         if (C->isNegative()) {
9712           // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9713           Lower = APInt::getSignedMinValue(Width);
9714           Upper = APInt::getSignedMaxValue(Width) + *C + 1;
9715         } else {
9716           // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9717           Lower = APInt::getSignedMinValue(Width) + *C;
9718           Upper = APInt::getSignedMaxValue(Width) + 1;
9719         }
9720       }
9721     }
9722     break;
9723
9724   case Instruction::And:
9725     if (match(BO.getOperand(1), m_APInt(C)))
9726       // 'and x, C' produces [0, C].
9727       Upper = *C + 1;
9728     // X & -X is a power of two or zero. So we can cap the value at max power of
9729     // two.
9730     if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) ||
9731         match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0)))))
9732       Upper = APInt::getSignedMinValue(Width) + 1;
9733     break;
9734
9735   case Instruction::Or:
9736     if (match(BO.getOperand(1), m_APInt(C)))
9737       // 'or x, C' produces [C, UINT_MAX].
9738       Lower = *C;
9739     break;
9740
9741   case Instruction::AShr:
9742     if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
9743       // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9744       Lower = APInt::getSignedMinValue(Width).ashr(*C);
9745       Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
9746     } else if (match(BO.getOperand(0), m_APInt(C))) {
9747       unsigned ShiftAmount = Width - 1;
9748       if (!C->isZero() && IIQ.isExact(&BO))
9749         ShiftAmount = C->countr_zero();
9750       if (C->isNegative()) {
9751         // 'ashr C, x' produces [C, C >> (Width-1)]
9752         Lower = *C;
9753         Upper = C->ashr(ShiftAmount) + 1;
9754       } else {
9755         // 'ashr C, x' produces [C >> (Width-1), C]
9756         Lower = C->ashr(ShiftAmount);
9757         Upper = *C + 1;
9758       }
9759     }
9760     break;
9761
9762   case Instruction::LShr:
9763     if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
9764       // 'lshr x, C' produces [0, UINT_MAX >> C].
9765       Upper = APInt::getAllOnes(Width).lshr(*C) + 1;
9766     } else if (match(BO.getOperand(0), m_APInt(C))) {
9767       // 'lshr C, x' produces [C >> (Width-1), C].
9768       unsigned ShiftAmount = Width - 1;
9769       if (!C->isZero() && IIQ.isExact(&BO))
9770         ShiftAmount = C->countr_zero();
9771       Lower = C->lshr(ShiftAmount);
9772       Upper = *C + 1;
9773     }
9774     break;
9775
9776   case Instruction::Shl:
9777     if (match(BO.getOperand(0), m_APInt(C))) {
9778       if (IIQ.hasNoUnsignedWrap(&BO)) {
9779         // 'shl nuw C, x' produces [C, C << CLZ(C)]
9780         Lower = *C;
9781         Upper = Lower.shl(Lower.countl_zero()) + 1;
9782       } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
9783         if (C->isNegative()) {
9784           // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9785           unsigned ShiftAmount = C->countl_one() - 1;
9786           Lower = C->shl(ShiftAmount);
9787           Upper = *C + 1;
9788         } else {
9789           // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9790           unsigned ShiftAmount = C->countl_zero() - 1;
9791           Lower = *C;
9792           Upper = C->shl(ShiftAmount) + 1;
9793         }
9794       } else {
9795         // If lowbit is set, value can never be zero.
9796         if ((*C)[0])
9797           Lower = APInt::getOneBitSet(Width, 0);
9798         // If we are shifting a constant the largest it can be is if the longest
9799         // sequence of consecutive ones is shifted to the highbits (breaking
9800         // ties for which sequence is higher). At the moment we take a liberal
9801         // upper bound on this by just popcounting the constant.
9802         // TODO: There may be a bitwise trick for it longest/highest
9803         // consecutative sequence of ones (naive method is O(Width) loop).
9804         Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1;
9805       }
9806     } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
9807       Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1;
9808     }
9809     break;
9810
9811   case Instruction::SDiv:
9812     if (match(BO.getOperand(1), m_APInt(C))) {
9813       APInt IntMin = APInt::getSignedMinValue(Width);
9814       APInt IntMax = APInt::getSignedMaxValue(Width);
9815       if (C->isAllOnes()) {
9816         // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9817         //    where C != -1 and C != 0 and C != 1
9818         Lower = IntMin + 1;
9819         Upper = IntMax + 1;
9820       } else if (C->countl_zero() < Width - 1) {
9821         // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9822         //    where C != -1 and C != 0 and C != 1
9823         Lower = IntMin.sdiv(*C);
9824         Upper = IntMax.sdiv(*C);
9825         if (Lower.sgt(Upper))
9826           std::swap(Lower, Upper);
9827         Upper = Upper + 1;
9828         assert(Upper != Lower && "Upper part of range has wrapped!");
9829       }
9830     } else if (match(BO.getOperand(0), m_APInt(C))) {
9831       if (C->isMinSignedValue()) {
9832         // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9833         Lower = *C;
9834         Upper = Lower.lshr(1) + 1;
9835       } else {
9836         // 'sdiv C, x' produces [-|C|, |C|].
9837         Upper = C->abs() + 1;
9838         Lower = (-Upper) + 1;
9839       }
9840     }
9841     break;
9842
9843   case Instruction::UDiv:
9844     if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
9845       // 'udiv x, C' produces [0, UINT_MAX / C].
9846       Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
9847     } else if (match(BO.getOperand(0), m_APInt(C))) {
9848       // 'udiv C, x' produces [0, C].
9849       Upper = *C + 1;
9850     }
9851     break;
9852
9853   case Instruction::SRem:
9854     if (match(BO.getOperand(1), m_APInt(C))) {
9855       // 'srem x, C' produces (-|C|, |C|).
9856       Upper = C->abs();
9857       Lower = (-Upper) + 1;
9858     } else if (match(BO.getOperand(0), m_APInt(C))) {
9859       if (C->isNegative()) {
9860         // 'srem -|C|, x' produces [-|C|, 0].
9861         Upper = 1;
9862         Lower = *C;
9863       } else {
9864         // 'srem |C|, x' produces [0, |C|].
9865         Upper = *C + 1;
9866       }
9867     }
9868     break;
9869
9870   case Instruction::URem:
9871     if (match(BO.getOperand(1), m_APInt(C)))
9872       // 'urem x, C' produces [0, C).
9873       Upper = *C;
9874     else if (match(BO.getOperand(0), m_APInt(C)))
9875       // 'urem C, x' produces [0, C].
9876       Upper = *C + 1;
9877     break;
9878
9879   default:
9880     break;
9881   }
9882 }
9883
9884 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II,
9885                                           bool UseInstrInfo) {
9886   unsigned Width = II.getType()->getScalarSizeInBits();
9887   const APInt *C;
9888   switch (II.getIntrinsicID()) {
9889   case Intrinsic::ctlz:
9890   case Intrinsic::cttz: {
9891     APInt Upper(Width, Width);
9892     if (!UseInstrInfo || !match(II.getArgOperand(1), m_One()))
9893       Upper += 1;
9894     // Maximum of set/clear bits is the bit width.
9895     return ConstantRange::getNonEmpty(APInt::getZero(Width), Upper);
9896   }
9897   case Intrinsic::ctpop:
9898     // Maximum of set/clear bits is the bit width.
9899     return ConstantRange::getNonEmpty(APInt::getZero(Width),
9900                                       APInt(Width, Width) + 1);
9901   case Intrinsic::uadd_sat:
9902     // uadd.sat(x, C) produces [C, UINT_MAX].
9903     if (match(II.getOperand(0), m_APInt(C)) ||
9904         match(II.getOperand(1), m_APInt(C)))
9905       return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
9906     break;
9907   case Intrinsic::sadd_sat:
9908     if (match(II.getOperand(0), m_APInt(C)) ||
9909         match(II.getOperand(1), m_APInt(C))) {
9910       if (C->isNegative())
9911         // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9912         return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9913                                           APInt::getSignedMaxValue(Width) + *C +
9914                                               1);
9915
9916       // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9917       return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C,
9918                                         APInt::getSignedMaxValue(Width) + 1);
9919     }
9920     break;
9921   case Intrinsic::usub_sat:
9922     // usub.sat(C, x) produces [0, C].
9923     if (match(II.getOperand(0), m_APInt(C)))
9924       return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
9925
9926     // usub.sat(x, C) produces [0, UINT_MAX - C].
9927     if (match(II.getOperand(1), m_APInt(C)))
9928       return ConstantRange::getNonEmpty(APInt::getZero(Width),
9929                                         APInt::getMaxValue(Width) - *C + 1);
9930     break;
9931   case Intrinsic::ssub_sat:
9932     if (match(II.getOperand(0), m_APInt(C))) {
9933       if (C->isNegative())
9934         // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9935         return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9936                                           *C - APInt::getSignedMinValue(Width) +
9937                                               1);
9938
9939       // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9940       return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width),
9941                                         APInt::getSignedMaxValue(Width) + 1);
9942     } else if (match(II.getOperand(1), m_APInt(C))) {
9943       if (C->isNegative())
9944         // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9945         return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C,
9946                                           APInt::getSignedMaxValue(Width) + 1);
9947
9948       // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9949       return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9950                                         APInt::getSignedMaxValue(Width) - *C +
9951                                             1);
9952     }
9953     break;
9954   case Intrinsic::umin:
9955   case Intrinsic::umax:
9956   case Intrinsic::smin:
9957   case Intrinsic::smax:
9958     if (!match(II.getOperand(0), m_APInt(C)) &&
9959         !match(II.getOperand(1), m_APInt(C)))
9960       break;
9961
9962     switch (II.getIntrinsicID()) {
9963     case Intrinsic::umin:
9964       return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
9965     case Intrinsic::umax:
9966       return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
9967     case Intrinsic::smin:
9968       return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9969                                         *C + 1);
9970     case Intrinsic::smax:
9971       return ConstantRange::getNonEmpty(*C,
9972                                         APInt::getSignedMaxValue(Width) + 1);
9973     default:
9974       llvm_unreachable("Must be min/max intrinsic");
9975     }
9976     break;
9977   case Intrinsic::abs:
9978     // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9979     // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9980     if (match(II.getOperand(1), m_One()))
9981       return ConstantRange::getNonEmpty(APInt::getZero(Width),
9982                                         APInt::getSignedMaxValue(Width) + 1);
9983
9984     return ConstantRange::getNonEmpty(APInt::getZero(Width),
9985                                       APInt::getSignedMinValue(Width) + 1);
9986   case Intrinsic::vscale:
9987     if (!II.getParent() || !II.getFunction())
9988       break;
9989     return getVScaleRange(II.getFunction(), Width);
9990   case Intrinsic::scmp:
9991   case Intrinsic::ucmp:
9992     return ConstantRange::getNonEmpty(APInt::getAllOnes(Width),
9993                                       APInt(Width, 2));
9994   default:
9995     break;
9996   }
9997
9998   return ConstantRange::getFull(Width);
9999 }
10000
10001 static ConstantRange getRangeForSelectPattern(const SelectInst &SI,
10002                                               const InstrInfoQuery &IIQ) {
10003   unsigned BitWidth = SI.getType()->getScalarSizeInBits();
10004   const Value *LHS = nullptr, *RHS = nullptr;
10005   SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
10006   if (R.Flavor == SPF_UNKNOWN)
10007     return ConstantRange::getFull(BitWidth);
10008
10009   if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
10010     // If the negation part of the abs (in RHS) has the NSW flag,
10011     // then the result of abs(X) is [0..SIGNED_MAX],
10012     // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
10013     if (match(RHS, m_Neg(m_Specific(LHS))) &&
10014         IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
10015       return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
10016                                         APInt::getSignedMaxValue(BitWidth) + 1);
10017
10018     return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
10019                                       APInt::getSignedMinValue(BitWidth) + 1);
10020   }
10021
10022   if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
10023     // The result of -abs(X) is <= 0.
10024     return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
10025                                       APInt(BitWidth, 1));
10026   }
10027
10028   const APInt *C;
10029   if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C)))
10030     return ConstantRange::getFull(BitWidth);
10031
10032   switch (R.Flavor) {
10033   case SPF_UMIN:
10034     return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1);
10035   case SPF_UMAX:
10036     return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth));
10037   case SPF_SMIN:
10038     return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
10039                                       *C + 1);
10040   case SPF_SMAX:
10041     return ConstantRange::getNonEmpty(*C,
10042                                       APInt::getSignedMaxValue(BitWidth) + 1);
10043   default:
10044     return ConstantRange::getFull(BitWidth);
10045   }
10046 }
10047
10048 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
10049   // The maximum representable value of a half is 65504. For floats the maximum
10050   // value is 3.4e38 which requires roughly 129 bits.
10051   unsigned BitWidth = I->getType()->getScalarSizeInBits();
10052   if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy())
10053     return;
10054   if (isa<FPToSIInst>(I) && BitWidth >= 17) {
10055     Lower = APInt(BitWidth, -65504, true);
10056     Upper = APInt(BitWidth, 65505);
10057   }
10058
10059   if (isa<FPToUIInst>(I) && BitWidth >= 16) {
10060     // For a fptoui the lower limit is left as 0.
10061     Upper = APInt(BitWidth, 65505);
10062   }
10063 }
10064
10065 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
10066                                          bool UseInstrInfo, AssumptionCache *AC,
10067                                          const Instruction *CtxI,
10068                                          const DominatorTree *DT,
10069                                          unsigned Depth) {
10070   assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
10071
10072   if (Depth == MaxAnalysisRecursionDepth)
10073     return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
10074
10075   if (auto *C = dyn_cast<Constant>(V))
10076     return C->toConstantRange();
10077
10078   unsigned BitWidth = V->getType()->getScalarSizeInBits();
10079   InstrInfoQuery IIQ(UseInstrInfo);
10080   ConstantRange CR = ConstantRange::getFull(BitWidth);
10081   if (auto *BO = dyn_cast<BinaryOperator>(V)) {
10082     APInt Lower = APInt(BitWidth, 0);
10083     APInt Upper = APInt(BitWidth, 0);
10084     // TODO: Return ConstantRange.
10085     setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
10086     CR = ConstantRange::getNonEmpty(Lower, Upper);
10087   } else if (auto *II = dyn_cast<IntrinsicInst>(V))
10088     CR = getRangeForIntrinsic(*II, UseInstrInfo);
10089   else if (auto *SI = dyn_cast<SelectInst>(V)) {
10090     ConstantRange CRTrue = computeConstantRange(
10091         SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
10092     ConstantRange CRFalse = computeConstantRange(
10093         SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
10094     CR = CRTrue.unionWith(CRFalse);
10095     CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ));
10096   } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) {
10097     APInt Lower = APInt(BitWidth, 0);
10098     APInt Upper = APInt(BitWidth, 0);
10099     // TODO: Return ConstantRange.
10100     setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
10101     CR = ConstantRange::getNonEmpty(Lower, Upper);
10102   } else if (const auto *A = dyn_cast<Argument>(V))
10103     if (std::optional<ConstantRange> Range = A->getRange())
10104       CR = *Range;
10105
10106   if (auto *I = dyn_cast<Instruction>(V)) {
10107     if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
10108       CR = CR.intersectWith(getConstantRangeFromMetadata(*Range));
10109
10110     if (const auto *CB = dyn_cast<CallBase>(V))
10111       if (std::optional<ConstantRange> Range = CB->getRange())
10112         CR = CR.intersectWith(*Range);
10113   }
10114
10115   if (CtxI && AC) {
10116     // Try to restrict the range based on information from assumptions.
10117     for (auto &AssumeVH : AC->assumptionsFor(V)) {
10118       if (!AssumeVH)
10119         continue;
10120       CallInst *I = cast<CallInst>(AssumeVH);
10121       assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
10122              "Got assumption for the wrong function!");
10123       assert(I->getIntrinsicID() == Intrinsic::assume &&
10124              "must be an assume intrinsic");
10125
10126       if (!isValidAssumeForContext(I, CtxI, DT))
10127         continue;
10128       Value *Arg = I->getArgOperand(0);
10129       ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
10130       // Currently we just use information from comparisons.
10131       if (!Cmp || Cmp->getOperand(0) != V)
10132         continue;
10133       // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
10134       ConstantRange RHS =
10135           computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false,
10136                                UseInstrInfo, AC, I, DT, Depth + 1);
10137       CR = CR.intersectWith(
10138           ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS));
10139     }
10140   }
10141
10142   return CR;
10143 }
10144
10145 static void
10146 addValueAffectedByCondition(Value *V,
10147                             function_ref<void(Value *)> InsertAffected) {
10148   assert(V != nullptr);
10149   if (isa<Argument>(V) || isa<GlobalValue>(V)) {
10150     InsertAffected(V);
10151   } else if (auto *I = dyn_cast<Instruction>(V)) {
10152     InsertAffected(V);
10153
10154     // Peek through unary operators to find the source of the condition.
10155     Value *Op;
10156     if (match(I, m_CombineOr(m_PtrToInt(m_Value(Op)), m_Trunc(m_Value(Op))))) {
10157       if (isa<Instruction>(Op) || isa<Argument>(Op))
10158         InsertAffected(Op);
10159     }
10160   }
10161 }
10162
10163 void llvm::findValuesAffectedByCondition(
10164     Value *Cond, bool IsAssume, function_ref<void(Value *)> InsertAffected) {
10165   auto AddAffected = [&InsertAffected](Value *V) {
10166     addValueAffectedByCondition(V, InsertAffected);
10167   };
10168
10169   auto AddCmpOperands = [&AddAffected, IsAssume](Value *LHS, Value *RHS) {
10170     if (IsAssume) {
10171       AddAffected(LHS);
10172       AddAffected(RHS);
10173     } else if (match(RHS, m_Constant()))
10174       AddAffected(LHS);
10175   };
10176
10177   SmallVector<Value *, 8> Worklist;
10178   SmallPtrSet<Value *, 8> Visited;
10179   Worklist.push_back(Cond);
10180   while (!Worklist.empty()) {
10181     Value *V = Worklist.pop_back_val();
10182     if (!Visited.insert(V).second)
10183       continue;
10184
10185     CmpPredicate Pred;
10186     Value *A, *B, *X;
10187
10188     if (IsAssume) {
10189       AddAffected(V);
10190       if (match(V, m_Not(m_Value(X))))
10191         AddAffected(X);
10192     }
10193
10194     if (match(V, m_LogicalOp(m_Value(A), m_Value(B)))) {
10195       // assume(A && B) is split to -> assume(A); assume(B);
10196       // assume(!(A || B)) is split to -> assume(!A); assume(!B);
10197       // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
10198       // enough information to be worth handling (intersection of information as
10199       // opposed to union).
10200       if (!IsAssume) {
10201         Worklist.push_back(A);
10202         Worklist.push_back(B);
10203       }
10204     } else if (match(V, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
10205       AddCmpOperands(A, B);
10206
10207       bool HasRHSC = match(B, m_ConstantInt());
10208       if (ICmpInst::isEquality(Pred)) {
10209         if (HasRHSC) {
10210           Value *Y;
10211           // (X & C) or (X | C) or (X ^ C).
10212           // (X << C) or (X >>_s C) or (X >>_u C).
10213           if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) ||
10214               match(A, m_Shift(m_Value(X), m_ConstantInt())))
10215             AddAffected(X);
10216           else if (match(A, m_And(m_Value(X), m_Value(Y))) ||
10217                    match(A, m_Or(m_Value(X), m_Value(Y)))) {
10218             AddAffected(X);
10219             AddAffected(Y);
10220           }
10221         }
10222       } else {
10223         if (HasRHSC) {
10224           // Handle (A + C1) u< C2, which is the canonical form of
10225           // A > C3 && A < C4.
10226           if (match(A, m_AddLike(m_Value(X), m_ConstantInt())))
10227             AddAffected(X);
10228
10229           if (ICmpInst::isUnsigned(Pred)) {
10230             Value *Y;
10231             // X & Y u> C    -> X >u C && Y >u C
10232             // X | Y u< C    -> X u< C && Y u< C
10233             // X nuw+ Y u< C -> X u< C && Y u< C
10234             if (match(A, m_And(m_Value(X), m_Value(Y))) ||
10235                 match(A, m_Or(m_Value(X), m_Value(Y))) ||
10236                 match(A, m_NUWAdd(m_Value(X), m_Value(Y)))) {
10237               AddAffected(X);
10238               AddAffected(Y);
10239             }
10240             // X nuw- Y u> C -> X u> C
10241             if (match(A, m_NUWSub(m_Value(X), m_Value())))
10242               AddAffected(X);
10243           }
10244         }
10245
10246         // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
10247         // by computeKnownFPClass().
10248         if (match(A, m_ElementWiseBitCast(m_Value(X)))) {
10249           if (Pred == ICmpInst::ICMP_SLT && match(B, m_Zero()))
10250             InsertAffected(X);
10251           else if (Pred == ICmpInst::ICMP_SGT && match(B, m_AllOnes()))
10252             InsertAffected(X);
10253         }
10254       }
10255
10256       if (HasRHSC && match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X))))
10257         AddAffected(X);
10258     } else if (match(V, m_FCmp(Pred, m_Value(A), m_Value(B)))) {
10259       AddCmpOperands(A, B);
10260
10261       // fcmp fneg(x), y
10262       // fcmp fabs(x), y
10263       // fcmp fneg(fabs(x)), y
10264       if (match(A, m_FNeg(m_Value(A))))
10265         AddAffected(A);
10266       if (match(A, m_FAbs(m_Value(A))))
10267         AddAffected(A);
10268
10269     } else if (match(V, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A),
10270                                                            m_Value()))) {
10271       // Handle patterns that computeKnownFPClass() support.
10272       AddAffected(A);
10273     }
10274   }
10275 }