lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

   1 //===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the Correlated Value Propagation pass.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
  14 #include "llvm/ADT/DepthFirstIterator.h"
  15 #include "llvm/ADT/Optional.h"
  16 #include "llvm/ADT/SmallVector.h"
  17 #include "llvm/ADT/Statistic.h"
  18 #include "llvm/Analysis/DomTreeUpdater.h"
  19 #include "llvm/Analysis/GlobalsModRef.h"
  20 #include "llvm/Analysis/InstructionSimplify.h"
  21 #include "llvm/Analysis/LazyValueInfo.h"
  22 #include "llvm/IR/Attributes.h"
  23 #include "llvm/IR/BasicBlock.h"
  24 #include "llvm/IR/CFG.h"
  25 #include "llvm/IR/CallSite.h"
  26 #include "llvm/IR/Constant.h"
  27 #include "llvm/IR/ConstantRange.h"
  28 #include "llvm/IR/Constants.h"
  29 #include "llvm/IR/DerivedTypes.h"
  30 #include "llvm/IR/Function.h"
  31 #include "llvm/IR/IRBuilder.h"
  32 #include "llvm/IR/InstrTypes.h"
  33 #include "llvm/IR/Instruction.h"
  34 #include "llvm/IR/Instructions.h"
  35 #include "llvm/IR/IntrinsicInst.h"
  36 #include "llvm/IR/Operator.h"
  37 #include "llvm/IR/PassManager.h"
  38 #include "llvm/IR/Type.h"
  39 #include "llvm/IR/Value.h"
  40 #include "llvm/Pass.h"
  41 #include "llvm/Support/Casting.h"
  42 #include "llvm/Support/CommandLine.h"
  43 #include "llvm/Support/Debug.h"
  44 #include "llvm/Support/raw_ostream.h"
  45 #include "llvm/Transforms/Scalar.h"
  46 #include "llvm/Transforms/Utils/Local.h"
  47 #include <cassert>
  48 #include <utility>
  49
  50 using namespace llvm;
  51
  52 #define DEBUG_TYPE "correlated-value-propagation"
  53
  54 STATISTIC(NumPhis,      "Number of phis propagated");
  55 STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
  56 STATISTIC(NumSelects,   "Number of selects propagated");
  57 STATISTIC(NumMemAccess, "Number of memory access targets propagated");
  58 STATISTIC(NumCmps,      "Number of comparisons propagated");
  59 STATISTIC(NumReturns,   "Number of return values propagated");
  60 STATISTIC(NumDeadCases, "Number of switch cases removed");
  61 STATISTIC(NumSDivs,     "Number of sdiv converted to udiv");
  62 STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
  63 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
  64 STATISTIC(NumSRems,     "Number of srem converted to urem");
  65 STATISTIC(NumOverflows, "Number of overflow checks removed");
  66
  67 static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true));
  68
  69 namespace {
  70
  71   class CorrelatedValuePropagation : public FunctionPass {
  72   public:
  73     static char ID;
  74
  75     CorrelatedValuePropagation(): FunctionPass(ID) {
  76      initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
  77     }
  78
  79     bool runOnFunction(Function &F) override;
  80
  81     void getAnalysisUsage(AnalysisUsage &AU) const override {
  82       AU.addRequired<DominatorTreeWrapperPass>();
  83       AU.addRequired<LazyValueInfoWrapperPass>();
  84       AU.addPreserved<GlobalsAAWrapperPass>();
  85       AU.addPreserved<DominatorTreeWrapperPass>();
  86     }
  87   };
  88
  89 } // end anonymous namespace
  90
  91 char CorrelatedValuePropagation::ID = 0;
  92
  93 INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
  94                 "Value Propagation", false, false)
  95 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  96 INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
  97 INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
  98                 "Value Propagation", false, false)
  99
 100 // Public interface to the Value Propagation pass
 101 Pass *llvm::createCorrelatedValuePropagationPass() {
 102   return new CorrelatedValuePropagation();
 103 }
 104
 105 static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
 106   if (S->getType()->isVectorTy()) return false;
 107   if (isa<Constant>(S->getOperand(0))) return false;
 108
 109   Constant *C = LVI->getConstant(S->getCondition(), S->getParent(), S);
 110   if (!C) return false;
 111
 112   ConstantInt *CI = dyn_cast<ConstantInt>(C);
 113   if (!CI) return false;
 114
 115   Value *ReplaceWith = S->getTrueValue();
 116   Value *Other = S->getFalseValue();
 117   if (!CI->isOne()) std::swap(ReplaceWith, Other);
 118   if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
 119
 120   S->replaceAllUsesWith(ReplaceWith);
 121   S->eraseFromParent();
 122
 123   ++NumSelects;
 124
 125   return true;
 126 }
 127
 128 /// Try to simplify a phi with constant incoming values that match the edge
 129 /// values of a non-constant value on all other edges:
 130 /// bb0:
 131 ///   %isnull = icmp eq i8* %x, null
 132 ///   br i1 %isnull, label %bb2, label %bb1
 133 /// bb1:
 134 ///   br label %bb2
 135 /// bb2:
 136 ///   %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ]
 137 /// -->
 138 ///   %r = %x
 139 static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
 140                                    DominatorTree *DT) {
 141   // Collect incoming constants and initialize possible common value.
 142   SmallVector<std::pair<Constant *, unsigned>, 4> IncomingConstants;
 143   Value *CommonValue = nullptr;
 144   for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
 145     Value *Incoming = P->getIncomingValue(i);
 146     if (auto *IncomingConstant = dyn_cast<Constant>(Incoming)) {
 147       IncomingConstants.push_back(std::make_pair(IncomingConstant, i));
 148     } else if (!CommonValue) {
 149       // The potential common value is initialized to the first non-constant.
 150       CommonValue = Incoming;
 151     } else if (Incoming != CommonValue) {
 152       // There can be only one non-constant common value.
 153       return false;
 154     }
 155   }
 156
 157   if (!CommonValue || IncomingConstants.empty())
 158     return false;
 159
 160   // The common value must be valid in all incoming blocks.
 161   BasicBlock *ToBB = P->getParent();
 162   if (auto *CommonInst = dyn_cast<Instruction>(CommonValue))
 163     if (!DT->dominates(CommonInst, ToBB))
 164       return false;
 165
 166   // We have a phi with exactly 1 variable incoming value and 1 or more constant
 167   // incoming values. See if all constant incoming values can be mapped back to
 168   // the same incoming variable value.
 169   for (auto &IncomingConstant : IncomingConstants) {
 170     Constant *C = IncomingConstant.first;
 171     BasicBlock *IncomingBB = P->getIncomingBlock(IncomingConstant.second);
 172     if (C != LVI->getConstantOnEdge(CommonValue, IncomingBB, ToBB, P))
 173       return false;
 174   }
 175
 176   // All constant incoming values map to the same variable along the incoming
 177   // edges of the phi. The phi is unnecessary.
 178   P->replaceAllUsesWith(CommonValue);
 179   P->eraseFromParent();
 180   ++NumPhiCommon;
 181   return true;
 182 }
 183
 184 static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
 185                        const SimplifyQuery &SQ) {
 186   bool Changed = false;
 187
 188   BasicBlock *BB = P->getParent();
 189   for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
 190     Value *Incoming = P->getIncomingValue(i);
 191     if (isa<Constant>(Incoming)) continue;
 192
 193     Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
 194
 195     // Look if the incoming value is a select with a scalar condition for which
 196     // LVI can tells us the value. In that case replace the incoming value with
 197     // the appropriate value of the select. This often allows us to remove the
 198     // select later.
 199     if (!V) {
 200       SelectInst *SI = dyn_cast<SelectInst>(Incoming);
 201       if (!SI) continue;
 202
 203       Value *Condition = SI->getCondition();
 204       if (!Condition->getType()->isVectorTy()) {
 205         if (Constant *C = LVI->getConstantOnEdge(
 206                 Condition, P->getIncomingBlock(i), BB, P)) {
 207           if (C->isOneValue()) {
 208             V = SI->getTrueValue();
 209           } else if (C->isZeroValue()) {
 210             V = SI->getFalseValue();
 211           }
 212           // Once LVI learns to handle vector types, we could also add support
 213           // for vector type constants that are not all zeroes or all ones.
 214         }
 215       }
 216
 217       // Look if the select has a constant but LVI tells us that the incoming
 218       // value can never be that constant. In that case replace the incoming
 219       // value with the other value of the select. This often allows us to
 220       // remove the select later.
 221       if (!V) {
 222         Constant *C = dyn_cast<Constant>(SI->getFalseValue());
 223         if (!C) continue;
 224
 225         if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
 226               P->getIncomingBlock(i), BB, P) !=
 227             LazyValueInfo::False)
 228           continue;
 229         V = SI->getTrueValue();
 230       }
 231
 232       LLVM_DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
 233     }
 234
 235     P->setIncomingValue(i, V);
 236     Changed = true;
 237   }
 238
 239   if (Value *V = SimplifyInstruction(P, SQ)) {
 240     P->replaceAllUsesWith(V);
 241     P->eraseFromParent();
 242     Changed = true;
 243   }
 244
 245   if (!Changed)
 246     Changed = simplifyCommonValuePhi(P, LVI, DT);
 247
 248   if (Changed)
 249     ++NumPhis;
 250
 251   return Changed;
 252 }
 253
 254 static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
 255   Value *Pointer = nullptr;
 256   if (LoadInst *L = dyn_cast<LoadInst>(I))
 257     Pointer = L->getPointerOperand();
 258   else
 259     Pointer = cast<StoreInst>(I)->getPointerOperand();
 260
 261   if (isa<Constant>(Pointer)) return false;
 262
 263   Constant *C = LVI->getConstant(Pointer, I->getParent(), I);
 264   if (!C) return false;
 265
 266   ++NumMemAccess;
 267   I->replaceUsesOfWith(Pointer, C);
 268   return true;
 269 }
 270
 271 /// See if LazyValueInfo's ability to exploit edge conditions or range
 272 /// information is sufficient to prove this comparison. Even for local
 273 /// conditions, this can sometimes prove conditions instcombine can't by
 274 /// exploiting range information.
 275 static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
 276   Value *Op0 = Cmp->getOperand(0);
 277   auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
 278   if (!C)
 279     return false;
 280
 281   // As a policy choice, we choose not to waste compile time on anything where
 282   // the comparison is testing local values.  While LVI can sometimes reason
 283   // about such cases, it's not its primary purpose.  We do make sure to do
 284   // the block local query for uses from terminator instructions, but that's
 285   // handled in the code for each terminator.
 286   auto *I = dyn_cast<Instruction>(Op0);
 287   if (I && I->getParent() == Cmp->getParent())
 288     return false;
 289
 290   LazyValueInfo::Tristate Result =
 291       LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp);
 292   if (Result == LazyValueInfo::Unknown)
 293     return false;
 294
 295   ++NumCmps;
 296   Constant *TorF = ConstantInt::get(Type::getInt1Ty(Cmp->getContext()), Result);
 297   Cmp->replaceAllUsesWith(TorF);
 298   Cmp->eraseFromParent();
 299   return true;
 300 }
 301
 302 /// Simplify a switch instruction by removing cases which can never fire. If the
 303 /// uselessness of a case could be determined locally then constant propagation
 304 /// would already have figured it out. Instead, walk the predecessors and
 305 /// statically evaluate cases based on information available on that edge. Cases
 306 /// that cannot fire no matter what the incoming edge can safely be removed. If
 307 /// a case fires on every incoming edge then the entire switch can be removed
 308 /// and replaced with a branch to the case destination.
 309 static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
 310                           DominatorTree *DT) {
 311   DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
 312   Value *Cond = SI->getCondition();
 313   BasicBlock *BB = SI->getParent();
 314
 315   // If the condition was defined in same block as the switch then LazyValueInfo
 316   // currently won't say anything useful about it, though in theory it could.
 317   if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
 318     return false;
 319
 320   // If the switch is unreachable then trying to improve it is a waste of time.
 321   pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
 322   if (PB == PE) return false;
 323
 324   // Analyse each switch case in turn.
 325   bool Changed = false;
 326   DenseMap<BasicBlock*, int> SuccessorsCount;
 327   for (auto *Succ : successors(BB))
 328     SuccessorsCount[Succ]++;
 329
 330   for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
 331     ConstantInt *Case = CI->getCaseValue();
 332
 333     // Check to see if the switch condition is equal to/not equal to the case
 334     // value on every incoming edge, equal/not equal being the same each time.
 335     LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
 336     for (pred_iterator PI = PB; PI != PE; ++PI) {
 337       // Is the switch condition equal to the case value?
 338       LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
 339                                                               Cond, Case, *PI,
 340                                                               BB, SI);
 341       // Give up on this case if nothing is known.
 342       if (Value == LazyValueInfo::Unknown) {
 343         State = LazyValueInfo::Unknown;
 344         break;
 345       }
 346
 347       // If this was the first edge to be visited, record that all other edges
 348       // need to give the same result.
 349       if (PI == PB) {
 350         State = Value;
 351         continue;
 352       }
 353
 354       // If this case is known to fire for some edges and known not to fire for
 355       // others then there is nothing we can do - give up.
 356       if (Value != State) {
 357         State = LazyValueInfo::Unknown;
 358         break;
 359       }
 360     }
 361
 362     if (State == LazyValueInfo::False) {
 363       // This case never fires - remove it.
 364       BasicBlock *Succ = CI->getCaseSuccessor();
 365       Succ->removePredecessor(BB);
 366       CI = SI->removeCase(CI);
 367       CE = SI->case_end();
 368
 369       // The condition can be modified by removePredecessor's PHI simplification
 370       // logic.
 371       Cond = SI->getCondition();
 372
 373       ++NumDeadCases;
 374       Changed = true;
 375       if (--SuccessorsCount[Succ] == 0)
 376         DTU.deleteEdge(BB, Succ);
 377       continue;
 378     }
 379     if (State == LazyValueInfo::True) {
 380       // This case always fires.  Arrange for the switch to be turned into an
 381       // unconditional branch by replacing the switch condition with the case
 382       // value.
 383       SI->setCondition(Case);
 384       NumDeadCases += SI->getNumCases();
 385       Changed = true;
 386       break;
 387     }
 388
 389     // Increment the case iterator since we didn't delete it.
 390     ++CI;
 391   }
 392
 393   if (Changed)
 394     // If the switch has been simplified to the point where it can be replaced
 395     // by a branch then do so now.
 396     ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false,
 397                            /*TLI = */ nullptr, &DTU);
 398   return Changed;
 399 }
 400
 401 // See if we can prove that the given overflow intrinsic will not overflow.
 402 static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) {
 403   using OBO = OverflowingBinaryOperator;
 404   auto NoWrap = [&] (Instruction::BinaryOps BinOp, unsigned NoWrapKind) {
 405     Value *RHS = II->getOperand(1);
 406     ConstantRange RRange = LVI->getConstantRange(RHS, II->getParent(), II);
 407     ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
 408         BinOp, RRange, NoWrapKind);
 409     // As an optimization, do not compute LRange if we do not need it.
 410     if (NWRegion.isEmptySet())
 411       return false;
 412     Value *LHS = II->getOperand(0);
 413     ConstantRange LRange = LVI->getConstantRange(LHS, II->getParent(), II);
 414     return NWRegion.contains(LRange);
 415   };
 416   switch (II->getIntrinsicID()) {
 417   default:
 418     break;
 419   case Intrinsic::uadd_with_overflow:
 420     return NoWrap(Instruction::Add, OBO::NoUnsignedWrap);
 421   case Intrinsic::sadd_with_overflow:
 422     return NoWrap(Instruction::Add, OBO::NoSignedWrap);
 423   case Intrinsic::usub_with_overflow:
 424     return NoWrap(Instruction::Sub, OBO::NoUnsignedWrap);
 425   case Intrinsic::ssub_with_overflow:
 426     return NoWrap(Instruction::Sub, OBO::NoSignedWrap);
 427   }
 428   return false;
 429 }
 430
 431 static void processOverflowIntrinsic(IntrinsicInst *II) {
 432   IRBuilder<> B(II);
 433   Value *NewOp = nullptr;
 434   switch (II->getIntrinsicID()) {
 435   default:
 436     llvm_unreachable("Unexpected instruction.");
 437   case Intrinsic::uadd_with_overflow:
 438   case Intrinsic::sadd_with_overflow:
 439     NewOp = B.CreateAdd(II->getOperand(0), II->getOperand(1), II->getName());
 440     break;
 441   case Intrinsic::usub_with_overflow:
 442   case Intrinsic::ssub_with_overflow:
 443     NewOp = B.CreateSub(II->getOperand(0), II->getOperand(1), II->getName());
 444     break;
 445   }
 446   ++NumOverflows;
 447   Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0);
 448   NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1);
 449   II->replaceAllUsesWith(NewI);
 450   II->eraseFromParent();
 451 }
 452
 453 /// Infer nonnull attributes for the arguments at the specified callsite.
 454 static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
 455   SmallVector<unsigned, 4> ArgNos;
 456   unsigned ArgNo = 0;
 457
 458   if (auto *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
 459     if (willNotOverflow(II, LVI)) {
 460       processOverflowIntrinsic(II);
 461       return true;
 462     }
 463   }
 464
 465   // Deopt bundle operands are intended to capture state with minimal
 466   // perturbance of the code otherwise.  If we can find a constant value for
 467   // any such operand and remove a use of the original value, that's
 468   // desireable since it may allow further optimization of that value (e.g. via
 469   // single use rules in instcombine).  Since deopt uses tend to,
 470   // idiomatically, appear along rare conditional paths, it's reasonable likely
 471   // we may have a conditional fact with which LVI can fold.
 472   if (auto DeoptBundle = CS.getOperandBundle(LLVMContext::OB_deopt)) {
 473     bool Progress = false;
 474     for (const Use &ConstU : DeoptBundle->Inputs) {
 475       Use &U = const_cast<Use&>(ConstU);
 476       Value *V = U.get();
 477       if (V->getType()->isVectorTy()) continue;
 478       if (isa<Constant>(V)) continue;
 479
 480       Constant *C = LVI->getConstant(V, CS.getParent(), CS.getInstruction());
 481       if (!C) continue;
 482       U.set(C);
 483       Progress = true;
 484     }
 485     if (Progress)
 486       return true;
 487   }
 488
 489   for (Value *V : CS.args()) {
 490     PointerType *Type = dyn_cast<PointerType>(V->getType());
 491     // Try to mark pointer typed parameters as non-null.  We skip the
 492     // relatively expensive analysis for constants which are obviously either
 493     // null or non-null to start with.
 494     if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
 495         !isa<Constant>(V) &&
 496         LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
 497                             ConstantPointerNull::get(Type),
 498                             CS.getInstruction()) == LazyValueInfo::False)
 499       ArgNos.push_back(ArgNo);
 500     ArgNo++;
 501   }
 502
 503   assert(ArgNo == CS.arg_size() && "sanity check");
 504
 505   if (ArgNos.empty())
 506     return false;
 507
 508   AttributeList AS = CS.getAttributes();
 509   LLVMContext &Ctx = CS.getInstruction()->getContext();
 510   AS = AS.addParamAttribute(Ctx, ArgNos,
 511                             Attribute::get(Ctx, Attribute::NonNull));
 512   CS.setAttributes(AS);
 513
 514   return true;
 515 }
 516
 517 static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
 518   Constant *Zero = ConstantInt::get(SDI->getType(), 0);
 519   for (Value *O : SDI->operands()) {
 520     auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI);
 521     if (Result != LazyValueInfo::True)
 522       return false;
 523   }
 524   return true;
 525 }
 526
 527 /// Try to shrink a udiv/urem's width down to the smallest power of two that's
 528 /// sufficient to contain its operands.
 529 static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
 530   assert(Instr->getOpcode() == Instruction::UDiv ||
 531          Instr->getOpcode() == Instruction::URem);
 532   if (Instr->getType()->isVectorTy())
 533     return false;
 534
 535   // Find the smallest power of two bitwidth that's sufficient to hold Instr's
 536   // operands.
 537   auto OrigWidth = Instr->getType()->getIntegerBitWidth();
 538   ConstantRange OperandRange(OrigWidth, /*isFullset=*/false);
 539   for (Value *Operand : Instr->operands()) {
 540     OperandRange = OperandRange.unionWith(
 541         LVI->getConstantRange(Operand, Instr->getParent()));
 542   }
 543   // Don't shrink below 8 bits wide.
 544   unsigned NewWidth = std::max<unsigned>(
 545       PowerOf2Ceil(OperandRange.getUnsignedMax().getActiveBits()), 8);
 546   // NewWidth might be greater than OrigWidth if OrigWidth is not a power of
 547   // two.
 548   if (NewWidth >= OrigWidth)
 549     return false;
 550
 551   ++NumUDivs;
 552   IRBuilder<> B{Instr};
 553   auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
 554   auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
 555                                      Instr->getName() + ".lhs.trunc");
 556   auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
 557                                      Instr->getName() + ".rhs.trunc");
 558   auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
 559   auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext");
 560   if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
 561     if (BinOp->getOpcode() == Instruction::UDiv)
 562       BinOp->setIsExact(Instr->isExact());
 563
 564   Instr->replaceAllUsesWith(Zext);
 565   Instr->eraseFromParent();
 566   return true;
 567 }
 568
 569 static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
 570   if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
 571     return false;
 572
 573   ++NumSRems;
 574   auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
 575                                         SDI->getName(), SDI);
 576   BO->setDebugLoc(SDI->getDebugLoc());
 577   SDI->replaceAllUsesWith(BO);
 578   SDI->eraseFromParent();
 579
 580   // Try to process our new urem.
 581   processUDivOrURem(BO, LVI);
 582
 583   return true;
 584 }
 585
 586 /// See if LazyValueInfo's ability to exploit edge conditions or range
 587 /// information is sufficient to prove the both operands of this SDiv are
 588 /// positive.  If this is the case, replace the SDiv with a UDiv. Even for local
 589 /// conditions, this can sometimes prove conditions instcombine can't by
 590 /// exploiting range information.
 591 static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
 592   if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
 593     return false;
 594
 595   ++NumSDivs;
 596   auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
 597                                         SDI->getName(), SDI);
 598   BO->setDebugLoc(SDI->getDebugLoc());
 599   BO->setIsExact(SDI->isExact());
 600   SDI->replaceAllUsesWith(BO);
 601   SDI->eraseFromParent();
 602
 603   // Try to simplify our new udiv.
 604   processUDivOrURem(BO, LVI);
 605
 606   return true;
 607 }
 608
 609 static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
 610   if (SDI->getType()->isVectorTy())
 611     return false;
 612
 613   Constant *Zero = ConstantInt::get(SDI->getType(), 0);
 614   if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) !=
 615       LazyValueInfo::True)
 616     return false;
 617
 618   ++NumAShrs;
 619   auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
 620                                         SDI->getName(), SDI);
 621   BO->setDebugLoc(SDI->getDebugLoc());
 622   BO->setIsExact(SDI->isExact());
 623   SDI->replaceAllUsesWith(BO);
 624   SDI->eraseFromParent();
 625
 626   return true;
 627 }
 628
 629 static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
 630   using OBO = OverflowingBinaryOperator;
 631
 632   if (DontProcessAdds)
 633     return false;
 634
 635   if (AddOp->getType()->isVectorTy())
 636     return false;
 637
 638   bool NSW = AddOp->hasNoSignedWrap();
 639   bool NUW = AddOp->hasNoUnsignedWrap();
 640   if (NSW && NUW)
 641     return false;
 642
 643   BasicBlock *BB = AddOp->getParent();
 644
 645   Value *LHS = AddOp->getOperand(0);
 646   Value *RHS = AddOp->getOperand(1);
 647
 648   ConstantRange LRange = LVI->getConstantRange(LHS, BB, AddOp);
 649
 650   // Initialize RRange only if we need it. If we know that guaranteed no wrap
 651   // range for the given LHS range is empty don't spend time calculating the
 652   // range for the RHS.
 653   Optional<ConstantRange> RRange;
 654   auto LazyRRange = [&] () {
 655       if (!RRange)
 656         RRange = LVI->getConstantRange(RHS, BB, AddOp);
 657       return RRange.getValue();
 658   };
 659
 660   bool Changed = false;
 661   if (!NUW) {
 662     ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
 663         BinaryOperator::Add, LRange, OBO::NoUnsignedWrap);
 664     if (!NUWRange.isEmptySet()) {
 665       bool NewNUW = NUWRange.contains(LazyRRange());
 666       AddOp->setHasNoUnsignedWrap(NewNUW);
 667       Changed |= NewNUW;
 668     }
 669   }
 670   if (!NSW) {
 671     ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
 672         BinaryOperator::Add, LRange, OBO::NoSignedWrap);
 673     if (!NSWRange.isEmptySet()) {
 674       bool NewNSW = NSWRange.contains(LazyRRange());
 675       AddOp->setHasNoSignedWrap(NewNSW);
 676       Changed |= NewNSW;
 677     }
 678   }
 679
 680   return Changed;
 681 }
 682
 683 static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
 684   if (Constant *C = LVI->getConstant(V, At->getParent(), At))
 685     return C;
 686
 687   // TODO: The following really should be sunk inside LVI's core algorithm, or
 688   // at least the outer shims around such.
 689   auto *C = dyn_cast<CmpInst>(V);
 690   if (!C) return nullptr;
 691
 692   Value *Op0 = C->getOperand(0);
 693   Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
 694   if (!Op1) return nullptr;
 695
 696   LazyValueInfo::Tristate Result =
 697     LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
 698   if (Result == LazyValueInfo::Unknown)
 699     return nullptr;
 700
 701   return (Result == LazyValueInfo::True) ?
 702     ConstantInt::getTrue(C->getContext()) :
 703     ConstantInt::getFalse(C->getContext());
 704 }
 705
 706 static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
 707                     const SimplifyQuery &SQ) {
 708   bool FnChanged = false;
 709   // Visiting in a pre-order depth-first traversal causes us to simplify early
 710   // blocks before querying later blocks (which require us to analyze early
 711   // blocks).  Eagerly simplifying shallow blocks means there is strictly less
 712   // work to do for deep blocks.  This also means we don't visit unreachable
 713   // blocks.
 714   for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
 715     bool BBChanged = false;
 716     for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
 717       Instruction *II = &*BI++;
 718       switch (II->getOpcode()) {
 719       case Instruction::Select:
 720         BBChanged |= processSelect(cast<SelectInst>(II), LVI);
 721         break;
 722       case Instruction::PHI:
 723         BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
 724         break;
 725       case Instruction::ICmp:
 726       case Instruction::FCmp:
 727         BBChanged |= processCmp(cast<CmpInst>(II), LVI);
 728         break;
 729       case Instruction::Load:
 730       case Instruction::Store:
 731         BBChanged |= processMemAccess(II, LVI);
 732         break;
 733       case Instruction::Call:
 734       case Instruction::Invoke:
 735         BBChanged |= processCallSite(CallSite(II), LVI);
 736         break;
 737       case Instruction::SRem:
 738         BBChanged |= processSRem(cast<BinaryOperator>(II), LVI);
 739         break;
 740       case Instruction::SDiv:
 741         BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
 742         break;
 743       case Instruction::UDiv:
 744       case Instruction::URem:
 745         BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
 746         break;
 747       case Instruction::AShr:
 748         BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
 749         break;
 750       case Instruction::Add:
 751         BBChanged |= processAdd(cast<BinaryOperator>(II), LVI);
 752         break;
 753       }
 754     }
 755
 756     Instruction *Term = BB->getTerminator();
 757     switch (Term->getOpcode()) {
 758     case Instruction::Switch:
 759       BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT);
 760       break;
 761     case Instruction::Ret: {
 762       auto *RI = cast<ReturnInst>(Term);
 763       // Try to determine the return value if we can.  This is mainly here to
 764       // simplify the writing of unit tests, but also helps to enable IPO by
 765       // constant folding the return values of callees.
 766       auto *RetVal = RI->getReturnValue();
 767       if (!RetVal) break; // handle "ret void"
 768       if (isa<Constant>(RetVal)) break; // nothing to do
 769       if (auto *C = getConstantAt(RetVal, RI, LVI)) {
 770         ++NumReturns;
 771         RI->replaceUsesOfWith(RetVal, C);
 772         BBChanged = true;
 773       }
 774     }
 775     }
 776
 777     FnChanged |= BBChanged;
 778   }
 779
 780   return FnChanged;
 781 }
 782
 783 bool CorrelatedValuePropagation::runOnFunction(Function &F) {
 784   if (skipFunction(F))
 785     return false;
 786
 787   LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
 788   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 789
 790   return runImpl(F, LVI, DT, getBestSimplifyQuery(*this, F));
 791 }
 792
 793 PreservedAnalyses
 794 CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
 795   LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
 796   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
 797
 798   bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F));
 799
 800   if (!Changed)
 801     return PreservedAnalyses::all();
 802   PreservedAnalyses PA;
 803   PA.preserve<GlobalsAA>();
 804   PA.preserve<DominatorTreeAnalysis>();
 805   return PA;
 806 }