lib/Transforms/Scalar/IndVarSimplify.cpp

   1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This transformation analyzes and transforms the induction variables (and
  10 // computations derived from them) into simpler forms suitable for subsequent
  11 // analysis and transformation.
  12 //
  13 // If the trip count of a loop is computable, this pass also makes the following
  14 // changes:
  15 //   1. The exit condition for the loop is canonicalized to compare the
  16 //      induction value against the exit value.  This turns loops like:
  17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
  18 //   2. Any use outside of the loop of an expression derived from the indvar
  19 //      is changed to compute the derived value outside of the loop, eliminating
  20 //      the dependence on the exit value of the induction variable.  If the only
  21 //      purpose of the loop is to compute the exit value of some derived
  22 //      expression, this transformation will make the loop dead.
  23 //
  24 //===----------------------------------------------------------------------===//
  25
  26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
  27 #include "llvm/ADT/APFloat.h"
  28 #include "llvm/ADT/APInt.h"
  29 #include "llvm/ADT/ArrayRef.h"
  30 #include "llvm/ADT/DenseMap.h"
  31 #include "llvm/ADT/None.h"
  32 #include "llvm/ADT/Optional.h"
  33 #include "llvm/ADT/STLExtras.h"
  34 #include "llvm/ADT/SmallSet.h"
  35 #include "llvm/ADT/SmallPtrSet.h"
  36 #include "llvm/ADT/SmallVector.h"
  37 #include "llvm/ADT/Statistic.h"
  38 #include "llvm/ADT/iterator_range.h"
  39 #include "llvm/Analysis/LoopInfo.h"
  40 #include "llvm/Analysis/LoopPass.h"
  41 #include "llvm/Analysis/ScalarEvolution.h"
  42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
  43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  44 #include "llvm/Analysis/TargetLibraryInfo.h"
  45 #include "llvm/Analysis/TargetTransformInfo.h"
  46 #include "llvm/Analysis/ValueTracking.h"
  47 #include "llvm/Transforms/Utils/Local.h"
  48 #include "llvm/IR/BasicBlock.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/ConstantRange.h"
  51 #include "llvm/IR/Constants.h"
  52 #include "llvm/IR/DataLayout.h"
  53 #include "llvm/IR/DerivedTypes.h"
  54 #include "llvm/IR/Dominators.h"
  55 #include "llvm/IR/Function.h"
  56 #include "llvm/IR/IRBuilder.h"
  57 #include "llvm/IR/InstrTypes.h"
  58 #include "llvm/IR/Instruction.h"
  59 #include "llvm/IR/Instructions.h"
  60 #include "llvm/IR/IntrinsicInst.h"
  61 #include "llvm/IR/Intrinsics.h"
  62 #include "llvm/IR/Module.h"
  63 #include "llvm/IR/Operator.h"
  64 #include "llvm/IR/PassManager.h"
  65 #include "llvm/IR/PatternMatch.h"
  66 #include "llvm/IR/Type.h"
  67 #include "llvm/IR/Use.h"
  68 #include "llvm/IR/User.h"
  69 #include "llvm/IR/Value.h"
  70 #include "llvm/IR/ValueHandle.h"
  71 #include "llvm/Pass.h"
  72 #include "llvm/Support/Casting.h"
  73 #include "llvm/Support/CommandLine.h"
  74 #include "llvm/Support/Compiler.h"
  75 #include "llvm/Support/Debug.h"
  76 #include "llvm/Support/ErrorHandling.h"
  77 #include "llvm/Support/MathExtras.h"
  78 #include "llvm/Support/raw_ostream.h"
  79 #include "llvm/Transforms/Scalar.h"
  80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
  81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  82 #include "llvm/Transforms/Utils/LoopUtils.h"
  83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  84 #include <cassert>
  85 #include <cstdint>
  86 #include <utility>
  87
  88 using namespace llvm;
  89
  90 #define DEBUG_TYPE "indvars"
  91
  92 STATISTIC(NumWidened     , "Number of indvars widened");
  93 STATISTIC(NumReplaced    , "Number of exit values replaced");
  94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
  95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
  96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
  97
  98 // Trip count verification can be enabled by default under NDEBUG if we
  99 // implement a strong expression equivalence checker in SCEV. Until then, we
 100 // use the verify-indvars flag, which may assert in some cases.
 101 static cl::opt<bool> VerifyIndvars(
 102   "verify-indvars", cl::Hidden,
 103   cl::desc("Verify the ScalarEvolution result after running indvars"));
 104
 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
 106
 107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
 108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
 109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
 110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
 111                clEnumValN(OnlyCheapRepl, "cheap",
 112                           "only replace exit value when the cost is cheap"),
 113                clEnumValN(NoHardUse, "noharduse",
 114                           "only replace exit values when loop def likely dead"),
 115                clEnumValN(AlwaysRepl, "always",
 116                           "always replace exit value whenever possible")));
 117
 118 static cl::opt<bool> UsePostIncrementRanges(
 119   "indvars-post-increment-ranges", cl::Hidden,
 120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
 121   cl::init(true));
 122
 123 static cl::opt<bool>
 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
 125             cl::desc("Disable Linear Function Test Replace optimization"));
 126
 127 static cl::opt<bool>
 128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
 129                 cl::desc("Predicate conditions in read only loops"));
 130
 131
 132 namespace {
 133
 134 struct RewritePhi;
 135
 136 class IndVarSimplify {
 137   LoopInfo *LI;
 138   ScalarEvolution *SE;
 139   DominatorTree *DT;
 140   const DataLayout &DL;
 141   TargetLibraryInfo *TLI;
 142   const TargetTransformInfo *TTI;
 143
 144   SmallVector<WeakTrackingVH, 16> DeadInsts;
 145
 146   bool isValidRewrite(Value *FromVal, Value *ToVal);
 147
 148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
 149   bool rewriteNonIntegerIVs(Loop *L);
 150
 151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
 152   /// Try to eliminate loop exits based on analyzeable exit counts
 153   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
 154   /// Try to form loop invariant tests for loop exits by changing how many
 155   /// iterations of the loop run when that is unobservable.
 156   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
 157
 158   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
 159   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
 160   bool rewriteFirstIterationLoopExitValues(Loop *L);
 161   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 162
 163   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
 164                                  const SCEV *ExitCount,
 165                                  PHINode *IndVar, SCEVExpander &Rewriter);
 166
 167   bool sinkUnusedInvariants(Loop *L);
 168
 169 public:
 170   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
 171                  const DataLayout &DL, TargetLibraryInfo *TLI,
 172                  TargetTransformInfo *TTI)
 173       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
 174
 175   bool run(Loop *L);
 176 };
 177
 178 } // end anonymous namespace
 179
 180 /// Return true if the SCEV expansion generated by the rewriter can replace the
 181 /// original value. SCEV guarantees that it produces the same value, but the way
 182 /// it is produced may be illegal IR.  Ideally, this function will only be
 183 /// called for verification.
 184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
 185   // If an SCEV expression subsumed multiple pointers, its expansion could
 186   // reassociate the GEP changing the base pointer. This is illegal because the
 187   // final address produced by a GEP chain must be inbounds relative to its
 188   // underlying object. Otherwise basic alias analysis, among other things,
 189   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
 190   // producing an expression involving multiple pointers. Until then, we must
 191   // bail out here.
 192   //
 193   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
 194   // because it understands lcssa phis while SCEV does not.
 195   Value *FromPtr = FromVal;
 196   Value *ToPtr = ToVal;
 197   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
 198     FromPtr = GEP->getPointerOperand();
 199   }
 200   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
 201     ToPtr = GEP->getPointerOperand();
 202   }
 203   if (FromPtr != FromVal || ToPtr != ToVal) {
 204     // Quickly check the common case
 205     if (FromPtr == ToPtr)
 206       return true;
 207
 208     // SCEV may have rewritten an expression that produces the GEP's pointer
 209     // operand. That's ok as long as the pointer operand has the same base
 210     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
 211     // base of a recurrence. This handles the case in which SCEV expansion
 212     // converts a pointer type recurrence into a nonrecurrent pointer base
 213     // indexed by an integer recurrence.
 214
 215     // If the GEP base pointer is a vector of pointers, abort.
 216     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
 217       return false;
 218
 219     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
 220     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
 221     if (FromBase == ToBase)
 222       return true;
 223
 224     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
 225                       << " != " << *ToBase << "\n");
 226
 227     return false;
 228   }
 229   return true;
 230 }
 231
 232 /// Determine the insertion point for this user. By default, insert immediately
 233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
 234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
 235 /// common dominator for the incoming blocks. A nullptr can be returned if no
 236 /// viable location is found: it may happen if User is a PHI and Def only comes
 237 /// to this PHI from unreachable blocks.
 238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
 239                                           DominatorTree *DT, LoopInfo *LI) {
 240   PHINode *PHI = dyn_cast<PHINode>(User);
 241   if (!PHI)
 242     return User;
 243
 244   Instruction *InsertPt = nullptr;
 245   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
 246     if (PHI->getIncomingValue(i) != Def)
 247       continue;
 248
 249     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
 250
 251     if (!DT->isReachableFromEntry(InsertBB))
 252       continue;
 253
 254     if (!InsertPt) {
 255       InsertPt = InsertBB->getTerminator();
 256       continue;
 257     }
 258     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
 259     InsertPt = InsertBB->getTerminator();
 260   }
 261
 262   // If we have skipped all inputs, it means that Def only comes to Phi from
 263   // unreachable blocks.
 264   if (!InsertPt)
 265     return nullptr;
 266
 267   auto *DefI = dyn_cast<Instruction>(Def);
 268   if (!DefI)
 269     return InsertPt;
 270
 271   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
 272
 273   auto *L = LI->getLoopFor(DefI->getParent());
 274   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
 275
 276   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
 277     if (LI->getLoopFor(DTN->getBlock()) == L)
 278       return DTN->getBlock()->getTerminator();
 279
 280   llvm_unreachable("DefI dominates InsertPt!");
 281 }
 282
 283 //===----------------------------------------------------------------------===//
 284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
 285 //===----------------------------------------------------------------------===//
 286
 287 /// Convert APF to an integer, if possible.
 288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
 289   bool isExact = false;
 290   // See if we can convert this to an int64_t
 291   uint64_t UIntVal;
 292   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
 293                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
 294       !isExact)
 295     return false;
 296   IntVal = UIntVal;
 297   return true;
 298 }
 299
 300 /// If the loop has floating induction variable then insert corresponding
 301 /// integer induction variable if possible.
 302 /// For example,
 303 /// for(double i = 0; i < 10000; ++i)
 304 ///   bar(i)
 305 /// is converted into
 306 /// for(int i = 0; i < 10000; ++i)
 307 ///   bar((double)i);
 308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 309   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
 310   unsigned BackEdge     = IncomingEdge^1;
 311
 312   // Check incoming value.
 313   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 314
 315   int64_t InitValue;
 316   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
 317     return false;
 318
 319   // Check IV increment. Reject this PN if increment operation is not
 320   // an add or increment value can not be represented by an integer.
 321   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
 322   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
 323
 324   // If this is not an add of the PHI with a constantfp, or if the constant fp
 325   // is not an integer, bail out.
 326   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
 327   int64_t IncValue;
 328   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
 329       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
 330     return false;
 331
 332   // Check Incr uses. One user is PN and the other user is an exit condition
 333   // used by the conditional terminator.
 334   Value::user_iterator IncrUse = Incr->user_begin();
 335   Instruction *U1 = cast<Instruction>(*IncrUse++);
 336   if (IncrUse == Incr->user_end()) return false;
 337   Instruction *U2 = cast<Instruction>(*IncrUse++);
 338   if (IncrUse != Incr->user_end()) return false;
 339
 340   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
 341   // only used by a branch, we can't transform it.
 342   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
 343   if (!Compare)
 344     Compare = dyn_cast<FCmpInst>(U2);
 345   if (!Compare || !Compare->hasOneUse() ||
 346       !isa<BranchInst>(Compare->user_back()))
 347     return false;
 348
 349   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
 350
 351   // We need to verify that the branch actually controls the iteration count
 352   // of the loop.  If not, the new IV can overflow and no one will notice.
 353   // The branch block must be in the loop and one of the successors must be out
 354   // of the loop.
 355   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
 356   if (!L->contains(TheBr->getParent()) ||
 357       (L->contains(TheBr->getSuccessor(0)) &&
 358        L->contains(TheBr->getSuccessor(1))))
 359     return false;
 360
 361   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
 362   // transform it.
 363   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
 364   int64_t ExitValue;
 365   if (ExitValueVal == nullptr ||
 366       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
 367     return false;
 368
 369   // Find new predicate for integer comparison.
 370   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
 371   switch (Compare->getPredicate()) {
 372   default: return false;  // Unknown comparison.
 373   case CmpInst::FCMP_OEQ:
 374   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
 375   case CmpInst::FCMP_ONE:
 376   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
 377   case CmpInst::FCMP_OGT:
 378   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
 379   case CmpInst::FCMP_OGE:
 380   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
 381   case CmpInst::FCMP_OLT:
 382   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
 383   case CmpInst::FCMP_OLE:
 384   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
 385   }
 386
 387   // We convert the floating point induction variable to a signed i32 value if
 388   // we can.  This is only safe if the comparison will not overflow in a way
 389   // that won't be trapped by the integer equivalent operations.  Check for this
 390   // now.
 391   // TODO: We could use i64 if it is native and the range requires it.
 392
 393   // The start/stride/exit values must all fit in signed i32.
 394   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
 395     return false;
 396
 397   // If not actually striding (add x, 0.0), avoid touching the code.
 398   if (IncValue == 0)
 399     return false;
 400
 401   // Positive and negative strides have different safety conditions.
 402   if (IncValue > 0) {
 403     // If we have a positive stride, we require the init to be less than the
 404     // exit value.
 405     if (InitValue >= ExitValue)
 406       return false;
 407
 408     uint32_t Range = uint32_t(ExitValue-InitValue);
 409     // Check for infinite loop, either:
 410     // while (i <= Exit) or until (i > Exit)
 411     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
 412       if (++Range == 0) return false;  // Range overflows.
 413     }
 414
 415     unsigned Leftover = Range % uint32_t(IncValue);
 416
 417     // If this is an equality comparison, we require that the strided value
 418     // exactly land on the exit value, otherwise the IV condition will wrap
 419     // around and do things the fp IV wouldn't.
 420     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 421         Leftover != 0)
 422       return false;
 423
 424     // If the stride would wrap around the i32 before exiting, we can't
 425     // transform the IV.
 426     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
 427       return false;
 428   } else {
 429     // If we have a negative stride, we require the init to be greater than the
 430     // exit value.
 431     if (InitValue <= ExitValue)
 432       return false;
 433
 434     uint32_t Range = uint32_t(InitValue-ExitValue);
 435     // Check for infinite loop, either:
 436     // while (i >= Exit) or until (i < Exit)
 437     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
 438       if (++Range == 0) return false;  // Range overflows.
 439     }
 440
 441     unsigned Leftover = Range % uint32_t(-IncValue);
 442
 443     // If this is an equality comparison, we require that the strided value
 444     // exactly land on the exit value, otherwise the IV condition will wrap
 445     // around and do things the fp IV wouldn't.
 446     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 447         Leftover != 0)
 448       return false;
 449
 450     // If the stride would wrap around the i32 before exiting, we can't
 451     // transform the IV.
 452     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
 453       return false;
 454   }
 455
 456   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 457
 458   // Insert new integer induction variable.
 459   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
 460   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
 461                       PN->getIncomingBlock(IncomingEdge));
 462
 463   Value *NewAdd =
 464     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
 465                               Incr->getName()+".int", Incr);
 466   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 467
 468   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
 469                                       ConstantInt::get(Int32Ty, ExitValue),
 470                                       Compare->getName());
 471
 472   // In the following deletions, PN may become dead and may be deleted.
 473   // Use a WeakTrackingVH to observe whether this happens.
 474   WeakTrackingVH WeakPH = PN;
 475
 476   // Delete the old floating point exit comparison.  The branch starts using the
 477   // new comparison.
 478   NewCompare->takeName(Compare);
 479   Compare->replaceAllUsesWith(NewCompare);
 480   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
 481
 482   // Delete the old floating point increment.
 483   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
 484   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
 485
 486   // If the FP induction variable still has uses, this is because something else
 487   // in the loop uses its value.  In order to canonicalize the induction
 488   // variable, we chose to eliminate the IV and rewrite it in terms of an
 489   // int->fp cast.
 490   //
 491   // We give preference to sitofp over uitofp because it is faster on most
 492   // platforms.
 493   if (WeakPH) {
 494     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
 495                                  &*PN->getParent()->getFirstInsertionPt());
 496     PN->replaceAllUsesWith(Conv);
 497     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
 498   }
 499   return true;
 500 }
 501
 502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
 503   // First step.  Check to see if there are any floating-point recurrences.
 504   // If there are, change them into integer recurrences, permitting analysis by
 505   // the SCEV routines.
 506   BasicBlock *Header = L->getHeader();
 507
 508   SmallVector<WeakTrackingVH, 8> PHIs;
 509   for (PHINode &PN : Header->phis())
 510     PHIs.push_back(&PN);
 511
 512   bool Changed = false;
 513   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
 514     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
 515       Changed |= handleFloatingPointIV(L, PN);
 516
 517   // If the loop previously had floating-point IV, ScalarEvolution
 518   // may not have been able to compute a trip count. Now that we've done some
 519   // re-writing, the trip count may be computable.
 520   if (Changed)
 521     SE->forgetLoop(L);
 522   return Changed;
 523 }
 524
 525 namespace {
 526
 527 // Collect information about PHI nodes which can be transformed in
 528 // rewriteLoopExitValues.
 529 struct RewritePhi {
 530   PHINode *PN;
 531
 532   // Ith incoming value.
 533   unsigned Ith;
 534
 535   // Exit value after expansion.
 536   Value *Val;
 537
 538   // High Cost when expansion.
 539   bool HighCost;
 540
 541   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
 542       : PN(P), Ith(I), Val(V), HighCost(H) {}
 543 };
 544
 545 } // end anonymous namespace
 546
 547 //===----------------------------------------------------------------------===//
 548 // rewriteLoopExitValues - Optimize IV users outside the loop.
 549 // As a side effect, reduces the amount of IV processing within the loop.
 550 //===----------------------------------------------------------------------===//
 551
 552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
 553   SmallPtrSet<const Instruction *, 8> Visited;
 554   SmallVector<const Instruction *, 8> WorkList;
 555   Visited.insert(I);
 556   WorkList.push_back(I);
 557   while (!WorkList.empty()) {
 558     const Instruction *Curr = WorkList.pop_back_val();
 559     // This use is outside the loop, nothing to do.
 560     if (!L->contains(Curr))
 561       continue;
 562     // Do we assume it is a "hard" use which will not be eliminated easily?
 563     if (Curr->mayHaveSideEffects())
 564       return true;
 565     // Otherwise, add all its users to worklist.
 566     for (auto U : Curr->users()) {
 567       auto *UI = cast<Instruction>(U);
 568       if (Visited.insert(UI).second)
 569         WorkList.push_back(UI);
 570     }
 571   }
 572   return false;
 573 }
 574
 575 /// Check to see if this loop has a computable loop-invariant execution count.
 576 /// If so, this means that we can compute the final value of any expressions
 577 /// that are recurrent in the loop, and substitute the exit values from the loop
 578 /// into any instructions outside of the loop that use the final values of the
 579 /// current expressions.
 580 ///
 581 /// This is mostly redundant with the regular IndVarSimplify activities that
 582 /// happen later, except that it's more powerful in some cases, because it's
 583 /// able to brute-force evaluate arbitrary instructions as long as they have
 584 /// constant operands at the beginning of the loop.
 585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
 586   // Check a pre-condition.
 587   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
 588          "Indvars did not preserve LCSSA!");
 589
 590   SmallVector<BasicBlock*, 8> ExitBlocks;
 591   L->getUniqueExitBlocks(ExitBlocks);
 592
 593   SmallVector<RewritePhi, 8> RewritePhiSet;
 594   // Find all values that are computed inside the loop, but used outside of it.
 595   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
 596   // the exit blocks of the loop to find them.
 597   for (BasicBlock *ExitBB : ExitBlocks) {
 598     // If there are no PHI nodes in this exit block, then no values defined
 599     // inside the loop are used on this path, skip it.
 600     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
 601     if (!PN) continue;
 602
 603     unsigned NumPreds = PN->getNumIncomingValues();
 604
 605     // Iterate over all of the PHI nodes.
 606     BasicBlock::iterator BBI = ExitBB->begin();
 607     while ((PN = dyn_cast<PHINode>(BBI++))) {
 608       if (PN->use_empty())
 609         continue; // dead use, don't replace it
 610
 611       if (!SE->isSCEVable(PN->getType()))
 612         continue;
 613
 614       // It's necessary to tell ScalarEvolution about this explicitly so that
 615       // it can walk the def-use list and forget all SCEVs, as it may not be
 616       // watching the PHI itself. Once the new exit value is in place, there
 617       // may not be a def-use connection between the loop and every instruction
 618       // which got a SCEVAddRecExpr for that loop.
 619       SE->forgetValue(PN);
 620
 621       // Iterate over all of the values in all the PHI nodes.
 622       for (unsigned i = 0; i != NumPreds; ++i) {
 623         // If the value being merged in is not integer or is not defined
 624         // in the loop, skip it.
 625         Value *InVal = PN->getIncomingValue(i);
 626         if (!isa<Instruction>(InVal))
 627           continue;
 628
 629         // If this pred is for a subloop, not L itself, skip it.
 630         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
 631           continue; // The Block is in a subloop, skip it.
 632
 633         // Check that InVal is defined in the loop.
 634         Instruction *Inst = cast<Instruction>(InVal);
 635         if (!L->contains(Inst))
 636           continue;
 637
 638         // Okay, this instruction has a user outside of the current loop
 639         // and varies predictably *inside* the loop.  Evaluate the value it
 640         // contains when the loop exits, if possible.  We prefer to start with
 641         // expressions which are true for all exits (so as to maximize
 642         // expression reuse by the SCEVExpander), but resort to per-exit
 643         // evaluation if that fails.
 644         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
 645         if (isa<SCEVCouldNotCompute>(ExitValue) ||
 646             !SE->isLoopInvariant(ExitValue, L) ||
 647             !isSafeToExpand(ExitValue, *SE)) {
 648           // TODO: This should probably be sunk into SCEV in some way; maybe a
 649           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
 650           // most SCEV expressions and other recurrence types (e.g. shift
 651           // recurrences).  Is there existing code we can reuse?
 652           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
 653           if (isa<SCEVCouldNotCompute>(ExitCount))
 654             continue;
 655           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
 656             if (AddRec->getLoop() == L)
 657               ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
 658           if (isa<SCEVCouldNotCompute>(ExitValue) ||
 659               !SE->isLoopInvariant(ExitValue, L) ||
 660               !isSafeToExpand(ExitValue, *SE))
 661             continue;
 662         }
 663
 664         // Computing the value outside of the loop brings no benefit if it is
 665         // definitely used inside the loop in a way which can not be optimized
 666         // away.  Avoid doing so unless we know we have a value which computes
 667         // the ExitValue already.  TODO: This should be merged into SCEV
 668         // expander to leverage its knowledge of existing expressions.
 669         if (ReplaceExitValue != AlwaysRepl &&
 670             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
 671             hasHardUserWithinLoop(L, Inst))
 672           continue;
 673
 674         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
 675         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 676
 677         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
 678                           << '\n'
 679                           << "  LoopVal = " << *Inst << "\n");
 680
 681         if (!isValidRewrite(Inst, ExitVal)) {
 682           DeadInsts.push_back(ExitVal);
 683           continue;
 684         }
 685
 686 #ifndef NDEBUG
 687         // If we reuse an instruction from a loop which is neither L nor one of
 688         // its containing loops, we end up breaking LCSSA form for this loop by
 689         // creating a new use of its instruction.
 690         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
 691           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
 692             if (EVL != L)
 693               assert(EVL->contains(L) && "LCSSA breach detected!");
 694 #endif
 695
 696         // Collect all the candidate PHINodes to be rewritten.
 697         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
 698       }
 699     }
 700   }
 701
 702   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
 703
 704   bool Changed = false;
 705   // Transformation.
 706   for (const RewritePhi &Phi : RewritePhiSet) {
 707     PHINode *PN = Phi.PN;
 708     Value *ExitVal = Phi.Val;
 709
 710     // Only do the rewrite when the ExitValue can be expanded cheaply.
 711     // If LoopCanBeDel is true, rewrite exit value aggressively.
 712     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
 713       DeadInsts.push_back(ExitVal);
 714       continue;
 715     }
 716
 717     Changed = true;
 718     ++NumReplaced;
 719     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
 720     PN->setIncomingValue(Phi.Ith, ExitVal);
 721
 722     // If this instruction is dead now, delete it. Don't do it now to avoid
 723     // invalidating iterators.
 724     if (isInstructionTriviallyDead(Inst, TLI))
 725       DeadInsts.push_back(Inst);
 726
 727     // Replace PN with ExitVal if that is legal and does not break LCSSA.
 728     if (PN->getNumIncomingValues() == 1 &&
 729         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
 730       PN->replaceAllUsesWith(ExitVal);
 731       PN->eraseFromParent();
 732     }
 733   }
 734
 735   // The insertion point instruction may have been deleted; clear it out
 736   // so that the rewriter doesn't trip over it later.
 737   Rewriter.clearInsertPoint();
 738   return Changed;
 739 }
 740
 741 //===---------------------------------------------------------------------===//
 742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
 743 // they will exit at the first iteration.
 744 //===---------------------------------------------------------------------===//
 745
 746 /// Check to see if this loop has loop invariant conditions which lead to loop
 747 /// exits. If so, we know that if the exit path is taken, it is at the first
 748 /// loop iteration. This lets us predict exit values of PHI nodes that live in
 749 /// loop header.
 750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
 751   // Verify the input to the pass is already in LCSSA form.
 752   assert(L->isLCSSAForm(*DT));
 753
 754   SmallVector<BasicBlock *, 8> ExitBlocks;
 755   L->getUniqueExitBlocks(ExitBlocks);
 756
 757   bool MadeAnyChanges = false;
 758   for (auto *ExitBB : ExitBlocks) {
 759     // If there are no more PHI nodes in this exit block, then no more
 760     // values defined inside the loop are used on this path.
 761     for (PHINode &PN : ExitBB->phis()) {
 762       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
 763            IncomingValIdx != E; ++IncomingValIdx) {
 764         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
 765
 766         // Can we prove that the exit must run on the first iteration if it
 767         // runs at all?  (i.e. early exits are fine for our purposes, but
 768         // traces which lead to this exit being taken on the 2nd iteration
 769         // aren't.)  Note that this is about whether the exit branch is
 770         // executed, not about whether it is taken.
 771         if (!L->getLoopLatch() ||
 772             !DT->dominates(IncomingBB, L->getLoopLatch()))
 773           continue;
 774
 775         // Get condition that leads to the exit path.
 776         auto *TermInst = IncomingBB->getTerminator();
 777
 778         Value *Cond = nullptr;
 779         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
 780           // Must be a conditional branch, otherwise the block
 781           // should not be in the loop.
 782           Cond = BI->getCondition();
 783         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
 784           Cond = SI->getCondition();
 785         else
 786           continue;
 787
 788         if (!L->isLoopInvariant(Cond))
 789           continue;
 790
 791         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
 792
 793         // Only deal with PHIs in the loop header.
 794         if (!ExitVal || ExitVal->getParent() != L->getHeader())
 795           continue;
 796
 797         // If ExitVal is a PHI on the loop header, then we know its
 798         // value along this exit because the exit can only be taken
 799         // on the first iteration.
 800         auto *LoopPreheader = L->getLoopPreheader();
 801         assert(LoopPreheader && "Invalid loop");
 802         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
 803         if (PreheaderIdx != -1) {
 804           assert(ExitVal->getParent() == L->getHeader() &&
 805                  "ExitVal must be in loop header");
 806           MadeAnyChanges = true;
 807           PN.setIncomingValue(IncomingValIdx,
 808                               ExitVal->getIncomingValue(PreheaderIdx));
 809         }
 810       }
 811     }
 812   }
 813   return MadeAnyChanges;
 814 }
 815
 816 /// Check whether it is possible to delete the loop after rewriting exit
 817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
 818 /// aggressively.
 819 bool IndVarSimplify::canLoopBeDeleted(
 820     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
 821   BasicBlock *Preheader = L->getLoopPreheader();
 822   // If there is no preheader, the loop will not be deleted.
 823   if (!Preheader)
 824     return false;
 825
 826   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
 827   // We obviate multiple ExitingBlocks case for simplicity.
 828   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
 829   // after exit value rewriting, we can enhance the logic here.
 830   SmallVector<BasicBlock *, 4> ExitingBlocks;
 831   L->getExitingBlocks(ExitingBlocks);
 832   SmallVector<BasicBlock *, 8> ExitBlocks;
 833   L->getUniqueExitBlocks(ExitBlocks);
 834   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
 835     return false;
 836
 837   BasicBlock *ExitBlock = ExitBlocks[0];
 838   BasicBlock::iterator BI = ExitBlock->begin();
 839   while (PHINode *P = dyn_cast<PHINode>(BI)) {
 840     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
 841
 842     // If the Incoming value of P is found in RewritePhiSet, we know it
 843     // could be rewritten to use a loop invariant value in transformation
 844     // phase later. Skip it in the loop invariant check below.
 845     bool found = false;
 846     for (const RewritePhi &Phi : RewritePhiSet) {
 847       unsigned i = Phi.Ith;
 848       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
 849         found = true;
 850         break;
 851       }
 852     }
 853
 854     Instruction *I;
 855     if (!found && (I = dyn_cast<Instruction>(Incoming)))
 856       if (!L->hasLoopInvariantOperands(I))
 857         return false;
 858
 859     ++BI;
 860   }
 861
 862   for (auto *BB : L->blocks())
 863     if (llvm::any_of(*BB, [](Instruction &I) {
 864           return I.mayHaveSideEffects();
 865         }))
 866       return false;
 867
 868   return true;
 869 }
 870
 871 //===----------------------------------------------------------------------===//
 872 //  IV Widening - Extend the width of an IV to cover its widest uses.
 873 //===----------------------------------------------------------------------===//
 874
 875 namespace {
 876
 877 // Collect information about induction variables that are used by sign/zero
 878 // extend operations. This information is recorded by CollectExtend and provides
 879 // the input to WidenIV.
 880 struct WideIVInfo {
 881   PHINode *NarrowIV = nullptr;
 882
 883   // Widest integer type created [sz]ext
 884   Type *WidestNativeType = nullptr;
 885
 886   // Was a sext user seen before a zext?
 887   bool IsSigned = false;
 888 };
 889
 890 } // end anonymous namespace
 891
 892 /// Update information about the induction variable that is extended by this
 893 /// sign or zero extend operation. This is used to determine the final width of
 894 /// the IV before actually widening it.
 895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
 896                         const TargetTransformInfo *TTI) {
 897   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
 898   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
 899     return;
 900
 901   Type *Ty = Cast->getType();
 902   uint64_t Width = SE->getTypeSizeInBits(Ty);
 903   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
 904     return;
 905
 906   // Check that `Cast` actually extends the induction variable (we rely on this
 907   // later).  This takes care of cases where `Cast` is extending a truncation of
 908   // the narrow induction variable, and thus can end up being narrower than the
 909   // "narrow" induction variable.
 910   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
 911   if (NarrowIVWidth >= Width)
 912     return;
 913
 914   // Cast is either an sext or zext up to this point.
 915   // We should not widen an indvar if arithmetics on the wider indvar are more
 916   // expensive than those on the narrower indvar. We check only the cost of ADD
 917   // because at least an ADD is required to increment the induction variable. We
 918   // could compute more comprehensively the cost of all instructions on the
 919   // induction variable when necessary.
 920   if (TTI &&
 921       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
 922           TTI->getArithmeticInstrCost(Instruction::Add,
 923                                       Cast->getOperand(0)->getType())) {
 924     return;
 925   }
 926
 927   if (!WI.WidestNativeType) {
 928     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 929     WI.IsSigned = IsSigned;
 930     return;
 931   }
 932
 933   // We extend the IV to satisfy the sign of its first user, arbitrarily.
 934   if (WI.IsSigned != IsSigned)
 935     return;
 936
 937   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
 938     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 939 }
 940
 941 namespace {
 942
 943 /// Record a link in the Narrow IV def-use chain along with the WideIV that
 944 /// computes the same value as the Narrow IV def.  This avoids caching Use*
 945 /// pointers.
 946 struct NarrowIVDefUse {
 947   Instruction *NarrowDef = nullptr;
 948   Instruction *NarrowUse = nullptr;
 949   Instruction *WideDef = nullptr;
 950
 951   // True if the narrow def is never negative.  Tracking this information lets
 952   // us use a sign extension instead of a zero extension or vice versa, when
 953   // profitable and legal.
 954   bool NeverNegative = false;
 955
 956   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
 957                  bool NeverNegative)
 958       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
 959         NeverNegative(NeverNegative) {}
 960 };
 961
 962 /// The goal of this transform is to remove sign and zero extends without
 963 /// creating any new induction variables. To do this, it creates a new phi of
 964 /// the wider type and redirects all users, either removing extends or inserting
 965 /// truncs whenever we stop propagating the type.
 966 class WidenIV {
 967   // Parameters
 968   PHINode *OrigPhi;
 969   Type *WideType;
 970
 971   // Context
 972   LoopInfo        *LI;
 973   Loop            *L;
 974   ScalarEvolution *SE;
 975   DominatorTree   *DT;
 976
 977   // Does the module have any calls to the llvm.experimental.guard intrinsic
 978   // at all? If not we can avoid scanning instructions looking for guards.
 979   bool HasGuards;
 980
 981   // Result
 982   PHINode *WidePhi = nullptr;
 983   Instruction *WideInc = nullptr;
 984   const SCEV *WideIncExpr = nullptr;
 985   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 986
 987   SmallPtrSet<Instruction *,16> Widened;
 988   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
 989
 990   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
 991
 992   // A map tracking the kind of extension used to widen each narrow IV
 993   // and narrow IV user.
 994   // Key: pointer to a narrow IV or IV user.
 995   // Value: the kind of extension used to widen this Instruction.
 996   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
 997
 998   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
 999
1000   // A map with control-dependent ranges for post increment IV uses. The key is
1001   // a pair of IV def and a use of this def denoting the context. The value is
1002   // a ConstantRange representing possible values of the def at the given
1003   // context.
1004   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1005
1006   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1007                                               Instruction *UseI) {
1008     DefUserPair Key(Def, UseI);
1009     auto It = PostIncRangeInfos.find(Key);
1010     return It == PostIncRangeInfos.end()
1011                ? Optional<ConstantRange>(None)
1012                : Optional<ConstantRange>(It->second);
1013   }
1014
1015   void calculatePostIncRanges(PHINode *OrigPhi);
1016   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1017
1018   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1019     DefUserPair Key(Def, UseI);
1020     auto It = PostIncRangeInfos.find(Key);
1021     if (It == PostIncRangeInfos.end())
1022       PostIncRangeInfos.insert({Key, R});
1023     else
1024       It->second = R.intersectWith(It->second);
1025   }
1026
1027 public:
1028   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1029           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1030           bool HasGuards)
1031       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1032         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1033         HasGuards(HasGuards), DeadInsts(DI) {
1034     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1035     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1036   }
1037
1038   PHINode *createWideIV(SCEVExpander &Rewriter);
1039
1040 protected:
1041   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1042                           Instruction *Use);
1043
1044   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1045   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1046                                      const SCEVAddRecExpr *WideAR);
1047   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1048
1049   ExtendKind getExtendKind(Instruction *I);
1050
1051   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1052
1053   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1054
1055   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1056
1057   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1058                               unsigned OpCode) const;
1059
1060   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1061
1062   bool widenLoopCompare(NarrowIVDefUse DU);
1063   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1064   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1065
1066   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1067 };
1068
1069 } // end anonymous namespace
1070
1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1072                                  bool IsSigned, Instruction *Use) {
1073   // Set the debug location and conservative insertion point.
1074   IRBuilder<> Builder(Use);
1075   // Hoist the insertion point into loop preheaders as far as possible.
1076   for (const Loop *L = LI->getLoopFor(Use->getParent());
1077        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1078        L = L->getParentLoop())
1079     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1080
1081   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1082                     Builder.CreateZExt(NarrowOper, WideType);
1083 }
1084
1085 /// Instantiate a wide operation to replace a narrow operation. This only needs
1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1087 /// 0 for any operation we decide not to clone.
1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1089                                   const SCEVAddRecExpr *WideAR) {
1090   unsigned Opcode = DU.NarrowUse->getOpcode();
1091   switch (Opcode) {
1092   default:
1093     return nullptr;
1094   case Instruction::Add:
1095   case Instruction::Mul:
1096   case Instruction::UDiv:
1097   case Instruction::Sub:
1098     return cloneArithmeticIVUser(DU, WideAR);
1099
1100   case Instruction::And:
1101   case Instruction::Or:
1102   case Instruction::Xor:
1103   case Instruction::Shl:
1104   case Instruction::LShr:
1105   case Instruction::AShr:
1106     return cloneBitwiseIVUser(DU);
1107   }
1108 }
1109
1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1111   Instruction *NarrowUse = DU.NarrowUse;
1112   Instruction *NarrowDef = DU.NarrowDef;
1113   Instruction *WideDef = DU.WideDef;
1114
1115   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1116
1117   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1118   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1119   // invariant and will be folded or hoisted. If it actually comes from a
1120   // widened IV, it should be removed during a future call to widenIVUse.
1121   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1122   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1123                    ? WideDef
1124                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1125                                       IsSigned, NarrowUse);
1126   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1127                    ? WideDef
1128                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1129                                       IsSigned, NarrowUse);
1130
1131   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1132   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1133                                         NarrowBO->getName());
1134   IRBuilder<> Builder(NarrowUse);
1135   Builder.Insert(WideBO);
1136   WideBO->copyIRFlags(NarrowBO);
1137   return WideBO;
1138 }
1139
1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1141                                             const SCEVAddRecExpr *WideAR) {
1142   Instruction *NarrowUse = DU.NarrowUse;
1143   Instruction *NarrowDef = DU.NarrowDef;
1144   Instruction *WideDef = DU.WideDef;
1145
1146   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1147
1148   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1149
1150   // We're trying to find X such that
1151   //
1152   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1153   //
1154   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1155   // and check using SCEV if any of them are correct.
1156
1157   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1158   // correct solution to X.
1159   auto GuessNonIVOperand = [&](bool SignExt) {
1160     const SCEV *WideLHS;
1161     const SCEV *WideRHS;
1162
1163     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1164       if (SignExt)
1165         return SE->getSignExtendExpr(S, Ty);
1166       return SE->getZeroExtendExpr(S, Ty);
1167     };
1168
1169     if (IVOpIdx == 0) {
1170       WideLHS = SE->getSCEV(WideDef);
1171       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1172       WideRHS = GetExtend(NarrowRHS, WideType);
1173     } else {
1174       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1175       WideLHS = GetExtend(NarrowLHS, WideType);
1176       WideRHS = SE->getSCEV(WideDef);
1177     }
1178
1179     // WideUse is "WideDef `op.wide` X" as described in the comment.
1180     const SCEV *WideUse = nullptr;
1181
1182     switch (NarrowUse->getOpcode()) {
1183     default:
1184       llvm_unreachable("No other possibility!");
1185
1186     case Instruction::Add:
1187       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1188       break;
1189
1190     case Instruction::Mul:
1191       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1192       break;
1193
1194     case Instruction::UDiv:
1195       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1196       break;
1197
1198     case Instruction::Sub:
1199       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1200       break;
1201     }
1202
1203     return WideUse == WideAR;
1204   };
1205
1206   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1207   if (!GuessNonIVOperand(SignExtend)) {
1208     SignExtend = !SignExtend;
1209     if (!GuessNonIVOperand(SignExtend))
1210       return nullptr;
1211   }
1212
1213   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1214                    ? WideDef
1215                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1216                                       SignExtend, NarrowUse);
1217   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1218                    ? WideDef
1219                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1220                                       SignExtend, NarrowUse);
1221
1222   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1223   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1224                                         NarrowBO->getName());
1225
1226   IRBuilder<> Builder(NarrowUse);
1227   Builder.Insert(WideBO);
1228   WideBO->copyIRFlags(NarrowBO);
1229   return WideBO;
1230 }
1231
1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1233   auto It = ExtendKindMap.find(I);
1234   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1235   return It->second;
1236 }
1237
1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1239                                      unsigned OpCode) const {
1240   if (OpCode == Instruction::Add)
1241     return SE->getAddExpr(LHS, RHS);
1242   if (OpCode == Instruction::Sub)
1243     return SE->getMinusSCEV(LHS, RHS);
1244   if (OpCode == Instruction::Mul)
1245     return SE->getMulExpr(LHS, RHS);
1246
1247   llvm_unreachable("Unsupported opcode.");
1248 }
1249
1250 /// No-wrap operations can transfer sign extension of their result to their
1251 /// operands. Generate the SCEV value for the widened operation without
1252 /// actually modifying the IR yet. If the expression after extending the
1253 /// operands is an AddRec for this loop, return the AddRec and the kind of
1254 /// extension used.
1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1256   // Handle the common case of add<nsw/nuw>
1257   const unsigned OpCode = DU.NarrowUse->getOpcode();
1258   // Only Add/Sub/Mul instructions supported yet.
1259   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1260       OpCode != Instruction::Mul)
1261     return {nullptr, Unknown};
1262
1263   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1264   // if extending the other will lead to a recurrence.
1265   const unsigned ExtendOperIdx =
1266       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1267   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1268
1269   const SCEV *ExtendOperExpr = nullptr;
1270   const OverflowingBinaryOperator *OBO =
1271     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1272   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1273   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1274     ExtendOperExpr = SE->getSignExtendExpr(
1275       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1276   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1277     ExtendOperExpr = SE->getZeroExtendExpr(
1278       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1279   else
1280     return {nullptr, Unknown};
1281
1282   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1283   // flags. This instruction may be guarded by control flow that the no-wrap
1284   // behavior depends on. Non-control-equivalent instructions can be mapped to
1285   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1286   // semantics to those operations.
1287   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1288   const SCEV *rhs = ExtendOperExpr;
1289
1290   // Let's swap operands to the initial order for the case of non-commutative
1291   // operations, like SUB. See PR21014.
1292   if (ExtendOperIdx == 0)
1293     std::swap(lhs, rhs);
1294   const SCEVAddRecExpr *AddRec =
1295       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1296
1297   if (!AddRec || AddRec->getLoop() != L)
1298     return {nullptr, Unknown};
1299
1300   return {AddRec, ExtKind};
1301 }
1302
1303 /// Is this instruction potentially interesting for further simplification after
1304 /// widening it's type? In other words, can the extend be safely hoisted out of
1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1306 /// so, return the extended recurrence and the kind of extension used. Otherwise
1307 /// return {nullptr, Unknown}.
1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1309   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1310     return {nullptr, Unknown};
1311
1312   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1313   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1314       SE->getTypeSizeInBits(WideType)) {
1315     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1316     // index. So don't follow this use.
1317     return {nullptr, Unknown};
1318   }
1319
1320   const SCEV *WideExpr;
1321   ExtendKind ExtKind;
1322   if (DU.NeverNegative) {
1323     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1324     if (isa<SCEVAddRecExpr>(WideExpr))
1325       ExtKind = SignExtended;
1326     else {
1327       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1328       ExtKind = ZeroExtended;
1329     }
1330   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1331     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1332     ExtKind = SignExtended;
1333   } else {
1334     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1335     ExtKind = ZeroExtended;
1336   }
1337   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1338   if (!AddRec || AddRec->getLoop() != L)
1339     return {nullptr, Unknown};
1340   return {AddRec, ExtKind};
1341 }
1342
1343 /// This IV user cannot be widened. Replace this use of the original narrow IV
1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1346   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1347   if (!InsertPt)
1348     return;
1349   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1350                     << *DU.NarrowUse << "\n");
1351   IRBuilder<> Builder(InsertPt);
1352   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1353   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1354 }
1355
1356 /// If the narrow use is a compare instruction, then widen the compare
1357 //  (and possibly the other operand).  The extend operation is hoisted into the
1358 // loop preheader as far as possible.
1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1360   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1361   if (!Cmp)
1362     return false;
1363
1364   // We can legally widen the comparison in the following two cases:
1365   //
1366   //  - The signedness of the IV extension and comparison match
1367   //
1368   //  - The narrow IV is always positive (and thus its sign extension is equal
1369   //    to its zero extension).  For instance, let's say we're zero extending
1370   //    %narrow for the following use
1371   //
1372   //      icmp slt i32 %narrow, %val   ... (A)
1373   //
1374   //    and %narrow is always positive.  Then
1375   //
1376   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1377   //          == icmp slt i32 zext(%narrow), sext(%val)
1378   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1379   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1380     return false;
1381
1382   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1383   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1384   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1385   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1386
1387   // Widen the compare instruction.
1388   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1389   if (!InsertPt)
1390     return false;
1391   IRBuilder<> Builder(InsertPt);
1392   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1393
1394   // Widen the other operand of the compare, if necessary.
1395   if (CastWidth < IVWidth) {
1396     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1397     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1398   }
1399   return true;
1400 }
1401
1402 /// If the narrow use is an instruction whose two operands are the defining
1403 /// instruction of DU and a load instruction, then we have the following:
1404 /// if the load is hoisted outside the loop, then we do not reach this function
1405 /// as scalar evolution analysis works fine in widenIVUse with variables
1406 /// hoisted outside the loop and efficient code is subsequently generated by
1407 /// not emitting truncate instructions. But when the load is not hoisted
1408 /// (whether due to limitation in alias analysis or due to a true legality),
1409 /// then scalar evolution can not proceed with loop variant values and
1410 /// inefficient code is generated. This function handles the non-hoisted load
1411 /// special case by making the optimization generate the same type of code for
1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1413 /// instruction). This special case is important especially when the induction
1414 /// variables are affecting addressing mode in code generation.
1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1416   Instruction *NarrowUse = DU.NarrowUse;
1417   Instruction *NarrowDef = DU.NarrowDef;
1418   Instruction *WideDef = DU.WideDef;
1419
1420   // Handle the common case of add<nsw/nuw>
1421   const unsigned OpCode = NarrowUse->getOpcode();
1422   // Only Add/Sub/Mul instructions are supported.
1423   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1424       OpCode != Instruction::Mul)
1425     return false;
1426
1427   // The operand that is not defined by NarrowDef of DU. Let's call it the
1428   // other operand.
1429   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1430   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1431          "bad DU");
1432
1433   const SCEV *ExtendOperExpr = nullptr;
1434   const OverflowingBinaryOperator *OBO =
1435     cast<OverflowingBinaryOperator>(NarrowUse);
1436   ExtendKind ExtKind = getExtendKind(NarrowDef);
1437   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1438     ExtendOperExpr = SE->getSignExtendExpr(
1439       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1440   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1441     ExtendOperExpr = SE->getZeroExtendExpr(
1442       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1443   else
1444     return false;
1445
1446   // We are interested in the other operand being a load instruction.
1447   // But, we should look into relaxing this restriction later on.
1448   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1449   if (I && I->getOpcode() != Instruction::Load)
1450     return false;
1451
1452   // Verifying that Defining operand is an AddRec
1453   const SCEV *Op1 = SE->getSCEV(WideDef);
1454   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1455   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1456     return false;
1457   // Verifying that other operand is an Extend.
1458   if (ExtKind == SignExtended) {
1459     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1460       return false;
1461   } else {
1462     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1463       return false;
1464   }
1465
1466   if (ExtKind == SignExtended) {
1467     for (Use &U : NarrowUse->uses()) {
1468       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1469       if (!User || User->getType() != WideType)
1470         return false;
1471     }
1472   } else { // ExtKind == ZeroExtended
1473     for (Use &U : NarrowUse->uses()) {
1474       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1475       if (!User || User->getType() != WideType)
1476         return false;
1477     }
1478   }
1479
1480   return true;
1481 }
1482
1483 /// Special Case for widening with variant Loads (see
1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1486   Instruction *NarrowUse = DU.NarrowUse;
1487   Instruction *NarrowDef = DU.NarrowDef;
1488   Instruction *WideDef = DU.WideDef;
1489
1490   ExtendKind ExtKind = getExtendKind(NarrowDef);
1491
1492   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1493
1494   // Generating a widening use instruction.
1495   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1496                    ? WideDef
1497                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1498                                       ExtKind, NarrowUse);
1499   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1500                    ? WideDef
1501                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1502                                       ExtKind, NarrowUse);
1503
1504   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1505   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1506                                         NarrowBO->getName());
1507   IRBuilder<> Builder(NarrowUse);
1508   Builder.Insert(WideBO);
1509   WideBO->copyIRFlags(NarrowBO);
1510
1511   if (ExtKind == SignExtended)
1512     ExtendKindMap[NarrowUse] = SignExtended;
1513   else
1514     ExtendKindMap[NarrowUse] = ZeroExtended;
1515
1516   // Update the Use.
1517   if (ExtKind == SignExtended) {
1518     for (Use &U : NarrowUse->uses()) {
1519       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1520       if (User && User->getType() == WideType) {
1521         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1522                           << *WideBO << "\n");
1523         ++NumElimExt;
1524         User->replaceAllUsesWith(WideBO);
1525         DeadInsts.emplace_back(User);
1526       }
1527     }
1528   } else { // ExtKind == ZeroExtended
1529     for (Use &U : NarrowUse->uses()) {
1530       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1531       if (User && User->getType() == WideType) {
1532         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1533                           << *WideBO << "\n");
1534         ++NumElimExt;
1535         User->replaceAllUsesWith(WideBO);
1536         DeadInsts.emplace_back(User);
1537       }
1538     }
1539   }
1540 }
1541
1542 /// Determine whether an individual user of the narrow IV can be widened. If so,
1543 /// return the wide clone of the user.
1544 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1545   assert(ExtendKindMap.count(DU.NarrowDef) &&
1546          "Should already know the kind of extension used to widen NarrowDef");
1547
1548   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1549   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1550     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1551       // For LCSSA phis, sink the truncate outside the loop.
1552       // After SimplifyCFG most loop exit targets have a single predecessor.
1553       // Otherwise fall back to a truncate within the loop.
1554       if (UsePhi->getNumOperands() != 1)
1555         truncateIVUse(DU, DT, LI);
1556       else {
1557         // Widening the PHI requires us to insert a trunc.  The logical place
1558         // for this trunc is in the same BB as the PHI.  This is not possible if
1559         // the BB is terminated by a catchswitch.
1560         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1561           return nullptr;
1562
1563         PHINode *WidePhi =
1564           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1565                           UsePhi);
1566         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1567         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1568         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1569         UsePhi->replaceAllUsesWith(Trunc);
1570         DeadInsts.emplace_back(UsePhi);
1571         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1572                           << *WidePhi << "\n");
1573       }
1574       return nullptr;
1575     }
1576   }
1577
1578   // This narrow use can be widened by a sext if it's non-negative or its narrow
1579   // def was widended by a sext. Same for zext.
1580   auto canWidenBySExt = [&]() {
1581     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1582   };
1583   auto canWidenByZExt = [&]() {
1584     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1585   };
1586
1587   // Our raison d'etre! Eliminate sign and zero extension.
1588   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1589       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1590     Value *NewDef = DU.WideDef;
1591     if (DU.NarrowUse->getType() != WideType) {
1592       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1593       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1594       if (CastWidth < IVWidth) {
1595         // The cast isn't as wide as the IV, so insert a Trunc.
1596         IRBuilder<> Builder(DU.NarrowUse);
1597         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1598       }
1599       else {
1600         // A wider extend was hidden behind a narrower one. This may induce
1601         // another round of IV widening in which the intermediate IV becomes
1602         // dead. It should be very rare.
1603         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1604                           << " not wide enough to subsume " << *DU.NarrowUse
1605                           << "\n");
1606         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1607         NewDef = DU.NarrowUse;
1608       }
1609     }
1610     if (NewDef != DU.NarrowUse) {
1611       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1612                         << " replaced by " << *DU.WideDef << "\n");
1613       ++NumElimExt;
1614       DU.NarrowUse->replaceAllUsesWith(NewDef);
1615       DeadInsts.emplace_back(DU.NarrowUse);
1616     }
1617     // Now that the extend is gone, we want to expose it's uses for potential
1618     // further simplification. We don't need to directly inform SimplifyIVUsers
1619     // of the new users, because their parent IV will be processed later as a
1620     // new loop phi. If we preserved IVUsers analysis, we would also want to
1621     // push the uses of WideDef here.
1622
1623     // No further widening is needed. The deceased [sz]ext had done it for us.
1624     return nullptr;
1625   }
1626
1627   // Does this user itself evaluate to a recurrence after widening?
1628   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1629   if (!WideAddRec.first)
1630     WideAddRec = getWideRecurrence(DU);
1631
1632   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1633   if (!WideAddRec.first) {
1634     // If use is a loop condition, try to promote the condition instead of
1635     // truncating the IV first.
1636     if (widenLoopCompare(DU))
1637       return nullptr;
1638
1639     // We are here about to generate a truncate instruction that may hurt
1640     // performance because the scalar evolution expression computed earlier
1641     // in WideAddRec.first does not indicate a polynomial induction expression.
1642     // In that case, look at the operands of the use instruction to determine
1643     // if we can still widen the use instead of truncating its operand.
1644     if (widenWithVariantLoadUse(DU)) {
1645       widenWithVariantLoadUseCodegen(DU);
1646       return nullptr;
1647     }
1648
1649     // This user does not evaluate to a recurrence after widening, so don't
1650     // follow it. Instead insert a Trunc to kill off the original use,
1651     // eventually isolating the original narrow IV so it can be removed.
1652     truncateIVUse(DU, DT, LI);
1653     return nullptr;
1654   }
1655   // Assume block terminators cannot evaluate to a recurrence. We can't to
1656   // insert a Trunc after a terminator if there happens to be a critical edge.
1657   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1658          "SCEV is not expected to evaluate a block terminator");
1659
1660   // Reuse the IV increment that SCEVExpander created as long as it dominates
1661   // NarrowUse.
1662   Instruction *WideUse = nullptr;
1663   if (WideAddRec.first == WideIncExpr &&
1664       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1665     WideUse = WideInc;
1666   else {
1667     WideUse = cloneIVUser(DU, WideAddRec.first);
1668     if (!WideUse)
1669       return nullptr;
1670   }
1671   // Evaluation of WideAddRec ensured that the narrow expression could be
1672   // extended outside the loop without overflow. This suggests that the wide use
1673   // evaluates to the same expression as the extended narrow use, but doesn't
1674   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1675   // where it fails, we simply throw away the newly created wide use.
1676   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1677     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1678                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1679                       << "\n");
1680     DeadInsts.emplace_back(WideUse);
1681     return nullptr;
1682   }
1683
1684   // if we reached this point then we are going to replace
1685   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1686   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1687
1688   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1689   // Returning WideUse pushes it on the worklist.
1690   return WideUse;
1691 }
1692
1693 /// Add eligible users of NarrowDef to NarrowIVUsers.
1694 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1695   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1696   bool NonNegativeDef =
1697       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1698                            SE->getConstant(NarrowSCEV->getType(), 0));
1699   for (User *U : NarrowDef->users()) {
1700     Instruction *NarrowUser = cast<Instruction>(U);
1701
1702     // Handle data flow merges and bizarre phi cycles.
1703     if (!Widened.insert(NarrowUser).second)
1704       continue;
1705
1706     bool NonNegativeUse = false;
1707     if (!NonNegativeDef) {
1708       // We might have a control-dependent range information for this context.
1709       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1710         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1711     }
1712
1713     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1714                                NonNegativeDef || NonNegativeUse);
1715   }
1716 }
1717
1718 /// Process a single induction variable. First use the SCEVExpander to create a
1719 /// wide induction variable that evaluates to the same recurrence as the
1720 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1721 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1722 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1723 ///
1724 /// It would be simpler to delete uses as they are processed, but we must avoid
1725 /// invalidating SCEV expressions.
1726 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1727   // Is this phi an induction variable?
1728   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1729   if (!AddRec)
1730     return nullptr;
1731
1732   // Widen the induction variable expression.
1733   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1734                                ? SE->getSignExtendExpr(AddRec, WideType)
1735                                : SE->getZeroExtendExpr(AddRec, WideType);
1736
1737   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1738          "Expect the new IV expression to preserve its type");
1739
1740   // Can the IV be extended outside the loop without overflow?
1741   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1742   if (!AddRec || AddRec->getLoop() != L)
1743     return nullptr;
1744
1745   // An AddRec must have loop-invariant operands. Since this AddRec is
1746   // materialized by a loop header phi, the expression cannot have any post-loop
1747   // operands, so they must dominate the loop header.
1748   assert(
1749       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1750       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1751       "Loop header phi recurrence inputs do not dominate the loop");
1752
1753   // Iterate over IV uses (including transitive ones) looking for IV increments
1754   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1755   // the increment calculate control-dependent range information basing on
1756   // dominating conditions inside of the loop (e.g. a range check inside of the
1757   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1758   //
1759   // Control-dependent range information is later used to prove that a narrow
1760   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1761   // this on demand because when pushNarrowIVUsers needs this information some
1762   // of the dominating conditions might be already widened.
1763   if (UsePostIncrementRanges)
1764     calculatePostIncRanges(OrigPhi);
1765
1766   // The rewriter provides a value for the desired IV expression. This may
1767   // either find an existing phi or materialize a new one. Either way, we
1768   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1769   // of the phi-SCC dominates the loop entry.
1770   Instruction *InsertPt = &L->getHeader()->front();
1771   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1772
1773   // Remembering the WideIV increment generated by SCEVExpander allows
1774   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1775   // employ a general reuse mechanism because the call above is the only call to
1776   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1777   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1778     WideInc =
1779       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1780     WideIncExpr = SE->getSCEV(WideInc);
1781     // Propagate the debug location associated with the original loop increment
1782     // to the new (widened) increment.
1783     auto *OrigInc =
1784       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1785     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1786   }
1787
1788   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1789   ++NumWidened;
1790
1791   // Traverse the def-use chain using a worklist starting at the original IV.
1792   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1793
1794   Widened.insert(OrigPhi);
1795   pushNarrowIVUsers(OrigPhi, WidePhi);
1796
1797   while (!NarrowIVUsers.empty()) {
1798     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1799
1800     // Process a def-use edge. This may replace the use, so don't hold a
1801     // use_iterator across it.
1802     Instruction *WideUse = widenIVUse(DU, Rewriter);
1803
1804     // Follow all def-use edges from the previous narrow use.
1805     if (WideUse)
1806       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1807
1808     // widenIVUse may have removed the def-use edge.
1809     if (DU.NarrowDef->use_empty())
1810       DeadInsts.emplace_back(DU.NarrowDef);
1811   }
1812
1813   // Attach any debug information to the new PHI.
1814   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1815
1816   return WidePhi;
1817 }
1818
1819 /// Calculates control-dependent range for the given def at the given context
1820 /// by looking at dominating conditions inside of the loop
1821 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1822                                     Instruction *NarrowUser) {
1823   using namespace llvm::PatternMatch;
1824
1825   Value *NarrowDefLHS;
1826   const APInt *NarrowDefRHS;
1827   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1828                                  m_APInt(NarrowDefRHS))) ||
1829       !NarrowDefRHS->isNonNegative())
1830     return;
1831
1832   auto UpdateRangeFromCondition = [&] (Value *Condition,
1833                                        bool TrueDest) {
1834     CmpInst::Predicate Pred;
1835     Value *CmpRHS;
1836     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1837                                  m_Value(CmpRHS))))
1838       return;
1839
1840     CmpInst::Predicate P =
1841             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1842
1843     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1844     auto CmpConstrainedLHSRange =
1845             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1846     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1847         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1848
1849     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1850   };
1851
1852   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1853     if (!HasGuards)
1854       return;
1855
1856     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1857                                      Ctx->getParent()->rend())) {
1858       Value *C = nullptr;
1859       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1860         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1861     }
1862   };
1863
1864   UpdateRangeFromGuards(NarrowUser);
1865
1866   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1867   // If NarrowUserBB is statically unreachable asking dominator queries may
1868   // yield surprising results. (e.g. the block may not have a dom tree node)
1869   if (!DT->isReachableFromEntry(NarrowUserBB))
1870     return;
1871
1872   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1873        L->contains(DTB->getBlock());
1874        DTB = DTB->getIDom()) {
1875     auto *BB = DTB->getBlock();
1876     auto *TI = BB->getTerminator();
1877     UpdateRangeFromGuards(TI);
1878
1879     auto *BI = dyn_cast<BranchInst>(TI);
1880     if (!BI || !BI->isConditional())
1881       continue;
1882
1883     auto *TrueSuccessor = BI->getSuccessor(0);
1884     auto *FalseSuccessor = BI->getSuccessor(1);
1885
1886     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1887       return BBE.isSingleEdge() &&
1888              DT->dominates(BBE, NarrowUser->getParent());
1889     };
1890
1891     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1892       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1893
1894     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1895       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1896   }
1897 }
1898
1899 /// Calculates PostIncRangeInfos map for the given IV
1900 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1901   SmallPtrSet<Instruction *, 16> Visited;
1902   SmallVector<Instruction *, 6> Worklist;
1903   Worklist.push_back(OrigPhi);
1904   Visited.insert(OrigPhi);
1905
1906   while (!Worklist.empty()) {
1907     Instruction *NarrowDef = Worklist.pop_back_val();
1908
1909     for (Use &U : NarrowDef->uses()) {
1910       auto *NarrowUser = cast<Instruction>(U.getUser());
1911
1912       // Don't go looking outside the current loop.
1913       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1914       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1915         continue;
1916
1917       if (!Visited.insert(NarrowUser).second)
1918         continue;
1919
1920       Worklist.push_back(NarrowUser);
1921
1922       calculatePostIncRange(NarrowDef, NarrowUser);
1923     }
1924   }
1925 }
1926
1927 //===----------------------------------------------------------------------===//
1928 //  Live IV Reduction - Minimize IVs live across the loop.
1929 //===----------------------------------------------------------------------===//
1930
1931 //===----------------------------------------------------------------------===//
1932 //  Simplification of IV users based on SCEV evaluation.
1933 //===----------------------------------------------------------------------===//
1934
1935 namespace {
1936
1937 class IndVarSimplifyVisitor : public IVVisitor {
1938   ScalarEvolution *SE;
1939   const TargetTransformInfo *TTI;
1940   PHINode *IVPhi;
1941
1942 public:
1943   WideIVInfo WI;
1944
1945   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1946                         const TargetTransformInfo *TTI,
1947                         const DominatorTree *DTree)
1948     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1949     DT = DTree;
1950     WI.NarrowIV = IVPhi;
1951   }
1952
1953   // Implement the interface used by simplifyUsersOfIV.
1954   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1955 };
1956
1957 } // end anonymous namespace
1958
1959 /// Iteratively perform simplification on a worklist of IV users. Each
1960 /// successive simplification may push more users which may themselves be
1961 /// candidates for simplification.
1962 ///
1963 /// Sign/Zero extend elimination is interleaved with IV simplification.
1964 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1965                                        SCEVExpander &Rewriter,
1966                                        LoopInfo *LI) {
1967   SmallVector<WideIVInfo, 8> WideIVs;
1968
1969   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1970           Intrinsic::getName(Intrinsic::experimental_guard));
1971   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1972
1973   SmallVector<PHINode*, 8> LoopPhis;
1974   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1975     LoopPhis.push_back(cast<PHINode>(I));
1976   }
1977   // Each round of simplification iterates through the SimplifyIVUsers worklist
1978   // for all current phis, then determines whether any IVs can be
1979   // widened. Widening adds new phis to LoopPhis, inducing another round of
1980   // simplification on the wide IVs.
1981   bool Changed = false;
1982   while (!LoopPhis.empty()) {
1983     // Evaluate as many IV expressions as possible before widening any IVs. This
1984     // forces SCEV to set no-wrap flags before evaluating sign/zero
1985     // extension. The first time SCEV attempts to normalize sign/zero extension,
1986     // the result becomes final. So for the most predictable results, we delay
1987     // evaluation of sign/zero extend evaluation until needed, and avoid running
1988     // other SCEV based analysis prior to simplifyAndExtend.
1989     do {
1990       PHINode *CurrIV = LoopPhis.pop_back_val();
1991
1992       // Information about sign/zero extensions of CurrIV.
1993       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1994
1995       Changed |=
1996           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1997
1998       if (Visitor.WI.WidestNativeType) {
1999         WideIVs.push_back(Visitor.WI);
2000       }
2001     } while(!LoopPhis.empty());
2002
2003     for (; !WideIVs.empty(); WideIVs.pop_back()) {
2004       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2005       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2006         Changed = true;
2007         LoopPhis.push_back(WidePhi);
2008       }
2009     }
2010   }
2011   return Changed;
2012 }
2013
2014 //===----------------------------------------------------------------------===//
2015 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2016 //===----------------------------------------------------------------------===//
2017
2018 /// Given an Value which is hoped to be part of an add recurance in the given
2019 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2020 /// that this is less general than SCEVs AddRec checking.
2021 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2022   Instruction *IncI = dyn_cast<Instruction>(IncV);
2023   if (!IncI)
2024     return nullptr;
2025
2026   switch (IncI->getOpcode()) {
2027   case Instruction::Add:
2028   case Instruction::Sub:
2029     break;
2030   case Instruction::GetElementPtr:
2031     // An IV counter must preserve its type.
2032     if (IncI->getNumOperands() == 2)
2033       break;
2034     LLVM_FALLTHROUGH;
2035   default:
2036     return nullptr;
2037   }
2038
2039   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2040   if (Phi && Phi->getParent() == L->getHeader()) {
2041     if (L->isLoopInvariant(IncI->getOperand(1)))
2042       return Phi;
2043     return nullptr;
2044   }
2045   if (IncI->getOpcode() == Instruction::GetElementPtr)
2046     return nullptr;
2047
2048   // Allow add/sub to be commuted.
2049   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2050   if (Phi && Phi->getParent() == L->getHeader()) {
2051     if (L->isLoopInvariant(IncI->getOperand(0)))
2052       return Phi;
2053   }
2054   return nullptr;
2055 }
2056
2057 /// Whether the current loop exit test is based on this value.  Currently this
2058 /// is limited to a direct use in the loop condition.
2059 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2060   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2061   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2062   // TODO: Allow non-icmp loop test.
2063   if (!ICmp)
2064     return false;
2065
2066   // TODO: Allow indirect use.
2067   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2068 }
2069
2070 /// linearFunctionTestReplace policy. Return true unless we can show that the
2071 /// current exit test is already sufficiently canonical.
2072 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2073   assert(L->getLoopLatch() && "Must be in simplified form");
2074
2075   // Avoid converting a constant or loop invariant test back to a runtime
2076   // test.  This is critical for when SCEV's cached ExitCount is less precise
2077   // than the current IR (such as after we've proven a particular exit is
2078   // actually dead and thus the BE count never reaches our ExitCount.)
2079   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2080   if (L->isLoopInvariant(BI->getCondition()))
2081     return false;
2082
2083   // Do LFTR to simplify the exit condition to an ICMP.
2084   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2085   if (!Cond)
2086     return true;
2087
2088   // Do LFTR to simplify the exit ICMP to EQ/NE
2089   ICmpInst::Predicate Pred = Cond->getPredicate();
2090   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2091     return true;
2092
2093   // Look for a loop invariant RHS
2094   Value *LHS = Cond->getOperand(0);
2095   Value *RHS = Cond->getOperand(1);
2096   if (!L->isLoopInvariant(RHS)) {
2097     if (!L->isLoopInvariant(LHS))
2098       return true;
2099     std::swap(LHS, RHS);
2100   }
2101   // Look for a simple IV counter LHS
2102   PHINode *Phi = dyn_cast<PHINode>(LHS);
2103   if (!Phi)
2104     Phi = getLoopPhiForCounter(LHS, L);
2105
2106   if (!Phi)
2107     return true;
2108
2109   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2110   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2111   if (Idx < 0)
2112     return true;
2113
2114   // Do LFTR if the exit condition's IV is *not* a simple counter.
2115   Value *IncV = Phi->getIncomingValue(Idx);
2116   return Phi != getLoopPhiForCounter(IncV, L);
2117 }
2118
2119 /// Return true if undefined behavior would provable be executed on the path to
2120 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2121 /// anything about whether OnPathTo is actually executed or whether Root is
2122 /// actually poison.  This can be used to assess whether a new use of Root can
2123 /// be added at a location which is control equivalent with OnPathTo (such as
2124 /// immediately before it) without introducing UB which didn't previously
2125 /// exist.  Note that a false result conveys no information.
2126 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2127                                           Instruction *OnPathTo,
2128                                           DominatorTree *DT) {
2129   // Basic approach is to assume Root is poison, propagate poison forward
2130   // through all users we can easily track, and then check whether any of those
2131   // users are provable UB and must execute before out exiting block might
2132   // exit.
2133
2134   // The set of all recursive users we've visited (which are assumed to all be
2135   // poison because of said visit)
2136   SmallSet<const Value *, 16> KnownPoison;
2137   SmallVector<const Instruction*, 16> Worklist;
2138   Worklist.push_back(Root);
2139   while (!Worklist.empty()) {
2140     const Instruction *I = Worklist.pop_back_val();
2141
2142     // If we know this must trigger UB on a path leading our target.
2143     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2144       return true;
2145
2146     // If we can't analyze propagation through this instruction, just skip it
2147     // and transitive users.  Safe as false is a conservative result.
2148     if (!propagatesFullPoison(I) && I != Root)
2149       continue;
2150
2151     if (KnownPoison.insert(I).second)
2152       for (const User *User : I->users())
2153         Worklist.push_back(cast<Instruction>(User));
2154   }
2155
2156   // Might be non-UB, or might have a path we couldn't prove must execute on
2157   // way to exiting bb.
2158   return false;
2159 }
2160
2161 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2162 /// down to checking that all operands are constant and listing instructions
2163 /// that may hide undef.
2164 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2165                                unsigned Depth) {
2166   if (isa<Constant>(V))
2167     return !isa<UndefValue>(V);
2168
2169   if (Depth >= 6)
2170     return false;
2171
2172   // Conservatively handle non-constant non-instructions. For example, Arguments
2173   // may be undef.
2174   Instruction *I = dyn_cast<Instruction>(V);
2175   if (!I)
2176     return false;
2177
2178   // Load and return values may be undef.
2179   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2180     return false;
2181
2182   // Optimistically handle other instructions.
2183   for (Value *Op : I->operands()) {
2184     if (!Visited.insert(Op).second)
2185       continue;
2186     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2187       return false;
2188   }
2189   return true;
2190 }
2191
2192 /// Return true if the given value is concrete. We must prove that undef can
2193 /// never reach it.
2194 ///
2195 /// TODO: If we decide that this is a good approach to checking for undef, we
2196 /// may factor it into a common location.
2197 static bool hasConcreteDef(Value *V) {
2198   SmallPtrSet<Value*, 8> Visited;
2199   Visited.insert(V);
2200   return hasConcreteDefImpl(V, Visited, 0);
2201 }
2202
2203 /// Return true if this IV has any uses other than the (soon to be rewritten)
2204 /// loop exit test.
2205 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2206   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2207   Value *IncV = Phi->getIncomingValue(LatchIdx);
2208
2209   for (User *U : Phi->users())
2210     if (U != Cond && U != IncV) return false;
2211
2212   for (User *U : IncV->users())
2213     if (U != Cond && U != Phi) return false;
2214   return true;
2215 }
2216
2217 /// Return true if the given phi is a "counter" in L.  A counter is an
2218 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2219 /// step of 1.  Note that L must have exactly one latch.
2220 static bool isLoopCounter(PHINode* Phi, Loop *L,
2221                           ScalarEvolution *SE) {
2222   assert(Phi->getParent() == L->getHeader());
2223   assert(L->getLoopLatch());
2224
2225   if (!SE->isSCEVable(Phi->getType()))
2226     return false;
2227
2228   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2229   if (!AR || AR->getLoop() != L || !AR->isAffine())
2230     return false;
2231
2232   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2233   if (!Step || !Step->isOne())
2234     return false;
2235
2236   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2237   Value *IncV = Phi->getIncomingValue(LatchIdx);
2238   return (getLoopPhiForCounter(IncV, L) == Phi);
2239 }
2240
2241 /// Search the loop header for a loop counter (anadd rec w/step of one)
2242 /// suitable for use by LFTR.  If multiple counters are available, select the
2243 /// "best" one based profitable heuristics.
2244 ///
2245 /// BECount may be an i8* pointer type. The pointer difference is already
2246 /// valid count without scaling the address stride, so it remains a pointer
2247 /// expression as far as SCEV is concerned.
2248 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2249                                 const SCEV *BECount,
2250                                 ScalarEvolution *SE, DominatorTree *DT) {
2251   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2252
2253   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2254
2255   // Loop over all of the PHI nodes, looking for a simple counter.
2256   PHINode *BestPhi = nullptr;
2257   const SCEV *BestInit = nullptr;
2258   BasicBlock *LatchBlock = L->getLoopLatch();
2259   assert(LatchBlock && "Must be in simplified form");
2260   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2261
2262   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2263     PHINode *Phi = cast<PHINode>(I);
2264     if (!isLoopCounter(Phi, L, SE))
2265       continue;
2266
2267     // Avoid comparing an integer IV against a pointer Limit.
2268     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2269       continue;
2270
2271     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2272
2273     // AR may be a pointer type, while BECount is an integer type.
2274     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2275     // AR may not be a narrower type, or we may never exit.
2276     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2277     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2278       continue;
2279
2280     // Avoid reusing a potentially undef value to compute other values that may
2281     // have originally had a concrete definition.
2282     if (!hasConcreteDef(Phi)) {
2283       // We explicitly allow unknown phis as long as they are already used by
2284       // the loop exit test.  This is legal since performing LFTR could not
2285       // increase the number of undef users.
2286       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2287       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2288           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2289         continue;
2290     }
2291
2292     // Avoid introducing undefined behavior due to poison which didn't exist in
2293     // the original program.  (Annoyingly, the rules for poison and undef
2294     // propagation are distinct, so this does NOT cover the undef case above.)
2295     // We have to ensure that we don't introduce UB by introducing a use on an
2296     // iteration where said IV produces poison.  Our strategy here differs for
2297     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2298     // see code in linearFunctionTestReplace.  For pointers, we restrict
2299     // transforms as there is no good way to reinfer inbounds once lost.
2300     if (!Phi->getType()->isIntegerTy() &&
2301         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2302       continue;
2303
2304     const SCEV *Init = AR->getStart();
2305
2306     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2307       // Don't force a live loop counter if another IV can be used.
2308       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2309         continue;
2310
2311       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2312       // also prefers integer to pointer IVs.
2313       if (BestInit->isZero() != Init->isZero()) {
2314         if (BestInit->isZero())
2315           continue;
2316       }
2317       // If two IVs both count from zero or both count from nonzero then the
2318       // narrower is likely a dead phi that has been widened. Use the wider phi
2319       // to allow the other to be eliminated.
2320       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2321         continue;
2322     }
2323     BestPhi = Phi;
2324     BestInit = Init;
2325   }
2326   return BestPhi;
2327 }
2328
2329 /// Insert an IR expression which computes the value held by the IV IndVar
2330 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2331 /// is taken ExitCount times.
2332 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2333                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2334                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2335   assert(isLoopCounter(IndVar, L, SE));
2336   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2337   const SCEV *IVInit = AR->getStart();
2338
2339   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2340   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2341   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2342   // the existing GEPs whenever possible.
2343   if (IndVar->getType()->isPointerTy() &&
2344       !ExitCount->getType()->isPointerTy()) {
2345     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2346     // signed value. ExitCount on the other hand represents the loop trip count,
2347     // which is an unsigned value. FindLoopCounter only allows induction
2348     // variables that have a positive unit stride of one. This means we don't
2349     // have to handle the case of negative offsets (yet) and just need to zero
2350     // extend ExitCount.
2351     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2352     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2353     if (UsePostInc)
2354       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2355
2356     // Expand the code for the iteration count.
2357     assert(SE->isLoopInvariant(IVOffset, L) &&
2358            "Computed iteration count is not loop invariant!");
2359
2360     // We could handle pointer IVs other than i8*, but we need to compensate for
2361     // gep index scaling.
2362     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2363                              cast<PointerType>(IndVar->getType())
2364                                  ->getElementType())->isOne() &&
2365            "unit stride pointer IV must be i8*");
2366
2367     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2368     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2369     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2370   } else {
2371     // In any other case, convert both IVInit and ExitCount to integers before
2372     // comparing. This may result in SCEV expansion of pointers, but in practice
2373     // SCEV will fold the pointer arithmetic away as such:
2374     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2375     //
2376     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2377     // for simple memset-style loops.
2378     //
2379     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2380     // were generated on top of case #2, which is not expected.
2381
2382     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2383     // For unit stride, IVCount = Start + ExitCount with 2's complement
2384     // overflow.
2385
2386     // For integer IVs, truncate the IV before computing IVInit + BECount,
2387     // unless we know apriori that the limit must be a constant when evaluated
2388     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2389     // of the IV in the loop over a (potentially) expensive expansion of the
2390     // widened exit count add(zext(add)) expression.
2391     if (SE->getTypeSizeInBits(IVInit->getType())
2392         > SE->getTypeSizeInBits(ExitCount->getType())) {
2393       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2394         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2395       else
2396         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2397     }
2398
2399     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2400
2401     if (UsePostInc)
2402       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2403
2404     // Expand the code for the iteration count.
2405     assert(SE->isLoopInvariant(IVLimit, L) &&
2406            "Computed iteration count is not loop invariant!");
2407     // Ensure that we generate the same type as IndVar, or a smaller integer
2408     // type. In the presence of null pointer values, we have an integer type
2409     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2410     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2411       IndVar->getType() : ExitCount->getType();
2412     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2413     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2414   }
2415 }
2416
2417 /// This method rewrites the exit condition of the loop to be a canonical !=
2418 /// comparison against the incremented loop induction variable.  This pass is
2419 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2420 /// determine a loop-invariant trip count of the loop, which is actually a much
2421 /// broader range than just linear tests.
2422 bool IndVarSimplify::
2423 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2424                           const SCEV *ExitCount,
2425                           PHINode *IndVar, SCEVExpander &Rewriter) {
2426   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2427   assert(isLoopCounter(IndVar, L, SE));
2428   Instruction * const IncVar =
2429     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2430
2431   // Initialize CmpIndVar to the preincremented IV.
2432   Value *CmpIndVar = IndVar;
2433   bool UsePostInc = false;
2434
2435   // If the exiting block is the same as the backedge block, we prefer to
2436   // compare against the post-incremented value, otherwise we must compare
2437   // against the preincremented value.
2438   if (ExitingBB == L->getLoopLatch()) {
2439     // For pointer IVs, we chose to not strip inbounds which requires us not
2440     // to add a potentially UB introducing use.  We need to either a) show
2441     // the loop test we're modifying is already in post-inc form, or b) show
2442     // that adding a use must not introduce UB.
2443     bool SafeToPostInc =
2444         IndVar->getType()->isIntegerTy() ||
2445         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2446         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2447     if (SafeToPostInc) {
2448       UsePostInc = true;
2449       CmpIndVar = IncVar;
2450     }
2451   }
2452
2453   // It may be necessary to drop nowrap flags on the incrementing instruction
2454   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2455   // case the increment might have previously been poison on the last iteration
2456   // only) or if LFTR switches to a different IV that was previously dynamically
2457   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2458   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2459   // check), because the pre-inc addrec flags may be adopted from the original
2460   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2461   // TODO: This handling is inaccurate for one case: If we switch to a
2462   // dynamically dead IV that wraps on the first loop iteration only, which is
2463   // not covered by the post-inc addrec. (If the new IV was not dynamically
2464   // dead, it could not be poison on the first iteration in the first place.)
2465   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2466     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2467     if (BO->hasNoUnsignedWrap())
2468       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2469     if (BO->hasNoSignedWrap())
2470       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2471   }
2472
2473   Value *ExitCnt = genLoopLimit(
2474       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2475   assert(ExitCnt->getType()->isPointerTy() ==
2476              IndVar->getType()->isPointerTy() &&
2477          "genLoopLimit missed a cast");
2478
2479   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2480   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2481   ICmpInst::Predicate P;
2482   if (L->contains(BI->getSuccessor(0)))
2483     P = ICmpInst::ICMP_NE;
2484   else
2485     P = ICmpInst::ICMP_EQ;
2486
2487   IRBuilder<> Builder(BI);
2488
2489   // The new loop exit condition should reuse the debug location of the
2490   // original loop exit condition.
2491   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2492     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2493
2494   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2495   // avoid the expensive expansion of the limit expression in the wider type,
2496   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2497   // since we know (from the exit count bitwidth), that we can't self-wrap in
2498   // the narrower type.
2499   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2500   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2501   if (CmpIndVarSize > ExitCntSize) {
2502     assert(!CmpIndVar->getType()->isPointerTy() &&
2503            !ExitCnt->getType()->isPointerTy());
2504
2505     // Before resorting to actually inserting the truncate, use the same
2506     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2507     // the other side of the comparison instead.  We still evaluate the limit
2508     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2509     // a truncate within in.
2510     bool Extended = false;
2511     const SCEV *IV = SE->getSCEV(CmpIndVar);
2512     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2513                                                   ExitCnt->getType());
2514     const SCEV *ZExtTrunc =
2515       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2516
2517     if (ZExtTrunc == IV) {
2518       Extended = true;
2519       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2520                                    "wide.trip.count");
2521     } else {
2522       const SCEV *SExtTrunc =
2523         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2524       if (SExtTrunc == IV) {
2525         Extended = true;
2526         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2527                                      "wide.trip.count");
2528       }
2529     }
2530
2531     if (Extended) {
2532       bool Discard;
2533       L->makeLoopInvariant(ExitCnt, Discard);
2534     } else
2535       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2536                                       "lftr.wideiv");
2537   }
2538   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2539                     << "      LHS:" << *CmpIndVar << '\n'
2540                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2541                     << "\n"
2542                     << "      RHS:\t" << *ExitCnt << "\n"
2543                     << "ExitCount:\t" << *ExitCount << "\n"
2544                     << "  was: " << *BI->getCondition() << "\n");
2545
2546   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2547   Value *OrigCond = BI->getCondition();
2548   // It's tempting to use replaceAllUsesWith here to fully replace the old
2549   // comparison, but that's not immediately safe, since users of the old
2550   // comparison may not be dominated by the new comparison. Instead, just
2551   // update the branch to use the new comparison; in the common case this
2552   // will make old comparison dead.
2553   BI->setCondition(Cond);
2554   DeadInsts.push_back(OrigCond);
2555
2556   ++NumLFTR;
2557   return true;
2558 }
2559
2560 //===----------------------------------------------------------------------===//
2561 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2562 //===----------------------------------------------------------------------===//
2563
2564 /// If there's a single exit block, sink any loop-invariant values that
2565 /// were defined in the preheader but not used inside the loop into the
2566 /// exit block to reduce register pressure in the loop.
2567 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2568   BasicBlock *ExitBlock = L->getExitBlock();
2569   if (!ExitBlock) return false;
2570
2571   BasicBlock *Preheader = L->getLoopPreheader();
2572   if (!Preheader) return false;
2573
2574   bool MadeAnyChanges = false;
2575   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2576   BasicBlock::iterator I(Preheader->getTerminator());
2577   while (I != Preheader->begin()) {
2578     --I;
2579     // New instructions were inserted at the end of the preheader.
2580     if (isa<PHINode>(I))
2581       break;
2582
2583     // Don't move instructions which might have side effects, since the side
2584     // effects need to complete before instructions inside the loop.  Also don't
2585     // move instructions which might read memory, since the loop may modify
2586     // memory. Note that it's okay if the instruction might have undefined
2587     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2588     // block.
2589     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2590       continue;
2591
2592     // Skip debug info intrinsics.
2593     if (isa<DbgInfoIntrinsic>(I))
2594       continue;
2595
2596     // Skip eh pad instructions.
2597     if (I->isEHPad())
2598       continue;
2599
2600     // Don't sink alloca: we never want to sink static alloca's out of the
2601     // entry block, and correctly sinking dynamic alloca's requires
2602     // checks for stacksave/stackrestore intrinsics.
2603     // FIXME: Refactor this check somehow?
2604     if (isa<AllocaInst>(I))
2605       continue;
2606
2607     // Determine if there is a use in or before the loop (direct or
2608     // otherwise).
2609     bool UsedInLoop = false;
2610     for (Use &U : I->uses()) {
2611       Instruction *User = cast<Instruction>(U.getUser());
2612       BasicBlock *UseBB = User->getParent();
2613       if (PHINode *P = dyn_cast<PHINode>(User)) {
2614         unsigned i =
2615           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2616         UseBB = P->getIncomingBlock(i);
2617       }
2618       if (UseBB == Preheader || L->contains(UseBB)) {
2619         UsedInLoop = true;
2620         break;
2621       }
2622     }
2623
2624     // If there is, the def must remain in the preheader.
2625     if (UsedInLoop)
2626       continue;
2627
2628     // Otherwise, sink it to the exit block.
2629     Instruction *ToMove = &*I;
2630     bool Done = false;
2631
2632     if (I != Preheader->begin()) {
2633       // Skip debug info intrinsics.
2634       do {
2635         --I;
2636       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2637
2638       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2639         Done = true;
2640     } else {
2641       Done = true;
2642     }
2643
2644     MadeAnyChanges = true;
2645     ToMove->moveBefore(*ExitBlock, InsertPt);
2646     if (Done) break;
2647     InsertPt = ToMove->getIterator();
2648   }
2649
2650   return MadeAnyChanges;
2651 }
2652
2653 /// Return a symbolic upper bound for the backedge taken count of the loop.
2654 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2655 /// an arbitrary expression as opposed to only constants.
2656 /// TODO: Move into the ScalarEvolution class.
2657 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2658                                             DominatorTree &DT, Loop *L) {
2659   SmallVector<BasicBlock*, 16> ExitingBlocks;
2660   L->getExitingBlocks(ExitingBlocks);
2661
2662   // Form an expression for the maximum exit count possible for this loop. We
2663   // merge the max and exact information to approximate a version of
2664   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2665   SmallVector<const SCEV*, 4> ExitCounts;
2666   const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L);
2667   if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2668     ExitCounts.push_back(MaxConstEC);
2669   for (BasicBlock *ExitingBB : ExitingBlocks) {
2670     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2671     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2672       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2673              "We should only have known counts for exiting blocks that "
2674              "dominate latch!");
2675       ExitCounts.push_back(ExitCount);
2676     }
2677   }
2678   if (ExitCounts.empty())
2679     return SE.getCouldNotCompute();
2680   return SE.getUMinFromMismatchedTypes(ExitCounts);
2681 }
2682
2683 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2684   SmallVector<BasicBlock*, 16> ExitingBlocks;
2685   L->getExitingBlocks(ExitingBlocks);
2686
2687   // Remove all exits which aren't both rewriteable and analyzeable.
2688   auto NewEnd = llvm::remove_if(ExitingBlocks,
2689                                 [&](BasicBlock *ExitingBB) {
2690     // If our exitting block exits multiple loops, we can only rewrite the
2691     // innermost one.  Otherwise, we're changing how many times the innermost
2692     // loop runs before it exits.
2693     if (LI->getLoopFor(ExitingBB) != L)
2694       return true;
2695
2696     // Can't rewrite non-branch yet.
2697     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2698     if (!BI)
2699       return true;
2700
2701     // If already constant, nothing to do.
2702     if (isa<Constant>(BI->getCondition()))
2703       return true;
2704
2705     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2706     if (isa<SCEVCouldNotCompute>(ExitCount))
2707       return true;
2708     return false;
2709    });
2710   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2711
2712   if (ExitingBlocks.empty())
2713     return false;
2714
2715   // Get a symbolic upper bound on the loop backedge taken count.
2716   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2717   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2718     return false;
2719
2720   // Visit our exit blocks in order of dominance.  We know from the fact that
2721   // all exits (left) are analyzeable that the must be a total dominance order
2722   // between them as each must dominate the latch.  The visit order only
2723   // matters for the provably equal case.
2724   llvm::sort(ExitingBlocks,
2725              [&](BasicBlock *A, BasicBlock *B) {
2726                // std::sort sorts in ascending order, so we want the inverse of
2727                // the normal dominance relation.
2728                if (DT->properlyDominates(A, B)) return true;
2729                if (DT->properlyDominates(B, A)) return false;
2730                llvm_unreachable("expected total dominance order!");
2731              });
2732 #ifdef ASSERT
2733   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2734     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2735   }
2736 #endif
2737
2738   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2739     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2740     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2741     auto *OldCond = BI->getCondition();
2742     auto *NewCond = ConstantInt::get(OldCond->getType(),
2743                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2744     BI->setCondition(NewCond);
2745     if (OldCond->use_empty())
2746       DeadInsts.push_back(OldCond);
2747   };
2748
2749   bool Changed = false;
2750   SmallSet<const SCEV*, 8> DominatingExitCounts;
2751   for (BasicBlock *ExitingBB : ExitingBlocks) {
2752     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2753     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2754
2755     // If we know we'd exit on the first iteration, rewrite the exit to
2756     // reflect this.  This does not imply the loop must exit through this
2757     // exit; there may be an earlier one taken on the first iteration.
2758     // TODO: Given we know the backedge can't be taken, we should go ahead
2759     // and break it.  Or at least, kill all the header phis and simplify.
2760     if (ExitCount->isZero()) {
2761       FoldExit(ExitingBB, true);
2762       Changed = true;
2763       continue;
2764     }
2765
2766     // If we end up with a pointer exit count, bail.  Note that we can end up
2767     // with a pointer exit count for one exiting block, and not for another in
2768     // the same loop.
2769     if (!ExitCount->getType()->isIntegerTy() ||
2770         !MaxExitCount->getType()->isIntegerTy())
2771       continue;
2772
2773     Type *WiderType =
2774       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2775     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2776     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2777     assert(MaxExitCount->getType() == ExitCount->getType());
2778
2779     // Can we prove that some other exit must be taken strictly before this
2780     // one?
2781     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2782                                      MaxExitCount, ExitCount)) {
2783       FoldExit(ExitingBB, false);
2784       Changed = true;
2785       continue;
2786     }
2787
2788     // As we run, keep track of which exit counts we've encountered.  If we
2789     // find a duplicate, we've found an exit which would have exited on the
2790     // exiting iteration, but (from the visit order) strictly follows another
2791     // which does the same and is thus dead.
2792     if (!DominatingExitCounts.insert(ExitCount).second) {
2793       FoldExit(ExitingBB, false);
2794       Changed = true;
2795       continue;
2796     }
2797
2798     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2799     // here.  If we kept track of the min of dominanting exits so far, we could
2800     // discharge exits with EC >= MDEC. This is less powerful than the existing
2801     // transform (since later exits aren't considered), but potentially more
2802     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2803     // or a >u b.  Such a case is not currently known.
2804   }
2805   return Changed;
2806 }
2807
2808 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2809   SmallVector<BasicBlock*, 16> ExitingBlocks;
2810   L->getExitingBlocks(ExitingBlocks);
2811
2812   bool Changed = false;
2813
2814   // Finally, see if we can rewrite our exit conditions into a loop invariant
2815   // form.  If we have a read-only loop, and we can tell that we must exit down
2816   // a path which does not need any of the values computed within the loop, we
2817   // can rewrite the loop to exit on the first iteration.  Note that this
2818   // doesn't either a) tell us the loop exits on the first iteration (unless
2819   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2820   // This transformation looks a lot like a restricted form of dead loop
2821   // elimination, but restricted to read-only loops and without neccesssarily
2822   // needing to kill the loop entirely.
2823   if (!LoopPredication)
2824     return Changed;
2825
2826   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2827     return Changed;
2828
2829   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2830   // through *explicit* control flow.  We have to eliminate the possibility of
2831   // implicit exits (see below) before we know it's truly exact.
2832   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2833   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2834       !SE->isLoopInvariant(ExactBTC, L) ||
2835       !isSafeToExpand(ExactBTC, *SE))
2836     return Changed;
2837
2838   auto BadExit = [&](BasicBlock *ExitingBB) {
2839     // If our exiting block exits multiple loops, we can only rewrite the
2840     // innermost one.  Otherwise, we're changing how many times the innermost
2841     // loop runs before it exits.
2842     if (LI->getLoopFor(ExitingBB) != L)
2843       return true;
2844
2845     // Can't rewrite non-branch yet.
2846     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2847     if (!BI)
2848       return true;
2849
2850     // If already constant, nothing to do.
2851     if (isa<Constant>(BI->getCondition()))
2852       return true;
2853
2854     // If the exit block has phis, we need to be able to compute the values
2855     // within the loop which contains them.  This assumes trivially lcssa phis
2856     // have already been removed; TODO: generalize
2857     BasicBlock *ExitBlock =
2858     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2859     if (!ExitBlock->phis().empty())
2860       return true;
2861
2862     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2863     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2864     if (!SE->isLoopInvariant(ExitCount, L) ||
2865         !isSafeToExpand(ExitCount, *SE))
2866       return true;
2867
2868     return false;
2869   };
2870
2871   // If we have any exits which can't be predicated themselves, than we can't
2872   // predicate any exit which isn't guaranteed to execute before it.  Consider
2873   // two exits (a) and (b) which would both exit on the same iteration.  If we
2874   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2875   // we could convert a loop from exiting through (a) to one exiting through
2876   // (b).  Note that this problem exists only for exits with the same exit
2877   // count, and we could be more aggressive when exit counts are known inequal.
2878   llvm::sort(ExitingBlocks,
2879             [&](BasicBlock *A, BasicBlock *B) {
2880               // std::sort sorts in ascending order, so we want the inverse of
2881               // the normal dominance relation, plus a tie breaker for blocks
2882               // unordered by dominance.
2883               if (DT->properlyDominates(A, B)) return true;
2884               if (DT->properlyDominates(B, A)) return false;
2885               return A->getName() < B->getName();
2886             });
2887   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2888   // exits before the backedge).  If they aren't, reasoning about reachability
2889   // is complicated and we choose not to for now.
2890   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2891     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2892       return Changed;
2893
2894   // Given our sorted total order, we know that exit[j] must be evaluated
2895   // after all exit[i] such j > i.
2896   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2897     if (BadExit(ExitingBlocks[i])) {
2898       ExitingBlocks.resize(i);
2899       break;
2900     }
2901
2902   if (ExitingBlocks.empty())
2903     return Changed;
2904
2905   // We rely on not being able to reach an exiting block on a later iteration
2906   // then it's statically compute exit count.  The implementaton of
2907   // getExitCount currently has this invariant, but assert it here so that
2908   // breakage is obvious if this ever changes..
2909   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2910         return DT->dominates(ExitingBB, L->getLoopLatch());
2911       }));
2912
2913   // At this point, ExitingBlocks consists of only those blocks which are
2914   // predicatable.  Given that, we know we have at least one exit we can
2915   // predicate if the loop is doesn't have side effects and doesn't have any
2916   // implicit exits (because then our exact BTC isn't actually exact).
2917   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2918   // suggestions on how to improve this?  I can obviously bail out for outer
2919   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2920   // is that enough for *all* side effects?
2921   for (BasicBlock *BB : L->blocks())
2922     for (auto &I : *BB)
2923       // TODO:isGuaranteedToTransfer
2924       if (I.mayHaveSideEffects() || I.mayThrow())
2925         return Changed;
2926
2927   // Finally, do the actual predication for all predicatable blocks.  A couple
2928   // of notes here:
2929   // 1) We don't bother to constant fold dominated exits with identical exit
2930   //    counts; that's simply a form of CSE/equality propagation and we leave
2931   //    it for dedicated passes.
2932   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2933   //    legality constraints and we leave that to dedicated logic.  We want to
2934   //    predicate even if we can't insert a loop invariant expression as
2935   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2936   //    varying check.
2937   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2938   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2939   Value *ExactBTCV = nullptr; //lazy generated if needed
2940   for (BasicBlock *ExitingBB : ExitingBlocks) {
2941     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2942
2943     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2944     Value *NewCond;
2945     if (ExitCount == ExactBTC) {
2946       NewCond = L->contains(BI->getSuccessor(0)) ?
2947         B.getFalse() : B.getTrue();
2948     } else {
2949       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2950       if (!ExactBTCV)
2951         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2952       Value *RHS = ExactBTCV;
2953       if (ECV->getType() != RHS->getType()) {
2954         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2955         ECV = B.CreateZExt(ECV, WiderTy);
2956         RHS = B.CreateZExt(RHS, WiderTy);
2957       }
2958       auto Pred = L->contains(BI->getSuccessor(0)) ?
2959         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2960       NewCond = B.CreateICmp(Pred, ECV, RHS);
2961     }
2962     Value *OldCond = BI->getCondition();
2963     BI->setCondition(NewCond);
2964     if (OldCond->use_empty())
2965       DeadInsts.push_back(OldCond);
2966     Changed = true;
2967   }
2968
2969   return Changed;
2970 }
2971
2972 //===----------------------------------------------------------------------===//
2973 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2974 //===----------------------------------------------------------------------===//
2975
2976 bool IndVarSimplify::run(Loop *L) {
2977   // We need (and expect!) the incoming loop to be in LCSSA.
2978   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2979          "LCSSA required to run indvars!");
2980   bool Changed = false;
2981
2982   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2983   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2984   //    canonicalization can be a pessimization without LSR to "clean up"
2985   //    afterwards.
2986   //  - We depend on having a preheader; in particular,
2987   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2988   //    and we're in trouble if we can't find the induction variable even when
2989   //    we've manually inserted one.
2990   //  - LFTR relies on having a single backedge.
2991   if (!L->isLoopSimplifyForm())
2992     return false;
2993
2994   // If there are any floating-point recurrences, attempt to
2995   // transform them to use integer recurrences.
2996   Changed |= rewriteNonIntegerIVs(L);
2997
2998 #ifndef NDEBUG
2999   // Used below for a consistency check only
3000   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3001 #endif
3002
3003   // Create a rewriter object which we'll use to transform the code with.
3004   SCEVExpander Rewriter(*SE, DL, "indvars");
3005 #ifndef NDEBUG
3006   Rewriter.setDebugType(DEBUG_TYPE);
3007 #endif
3008
3009   // Eliminate redundant IV users.
3010   //
3011   // Simplification works best when run before other consumers of SCEV. We
3012   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3013   // other expressions involving loop IVs have been evaluated. This helps SCEV
3014   // set no-wrap flags before normalizing sign/zero extension.
3015   Rewriter.disableCanonicalMode();
3016   Changed |= simplifyAndExtend(L, Rewriter, LI);
3017
3018   // Check to see if we can compute the final value of any expressions
3019   // that are recurrent in the loop, and substitute the exit values from the
3020   // loop into any instructions outside of the loop that use the final values
3021   // of the current expressions.
3022   if (ReplaceExitValue != NeverRepl)
3023     Changed |= rewriteLoopExitValues(L, Rewriter);
3024
3025   // Eliminate redundant IV cycles.
3026   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3027
3028   // Try to eliminate loop exits based on analyzeable exit counts
3029   Changed |= optimizeLoopExits(L, Rewriter);
3030
3031   // Try to form loop invariant tests for loop exits by changing how many
3032   // iterations of the loop run when that is unobservable.
3033   Changed |= predicateLoopExits(L, Rewriter);
3034
3035   // If we have a trip count expression, rewrite the loop's exit condition
3036   // using it.
3037   if (!DisableLFTR) {
3038     SmallVector<BasicBlock*, 16> ExitingBlocks;
3039     L->getExitingBlocks(ExitingBlocks);
3040     for (BasicBlock *ExitingBB : ExitingBlocks) {
3041       // Can't rewrite non-branch yet.
3042       if (!isa<BranchInst>(ExitingBB->getTerminator()))
3043         continue;
3044
3045       // If our exitting block exits multiple loops, we can only rewrite the
3046       // innermost one.  Otherwise, we're changing how many times the innermost
3047       // loop runs before it exits.
3048       if (LI->getLoopFor(ExitingBB) != L)
3049         continue;
3050
3051       if (!needsLFTR(L, ExitingBB))
3052         continue;
3053
3054       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3055       if (isa<SCEVCouldNotCompute>(ExitCount))
3056         continue;
3057
3058       // This was handled above, but as we form SCEVs, we can sometimes refine
3059       // existing ones; this allows exit counts to be folded to zero which
3060       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
3061       // until stable to handle cases like this better.
3062       if (ExitCount->isZero())
3063         continue;
3064
3065       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3066       if (!IndVar)
3067         continue;
3068
3069       // Avoid high cost expansions.  Note: This heuristic is questionable in
3070       // that our definition of "high cost" is not exactly principled.
3071       if (Rewriter.isHighCostExpansion(ExitCount, L))
3072         continue;
3073
3074       // Check preconditions for proper SCEVExpander operation. SCEV does not
3075       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3076       // any pass that uses the SCEVExpander must do it. This does not work
3077       // well for loop passes because SCEVExpander makes assumptions about
3078       // all loops, while LoopPassManager only forces the current loop to be
3079       // simplified.
3080       //
3081       // FIXME: SCEV expansion has no way to bail out, so the caller must
3082       // explicitly check any assumptions made by SCEV. Brittle.
3083       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3084       if (!AR || AR->getLoop()->getLoopPreheader())
3085         Changed |= linearFunctionTestReplace(L, ExitingBB,
3086                                              ExitCount, IndVar,
3087                                              Rewriter);
3088     }
3089   }
3090   // Clear the rewriter cache, because values that are in the rewriter's cache
3091   // can be deleted in the loop below, causing the AssertingVH in the cache to
3092   // trigger.
3093   Rewriter.clear();
3094
3095   // Now that we're done iterating through lists, clean up any instructions
3096   // which are now dead.
3097   while (!DeadInsts.empty())
3098     if (Instruction *Inst =
3099             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3100       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3101
3102   // The Rewriter may not be used from this point on.
3103
3104   // Loop-invariant instructions in the preheader that aren't used in the
3105   // loop may be sunk below the loop to reduce register pressure.
3106   Changed |= sinkUnusedInvariants(L);
3107
3108   // rewriteFirstIterationLoopExitValues does not rely on the computation of
3109   // trip count and therefore can further simplify exit values in addition to
3110   // rewriteLoopExitValues.
3111   Changed |= rewriteFirstIterationLoopExitValues(L);
3112
3113   // Clean up dead instructions.
3114   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3115
3116   // Check a post-condition.
3117   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3118          "Indvars did not preserve LCSSA!");
3119
3120   // Verify that LFTR, and any other change have not interfered with SCEV's
3121   // ability to compute trip count.
3122 #ifndef NDEBUG
3123   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3124     SE->forgetLoop(L);
3125     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3126     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3127         SE->getTypeSizeInBits(NewBECount->getType()))
3128       NewBECount = SE->getTruncateOrNoop(NewBECount,
3129                                          BackedgeTakenCount->getType());
3130     else
3131       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3132                                                  NewBECount->getType());
3133     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
3134   }
3135 #endif
3136
3137   return Changed;
3138 }
3139
3140 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3141                                           LoopStandardAnalysisResults &AR,
3142                                           LPMUpdater &) {
3143   Function *F = L.getHeader()->getParent();
3144   const DataLayout &DL = F->getParent()->getDataLayout();
3145
3146   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3147   if (!IVS.run(&L))
3148     return PreservedAnalyses::all();
3149
3150   auto PA = getLoopPassPreservedAnalyses();
3151   PA.preserveSet<CFGAnalyses>();
3152   return PA;
3153 }
3154
3155 namespace {
3156
3157 struct IndVarSimplifyLegacyPass : public LoopPass {
3158   static char ID; // Pass identification, replacement for typeid
3159
3160   IndVarSimplifyLegacyPass() : LoopPass(ID) {
3161     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3162   }
3163
3164   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3165     if (skipLoop(L))
3166       return false;
3167
3168     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3169     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3170     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3171     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3172     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3173     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3174     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3175     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3176
3177     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3178     return IVS.run(L);
3179   }
3180
3181   void getAnalysisUsage(AnalysisUsage &AU) const override {
3182     AU.setPreservesCFG();
3183     getLoopAnalysisUsage(AU);
3184   }
3185 };
3186
3187 } // end anonymous namespace
3188
3189 char IndVarSimplifyLegacyPass::ID = 0;
3190
3191 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3192                       "Induction Variable Simplification", false, false)
3193 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3194 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3195                     "Induction Variable Simplification", false, false)
3196
3197 Pass *llvm::createIndVarSimplifyPass() {
3198   return new IndVarSimplifyLegacyPass();
3199 }