llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

   1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This transformation analyzes and transforms the induction variables (and
  10 // computations derived from them) into simpler forms suitable for subsequent
  11 // analysis and transformation.
  12 //
  13 // If the trip count of a loop is computable, this pass also makes the following
  14 // changes:
  15 //   1. The exit condition for the loop is canonicalized to compare the
  16 //      induction value against the exit value.  This turns loops like:
  17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
  18 //   2. Any use outside of the loop of an expression derived from the indvar
  19 //      is changed to compute the derived value outside of the loop, eliminating
  20 //      the dependence on the exit value of the induction variable.  If the only
  21 //      purpose of the loop is to compute the exit value of some derived
  22 //      expression, this transformation will make the loop dead.
  23 //
  24 //===----------------------------------------------------------------------===//
  25
  26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
  27 #include "llvm/ADT/APFloat.h"
  28 #include "llvm/ADT/ArrayRef.h"
  29 #include "llvm/ADT/STLExtras.h"
  30 #include "llvm/ADT/SmallPtrSet.h"
  31 #include "llvm/ADT/SmallSet.h"
  32 #include "llvm/ADT/SmallVector.h"
  33 #include "llvm/ADT/Statistic.h"
  34 #include "llvm/ADT/iterator_range.h"
  35 #include "llvm/Analysis/LoopInfo.h"
  36 #include "llvm/Analysis/LoopPass.h"
  37 #include "llvm/Analysis/MemorySSA.h"
  38 #include "llvm/Analysis/MemorySSAUpdater.h"
  39 #include "llvm/Analysis/ScalarEvolution.h"
  40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  41 #include "llvm/Analysis/TargetLibraryInfo.h"
  42 #include "llvm/Analysis/TargetTransformInfo.h"
  43 #include "llvm/Analysis/ValueTracking.h"
  44 #include "llvm/IR/BasicBlock.h"
  45 #include "llvm/IR/Constant.h"
  46 #include "llvm/IR/ConstantRange.h"
  47 #include "llvm/IR/Constants.h"
  48 #include "llvm/IR/DataLayout.h"
  49 #include "llvm/IR/DerivedTypes.h"
  50 #include "llvm/IR/Dominators.h"
  51 #include "llvm/IR/Function.h"
  52 #include "llvm/IR/IRBuilder.h"
  53 #include "llvm/IR/InstrTypes.h"
  54 #include "llvm/IR/Instruction.h"
  55 #include "llvm/IR/Instructions.h"
  56 #include "llvm/IR/IntrinsicInst.h"
  57 #include "llvm/IR/Intrinsics.h"
  58 #include "llvm/IR/Module.h"
  59 #include "llvm/IR/Operator.h"
  60 #include "llvm/IR/PassManager.h"
  61 #include "llvm/IR/PatternMatch.h"
  62 #include "llvm/IR/Type.h"
  63 #include "llvm/IR/Use.h"
  64 #include "llvm/IR/User.h"
  65 #include "llvm/IR/Value.h"
  66 #include "llvm/IR/ValueHandle.h"
  67 #include "llvm/Support/Casting.h"
  68 #include "llvm/Support/CommandLine.h"
  69 #include "llvm/Support/Compiler.h"
  70 #include "llvm/Support/Debug.h"
  71 #include "llvm/Support/MathExtras.h"
  72 #include "llvm/Support/raw_ostream.h"
  73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  74 #include "llvm/Transforms/Utils/Local.h"
  75 #include "llvm/Transforms/Utils/LoopUtils.h"
  76 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  77 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  78 #include <cassert>
  79 #include <cstdint>
  80 #include <utility>
  81
  82 using namespace llvm;
  83 using namespace PatternMatch;
  84
  85 #define DEBUG_TYPE "indvars"
  86
  87 STATISTIC(NumWidened     , "Number of indvars widened");
  88 STATISTIC(NumReplaced    , "Number of exit values replaced");
  89 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
  90 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
  91 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
  92
  93 static cl::opt<ReplaceExitVal> ReplaceExitValue(
  94     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
  95     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
  96     cl::values(
  97         clEnumValN(NeverRepl, "never", "never replace exit value"),
  98         clEnumValN(OnlyCheapRepl, "cheap",
  99                    "only replace exit value when the cost is cheap"),
 100         clEnumValN(
 101             UnusedIndVarInLoop, "unusedindvarinloop",
 102             "only replace exit value when it is an unused "
 103             "induction variable in the loop and has cheap replacement cost"),
 104         clEnumValN(NoHardUse, "noharduse",
 105                    "only replace exit values when loop def likely dead"),
 106         clEnumValN(AlwaysRepl, "always",
 107                    "always replace exit value whenever possible")));
 108
 109 static cl::opt<bool> UsePostIncrementRanges(
 110   "indvars-post-increment-ranges", cl::Hidden,
 111   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
 112   cl::init(true));
 113
 114 static cl::opt<bool>
 115 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
 116             cl::desc("Disable Linear Function Test Replace optimization"));
 117
 118 static cl::opt<bool>
 119 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
 120                 cl::desc("Predicate conditions in read only loops"));
 121
 122 static cl::opt<bool>
 123 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
 124                 cl::desc("Allow widening of indvars to eliminate s/zext"));
 125
 126 namespace {
 127
 128 class IndVarSimplify {
 129   LoopInfo *LI;
 130   ScalarEvolution *SE;
 131   DominatorTree *DT;
 132   const DataLayout &DL;
 133   TargetLibraryInfo *TLI;
 134   const TargetTransformInfo *TTI;
 135   std::unique_ptr<MemorySSAUpdater> MSSAU;
 136
 137   SmallVector<WeakTrackingVH, 16> DeadInsts;
 138   bool WidenIndVars;
 139
 140   bool handleFloatingPointIV(Loop *L, PHINode *PH);
 141   bool rewriteNonIntegerIVs(Loop *L);
 142
 143   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
 144   /// Try to improve our exit conditions by converting condition from signed
 145   /// to unsigned or rotating computation out of the loop.
 146   /// (See inline comment about why this is duplicated from simplifyAndExtend)
 147   bool canonicalizeExitCondition(Loop *L);
 148   /// Try to eliminate loop exits based on analyzeable exit counts
 149   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
 150   /// Try to form loop invariant tests for loop exits by changing how many
 151   /// iterations of the loop run when that is unobservable.
 152   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
 153
 154   bool rewriteFirstIterationLoopExitValues(Loop *L);
 155
 156   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
 157                                  const SCEV *ExitCount,
 158                                  PHINode *IndVar, SCEVExpander &Rewriter);
 159
 160   bool sinkUnusedInvariants(Loop *L);
 161
 162 public:
 163   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
 164                  const DataLayout &DL, TargetLibraryInfo *TLI,
 165                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
 166       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
 167         WidenIndVars(WidenIndVars) {
 168     if (MSSA)
 169       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
 170   }
 171
 172   bool run(Loop *L);
 173 };
 174
 175 } // end anonymous namespace
 176
 177 //===----------------------------------------------------------------------===//
 178 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
 179 //===----------------------------------------------------------------------===//
 180
 181 /// Convert APF to an integer, if possible.
 182 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
 183   bool isExact = false;
 184   // See if we can convert this to an int64_t
 185   uint64_t UIntVal;
 186   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
 187                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
 188       !isExact)
 189     return false;
 190   IntVal = UIntVal;
 191   return true;
 192 }
 193
 194 /// If the loop has floating induction variable then insert corresponding
 195 /// integer induction variable if possible.
 196 /// For example,
 197 /// for(double i = 0; i < 10000; ++i)
 198 ///   bar(i)
 199 /// is converted into
 200 /// for(int i = 0; i < 10000; ++i)
 201 ///   bar((double)i);
 202 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 203   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
 204   unsigned BackEdge     = IncomingEdge^1;
 205
 206   // Check incoming value.
 207   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 208
 209   int64_t InitValue;
 210   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
 211     return false;
 212
 213   // Check IV increment. Reject this PN if increment operation is not
 214   // an add or increment value can not be represented by an integer.
 215   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
 216   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
 217
 218   // If this is not an add of the PHI with a constantfp, or if the constant fp
 219   // is not an integer, bail out.
 220   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
 221   int64_t IncValue;
 222   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
 223       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
 224     return false;
 225
 226   // Check Incr uses. One user is PN and the other user is an exit condition
 227   // used by the conditional terminator.
 228   Value::user_iterator IncrUse = Incr->user_begin();
 229   Instruction *U1 = cast<Instruction>(*IncrUse++);
 230   if (IncrUse == Incr->user_end()) return false;
 231   Instruction *U2 = cast<Instruction>(*IncrUse++);
 232   if (IncrUse != Incr->user_end()) return false;
 233
 234   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
 235   // only used by a branch, we can't transform it.
 236   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
 237   if (!Compare)
 238     Compare = dyn_cast<FCmpInst>(U2);
 239   if (!Compare || !Compare->hasOneUse() ||
 240       !isa<BranchInst>(Compare->user_back()))
 241     return false;
 242
 243   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
 244
 245   // We need to verify that the branch actually controls the iteration count
 246   // of the loop.  If not, the new IV can overflow and no one will notice.
 247   // The branch block must be in the loop and one of the successors must be out
 248   // of the loop.
 249   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
 250   if (!L->contains(TheBr->getParent()) ||
 251       (L->contains(TheBr->getSuccessor(0)) &&
 252        L->contains(TheBr->getSuccessor(1))))
 253     return false;
 254
 255   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
 256   // transform it.
 257   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
 258   int64_t ExitValue;
 259   if (ExitValueVal == nullptr ||
 260       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
 261     return false;
 262
 263   // Find new predicate for integer comparison.
 264   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
 265   switch (Compare->getPredicate()) {
 266   default: return false;  // Unknown comparison.
 267   case CmpInst::FCMP_OEQ:
 268   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
 269   case CmpInst::FCMP_ONE:
 270   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
 271   case CmpInst::FCMP_OGT:
 272   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
 273   case CmpInst::FCMP_OGE:
 274   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
 275   case CmpInst::FCMP_OLT:
 276   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
 277   case CmpInst::FCMP_OLE:
 278   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
 279   }
 280
 281   // We convert the floating point induction variable to a signed i32 value if
 282   // we can.  This is only safe if the comparison will not overflow in a way
 283   // that won't be trapped by the integer equivalent operations.  Check for this
 284   // now.
 285   // TODO: We could use i64 if it is native and the range requires it.
 286
 287   // The start/stride/exit values must all fit in signed i32.
 288   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
 289     return false;
 290
 291   // If not actually striding (add x, 0.0), avoid touching the code.
 292   if (IncValue == 0)
 293     return false;
 294
 295   // Positive and negative strides have different safety conditions.
 296   if (IncValue > 0) {
 297     // If we have a positive stride, we require the init to be less than the
 298     // exit value.
 299     if (InitValue >= ExitValue)
 300       return false;
 301
 302     uint32_t Range = uint32_t(ExitValue-InitValue);
 303     // Check for infinite loop, either:
 304     // while (i <= Exit) or until (i > Exit)
 305     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
 306       if (++Range == 0) return false;  // Range overflows.
 307     }
 308
 309     unsigned Leftover = Range % uint32_t(IncValue);
 310
 311     // If this is an equality comparison, we require that the strided value
 312     // exactly land on the exit value, otherwise the IV condition will wrap
 313     // around and do things the fp IV wouldn't.
 314     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 315         Leftover != 0)
 316       return false;
 317
 318     // If the stride would wrap around the i32 before exiting, we can't
 319     // transform the IV.
 320     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
 321       return false;
 322   } else {
 323     // If we have a negative stride, we require the init to be greater than the
 324     // exit value.
 325     if (InitValue <= ExitValue)
 326       return false;
 327
 328     uint32_t Range = uint32_t(InitValue-ExitValue);
 329     // Check for infinite loop, either:
 330     // while (i >= Exit) or until (i < Exit)
 331     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
 332       if (++Range == 0) return false;  // Range overflows.
 333     }
 334
 335     unsigned Leftover = Range % uint32_t(-IncValue);
 336
 337     // If this is an equality comparison, we require that the strided value
 338     // exactly land on the exit value, otherwise the IV condition will wrap
 339     // around and do things the fp IV wouldn't.
 340     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 341         Leftover != 0)
 342       return false;
 343
 344     // If the stride would wrap around the i32 before exiting, we can't
 345     // transform the IV.
 346     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
 347       return false;
 348   }
 349
 350   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 351
 352   // Insert new integer induction variable.
 353   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
 354   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
 355                       PN->getIncomingBlock(IncomingEdge));
 356
 357   Value *NewAdd =
 358     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
 359                               Incr->getName()+".int", Incr);
 360   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 361
 362   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
 363                                       ConstantInt::get(Int32Ty, ExitValue),
 364                                       Compare->getName());
 365
 366   // In the following deletions, PN may become dead and may be deleted.
 367   // Use a WeakTrackingVH to observe whether this happens.
 368   WeakTrackingVH WeakPH = PN;
 369
 370   // Delete the old floating point exit comparison.  The branch starts using the
 371   // new comparison.
 372   NewCompare->takeName(Compare);
 373   Compare->replaceAllUsesWith(NewCompare);
 374   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
 375
 376   // Delete the old floating point increment.
 377   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
 378   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
 379
 380   // If the FP induction variable still has uses, this is because something else
 381   // in the loop uses its value.  In order to canonicalize the induction
 382   // variable, we chose to eliminate the IV and rewrite it in terms of an
 383   // int->fp cast.
 384   //
 385   // We give preference to sitofp over uitofp because it is faster on most
 386   // platforms.
 387   if (WeakPH) {
 388     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
 389                                  &*PN->getParent()->getFirstInsertionPt());
 390     PN->replaceAllUsesWith(Conv);
 391     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
 392   }
 393   return true;
 394 }
 395
 396 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
 397   // First step.  Check to see if there are any floating-point recurrences.
 398   // If there are, change them into integer recurrences, permitting analysis by
 399   // the SCEV routines.
 400   BasicBlock *Header = L->getHeader();
 401
 402   SmallVector<WeakTrackingVH, 8> PHIs;
 403   for (PHINode &PN : Header->phis())
 404     PHIs.push_back(&PN);
 405
 406   bool Changed = false;
 407   for (WeakTrackingVH &PHI : PHIs)
 408     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
 409       Changed |= handleFloatingPointIV(L, PN);
 410
 411   // If the loop previously had floating-point IV, ScalarEvolution
 412   // may not have been able to compute a trip count. Now that we've done some
 413   // re-writing, the trip count may be computable.
 414   if (Changed)
 415     SE->forgetLoop(L);
 416   return Changed;
 417 }
 418
 419 //===---------------------------------------------------------------------===//
 420 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
 421 // they will exit at the first iteration.
 422 //===---------------------------------------------------------------------===//
 423
 424 /// Check to see if this loop has loop invariant conditions which lead to loop
 425 /// exits. If so, we know that if the exit path is taken, it is at the first
 426 /// loop iteration. This lets us predict exit values of PHI nodes that live in
 427 /// loop header.
 428 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
 429   // Verify the input to the pass is already in LCSSA form.
 430   assert(L->isLCSSAForm(*DT));
 431
 432   SmallVector<BasicBlock *, 8> ExitBlocks;
 433   L->getUniqueExitBlocks(ExitBlocks);
 434
 435   bool MadeAnyChanges = false;
 436   for (auto *ExitBB : ExitBlocks) {
 437     // If there are no more PHI nodes in this exit block, then no more
 438     // values defined inside the loop are used on this path.
 439     for (PHINode &PN : ExitBB->phis()) {
 440       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
 441            IncomingValIdx != E; ++IncomingValIdx) {
 442         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
 443
 444         // Can we prove that the exit must run on the first iteration if it
 445         // runs at all?  (i.e. early exits are fine for our purposes, but
 446         // traces which lead to this exit being taken on the 2nd iteration
 447         // aren't.)  Note that this is about whether the exit branch is
 448         // executed, not about whether it is taken.
 449         if (!L->getLoopLatch() ||
 450             !DT->dominates(IncomingBB, L->getLoopLatch()))
 451           continue;
 452
 453         // Get condition that leads to the exit path.
 454         auto *TermInst = IncomingBB->getTerminator();
 455
 456         Value *Cond = nullptr;
 457         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
 458           // Must be a conditional branch, otherwise the block
 459           // should not be in the loop.
 460           Cond = BI->getCondition();
 461         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
 462           Cond = SI->getCondition();
 463         else
 464           continue;
 465
 466         if (!L->isLoopInvariant(Cond))
 467           continue;
 468
 469         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
 470
 471         // Only deal with PHIs in the loop header.
 472         if (!ExitVal || ExitVal->getParent() != L->getHeader())
 473           continue;
 474
 475         // If ExitVal is a PHI on the loop header, then we know its
 476         // value along this exit because the exit can only be taken
 477         // on the first iteration.
 478         auto *LoopPreheader = L->getLoopPreheader();
 479         assert(LoopPreheader && "Invalid loop");
 480         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
 481         if (PreheaderIdx != -1) {
 482           assert(ExitVal->getParent() == L->getHeader() &&
 483                  "ExitVal must be in loop header");
 484           MadeAnyChanges = true;
 485           PN.setIncomingValue(IncomingValIdx,
 486                               ExitVal->getIncomingValue(PreheaderIdx));
 487           SE->forgetValue(&PN);
 488         }
 489       }
 490     }
 491   }
 492   return MadeAnyChanges;
 493 }
 494
 495 //===----------------------------------------------------------------------===//
 496 //  IV Widening - Extend the width of an IV to cover its widest uses.
 497 //===----------------------------------------------------------------------===//
 498
 499 /// Update information about the induction variable that is extended by this
 500 /// sign or zero extend operation. This is used to determine the final width of
 501 /// the IV before actually widening it.
 502 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
 503                         ScalarEvolution *SE,
 504                         const TargetTransformInfo *TTI) {
 505   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
 506   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
 507     return;
 508
 509   Type *Ty = Cast->getType();
 510   uint64_t Width = SE->getTypeSizeInBits(Ty);
 511   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
 512     return;
 513
 514   // Check that `Cast` actually extends the induction variable (we rely on this
 515   // later).  This takes care of cases where `Cast` is extending a truncation of
 516   // the narrow induction variable, and thus can end up being narrower than the
 517   // "narrow" induction variable.
 518   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
 519   if (NarrowIVWidth >= Width)
 520     return;
 521
 522   // Cast is either an sext or zext up to this point.
 523   // We should not widen an indvar if arithmetics on the wider indvar are more
 524   // expensive than those on the narrower indvar. We check only the cost of ADD
 525   // because at least an ADD is required to increment the induction variable. We
 526   // could compute more comprehensively the cost of all instructions on the
 527   // induction variable when necessary.
 528   if (TTI &&
 529       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
 530           TTI->getArithmeticInstrCost(Instruction::Add,
 531                                       Cast->getOperand(0)->getType())) {
 532     return;
 533   }
 534
 535   if (!WI.WidestNativeType ||
 536       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
 537     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 538     WI.IsSigned = IsSigned;
 539     return;
 540   }
 541
 542   // We extend the IV to satisfy the sign of its user(s), or 'signed'
 543   // if there are multiple users with both sign- and zero extensions,
 544   // in order not to introduce nondeterministic behaviour based on the
 545   // unspecified order of a PHI nodes' users-iterator.
 546   WI.IsSigned |= IsSigned;
 547 }
 548
 549 //===----------------------------------------------------------------------===//
 550 //  Live IV Reduction - Minimize IVs live across the loop.
 551 //===----------------------------------------------------------------------===//
 552
 553 //===----------------------------------------------------------------------===//
 554 //  Simplification of IV users based on SCEV evaluation.
 555 //===----------------------------------------------------------------------===//
 556
 557 namespace {
 558
 559 class IndVarSimplifyVisitor : public IVVisitor {
 560   ScalarEvolution *SE;
 561   const TargetTransformInfo *TTI;
 562   PHINode *IVPhi;
 563
 564 public:
 565   WideIVInfo WI;
 566
 567   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
 568                         const TargetTransformInfo *TTI,
 569                         const DominatorTree *DTree)
 570     : SE(SCEV), TTI(TTI), IVPhi(IV) {
 571     DT = DTree;
 572     WI.NarrowIV = IVPhi;
 573   }
 574
 575   // Implement the interface used by simplifyUsersOfIV.
 576   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
 577 };
 578
 579 } // end anonymous namespace
 580
 581 /// Iteratively perform simplification on a worklist of IV users. Each
 582 /// successive simplification may push more users which may themselves be
 583 /// candidates for simplification.
 584 ///
 585 /// Sign/Zero extend elimination is interleaved with IV simplification.
 586 bool IndVarSimplify::simplifyAndExtend(Loop *L,
 587                                        SCEVExpander &Rewriter,
 588                                        LoopInfo *LI) {
 589   SmallVector<WideIVInfo, 8> WideIVs;
 590
 591   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
 592           Intrinsic::getName(Intrinsic::experimental_guard));
 593   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
 594
 595   SmallVector<PHINode *, 8> LoopPhis;
 596   for (PHINode &PN : L->getHeader()->phis())
 597     LoopPhis.push_back(&PN);
 598
 599   // Each round of simplification iterates through the SimplifyIVUsers worklist
 600   // for all current phis, then determines whether any IVs can be
 601   // widened. Widening adds new phis to LoopPhis, inducing another round of
 602   // simplification on the wide IVs.
 603   bool Changed = false;
 604   while (!LoopPhis.empty()) {
 605     // Evaluate as many IV expressions as possible before widening any IVs. This
 606     // forces SCEV to set no-wrap flags before evaluating sign/zero
 607     // extension. The first time SCEV attempts to normalize sign/zero extension,
 608     // the result becomes final. So for the most predictable results, we delay
 609     // evaluation of sign/zero extend evaluation until needed, and avoid running
 610     // other SCEV based analysis prior to simplifyAndExtend.
 611     do {
 612       PHINode *CurrIV = LoopPhis.pop_back_val();
 613
 614       // Information about sign/zero extensions of CurrIV.
 615       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
 616
 617       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
 618                                    &Visitor);
 619
 620       if (Visitor.WI.WidestNativeType) {
 621         WideIVs.push_back(Visitor.WI);
 622       }
 623     } while(!LoopPhis.empty());
 624
 625     // Continue if we disallowed widening.
 626     if (!WidenIndVars)
 627       continue;
 628
 629     for (; !WideIVs.empty(); WideIVs.pop_back()) {
 630       unsigned ElimExt;
 631       unsigned Widened;
 632       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
 633                                           DT, DeadInsts, ElimExt, Widened,
 634                                           HasGuards, UsePostIncrementRanges)) {
 635         NumElimExt += ElimExt;
 636         NumWidened += Widened;
 637         Changed = true;
 638         LoopPhis.push_back(WidePhi);
 639       }
 640     }
 641   }
 642   return Changed;
 643 }
 644
 645 //===----------------------------------------------------------------------===//
 646 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
 647 //===----------------------------------------------------------------------===//
 648
 649 /// Given an Value which is hoped to be part of an add recurance in the given
 650 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
 651 /// that this is less general than SCEVs AddRec checking.
 652 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
 653   Instruction *IncI = dyn_cast<Instruction>(IncV);
 654   if (!IncI)
 655     return nullptr;
 656
 657   switch (IncI->getOpcode()) {
 658   case Instruction::Add:
 659   case Instruction::Sub:
 660     break;
 661   case Instruction::GetElementPtr:
 662     // An IV counter must preserve its type.
 663     if (IncI->getNumOperands() == 2)
 664       break;
 665     [[fallthrough]];
 666   default:
 667     return nullptr;
 668   }
 669
 670   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
 671   if (Phi && Phi->getParent() == L->getHeader()) {
 672     if (L->isLoopInvariant(IncI->getOperand(1)))
 673       return Phi;
 674     return nullptr;
 675   }
 676   if (IncI->getOpcode() == Instruction::GetElementPtr)
 677     return nullptr;
 678
 679   // Allow add/sub to be commuted.
 680   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
 681   if (Phi && Phi->getParent() == L->getHeader()) {
 682     if (L->isLoopInvariant(IncI->getOperand(0)))
 683       return Phi;
 684   }
 685   return nullptr;
 686 }
 687
 688 /// Whether the current loop exit test is based on this value.  Currently this
 689 /// is limited to a direct use in the loop condition.
 690 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
 691   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
 692   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
 693   // TODO: Allow non-icmp loop test.
 694   if (!ICmp)
 695     return false;
 696
 697   // TODO: Allow indirect use.
 698   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
 699 }
 700
 701 /// linearFunctionTestReplace policy. Return true unless we can show that the
 702 /// current exit test is already sufficiently canonical.
 703 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
 704   assert(L->getLoopLatch() && "Must be in simplified form");
 705
 706   // Avoid converting a constant or loop invariant test back to a runtime
 707   // test.  This is critical for when SCEV's cached ExitCount is less precise
 708   // than the current IR (such as after we've proven a particular exit is
 709   // actually dead and thus the BE count never reaches our ExitCount.)
 710   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
 711   if (L->isLoopInvariant(BI->getCondition()))
 712     return false;
 713
 714   // Do LFTR to simplify the exit condition to an ICMP.
 715   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
 716   if (!Cond)
 717     return true;
 718
 719   // Do LFTR to simplify the exit ICMP to EQ/NE
 720   ICmpInst::Predicate Pred = Cond->getPredicate();
 721   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
 722     return true;
 723
 724   // Look for a loop invariant RHS
 725   Value *LHS = Cond->getOperand(0);
 726   Value *RHS = Cond->getOperand(1);
 727   if (!L->isLoopInvariant(RHS)) {
 728     if (!L->isLoopInvariant(LHS))
 729       return true;
 730     std::swap(LHS, RHS);
 731   }
 732   // Look for a simple IV counter LHS
 733   PHINode *Phi = dyn_cast<PHINode>(LHS);
 734   if (!Phi)
 735     Phi = getLoopPhiForCounter(LHS, L);
 736
 737   if (!Phi)
 738     return true;
 739
 740   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
 741   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
 742   if (Idx < 0)
 743     return true;
 744
 745   // Do LFTR if the exit condition's IV is *not* a simple counter.
 746   Value *IncV = Phi->getIncomingValue(Idx);
 747   return Phi != getLoopPhiForCounter(IncV, L);
 748 }
 749
 750 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
 751 /// down to checking that all operands are constant and listing instructions
 752 /// that may hide undef.
 753 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
 754                                unsigned Depth) {
 755   if (isa<Constant>(V))
 756     return !isa<UndefValue>(V);
 757
 758   if (Depth >= 6)
 759     return false;
 760
 761   // Conservatively handle non-constant non-instructions. For example, Arguments
 762   // may be undef.
 763   Instruction *I = dyn_cast<Instruction>(V);
 764   if (!I)
 765     return false;
 766
 767   // Load and return values may be undef.
 768   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
 769     return false;
 770
 771   // Optimistically handle other instructions.
 772   for (Value *Op : I->operands()) {
 773     if (!Visited.insert(Op).second)
 774       continue;
 775     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
 776       return false;
 777   }
 778   return true;
 779 }
 780
 781 /// Return true if the given value is concrete. We must prove that undef can
 782 /// never reach it.
 783 ///
 784 /// TODO: If we decide that this is a good approach to checking for undef, we
 785 /// may factor it into a common location.
 786 static bool hasConcreteDef(Value *V) {
 787   SmallPtrSet<Value*, 8> Visited;
 788   Visited.insert(V);
 789   return hasConcreteDefImpl(V, Visited, 0);
 790 }
 791
 792 /// Return true if the given phi is a "counter" in L.  A counter is an
 793 /// add recurance (of integer or pointer type) with an arbitrary start, and a
 794 /// step of 1.  Note that L must have exactly one latch.
 795 static bool isLoopCounter(PHINode* Phi, Loop *L,
 796                           ScalarEvolution *SE) {
 797   assert(Phi->getParent() == L->getHeader());
 798   assert(L->getLoopLatch());
 799
 800   if (!SE->isSCEVable(Phi->getType()))
 801     return false;
 802
 803   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
 804   if (!AR || AR->getLoop() != L || !AR->isAffine())
 805     return false;
 806
 807   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
 808   if (!Step || !Step->isOne())
 809     return false;
 810
 811   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
 812   Value *IncV = Phi->getIncomingValue(LatchIdx);
 813   return (getLoopPhiForCounter(IncV, L) == Phi &&
 814           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
 815 }
 816
 817 /// Search the loop header for a loop counter (anadd rec w/step of one)
 818 /// suitable for use by LFTR.  If multiple counters are available, select the
 819 /// "best" one based profitable heuristics.
 820 ///
 821 /// BECount may be an i8* pointer type. The pointer difference is already
 822 /// valid count without scaling the address stride, so it remains a pointer
 823 /// expression as far as SCEV is concerned.
 824 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
 825                                 const SCEV *BECount,
 826                                 ScalarEvolution *SE, DominatorTree *DT) {
 827   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
 828
 829   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
 830
 831   // Loop over all of the PHI nodes, looking for a simple counter.
 832   PHINode *BestPhi = nullptr;
 833   const SCEV *BestInit = nullptr;
 834   BasicBlock *LatchBlock = L->getLoopLatch();
 835   assert(LatchBlock && "Must be in simplified form");
 836   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 837
 838   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
 839     PHINode *Phi = cast<PHINode>(I);
 840     if (!isLoopCounter(Phi, L, SE))
 841       continue;
 842
 843     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
 844
 845     // AR may be a pointer type, while BECount is an integer type.
 846     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
 847     // AR may not be a narrower type, or we may never exit.
 848     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
 849     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
 850       continue;
 851
 852     // Avoid reusing a potentially undef value to compute other values that may
 853     // have originally had a concrete definition.
 854     if (!hasConcreteDef(Phi)) {
 855       // We explicitly allow unknown phis as long as they are already used by
 856       // the loop exit test.  This is legal since performing LFTR could not
 857       // increase the number of undef users.
 858       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
 859       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
 860           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
 861         continue;
 862     }
 863
 864     // Avoid introducing undefined behavior due to poison which didn't exist in
 865     // the original program.  (Annoyingly, the rules for poison and undef
 866     // propagation are distinct, so this does NOT cover the undef case above.)
 867     // We have to ensure that we don't introduce UB by introducing a use on an
 868     // iteration where said IV produces poison.  Our strategy here differs for
 869     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
 870     // see code in linearFunctionTestReplace.  For pointers, we restrict
 871     // transforms as there is no good way to reinfer inbounds once lost.
 872     if (!Phi->getType()->isIntegerTy() &&
 873         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
 874       continue;
 875
 876     const SCEV *Init = AR->getStart();
 877
 878     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
 879       // Don't force a live loop counter if another IV can be used.
 880       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
 881         continue;
 882
 883       // Prefer to count-from-zero. This is a more "canonical" counter form. It
 884       // also prefers integer to pointer IVs.
 885       if (BestInit->isZero() != Init->isZero()) {
 886         if (BestInit->isZero())
 887           continue;
 888       }
 889       // If two IVs both count from zero or both count from nonzero then the
 890       // narrower is likely a dead phi that has been widened. Use the wider phi
 891       // to allow the other to be eliminated.
 892       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
 893         continue;
 894     }
 895     BestPhi = Phi;
 896     BestInit = Init;
 897   }
 898   return BestPhi;
 899 }
 900
 901 /// Insert an IR expression which computes the value held by the IV IndVar
 902 /// (which must be an loop counter w/unit stride) after the backedge of loop L
 903 /// is taken ExitCount times.
 904 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
 905                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
 906                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
 907   assert(isLoopCounter(IndVar, L, SE));
 908   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
 909   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
 910   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
 911
 912   // For integer IVs, truncate the IV before computing the limit unless we
 913   // know apriori that the limit must be a constant when evaluated in the
 914   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
 915   // IV in the loop over a (potentially) expensive expansion of the widened
 916   // exit count add(zext(add)) expression.
 917   if (IndVar->getType()->isIntegerTy() &&
 918       SE->getTypeSizeInBits(AR->getType()) >
 919       SE->getTypeSizeInBits(ExitCount->getType())) {
 920     const SCEV *IVInit = AR->getStart();
 921     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
 922       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
 923   }
 924
 925   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
 926   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
 927   assert(SE->isLoopInvariant(IVLimit, L) &&
 928          "Computed iteration count is not loop invariant!");
 929   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
 930                                 ExitingBB->getTerminator());
 931 }
 932
 933 /// This method rewrites the exit condition of the loop to be a canonical !=
 934 /// comparison against the incremented loop induction variable.  This pass is
 935 /// able to rewrite the exit tests of any loop where the SCEV analysis can
 936 /// determine a loop-invariant trip count of the loop, which is actually a much
 937 /// broader range than just linear tests.
 938 bool IndVarSimplify::
 939 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
 940                           const SCEV *ExitCount,
 941                           PHINode *IndVar, SCEVExpander &Rewriter) {
 942   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
 943   assert(isLoopCounter(IndVar, L, SE));
 944   Instruction * const IncVar =
 945     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
 946
 947   // Initialize CmpIndVar to the preincremented IV.
 948   Value *CmpIndVar = IndVar;
 949   bool UsePostInc = false;
 950
 951   // If the exiting block is the same as the backedge block, we prefer to
 952   // compare against the post-incremented value, otherwise we must compare
 953   // against the preincremented value.
 954   if (ExitingBB == L->getLoopLatch()) {
 955     // For pointer IVs, we chose to not strip inbounds which requires us not
 956     // to add a potentially UB introducing use.  We need to either a) show
 957     // the loop test we're modifying is already in post-inc form, or b) show
 958     // that adding a use must not introduce UB.
 959     bool SafeToPostInc =
 960         IndVar->getType()->isIntegerTy() ||
 961         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
 962         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
 963     if (SafeToPostInc) {
 964       UsePostInc = true;
 965       CmpIndVar = IncVar;
 966     }
 967   }
 968
 969   // It may be necessary to drop nowrap flags on the incrementing instruction
 970   // if either LFTR moves from a pre-inc check to a post-inc check (in which
 971   // case the increment might have previously been poison on the last iteration
 972   // only) or if LFTR switches to a different IV that was previously dynamically
 973   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
 974   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
 975   // check), because the pre-inc addrec flags may be adopted from the original
 976   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
 977   // TODO: This handling is inaccurate for one case: If we switch to a
 978   // dynamically dead IV that wraps on the first loop iteration only, which is
 979   // not covered by the post-inc addrec. (If the new IV was not dynamically
 980   // dead, it could not be poison on the first iteration in the first place.)
 981   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
 982     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
 983     if (BO->hasNoUnsignedWrap())
 984       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
 985     if (BO->hasNoSignedWrap())
 986       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
 987   }
 988
 989   Value *ExitCnt = genLoopLimit(
 990       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
 991   assert(ExitCnt->getType()->isPointerTy() ==
 992              IndVar->getType()->isPointerTy() &&
 993          "genLoopLimit missed a cast");
 994
 995   // Insert a new icmp_ne or icmp_eq instruction before the branch.
 996   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
 997   ICmpInst::Predicate P;
 998   if (L->contains(BI->getSuccessor(0)))
 999     P = ICmpInst::ICMP_NE;
1000   else
1001     P = ICmpInst::ICMP_EQ;
1002
1003   IRBuilder<> Builder(BI);
1004
1005   // The new loop exit condition should reuse the debug location of the
1006   // original loop exit condition.
1007   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1008     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1009
1010   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1011   // avoid the expensive expansion of the limit expression in the wider type,
1012   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1013   // since we know (from the exit count bitwidth), that we can't self-wrap in
1014   // the narrower type.
1015   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1016   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1017   if (CmpIndVarSize > ExitCntSize) {
1018     assert(!CmpIndVar->getType()->isPointerTy() &&
1019            !ExitCnt->getType()->isPointerTy());
1020
1021     // Before resorting to actually inserting the truncate, use the same
1022     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1023     // the other side of the comparison instead.  We still evaluate the limit
1024     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1025     // a truncate within in.
1026     bool Extended = false;
1027     const SCEV *IV = SE->getSCEV(CmpIndVar);
1028     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1029     const SCEV *ZExtTrunc =
1030       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1031
1032     if (ZExtTrunc == IV) {
1033       Extended = true;
1034       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1035                                    "wide.trip.count");
1036     } else {
1037       const SCEV *SExtTrunc =
1038         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1039       if (SExtTrunc == IV) {
1040         Extended = true;
1041         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1042                                      "wide.trip.count");
1043       }
1044     }
1045
1046     if (Extended) {
1047       bool Discard;
1048       L->makeLoopInvariant(ExitCnt, Discard);
1049     } else
1050       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1051                                       "lftr.wideiv");
1052   }
1053   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1054                     << "      LHS:" << *CmpIndVar << '\n'
1055                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1056                     << "\n"
1057                     << "      RHS:\t" << *ExitCnt << "\n"
1058                     << "ExitCount:\t" << *ExitCount << "\n"
1059                     << "  was: " << *BI->getCondition() << "\n");
1060
1061   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1062   Value *OrigCond = BI->getCondition();
1063   // It's tempting to use replaceAllUsesWith here to fully replace the old
1064   // comparison, but that's not immediately safe, since users of the old
1065   // comparison may not be dominated by the new comparison. Instead, just
1066   // update the branch to use the new comparison; in the common case this
1067   // will make old comparison dead.
1068   BI->setCondition(Cond);
1069   DeadInsts.emplace_back(OrigCond);
1070
1071   ++NumLFTR;
1072   return true;
1073 }
1074
1075 //===----------------------------------------------------------------------===//
1076 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1077 //===----------------------------------------------------------------------===//
1078
1079 /// If there's a single exit block, sink any loop-invariant values that
1080 /// were defined in the preheader but not used inside the loop into the
1081 /// exit block to reduce register pressure in the loop.
1082 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1083   BasicBlock *ExitBlock = L->getExitBlock();
1084   if (!ExitBlock) return false;
1085
1086   BasicBlock *Preheader = L->getLoopPreheader();
1087   if (!Preheader) return false;
1088
1089   bool MadeAnyChanges = false;
1090   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1091   BasicBlock::iterator I(Preheader->getTerminator());
1092   while (I != Preheader->begin()) {
1093     --I;
1094     // New instructions were inserted at the end of the preheader.
1095     if (isa<PHINode>(I))
1096       break;
1097
1098     // Don't move instructions which might have side effects, since the side
1099     // effects need to complete before instructions inside the loop.  Also don't
1100     // move instructions which might read memory, since the loop may modify
1101     // memory. Note that it's okay if the instruction might have undefined
1102     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1103     // block.
1104     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1105       continue;
1106
1107     // Skip debug info intrinsics.
1108     if (isa<DbgInfoIntrinsic>(I))
1109       continue;
1110
1111     // Skip eh pad instructions.
1112     if (I->isEHPad())
1113       continue;
1114
1115     // Don't sink alloca: we never want to sink static alloca's out of the
1116     // entry block, and correctly sinking dynamic alloca's requires
1117     // checks for stacksave/stackrestore intrinsics.
1118     // FIXME: Refactor this check somehow?
1119     if (isa<AllocaInst>(I))
1120       continue;
1121
1122     // Determine if there is a use in or before the loop (direct or
1123     // otherwise).
1124     bool UsedInLoop = false;
1125     for (Use &U : I->uses()) {
1126       Instruction *User = cast<Instruction>(U.getUser());
1127       BasicBlock *UseBB = User->getParent();
1128       if (PHINode *P = dyn_cast<PHINode>(User)) {
1129         unsigned i =
1130           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1131         UseBB = P->getIncomingBlock(i);
1132       }
1133       if (UseBB == Preheader || L->contains(UseBB)) {
1134         UsedInLoop = true;
1135         break;
1136       }
1137     }
1138
1139     // If there is, the def must remain in the preheader.
1140     if (UsedInLoop)
1141       continue;
1142
1143     // Otherwise, sink it to the exit block.
1144     Instruction *ToMove = &*I;
1145     bool Done = false;
1146
1147     if (I != Preheader->begin()) {
1148       // Skip debug info intrinsics.
1149       do {
1150         --I;
1151       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1152
1153       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1154         Done = true;
1155     } else {
1156       Done = true;
1157     }
1158
1159     MadeAnyChanges = true;
1160     ToMove->moveBefore(*ExitBlock, InsertPt);
1161     SE->forgetValue(ToMove);
1162     if (Done) break;
1163     InsertPt = ToMove->getIterator();
1164   }
1165
1166   return MadeAnyChanges;
1167 }
1168
1169 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1170                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1171   auto *OldCond = BI->getCondition();
1172   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1173                     << " with " << *NewCond << "\n");
1174   BI->setCondition(NewCond);
1175   if (OldCond->use_empty())
1176     DeadInsts.emplace_back(OldCond);
1177 }
1178
1179 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1180                                       bool IsTaken) {
1181   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1182   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1183   auto *OldCond = BI->getCondition();
1184   return ConstantInt::get(OldCond->getType(),
1185                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1186 }
1187
1188 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1189                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1190   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1191   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1192   replaceExitCond(BI, NewCond, DeadInsts);
1193 }
1194
1195 static void replaceLoopPHINodesWithPreheaderValues(
1196     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1197     ScalarEvolution &SE) {
1198   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1199   auto *LoopPreheader = L->getLoopPreheader();
1200   auto *LoopHeader = L->getHeader();
1201   SmallVector<Instruction *> Worklist;
1202   for (auto &PN : LoopHeader->phis()) {
1203     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1204     for (User *U : PN.users())
1205       Worklist.push_back(cast<Instruction>(U));
1206     SE.forgetValue(&PN);
1207     PN.replaceAllUsesWith(PreheaderIncoming);
1208     DeadInsts.emplace_back(&PN);
1209   }
1210
1211   // Replacing with the preheader value will often allow IV users to simplify
1212   // (especially if the preheader value is a constant).
1213   SmallPtrSet<Instruction *, 16> Visited;
1214   while (!Worklist.empty()) {
1215     auto *I = cast<Instruction>(Worklist.pop_back_val());
1216     if (!Visited.insert(I).second)
1217       continue;
1218
1219     // Don't simplify instructions outside the loop.
1220     if (!L->contains(I))
1221       continue;
1222
1223     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1224     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1225       for (User *U : I->users())
1226         Worklist.push_back(cast<Instruction>(U));
1227       I->replaceAllUsesWith(Res);
1228       DeadInsts.emplace_back(I);
1229     }
1230   }
1231 }
1232
1233 static Value *
1234 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1235                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1236                     SCEVExpander &Rewriter) {
1237   ICmpInst::Predicate InvariantPred = LIP.Pred;
1238   BasicBlock *Preheader = L->getLoopPreheader();
1239   assert(Preheader && "Preheader doesn't exist");
1240   Rewriter.setInsertPoint(Preheader->getTerminator());
1241   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1242   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1243   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1244   if (ExitIfTrue)
1245     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1246   IRBuilder<> Builder(Preheader->getTerminator());
1247   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1248   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1249                             BI->getCondition()->getName());
1250 }
1251
1252 static std::optional<Value *>
1253 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1254                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1255                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1256   ICmpInst::Predicate Pred = ICmp->getPredicate();
1257   Value *LHS = ICmp->getOperand(0);
1258   Value *RHS = ICmp->getOperand(1);
1259
1260   // 'LHS pred RHS' should now mean that we stay in loop.
1261   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1262   if (Inverted)
1263     Pred = CmpInst::getInversePredicate(Pred);
1264
1265   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1266   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1267   // Can we prove it to be trivially true or false?
1268   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1269     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1270
1271   auto *ARTy = LHSS->getType();
1272   auto *MaxIterTy = MaxIter->getType();
1273   // If possible, adjust types.
1274   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1275     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1276   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1277     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1278     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1279     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1280       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1281   }
1282
1283   if (SkipLastIter) {
1284     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1285     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1286     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1287     // So we manually construct umin(a - 1, b - 1).
1288     SmallVector<const SCEV *, 4> Elements;
1289     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1290       for (auto *Op : UMin->operands())
1291         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1292       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1293     } else
1294       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1295   }
1296
1297   // Check if there is a loop-invariant predicate equivalent to our check.
1298   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1299                                                                L, BI, MaxIter);
1300   if (!LIP)
1301     return std::nullopt;
1302
1303   // Can we prove it to be trivially true?
1304   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1305     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1306   else
1307     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1308 }
1309
1310 static bool optimizeLoopExitWithUnknownExitCount(
1311     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1312     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1313     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1314   assert(
1315       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1316       "Not a loop exit!");
1317
1318   // For branch that stays in loop by TRUE condition, go through AND. For branch
1319   // that stays in loop by FALSE condition, go through OR. Both gives the
1320   // similar logic: "stay in loop iff all conditions are true(false)".
1321   bool Inverted = L->contains(BI->getSuccessor(1));
1322   SmallVector<ICmpInst *, 4> LeafConditions;
1323   SmallVector<Value *, 4> Worklist;
1324   SmallPtrSet<Value *, 4> Visited;
1325   Value *OldCond = BI->getCondition();
1326   Visited.insert(OldCond);
1327   Worklist.push_back(OldCond);
1328
1329   auto GoThrough = [&](Value *V) {
1330     Value *LHS = nullptr, *RHS = nullptr;
1331     if (Inverted) {
1332       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1333         return false;
1334     } else {
1335       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1336         return false;
1337     }
1338     if (Visited.insert(LHS).second)
1339       Worklist.push_back(LHS);
1340     if (Visited.insert(RHS).second)
1341       Worklist.push_back(RHS);
1342     return true;
1343   };
1344
1345   do {
1346     Value *Curr = Worklist.pop_back_val();
1347     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1348     // those with one use, to avoid instruction duplication.
1349     if (Curr->hasOneUse())
1350       if (!GoThrough(Curr))
1351         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1352           LeafConditions.push_back(ICmp);
1353   } while (!Worklist.empty());
1354
1355   // If the current basic block has the same exit count as the whole loop, and
1356   // it consists of multiple icmp's, try to collect all icmp's that give exact
1357   // same exit count. For all other icmp's, we could use one less iteration,
1358   // because their value on the last iteration doesn't really matter.
1359   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1360   if (!SkipLastIter && LeafConditions.size() > 1 &&
1361       SE->getExitCount(L, ExitingBB,
1362                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1363           MaxIter)
1364     for (auto *ICmp : LeafConditions) {
1365       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1366                                              /*ControlsExit*/ false);
1367       auto *ExitMax = EL.SymbolicMaxNotTaken;
1368       if (isa<SCEVCouldNotCompute>(ExitMax))
1369         continue;
1370       // They could be of different types (specifically this happens after
1371       // IV widening).
1372       auto *WiderType =
1373           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1374       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1375       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1376       if (WideExitMax == WideMaxIter)
1377         ICmpsFailingOnLastIter.insert(ICmp);
1378     }
1379
1380   bool Changed = false;
1381   for (auto *OldCond : LeafConditions) {
1382     // Skip last iteration for this icmp under one of two conditions:
1383     // - We do it for all conditions;
1384     // - There is another ICmp that would fail on last iter, so this one doesn't
1385     // really matter.
1386     bool OptimisticSkipLastIter = SkipLastIter;
1387     if (!OptimisticSkipLastIter) {
1388       if (ICmpsFailingOnLastIter.size() > 1)
1389         OptimisticSkipLastIter = true;
1390       else if (ICmpsFailingOnLastIter.size() == 1)
1391         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1392     }
1393     if (auto Replaced =
1394             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1395                               OptimisticSkipLastIter, SE, Rewriter)) {
1396       Changed = true;
1397       auto *NewCond = *Replaced;
1398       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1399         NCI->setName(OldCond->getName() + ".first_iter");
1400       }
1401       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1402                         << " with " << *NewCond << "\n");
1403       assert(OldCond->hasOneUse() && "Must be!");
1404       OldCond->replaceAllUsesWith(NewCond);
1405       DeadInsts.push_back(OldCond);
1406       // Make sure we no longer consider this condition as failing on last
1407       // iteration.
1408       ICmpsFailingOnLastIter.erase(OldCond);
1409     }
1410   }
1411   return Changed;
1412 }
1413
1414 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1415   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1416   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1417   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1418   // it already has flags.  The alternative to this would be to extending the
1419   // set of "interesting" IV users to include the icmp, but doing that
1420   // regresses results in practice by querying SCEVs before trip counts which
1421   // rely on them which results in SCEV caching sub-optimal answers.  The
1422   // concern about caching sub-optimal results is why we only query SCEVs of
1423   // the loop invariant RHS here.
1424   SmallVector<BasicBlock*, 16> ExitingBlocks;
1425   L->getExitingBlocks(ExitingBlocks);
1426   bool Changed = false;
1427   for (auto *ExitingBB : ExitingBlocks) {
1428     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1429     if (!BI)
1430       continue;
1431     assert(BI->isConditional() && "exit branch must be conditional");
1432
1433     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1434     if (!ICmp || !ICmp->hasOneUse())
1435       continue;
1436
1437     auto *LHS = ICmp->getOperand(0);
1438     auto *RHS = ICmp->getOperand(1);
1439     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1440     // poisoning cache with sub-optimal results.  For the must-execute case,
1441     // this is a neccessary precondition for correctness.
1442     if (!L->isLoopInvariant(RHS)) {
1443       if (!L->isLoopInvariant(LHS))
1444         continue;
1445       // Same logic applies for the inverse case
1446       std::swap(LHS, RHS);
1447     }
1448
1449     // Match (icmp signed-cond zext, RHS)
1450     Value *LHSOp = nullptr;
1451     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1452       continue;
1453
1454     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1455     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1456     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1457     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1458     FullCR = FullCR.zeroExtend(OuterBitWidth);
1459     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1460     if (FullCR.contains(RHSCR)) {
1461       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1462       // replace the signed condition with the unsigned version.
1463       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1464       Changed = true;
1465       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1466       // have not changed exit counts, or the values produced by the compare.
1467       continue;
1468     }
1469   }
1470
1471   // Now that we've canonicalized the condition to match the extend,
1472   // see if we can rotate the extend out of the loop.
1473   for (auto *ExitingBB : ExitingBlocks) {
1474     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1475     if (!BI)
1476       continue;
1477     assert(BI->isConditional() && "exit branch must be conditional");
1478
1479     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1480     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1481       continue;
1482
1483     bool Swapped = false;
1484     auto *LHS = ICmp->getOperand(0);
1485     auto *RHS = ICmp->getOperand(1);
1486     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1487       // Nothing to rotate
1488       continue;
1489     if (L->isLoopInvariant(LHS)) {
1490       // Same logic applies for the inverse case until we actually pick
1491       // which operand of the compare to update.
1492       Swapped = true;
1493       std::swap(LHS, RHS);
1494     }
1495     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1496
1497     // Match (icmp unsigned-cond zext, RHS)
1498     // TODO: Extend to handle corresponding sext/signed-cmp case
1499     // TODO: Extend to other invertible functions
1500     Value *LHSOp = nullptr;
1501     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1502       continue;
1503
1504     // In general, we only rotate if we can do so without increasing the number
1505     // of instructions.  The exception is when we have an zext(add-rec).  The
1506     // reason for allowing this exception is that we know we need to get rid
1507     // of the zext for SCEV to be able to compute a trip count for said loops;
1508     // we consider the new trip count valuable enough to increase instruction
1509     // count by one.
1510     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1511       continue;
1512
1513     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1514     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1515     // when zext is loop varying and RHS is loop invariant.  This converts
1516     // loop varying work to loop-invariant work.
1517     auto doRotateTransform = [&]() {
1518       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1519       auto *NewRHS =
1520         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1521                          L->getLoopPreheader()->getTerminator());
1522       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1523       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1524       if (LHS->use_empty())
1525         DeadInsts.push_back(LHS);
1526     };
1527
1528
1529     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1530     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1531     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1532     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1533     FullCR = FullCR.zeroExtend(OuterBitWidth);
1534     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1535     if (FullCR.contains(RHSCR)) {
1536       doRotateTransform();
1537       Changed = true;
1538       // Note, we are leaving SCEV in an unfortunately imprecise case here
1539       // as rotation tends to reveal information about trip counts not
1540       // previously visible.
1541       continue;
1542     }
1543   }
1544
1545   return Changed;
1546 }
1547
1548 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1549   SmallVector<BasicBlock*, 16> ExitingBlocks;
1550   L->getExitingBlocks(ExitingBlocks);
1551
1552   // Remove all exits which aren't both rewriteable and execute on every
1553   // iteration.
1554   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1555     // If our exitting block exits multiple loops, we can only rewrite the
1556     // innermost one.  Otherwise, we're changing how many times the innermost
1557     // loop runs before it exits.
1558     if (LI->getLoopFor(ExitingBB) != L)
1559       return true;
1560
1561     // Can't rewrite non-branch yet.
1562     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1563     if (!BI)
1564       return true;
1565
1566     // Likewise, the loop latch must be dominated by the exiting BB.
1567     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1568       return true;
1569
1570     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1571       // If already constant, nothing to do. However, if this is an
1572       // unconditional exit, we can still replace header phis with their
1573       // preheader value.
1574       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1575         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1576       return true;
1577     }
1578
1579     return false;
1580   });
1581
1582   if (ExitingBlocks.empty())
1583     return false;
1584
1585   // Get a symbolic upper bound on the loop backedge taken count.
1586   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1587   if (isa<SCEVCouldNotCompute>(MaxBECount))
1588     return false;
1589
1590   // Visit our exit blocks in order of dominance. We know from the fact that
1591   // all exits must dominate the latch, so there is a total dominance order
1592   // between them.
1593   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1594                // std::sort sorts in ascending order, so we want the inverse of
1595                // the normal dominance relation.
1596                if (A == B) return false;
1597                if (DT->properlyDominates(A, B))
1598                  return true;
1599                else {
1600                  assert(DT->properlyDominates(B, A) &&
1601                         "expected total dominance order!");
1602                  return false;
1603                }
1604   });
1605 #ifdef ASSERT
1606   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1607     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1608   }
1609 #endif
1610
1611   bool Changed = false;
1612   bool SkipLastIter = false;
1613   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1614   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1615     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1616       return;
1617     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1618       CurrMaxExit = MaxExitCount;
1619     else
1620       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1621     // If the loop has more than 1 iteration, all further checks will be
1622     // executed 1 iteration less.
1623     if (CurrMaxExit == MaxBECount)
1624       SkipLastIter = true;
1625   };
1626   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1627   for (BasicBlock *ExitingBB : ExitingBlocks) {
1628     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1629     const SCEV *MaxExitCount = SE->getExitCount(
1630         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1631     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1632       // Okay, we do not know the exit count here. Can we at least prove that it
1633       // will remain the same within iteration space?
1634       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1635       auto OptimizeCond = [&](bool SkipLastIter) {
1636         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1637                                                     MaxBECount, SkipLastIter,
1638                                                     SE, Rewriter, DeadInsts);
1639       };
1640
1641       // TODO: We might have proved that we can skip the last iteration for
1642       // this check. In this case, we only want to check the condition on the
1643       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1644       // corner case:
1645       //
1646       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1647       //
1648       // If we could not prove that len != 0, then we also could not prove that
1649       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1650       // OptimizeCond will likely not prove anything for it, even if it could
1651       // prove the same fact for len.
1652       //
1653       // As a temporary solution, we query both last and pre-last iterations in
1654       // hope that we will be able to prove triviality for at least one of
1655       // them. We can stop querying MaxBECount for this case once SCEV
1656       // understands that (MaxBECount - 1) will not overflow here.
1657       if (OptimizeCond(false))
1658         Changed = true;
1659       else if (SkipLastIter && OptimizeCond(true))
1660         Changed = true;
1661       UpdateSkipLastIter(MaxExitCount);
1662       continue;
1663     }
1664
1665     UpdateSkipLastIter(ExactExitCount);
1666
1667     // If we know we'd exit on the first iteration, rewrite the exit to
1668     // reflect this.  This does not imply the loop must exit through this
1669     // exit; there may be an earlier one taken on the first iteration.
1670     // We know that the backedge can't be taken, so we replace all
1671     // the header PHIs with values coming from the preheader.
1672     if (ExactExitCount->isZero()) {
1673       foldExit(L, ExitingBB, true, DeadInsts);
1674       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1675       Changed = true;
1676       continue;
1677     }
1678
1679     assert(ExactExitCount->getType()->isIntegerTy() &&
1680            MaxBECount->getType()->isIntegerTy() &&
1681            "Exit counts must be integers");
1682
1683     Type *WiderType =
1684         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1685     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1686     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1687     assert(MaxBECount->getType() == ExactExitCount->getType());
1688
1689     // Can we prove that some other exit must be taken strictly before this
1690     // one?
1691     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1692                                      ExactExitCount)) {
1693       foldExit(L, ExitingBB, false, DeadInsts);
1694       Changed = true;
1695       continue;
1696     }
1697
1698     // As we run, keep track of which exit counts we've encountered.  If we
1699     // find a duplicate, we've found an exit which would have exited on the
1700     // exiting iteration, but (from the visit order) strictly follows another
1701     // which does the same and is thus dead.
1702     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1703       foldExit(L, ExitingBB, false, DeadInsts);
1704       Changed = true;
1705       continue;
1706     }
1707
1708     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1709     // here.  If we kept track of the min of dominanting exits so far, we could
1710     // discharge exits with EC >= MDEC. This is less powerful than the existing
1711     // transform (since later exits aren't considered), but potentially more
1712     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1713     // or a >u b.  Such a case is not currently known.
1714   }
1715   return Changed;
1716 }
1717
1718 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1719   SmallVector<BasicBlock*, 16> ExitingBlocks;
1720   L->getExitingBlocks(ExitingBlocks);
1721
1722   // Finally, see if we can rewrite our exit conditions into a loop invariant
1723   // form. If we have a read-only loop, and we can tell that we must exit down
1724   // a path which does not need any of the values computed within the loop, we
1725   // can rewrite the loop to exit on the first iteration.  Note that this
1726   // doesn't either a) tell us the loop exits on the first iteration (unless
1727   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1728   // This transformation looks a lot like a restricted form of dead loop
1729   // elimination, but restricted to read-only loops and without neccesssarily
1730   // needing to kill the loop entirely.
1731   if (!LoopPredication)
1732     return false;
1733
1734   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1735   // through *explicit* control flow.  We have to eliminate the possibility of
1736   // implicit exits (see below) before we know it's truly exact.
1737   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1738   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1739     return false;
1740
1741   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1742   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1743
1744   auto BadExit = [&](BasicBlock *ExitingBB) {
1745     // If our exiting block exits multiple loops, we can only rewrite the
1746     // innermost one.  Otherwise, we're changing how many times the innermost
1747     // loop runs before it exits.
1748     if (LI->getLoopFor(ExitingBB) != L)
1749       return true;
1750
1751     // Can't rewrite non-branch yet.
1752     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1753     if (!BI)
1754       return true;
1755
1756     // If already constant, nothing to do.
1757     if (isa<Constant>(BI->getCondition()))
1758       return true;
1759
1760     // If the exit block has phis, we need to be able to compute the values
1761     // within the loop which contains them.  This assumes trivially lcssa phis
1762     // have already been removed; TODO: generalize
1763     BasicBlock *ExitBlock =
1764     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1765     if (!ExitBlock->phis().empty())
1766       return true;
1767
1768     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1769     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1770         !Rewriter.isSafeToExpand(ExitCount))
1771       return true;
1772
1773     assert(SE->isLoopInvariant(ExitCount, L) &&
1774            "Exit count must be loop invariant");
1775     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1776     return false;
1777   };
1778
1779   // If we have any exits which can't be predicated themselves, than we can't
1780   // predicate any exit which isn't guaranteed to execute before it.  Consider
1781   // two exits (a) and (b) which would both exit on the same iteration.  If we
1782   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1783   // we could convert a loop from exiting through (a) to one exiting through
1784   // (b).  Note that this problem exists only for exits with the same exit
1785   // count, and we could be more aggressive when exit counts are known inequal.
1786   llvm::sort(ExitingBlocks,
1787             [&](BasicBlock *A, BasicBlock *B) {
1788               // std::sort sorts in ascending order, so we want the inverse of
1789               // the normal dominance relation, plus a tie breaker for blocks
1790               // unordered by dominance.
1791               if (DT->properlyDominates(A, B)) return true;
1792               if (DT->properlyDominates(B, A)) return false;
1793               return A->getName() < B->getName();
1794             });
1795   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1796   // exits before the backedge).  If they aren't, reasoning about reachability
1797   // is complicated and we choose not to for now.
1798   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1799     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1800       return false;
1801
1802   // Given our sorted total order, we know that exit[j] must be evaluated
1803   // after all exit[i] such j > i.
1804   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1805     if (BadExit(ExitingBlocks[i])) {
1806       ExitingBlocks.resize(i);
1807       break;
1808     }
1809
1810   if (ExitingBlocks.empty())
1811     return false;
1812
1813   // We rely on not being able to reach an exiting block on a later iteration
1814   // then it's statically compute exit count.  The implementaton of
1815   // getExitCount currently has this invariant, but assert it here so that
1816   // breakage is obvious if this ever changes..
1817   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1818         return DT->dominates(ExitingBB, L->getLoopLatch());
1819       }));
1820
1821   // At this point, ExitingBlocks consists of only those blocks which are
1822   // predicatable.  Given that, we know we have at least one exit we can
1823   // predicate if the loop is doesn't have side effects and doesn't have any
1824   // implicit exits (because then our exact BTC isn't actually exact).
1825   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1826   // suggestions on how to improve this?  I can obviously bail out for outer
1827   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1828   // is that enough for *all* side effects?
1829   for (BasicBlock *BB : L->blocks())
1830     for (auto &I : *BB)
1831       // TODO:isGuaranteedToTransfer
1832       if (I.mayHaveSideEffects())
1833         return false;
1834
1835   bool Changed = false;
1836   // Finally, do the actual predication for all predicatable blocks.  A couple
1837   // of notes here:
1838   // 1) We don't bother to constant fold dominated exits with identical exit
1839   //    counts; that's simply a form of CSE/equality propagation and we leave
1840   //    it for dedicated passes.
1841   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1842   //    legality constraints and we leave that to dedicated logic.  We want to
1843   //    predicate even if we can't insert a loop invariant expression as
1844   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1845   //    varying check.
1846   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1847   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1848   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1849   for (BasicBlock *ExitingBB : ExitingBlocks) {
1850     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1851
1852     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1853     Value *NewCond;
1854     if (ExitCount == ExactBTC) {
1855       NewCond = L->contains(BI->getSuccessor(0)) ?
1856         B.getFalse() : B.getTrue();
1857     } else {
1858       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1859       if (!ExactBTCV)
1860         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1861       Value *RHS = ExactBTCV;
1862       if (ECV->getType() != RHS->getType()) {
1863         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1864         ECV = B.CreateZExt(ECV, WiderTy);
1865         RHS = B.CreateZExt(RHS, WiderTy);
1866       }
1867       auto Pred = L->contains(BI->getSuccessor(0)) ?
1868         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1869       NewCond = B.CreateICmp(Pred, ECV, RHS);
1870     }
1871     Value *OldCond = BI->getCondition();
1872     BI->setCondition(NewCond);
1873     if (OldCond->use_empty())
1874       DeadInsts.emplace_back(OldCond);
1875     Changed = true;
1876   }
1877
1878   return Changed;
1879 }
1880
1881 //===----------------------------------------------------------------------===//
1882 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1883 //===----------------------------------------------------------------------===//
1884
1885 bool IndVarSimplify::run(Loop *L) {
1886   // We need (and expect!) the incoming loop to be in LCSSA.
1887   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1888          "LCSSA required to run indvars!");
1889
1890   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1891   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1892   //    canonicalization can be a pessimization without LSR to "clean up"
1893   //    afterwards.
1894   //  - We depend on having a preheader; in particular,
1895   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1896   //    and we're in trouble if we can't find the induction variable even when
1897   //    we've manually inserted one.
1898   //  - LFTR relies on having a single backedge.
1899   if (!L->isLoopSimplifyForm())
1900     return false;
1901
1902   bool Changed = false;
1903   // If there are any floating-point recurrences, attempt to
1904   // transform them to use integer recurrences.
1905   Changed |= rewriteNonIntegerIVs(L);
1906
1907   // Create a rewriter object which we'll use to transform the code with.
1908   SCEVExpander Rewriter(*SE, DL, "indvars");
1909 #ifndef NDEBUG
1910   Rewriter.setDebugType(DEBUG_TYPE);
1911 #endif
1912
1913   // Eliminate redundant IV users.
1914   //
1915   // Simplification works best when run before other consumers of SCEV. We
1916   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1917   // other expressions involving loop IVs have been evaluated. This helps SCEV
1918   // set no-wrap flags before normalizing sign/zero extension.
1919   Rewriter.disableCanonicalMode();
1920   Changed |= simplifyAndExtend(L, Rewriter, LI);
1921
1922   // Check to see if we can compute the final value of any expressions
1923   // that are recurrent in the loop, and substitute the exit values from the
1924   // loop into any instructions outside of the loop that use the final values
1925   // of the current expressions.
1926   if (ReplaceExitValue != NeverRepl) {
1927     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1928                                              ReplaceExitValue, DeadInsts)) {
1929       NumReplaced += Rewrites;
1930       Changed = true;
1931     }
1932   }
1933
1934   // Eliminate redundant IV cycles.
1935   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1936
1937   // Try to convert exit conditions to unsigned and rotate computation
1938   // out of the loop.  Note: Handles invalidation internally if needed.
1939   Changed |= canonicalizeExitCondition(L);
1940
1941   // Try to eliminate loop exits based on analyzeable exit counts
1942   if (optimizeLoopExits(L, Rewriter))  {
1943     Changed = true;
1944     // Given we've changed exit counts, notify SCEV
1945     // Some nested loops may share same folded exit basic block,
1946     // thus we need to notify top most loop.
1947     SE->forgetTopmostLoop(L);
1948   }
1949
1950   // Try to form loop invariant tests for loop exits by changing how many
1951   // iterations of the loop run when that is unobservable.
1952   if (predicateLoopExits(L, Rewriter)) {
1953     Changed = true;
1954     // Given we've changed exit counts, notify SCEV
1955     SE->forgetLoop(L);
1956   }
1957
1958   // If we have a trip count expression, rewrite the loop's exit condition
1959   // using it.
1960   if (!DisableLFTR) {
1961     BasicBlock *PreHeader = L->getLoopPreheader();
1962
1963     SmallVector<BasicBlock*, 16> ExitingBlocks;
1964     L->getExitingBlocks(ExitingBlocks);
1965     for (BasicBlock *ExitingBB : ExitingBlocks) {
1966       // Can't rewrite non-branch yet.
1967       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1968         continue;
1969
1970       // If our exitting block exits multiple loops, we can only rewrite the
1971       // innermost one.  Otherwise, we're changing how many times the innermost
1972       // loop runs before it exits.
1973       if (LI->getLoopFor(ExitingBB) != L)
1974         continue;
1975
1976       if (!needsLFTR(L, ExitingBB))
1977         continue;
1978
1979       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1980       if (isa<SCEVCouldNotCompute>(ExitCount))
1981         continue;
1982
1983       // This was handled above, but as we form SCEVs, we can sometimes refine
1984       // existing ones; this allows exit counts to be folded to zero which
1985       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1986       // until stable to handle cases like this better.
1987       if (ExitCount->isZero())
1988         continue;
1989
1990       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1991       if (!IndVar)
1992         continue;
1993
1994       // Avoid high cost expansions.  Note: This heuristic is questionable in
1995       // that our definition of "high cost" is not exactly principled.
1996       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1997                                        TTI, PreHeader->getTerminator()))
1998         continue;
1999
2000       if (!Rewriter.isSafeToExpand(ExitCount))
2001         continue;
2002
2003       Changed |= linearFunctionTestReplace(L, ExitingBB,
2004                                            ExitCount, IndVar,
2005                                            Rewriter);
2006     }
2007   }
2008   // Clear the rewriter cache, because values that are in the rewriter's cache
2009   // can be deleted in the loop below, causing the AssertingVH in the cache to
2010   // trigger.
2011   Rewriter.clear();
2012
2013   // Now that we're done iterating through lists, clean up any instructions
2014   // which are now dead.
2015   while (!DeadInsts.empty()) {
2016     Value *V = DeadInsts.pop_back_val();
2017
2018     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2019       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2020     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2021       Changed |=
2022           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2023   }
2024
2025   // The Rewriter may not be used from this point on.
2026
2027   // Loop-invariant instructions in the preheader that aren't used in the
2028   // loop may be sunk below the loop to reduce register pressure.
2029   Changed |= sinkUnusedInvariants(L);
2030
2031   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2032   // trip count and therefore can further simplify exit values in addition to
2033   // rewriteLoopExitValues.
2034   Changed |= rewriteFirstIterationLoopExitValues(L);
2035
2036   // Clean up dead instructions.
2037   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2038
2039   // Check a post-condition.
2040   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2041          "Indvars did not preserve LCSSA!");
2042   if (VerifyMemorySSA && MSSAU)
2043     MSSAU->getMemorySSA()->verifyMemorySSA();
2044
2045   return Changed;
2046 }
2047
2048 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2049                                           LoopStandardAnalysisResults &AR,
2050                                           LPMUpdater &) {
2051   Function *F = L.getHeader()->getParent();
2052   const DataLayout &DL = F->getParent()->getDataLayout();
2053
2054   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2055                      WidenIndVars && AllowIVWidening);
2056   if (!IVS.run(&L))
2057     return PreservedAnalyses::all();
2058
2059   auto PA = getLoopPassPreservedAnalyses();
2060   PA.preserveSet<CFGAnalyses>();
2061   if (AR.MSSA)
2062     PA.preserve<MemorySSAAnalysis>();
2063   return PA;
2064 }