[Alignment][NFC] Use Align with TargetLowering::setMinFunctionAlignment
[llvm-core.git] / lib / Transforms / Scalar / IndVarSimplify.cpp
blobaa35fa11f67b96d1e9f2d1fce5c6efcdd754a3f4
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
24 //===----------------------------------------------------------------------===//
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
88 using namespace llvm;
90 #define DEBUG_TYPE "indvars"
92 STATISTIC(NumWidened , "Number of indvars widened");
93 STATISTIC(NumReplaced , "Number of exit values replaced");
94 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102 "verify-indvars", cl::Hidden,
103 cl::desc("Verify the ScalarEvolution result after running indvars"));
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111 clEnumValN(OnlyCheapRepl, "cheap",
112 "only replace exit value when the cost is cheap"),
113 clEnumValN(NoHardUse, "noharduse",
114 "only replace exit values when loop def likely dead"),
115 clEnumValN(AlwaysRepl, "always",
116 "always replace exit value whenever possible")));
118 static cl::opt<bool> UsePostIncrementRanges(
119 "indvars-post-increment-ranges", cl::Hidden,
120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121 cl::init(true));
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125 cl::desc("Disable Linear Function Test Replace optimization"));
127 namespace {
129 struct RewritePhi;
131 class IndVarSimplify {
132 LoopInfo *LI;
133 ScalarEvolution *SE;
134 DominatorTree *DT;
135 const DataLayout &DL;
136 TargetLibraryInfo *TLI;
137 const TargetTransformInfo *TTI;
139 SmallVector<WeakTrackingVH, 16> DeadInsts;
141 bool isValidRewrite(Value *FromVal, Value *ToVal);
143 bool handleFloatingPointIV(Loop *L, PHINode *PH);
144 bool rewriteNonIntegerIVs(Loop *L);
146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147 bool optimizeLoopExits(Loop *L);
149 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151 bool rewriteFirstIterationLoopExitValues(Loop *L);
152 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
154 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155 const SCEV *ExitCount,
156 PHINode *IndVar, SCEVExpander &Rewriter);
158 bool sinkUnusedInvariants(Loop *L);
160 public:
161 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162 const DataLayout &DL, TargetLibraryInfo *TLI,
163 TargetTransformInfo *TTI)
164 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
166 bool run(Loop *L);
169 } // end anonymous namespace
171 /// Return true if the SCEV expansion generated by the rewriter can replace the
172 /// original value. SCEV guarantees that it produces the same value, but the way
173 /// it is produced may be illegal IR. Ideally, this function will only be
174 /// called for verification.
175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176 // If an SCEV expression subsumed multiple pointers, its expansion could
177 // reassociate the GEP changing the base pointer. This is illegal because the
178 // final address produced by a GEP chain must be inbounds relative to its
179 // underlying object. Otherwise basic alias analysis, among other things,
180 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181 // producing an expression involving multiple pointers. Until then, we must
182 // bail out here.
184 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185 // because it understands lcssa phis while SCEV does not.
186 Value *FromPtr = FromVal;
187 Value *ToPtr = ToVal;
188 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189 FromPtr = GEP->getPointerOperand();
191 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192 ToPtr = GEP->getPointerOperand();
194 if (FromPtr != FromVal || ToPtr != ToVal) {
195 // Quickly check the common case
196 if (FromPtr == ToPtr)
197 return true;
199 // SCEV may have rewritten an expression that produces the GEP's pointer
200 // operand. That's ok as long as the pointer operand has the same base
201 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202 // base of a recurrence. This handles the case in which SCEV expansion
203 // converts a pointer type recurrence into a nonrecurrent pointer base
204 // indexed by an integer recurrence.
206 // If the GEP base pointer is a vector of pointers, abort.
207 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208 return false;
210 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212 if (FromBase == ToBase)
213 return true;
215 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216 << " != " << *ToBase << "\n");
218 return false;
220 return true;
223 /// Determine the insertion point for this user. By default, insert immediately
224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226 /// common dominator for the incoming blocks. A nullptr can be returned if no
227 /// viable location is found: it may happen if User is a PHI and Def only comes
228 /// to this PHI from unreachable blocks.
229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
230 DominatorTree *DT, LoopInfo *LI) {
231 PHINode *PHI = dyn_cast<PHINode>(User);
232 if (!PHI)
233 return User;
235 Instruction *InsertPt = nullptr;
236 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
237 if (PHI->getIncomingValue(i) != Def)
238 continue;
240 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
242 if (!DT->isReachableFromEntry(InsertBB))
243 continue;
245 if (!InsertPt) {
246 InsertPt = InsertBB->getTerminator();
247 continue;
249 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250 InsertPt = InsertBB->getTerminator();
253 // If we have skipped all inputs, it means that Def only comes to Phi from
254 // unreachable blocks.
255 if (!InsertPt)
256 return nullptr;
258 auto *DefI = dyn_cast<Instruction>(Def);
259 if (!DefI)
260 return InsertPt;
262 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
264 auto *L = LI->getLoopFor(DefI->getParent());
265 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
267 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
268 if (LI->getLoopFor(DTN->getBlock()) == L)
269 return DTN->getBlock()->getTerminator();
271 llvm_unreachable("DefI dominates InsertPt!");
274 //===----------------------------------------------------------------------===//
275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276 //===----------------------------------------------------------------------===//
278 /// Convert APF to an integer, if possible.
279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280 bool isExact = false;
281 // See if we can convert this to an int64_t
282 uint64_t UIntVal;
283 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285 !isExact)
286 return false;
287 IntVal = UIntVal;
288 return true;
291 /// If the loop has floating induction variable then insert corresponding
292 /// integer induction variable if possible.
293 /// For example,
294 /// for(double i = 0; i < 10000; ++i)
295 /// bar(i)
296 /// is converted into
297 /// for(int i = 0; i < 10000; ++i)
298 /// bar((double)i);
299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301 unsigned BackEdge = IncomingEdge^1;
303 // Check incoming value.
304 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
306 int64_t InitValue;
307 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
308 return false;
310 // Check IV increment. Reject this PN if increment operation is not
311 // an add or increment value can not be represented by an integer.
312 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
315 // If this is not an add of the PHI with a constantfp, or if the constant fp
316 // is not an integer, bail out.
317 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318 int64_t IncValue;
319 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
320 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
321 return false;
323 // Check Incr uses. One user is PN and the other user is an exit condition
324 // used by the conditional terminator.
325 Value::user_iterator IncrUse = Incr->user_begin();
326 Instruction *U1 = cast<Instruction>(*IncrUse++);
327 if (IncrUse == Incr->user_end()) return false;
328 Instruction *U2 = cast<Instruction>(*IncrUse++);
329 if (IncrUse != Incr->user_end()) return false;
331 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
332 // only used by a branch, we can't transform it.
333 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
334 if (!Compare)
335 Compare = dyn_cast<FCmpInst>(U2);
336 if (!Compare || !Compare->hasOneUse() ||
337 !isa<BranchInst>(Compare->user_back()))
338 return false;
340 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
342 // We need to verify that the branch actually controls the iteration count
343 // of the loop. If not, the new IV can overflow and no one will notice.
344 // The branch block must be in the loop and one of the successors must be out
345 // of the loop.
346 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347 if (!L->contains(TheBr->getParent()) ||
348 (L->contains(TheBr->getSuccessor(0)) &&
349 L->contains(TheBr->getSuccessor(1))))
350 return false;
352 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353 // transform it.
354 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355 int64_t ExitValue;
356 if (ExitValueVal == nullptr ||
357 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358 return false;
360 // Find new predicate for integer comparison.
361 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
362 switch (Compare->getPredicate()) {
363 default: return false; // Unknown comparison.
364 case CmpInst::FCMP_OEQ:
365 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366 case CmpInst::FCMP_ONE:
367 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368 case CmpInst::FCMP_OGT:
369 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370 case CmpInst::FCMP_OGE:
371 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372 case CmpInst::FCMP_OLT:
373 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374 case CmpInst::FCMP_OLE:
375 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
378 // We convert the floating point induction variable to a signed i32 value if
379 // we can. This is only safe if the comparison will not overflow in a way
380 // that won't be trapped by the integer equivalent operations. Check for this
381 // now.
382 // TODO: We could use i64 if it is native and the range requires it.
384 // The start/stride/exit values must all fit in signed i32.
385 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386 return false;
388 // If not actually striding (add x, 0.0), avoid touching the code.
389 if (IncValue == 0)
390 return false;
392 // Positive and negative strides have different safety conditions.
393 if (IncValue > 0) {
394 // If we have a positive stride, we require the init to be less than the
395 // exit value.
396 if (InitValue >= ExitValue)
397 return false;
399 uint32_t Range = uint32_t(ExitValue-InitValue);
400 // Check for infinite loop, either:
401 // while (i <= Exit) or until (i > Exit)
402 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403 if (++Range == 0) return false; // Range overflows.
406 unsigned Leftover = Range % uint32_t(IncValue);
408 // If this is an equality comparison, we require that the strided value
409 // exactly land on the exit value, otherwise the IV condition will wrap
410 // around and do things the fp IV wouldn't.
411 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412 Leftover != 0)
413 return false;
415 // If the stride would wrap around the i32 before exiting, we can't
416 // transform the IV.
417 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
418 return false;
419 } else {
420 // If we have a negative stride, we require the init to be greater than the
421 // exit value.
422 if (InitValue <= ExitValue)
423 return false;
425 uint32_t Range = uint32_t(InitValue-ExitValue);
426 // Check for infinite loop, either:
427 // while (i >= Exit) or until (i < Exit)
428 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429 if (++Range == 0) return false; // Range overflows.
432 unsigned Leftover = Range % uint32_t(-IncValue);
434 // If this is an equality comparison, we require that the strided value
435 // exactly land on the exit value, otherwise the IV condition will wrap
436 // around and do things the fp IV wouldn't.
437 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438 Leftover != 0)
439 return false;
441 // If the stride would wrap around the i32 before exiting, we can't
442 // transform the IV.
443 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
444 return false;
447 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
449 // Insert new integer induction variable.
450 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452 PN->getIncomingBlock(IncomingEdge));
454 Value *NewAdd =
455 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456 Incr->getName()+".int", Incr);
457 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
459 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460 ConstantInt::get(Int32Ty, ExitValue),
461 Compare->getName());
463 // In the following deletions, PN may become dead and may be deleted.
464 // Use a WeakTrackingVH to observe whether this happens.
465 WeakTrackingVH WeakPH = PN;
467 // Delete the old floating point exit comparison. The branch starts using the
468 // new comparison.
469 NewCompare->takeName(Compare);
470 Compare->replaceAllUsesWith(NewCompare);
471 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
473 // Delete the old floating point increment.
474 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
475 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
477 // If the FP induction variable still has uses, this is because something else
478 // in the loop uses its value. In order to canonicalize the induction
479 // variable, we chose to eliminate the IV and rewrite it in terms of an
480 // int->fp cast.
482 // We give preference to sitofp over uitofp because it is faster on most
483 // platforms.
484 if (WeakPH) {
485 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486 &*PN->getParent()->getFirstInsertionPt());
487 PN->replaceAllUsesWith(Conv);
488 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
490 return true;
493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494 // First step. Check to see if there are any floating-point recurrences.
495 // If there are, change them into integer recurrences, permitting analysis by
496 // the SCEV routines.
497 BasicBlock *Header = L->getHeader();
499 SmallVector<WeakTrackingVH, 8> PHIs;
500 for (PHINode &PN : Header->phis())
501 PHIs.push_back(&PN);
503 bool Changed = false;
504 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
505 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506 Changed |= handleFloatingPointIV(L, PN);
508 // If the loop previously had floating-point IV, ScalarEvolution
509 // may not have been able to compute a trip count. Now that we've done some
510 // re-writing, the trip count may be computable.
511 if (Changed)
512 SE->forgetLoop(L);
513 return Changed;
516 namespace {
518 // Collect information about PHI nodes which can be transformed in
519 // rewriteLoopExitValues.
520 struct RewritePhi {
521 PHINode *PN;
523 // Ith incoming value.
524 unsigned Ith;
526 // Exit value after expansion.
527 Value *Val;
529 // High Cost when expansion.
530 bool HighCost;
532 RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533 : PN(P), Ith(I), Val(V), HighCost(H) {}
536 } // end anonymous namespace
538 //===----------------------------------------------------------------------===//
539 // rewriteLoopExitValues - Optimize IV users outside the loop.
540 // As a side effect, reduces the amount of IV processing within the loop.
541 //===----------------------------------------------------------------------===//
543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
544 SmallPtrSet<const Instruction *, 8> Visited;
545 SmallVector<const Instruction *, 8> WorkList;
546 Visited.insert(I);
547 WorkList.push_back(I);
548 while (!WorkList.empty()) {
549 const Instruction *Curr = WorkList.pop_back_val();
550 // This use is outside the loop, nothing to do.
551 if (!L->contains(Curr))
552 continue;
553 // Do we assume it is a "hard" use which will not be eliminated easily?
554 if (Curr->mayHaveSideEffects())
555 return true;
556 // Otherwise, add all its users to worklist.
557 for (auto U : Curr->users()) {
558 auto *UI = cast<Instruction>(U);
559 if (Visited.insert(UI).second)
560 WorkList.push_back(UI);
563 return false;
566 /// Check to see if this loop has a computable loop-invariant execution count.
567 /// If so, this means that we can compute the final value of any expressions
568 /// that are recurrent in the loop, and substitute the exit values from the loop
569 /// into any instructions outside of the loop that use the final values of the
570 /// current expressions.
572 /// This is mostly redundant with the regular IndVarSimplify activities that
573 /// happen later, except that it's more powerful in some cases, because it's
574 /// able to brute-force evaluate arbitrary instructions as long as they have
575 /// constant operands at the beginning of the loop.
576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577 // Check a pre-condition.
578 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579 "Indvars did not preserve LCSSA!");
581 SmallVector<BasicBlock*, 8> ExitBlocks;
582 L->getUniqueExitBlocks(ExitBlocks);
584 SmallVector<RewritePhi, 8> RewritePhiSet;
585 // Find all values that are computed inside the loop, but used outside of it.
586 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
587 // the exit blocks of the loop to find them.
588 for (BasicBlock *ExitBB : ExitBlocks) {
589 // If there are no PHI nodes in this exit block, then no values defined
590 // inside the loop are used on this path, skip it.
591 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592 if (!PN) continue;
594 unsigned NumPreds = PN->getNumIncomingValues();
596 // Iterate over all of the PHI nodes.
597 BasicBlock::iterator BBI = ExitBB->begin();
598 while ((PN = dyn_cast<PHINode>(BBI++))) {
599 if (PN->use_empty())
600 continue; // dead use, don't replace it
602 if (!SE->isSCEVable(PN->getType()))
603 continue;
605 // It's necessary to tell ScalarEvolution about this explicitly so that
606 // it can walk the def-use list and forget all SCEVs, as it may not be
607 // watching the PHI itself. Once the new exit value is in place, there
608 // may not be a def-use connection between the loop and every instruction
609 // which got a SCEVAddRecExpr for that loop.
610 SE->forgetValue(PN);
612 // Iterate over all of the values in all the PHI nodes.
613 for (unsigned i = 0; i != NumPreds; ++i) {
614 // If the value being merged in is not integer or is not defined
615 // in the loop, skip it.
616 Value *InVal = PN->getIncomingValue(i);
617 if (!isa<Instruction>(InVal))
618 continue;
620 // If this pred is for a subloop, not L itself, skip it.
621 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622 continue; // The Block is in a subloop, skip it.
624 // Check that InVal is defined in the loop.
625 Instruction *Inst = cast<Instruction>(InVal);
626 if (!L->contains(Inst))
627 continue;
629 // Okay, this instruction has a user outside of the current loop
630 // and varies predictably *inside* the loop. Evaluate the value it
631 // contains when the loop exits, if possible. We prefer to start with
632 // expressions which are true for all exits (so as to maximize
633 // expression reuse by the SCEVExpander), but resort to per-exit
634 // evaluation if that fails.
635 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
636 if (isa<SCEVCouldNotCompute>(ExitValue) ||
637 !SE->isLoopInvariant(ExitValue, L) ||
638 !isSafeToExpand(ExitValue, *SE)) {
639 // TODO: This should probably be sunk into SCEV in some way; maybe a
640 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
641 // most SCEV expressions and other recurrence types (e.g. shift
642 // recurrences). Is there existing code we can reuse?
643 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
644 if (isa<SCEVCouldNotCompute>(ExitCount))
645 continue;
646 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
647 if (AddRec->getLoop() == L)
648 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
649 if (isa<SCEVCouldNotCompute>(ExitValue) ||
650 !SE->isLoopInvariant(ExitValue, L) ||
651 !isSafeToExpand(ExitValue, *SE))
652 continue;
655 // Computing the value outside of the loop brings no benefit if it is
656 // definitely used inside the loop in a way which can not be optimized
657 // away. Avoid doing so unless we know we have a value which computes
658 // the ExitValue already. TODO: This should be merged into SCEV
659 // expander to leverage its knowledge of existing expressions.
660 if (ReplaceExitValue != AlwaysRepl &&
661 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
662 hasHardUserWithinLoop(L, Inst))
663 continue;
665 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
666 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
668 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
669 << '\n'
670 << " LoopVal = " << *Inst << "\n");
672 if (!isValidRewrite(Inst, ExitVal)) {
673 DeadInsts.push_back(ExitVal);
674 continue;
677 #ifndef NDEBUG
678 // If we reuse an instruction from a loop which is neither L nor one of
679 // its containing loops, we end up breaking LCSSA form for this loop by
680 // creating a new use of its instruction.
681 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
682 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
683 if (EVL != L)
684 assert(EVL->contains(L) && "LCSSA breach detected!");
685 #endif
687 // Collect all the candidate PHINodes to be rewritten.
688 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
693 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
695 bool Changed = false;
696 // Transformation.
697 for (const RewritePhi &Phi : RewritePhiSet) {
698 PHINode *PN = Phi.PN;
699 Value *ExitVal = Phi.Val;
701 // Only do the rewrite when the ExitValue can be expanded cheaply.
702 // If LoopCanBeDel is true, rewrite exit value aggressively.
703 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
704 DeadInsts.push_back(ExitVal);
705 continue;
708 Changed = true;
709 ++NumReplaced;
710 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
711 PN->setIncomingValue(Phi.Ith, ExitVal);
713 // If this instruction is dead now, delete it. Don't do it now to avoid
714 // invalidating iterators.
715 if (isInstructionTriviallyDead(Inst, TLI))
716 DeadInsts.push_back(Inst);
718 // Replace PN with ExitVal if that is legal and does not break LCSSA.
719 if (PN->getNumIncomingValues() == 1 &&
720 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
721 PN->replaceAllUsesWith(ExitVal);
722 PN->eraseFromParent();
726 // The insertion point instruction may have been deleted; clear it out
727 // so that the rewriter doesn't trip over it later.
728 Rewriter.clearInsertPoint();
729 return Changed;
732 //===---------------------------------------------------------------------===//
733 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
734 // they will exit at the first iteration.
735 //===---------------------------------------------------------------------===//
737 /// Check to see if this loop has loop invariant conditions which lead to loop
738 /// exits. If so, we know that if the exit path is taken, it is at the first
739 /// loop iteration. This lets us predict exit values of PHI nodes that live in
740 /// loop header.
741 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
742 // Verify the input to the pass is already in LCSSA form.
743 assert(L->isLCSSAForm(*DT));
745 SmallVector<BasicBlock *, 8> ExitBlocks;
746 L->getUniqueExitBlocks(ExitBlocks);
748 bool MadeAnyChanges = false;
749 for (auto *ExitBB : ExitBlocks) {
750 // If there are no more PHI nodes in this exit block, then no more
751 // values defined inside the loop are used on this path.
752 for (PHINode &PN : ExitBB->phis()) {
753 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
754 IncomingValIdx != E; ++IncomingValIdx) {
755 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
757 // Can we prove that the exit must run on the first iteration if it
758 // runs at all? (i.e. early exits are fine for our purposes, but
759 // traces which lead to this exit being taken on the 2nd iteration
760 // aren't.) Note that this is about whether the exit branch is
761 // executed, not about whether it is taken.
762 if (!L->getLoopLatch() ||
763 !DT->dominates(IncomingBB, L->getLoopLatch()))
764 continue;
766 // Get condition that leads to the exit path.
767 auto *TermInst = IncomingBB->getTerminator();
769 Value *Cond = nullptr;
770 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
771 // Must be a conditional branch, otherwise the block
772 // should not be in the loop.
773 Cond = BI->getCondition();
774 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
775 Cond = SI->getCondition();
776 else
777 continue;
779 if (!L->isLoopInvariant(Cond))
780 continue;
782 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
784 // Only deal with PHIs in the loop header.
785 if (!ExitVal || ExitVal->getParent() != L->getHeader())
786 continue;
788 // If ExitVal is a PHI on the loop header, then we know its
789 // value along this exit because the exit can only be taken
790 // on the first iteration.
791 auto *LoopPreheader = L->getLoopPreheader();
792 assert(LoopPreheader && "Invalid loop");
793 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
794 if (PreheaderIdx != -1) {
795 assert(ExitVal->getParent() == L->getHeader() &&
796 "ExitVal must be in loop header");
797 MadeAnyChanges = true;
798 PN.setIncomingValue(IncomingValIdx,
799 ExitVal->getIncomingValue(PreheaderIdx));
804 return MadeAnyChanges;
807 /// Check whether it is possible to delete the loop after rewriting exit
808 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
809 /// aggressively.
810 bool IndVarSimplify::canLoopBeDeleted(
811 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
812 BasicBlock *Preheader = L->getLoopPreheader();
813 // If there is no preheader, the loop will not be deleted.
814 if (!Preheader)
815 return false;
817 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
818 // We obviate multiple ExitingBlocks case for simplicity.
819 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
820 // after exit value rewriting, we can enhance the logic here.
821 SmallVector<BasicBlock *, 4> ExitingBlocks;
822 L->getExitingBlocks(ExitingBlocks);
823 SmallVector<BasicBlock *, 8> ExitBlocks;
824 L->getUniqueExitBlocks(ExitBlocks);
825 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
826 return false;
828 BasicBlock *ExitBlock = ExitBlocks[0];
829 BasicBlock::iterator BI = ExitBlock->begin();
830 while (PHINode *P = dyn_cast<PHINode>(BI)) {
831 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
833 // If the Incoming value of P is found in RewritePhiSet, we know it
834 // could be rewritten to use a loop invariant value in transformation
835 // phase later. Skip it in the loop invariant check below.
836 bool found = false;
837 for (const RewritePhi &Phi : RewritePhiSet) {
838 unsigned i = Phi.Ith;
839 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
840 found = true;
841 break;
845 Instruction *I;
846 if (!found && (I = dyn_cast<Instruction>(Incoming)))
847 if (!L->hasLoopInvariantOperands(I))
848 return false;
850 ++BI;
853 for (auto *BB : L->blocks())
854 if (llvm::any_of(*BB, [](Instruction &I) {
855 return I.mayHaveSideEffects();
857 return false;
859 return true;
862 //===----------------------------------------------------------------------===//
863 // IV Widening - Extend the width of an IV to cover its widest uses.
864 //===----------------------------------------------------------------------===//
866 namespace {
868 // Collect information about induction variables that are used by sign/zero
869 // extend operations. This information is recorded by CollectExtend and provides
870 // the input to WidenIV.
871 struct WideIVInfo {
872 PHINode *NarrowIV = nullptr;
874 // Widest integer type created [sz]ext
875 Type *WidestNativeType = nullptr;
877 // Was a sext user seen before a zext?
878 bool IsSigned = false;
881 } // end anonymous namespace
883 /// Update information about the induction variable that is extended by this
884 /// sign or zero extend operation. This is used to determine the final width of
885 /// the IV before actually widening it.
886 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
887 const TargetTransformInfo *TTI) {
888 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
889 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
890 return;
892 Type *Ty = Cast->getType();
893 uint64_t Width = SE->getTypeSizeInBits(Ty);
894 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
895 return;
897 // Check that `Cast` actually extends the induction variable (we rely on this
898 // later). This takes care of cases where `Cast` is extending a truncation of
899 // the narrow induction variable, and thus can end up being narrower than the
900 // "narrow" induction variable.
901 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
902 if (NarrowIVWidth >= Width)
903 return;
905 // Cast is either an sext or zext up to this point.
906 // We should not widen an indvar if arithmetics on the wider indvar are more
907 // expensive than those on the narrower indvar. We check only the cost of ADD
908 // because at least an ADD is required to increment the induction variable. We
909 // could compute more comprehensively the cost of all instructions on the
910 // induction variable when necessary.
911 if (TTI &&
912 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
913 TTI->getArithmeticInstrCost(Instruction::Add,
914 Cast->getOperand(0)->getType())) {
915 return;
918 if (!WI.WidestNativeType) {
919 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
920 WI.IsSigned = IsSigned;
921 return;
924 // We extend the IV to satisfy the sign of its first user, arbitrarily.
925 if (WI.IsSigned != IsSigned)
926 return;
928 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
929 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
932 namespace {
934 /// Record a link in the Narrow IV def-use chain along with the WideIV that
935 /// computes the same value as the Narrow IV def. This avoids caching Use*
936 /// pointers.
937 struct NarrowIVDefUse {
938 Instruction *NarrowDef = nullptr;
939 Instruction *NarrowUse = nullptr;
940 Instruction *WideDef = nullptr;
942 // True if the narrow def is never negative. Tracking this information lets
943 // us use a sign extension instead of a zero extension or vice versa, when
944 // profitable and legal.
945 bool NeverNegative = false;
947 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
948 bool NeverNegative)
949 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
950 NeverNegative(NeverNegative) {}
953 /// The goal of this transform is to remove sign and zero extends without
954 /// creating any new induction variables. To do this, it creates a new phi of
955 /// the wider type and redirects all users, either removing extends or inserting
956 /// truncs whenever we stop propagating the type.
957 class WidenIV {
958 // Parameters
959 PHINode *OrigPhi;
960 Type *WideType;
962 // Context
963 LoopInfo *LI;
964 Loop *L;
965 ScalarEvolution *SE;
966 DominatorTree *DT;
968 // Does the module have any calls to the llvm.experimental.guard intrinsic
969 // at all? If not we can avoid scanning instructions looking for guards.
970 bool HasGuards;
972 // Result
973 PHINode *WidePhi = nullptr;
974 Instruction *WideInc = nullptr;
975 const SCEV *WideIncExpr = nullptr;
976 SmallVectorImpl<WeakTrackingVH> &DeadInsts;
978 SmallPtrSet<Instruction *,16> Widened;
979 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
981 enum ExtendKind { ZeroExtended, SignExtended, Unknown };
983 // A map tracking the kind of extension used to widen each narrow IV
984 // and narrow IV user.
985 // Key: pointer to a narrow IV or IV user.
986 // Value: the kind of extension used to widen this Instruction.
987 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
989 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
991 // A map with control-dependent ranges for post increment IV uses. The key is
992 // a pair of IV def and a use of this def denoting the context. The value is
993 // a ConstantRange representing possible values of the def at the given
994 // context.
995 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
997 Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
998 Instruction *UseI) {
999 DefUserPair Key(Def, UseI);
1000 auto It = PostIncRangeInfos.find(Key);
1001 return It == PostIncRangeInfos.end()
1002 ? Optional<ConstantRange>(None)
1003 : Optional<ConstantRange>(It->second);
1006 void calculatePostIncRanges(PHINode *OrigPhi);
1007 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1009 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1010 DefUserPair Key(Def, UseI);
1011 auto It = PostIncRangeInfos.find(Key);
1012 if (It == PostIncRangeInfos.end())
1013 PostIncRangeInfos.insert({Key, R});
1014 else
1015 It->second = R.intersectWith(It->second);
1018 public:
1019 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1020 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1021 bool HasGuards)
1022 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1023 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1024 HasGuards(HasGuards), DeadInsts(DI) {
1025 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1026 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1029 PHINode *createWideIV(SCEVExpander &Rewriter);
1031 protected:
1032 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1033 Instruction *Use);
1035 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1036 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1037 const SCEVAddRecExpr *WideAR);
1038 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1040 ExtendKind getExtendKind(Instruction *I);
1042 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1044 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1046 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1048 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1049 unsigned OpCode) const;
1051 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1053 bool widenLoopCompare(NarrowIVDefUse DU);
1054 bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1055 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1057 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1060 } // end anonymous namespace
1062 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1063 bool IsSigned, Instruction *Use) {
1064 // Set the debug location and conservative insertion point.
1065 IRBuilder<> Builder(Use);
1066 // Hoist the insertion point into loop preheaders as far as possible.
1067 for (const Loop *L = LI->getLoopFor(Use->getParent());
1068 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1069 L = L->getParentLoop())
1070 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1072 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1073 Builder.CreateZExt(NarrowOper, WideType);
1076 /// Instantiate a wide operation to replace a narrow operation. This only needs
1077 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1078 /// 0 for any operation we decide not to clone.
1079 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1080 const SCEVAddRecExpr *WideAR) {
1081 unsigned Opcode = DU.NarrowUse->getOpcode();
1082 switch (Opcode) {
1083 default:
1084 return nullptr;
1085 case Instruction::Add:
1086 case Instruction::Mul:
1087 case Instruction::UDiv:
1088 case Instruction::Sub:
1089 return cloneArithmeticIVUser(DU, WideAR);
1091 case Instruction::And:
1092 case Instruction::Or:
1093 case Instruction::Xor:
1094 case Instruction::Shl:
1095 case Instruction::LShr:
1096 case Instruction::AShr:
1097 return cloneBitwiseIVUser(DU);
1101 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1102 Instruction *NarrowUse = DU.NarrowUse;
1103 Instruction *NarrowDef = DU.NarrowDef;
1104 Instruction *WideDef = DU.WideDef;
1106 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1108 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1109 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1110 // invariant and will be folded or hoisted. If it actually comes from a
1111 // widened IV, it should be removed during a future call to widenIVUse.
1112 bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1113 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1114 ? WideDef
1115 : createExtendInst(NarrowUse->getOperand(0), WideType,
1116 IsSigned, NarrowUse);
1117 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1118 ? WideDef
1119 : createExtendInst(NarrowUse->getOperand(1), WideType,
1120 IsSigned, NarrowUse);
1122 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1123 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1124 NarrowBO->getName());
1125 IRBuilder<> Builder(NarrowUse);
1126 Builder.Insert(WideBO);
1127 WideBO->copyIRFlags(NarrowBO);
1128 return WideBO;
1131 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1132 const SCEVAddRecExpr *WideAR) {
1133 Instruction *NarrowUse = DU.NarrowUse;
1134 Instruction *NarrowDef = DU.NarrowDef;
1135 Instruction *WideDef = DU.WideDef;
1137 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1139 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1141 // We're trying to find X such that
1143 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1145 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1146 // and check using SCEV if any of them are correct.
1148 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1149 // correct solution to X.
1150 auto GuessNonIVOperand = [&](bool SignExt) {
1151 const SCEV *WideLHS;
1152 const SCEV *WideRHS;
1154 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1155 if (SignExt)
1156 return SE->getSignExtendExpr(S, Ty);
1157 return SE->getZeroExtendExpr(S, Ty);
1160 if (IVOpIdx == 0) {
1161 WideLHS = SE->getSCEV(WideDef);
1162 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1163 WideRHS = GetExtend(NarrowRHS, WideType);
1164 } else {
1165 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1166 WideLHS = GetExtend(NarrowLHS, WideType);
1167 WideRHS = SE->getSCEV(WideDef);
1170 // WideUse is "WideDef `op.wide` X" as described in the comment.
1171 const SCEV *WideUse = nullptr;
1173 switch (NarrowUse->getOpcode()) {
1174 default:
1175 llvm_unreachable("No other possibility!");
1177 case Instruction::Add:
1178 WideUse = SE->getAddExpr(WideLHS, WideRHS);
1179 break;
1181 case Instruction::Mul:
1182 WideUse = SE->getMulExpr(WideLHS, WideRHS);
1183 break;
1185 case Instruction::UDiv:
1186 WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1187 break;
1189 case Instruction::Sub:
1190 WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1191 break;
1194 return WideUse == WideAR;
1197 bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1198 if (!GuessNonIVOperand(SignExtend)) {
1199 SignExtend = !SignExtend;
1200 if (!GuessNonIVOperand(SignExtend))
1201 return nullptr;
1204 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1205 ? WideDef
1206 : createExtendInst(NarrowUse->getOperand(0), WideType,
1207 SignExtend, NarrowUse);
1208 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1209 ? WideDef
1210 : createExtendInst(NarrowUse->getOperand(1), WideType,
1211 SignExtend, NarrowUse);
1213 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1214 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1215 NarrowBO->getName());
1217 IRBuilder<> Builder(NarrowUse);
1218 Builder.Insert(WideBO);
1219 WideBO->copyIRFlags(NarrowBO);
1220 return WideBO;
1223 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1224 auto It = ExtendKindMap.find(I);
1225 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1226 return It->second;
1229 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1230 unsigned OpCode) const {
1231 if (OpCode == Instruction::Add)
1232 return SE->getAddExpr(LHS, RHS);
1233 if (OpCode == Instruction::Sub)
1234 return SE->getMinusSCEV(LHS, RHS);
1235 if (OpCode == Instruction::Mul)
1236 return SE->getMulExpr(LHS, RHS);
1238 llvm_unreachable("Unsupported opcode.");
1241 /// No-wrap operations can transfer sign extension of their result to their
1242 /// operands. Generate the SCEV value for the widened operation without
1243 /// actually modifying the IR yet. If the expression after extending the
1244 /// operands is an AddRec for this loop, return the AddRec and the kind of
1245 /// extension used.
1246 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1247 // Handle the common case of add<nsw/nuw>
1248 const unsigned OpCode = DU.NarrowUse->getOpcode();
1249 // Only Add/Sub/Mul instructions supported yet.
1250 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1251 OpCode != Instruction::Mul)
1252 return {nullptr, Unknown};
1254 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1255 // if extending the other will lead to a recurrence.
1256 const unsigned ExtendOperIdx =
1257 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1258 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1260 const SCEV *ExtendOperExpr = nullptr;
1261 const OverflowingBinaryOperator *OBO =
1262 cast<OverflowingBinaryOperator>(DU.NarrowUse);
1263 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1264 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1265 ExtendOperExpr = SE->getSignExtendExpr(
1266 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1267 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1268 ExtendOperExpr = SE->getZeroExtendExpr(
1269 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1270 else
1271 return {nullptr, Unknown};
1273 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1274 // flags. This instruction may be guarded by control flow that the no-wrap
1275 // behavior depends on. Non-control-equivalent instructions can be mapped to
1276 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1277 // semantics to those operations.
1278 const SCEV *lhs = SE->getSCEV(DU.WideDef);
1279 const SCEV *rhs = ExtendOperExpr;
1281 // Let's swap operands to the initial order for the case of non-commutative
1282 // operations, like SUB. See PR21014.
1283 if (ExtendOperIdx == 0)
1284 std::swap(lhs, rhs);
1285 const SCEVAddRecExpr *AddRec =
1286 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1288 if (!AddRec || AddRec->getLoop() != L)
1289 return {nullptr, Unknown};
1291 return {AddRec, ExtKind};
1294 /// Is this instruction potentially interesting for further simplification after
1295 /// widening it's type? In other words, can the extend be safely hoisted out of
1296 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1297 /// so, return the extended recurrence and the kind of extension used. Otherwise
1298 /// return {nullptr, Unknown}.
1299 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1300 if (!SE->isSCEVable(DU.NarrowUse->getType()))
1301 return {nullptr, Unknown};
1303 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1304 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1305 SE->getTypeSizeInBits(WideType)) {
1306 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1307 // index. So don't follow this use.
1308 return {nullptr, Unknown};
1311 const SCEV *WideExpr;
1312 ExtendKind ExtKind;
1313 if (DU.NeverNegative) {
1314 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1315 if (isa<SCEVAddRecExpr>(WideExpr))
1316 ExtKind = SignExtended;
1317 else {
1318 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1319 ExtKind = ZeroExtended;
1321 } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1322 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1323 ExtKind = SignExtended;
1324 } else {
1325 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1326 ExtKind = ZeroExtended;
1328 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1329 if (!AddRec || AddRec->getLoop() != L)
1330 return {nullptr, Unknown};
1331 return {AddRec, ExtKind};
1334 /// This IV user cannot be widened. Replace this use of the original narrow IV
1335 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1336 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1337 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1338 if (!InsertPt)
1339 return;
1340 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1341 << *DU.NarrowUse << "\n");
1342 IRBuilder<> Builder(InsertPt);
1343 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1344 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1347 /// If the narrow use is a compare instruction, then widen the compare
1348 // (and possibly the other operand). The extend operation is hoisted into the
1349 // loop preheader as far as possible.
1350 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1351 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1352 if (!Cmp)
1353 return false;
1355 // We can legally widen the comparison in the following two cases:
1357 // - The signedness of the IV extension and comparison match
1359 // - The narrow IV is always positive (and thus its sign extension is equal
1360 // to its zero extension). For instance, let's say we're zero extending
1361 // %narrow for the following use
1363 // icmp slt i32 %narrow, %val ... (A)
1365 // and %narrow is always positive. Then
1367 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1368 // == icmp slt i32 zext(%narrow), sext(%val)
1369 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1370 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1371 return false;
1373 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1374 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1375 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1376 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1378 // Widen the compare instruction.
1379 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1380 if (!InsertPt)
1381 return false;
1382 IRBuilder<> Builder(InsertPt);
1383 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1385 // Widen the other operand of the compare, if necessary.
1386 if (CastWidth < IVWidth) {
1387 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1388 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1390 return true;
1393 /// If the narrow use is an instruction whose two operands are the defining
1394 /// instruction of DU and a load instruction, then we have the following:
1395 /// if the load is hoisted outside the loop, then we do not reach this function
1396 /// as scalar evolution analysis works fine in widenIVUse with variables
1397 /// hoisted outside the loop and efficient code is subsequently generated by
1398 /// not emitting truncate instructions. But when the load is not hoisted
1399 /// (whether due to limitation in alias analysis or due to a true legality),
1400 /// then scalar evolution can not proceed with loop variant values and
1401 /// inefficient code is generated. This function handles the non-hoisted load
1402 /// special case by making the optimization generate the same type of code for
1403 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1404 /// instruction). This special case is important especially when the induction
1405 /// variables are affecting addressing mode in code generation.
1406 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1407 Instruction *NarrowUse = DU.NarrowUse;
1408 Instruction *NarrowDef = DU.NarrowDef;
1409 Instruction *WideDef = DU.WideDef;
1411 // Handle the common case of add<nsw/nuw>
1412 const unsigned OpCode = NarrowUse->getOpcode();
1413 // Only Add/Sub/Mul instructions are supported.
1414 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1415 OpCode != Instruction::Mul)
1416 return false;
1418 // The operand that is not defined by NarrowDef of DU. Let's call it the
1419 // other operand.
1420 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1421 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1422 "bad DU");
1424 const SCEV *ExtendOperExpr = nullptr;
1425 const OverflowingBinaryOperator *OBO =
1426 cast<OverflowingBinaryOperator>(NarrowUse);
1427 ExtendKind ExtKind = getExtendKind(NarrowDef);
1428 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1429 ExtendOperExpr = SE->getSignExtendExpr(
1430 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1431 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1432 ExtendOperExpr = SE->getZeroExtendExpr(
1433 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1434 else
1435 return false;
1437 // We are interested in the other operand being a load instruction.
1438 // But, we should look into relaxing this restriction later on.
1439 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1440 if (I && I->getOpcode() != Instruction::Load)
1441 return false;
1443 // Verifying that Defining operand is an AddRec
1444 const SCEV *Op1 = SE->getSCEV(WideDef);
1445 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1446 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1447 return false;
1448 // Verifying that other operand is an Extend.
1449 if (ExtKind == SignExtended) {
1450 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1451 return false;
1452 } else {
1453 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1454 return false;
1457 if (ExtKind == SignExtended) {
1458 for (Use &U : NarrowUse->uses()) {
1459 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1460 if (!User || User->getType() != WideType)
1461 return false;
1463 } else { // ExtKind == ZeroExtended
1464 for (Use &U : NarrowUse->uses()) {
1465 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1466 if (!User || User->getType() != WideType)
1467 return false;
1471 return true;
1474 /// Special Case for widening with variant Loads (see
1475 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1476 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1477 Instruction *NarrowUse = DU.NarrowUse;
1478 Instruction *NarrowDef = DU.NarrowDef;
1479 Instruction *WideDef = DU.WideDef;
1481 ExtendKind ExtKind = getExtendKind(NarrowDef);
1483 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1485 // Generating a widening use instruction.
1486 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1487 ? WideDef
1488 : createExtendInst(NarrowUse->getOperand(0), WideType,
1489 ExtKind, NarrowUse);
1490 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1491 ? WideDef
1492 : createExtendInst(NarrowUse->getOperand(1), WideType,
1493 ExtKind, NarrowUse);
1495 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1496 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1497 NarrowBO->getName());
1498 IRBuilder<> Builder(NarrowUse);
1499 Builder.Insert(WideBO);
1500 WideBO->copyIRFlags(NarrowBO);
1502 if (ExtKind == SignExtended)
1503 ExtendKindMap[NarrowUse] = SignExtended;
1504 else
1505 ExtendKindMap[NarrowUse] = ZeroExtended;
1507 // Update the Use.
1508 if (ExtKind == SignExtended) {
1509 for (Use &U : NarrowUse->uses()) {
1510 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1511 if (User && User->getType() == WideType) {
1512 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1513 << *WideBO << "\n");
1514 ++NumElimExt;
1515 User->replaceAllUsesWith(WideBO);
1516 DeadInsts.emplace_back(User);
1519 } else { // ExtKind == ZeroExtended
1520 for (Use &U : NarrowUse->uses()) {
1521 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1522 if (User && User->getType() == WideType) {
1523 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1524 << *WideBO << "\n");
1525 ++NumElimExt;
1526 User->replaceAllUsesWith(WideBO);
1527 DeadInsts.emplace_back(User);
1533 /// Determine whether an individual user of the narrow IV can be widened. If so,
1534 /// return the wide clone of the user.
1535 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1536 assert(ExtendKindMap.count(DU.NarrowDef) &&
1537 "Should already know the kind of extension used to widen NarrowDef");
1539 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1540 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1541 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1542 // For LCSSA phis, sink the truncate outside the loop.
1543 // After SimplifyCFG most loop exit targets have a single predecessor.
1544 // Otherwise fall back to a truncate within the loop.
1545 if (UsePhi->getNumOperands() != 1)
1546 truncateIVUse(DU, DT, LI);
1547 else {
1548 // Widening the PHI requires us to insert a trunc. The logical place
1549 // for this trunc is in the same BB as the PHI. This is not possible if
1550 // the BB is terminated by a catchswitch.
1551 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1552 return nullptr;
1554 PHINode *WidePhi =
1555 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1556 UsePhi);
1557 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1558 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1559 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1560 UsePhi->replaceAllUsesWith(Trunc);
1561 DeadInsts.emplace_back(UsePhi);
1562 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1563 << *WidePhi << "\n");
1565 return nullptr;
1569 // This narrow use can be widened by a sext if it's non-negative or its narrow
1570 // def was widended by a sext. Same for zext.
1571 auto canWidenBySExt = [&]() {
1572 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1574 auto canWidenByZExt = [&]() {
1575 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1578 // Our raison d'etre! Eliminate sign and zero extension.
1579 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1580 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1581 Value *NewDef = DU.WideDef;
1582 if (DU.NarrowUse->getType() != WideType) {
1583 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1584 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1585 if (CastWidth < IVWidth) {
1586 // The cast isn't as wide as the IV, so insert a Trunc.
1587 IRBuilder<> Builder(DU.NarrowUse);
1588 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1590 else {
1591 // A wider extend was hidden behind a narrower one. This may induce
1592 // another round of IV widening in which the intermediate IV becomes
1593 // dead. It should be very rare.
1594 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1595 << " not wide enough to subsume " << *DU.NarrowUse
1596 << "\n");
1597 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1598 NewDef = DU.NarrowUse;
1601 if (NewDef != DU.NarrowUse) {
1602 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1603 << " replaced by " << *DU.WideDef << "\n");
1604 ++NumElimExt;
1605 DU.NarrowUse->replaceAllUsesWith(NewDef);
1606 DeadInsts.emplace_back(DU.NarrowUse);
1608 // Now that the extend is gone, we want to expose it's uses for potential
1609 // further simplification. We don't need to directly inform SimplifyIVUsers
1610 // of the new users, because their parent IV will be processed later as a
1611 // new loop phi. If we preserved IVUsers analysis, we would also want to
1612 // push the uses of WideDef here.
1614 // No further widening is needed. The deceased [sz]ext had done it for us.
1615 return nullptr;
1618 // Does this user itself evaluate to a recurrence after widening?
1619 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1620 if (!WideAddRec.first)
1621 WideAddRec = getWideRecurrence(DU);
1623 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1624 if (!WideAddRec.first) {
1625 // If use is a loop condition, try to promote the condition instead of
1626 // truncating the IV first.
1627 if (widenLoopCompare(DU))
1628 return nullptr;
1630 // We are here about to generate a truncate instruction that may hurt
1631 // performance because the scalar evolution expression computed earlier
1632 // in WideAddRec.first does not indicate a polynomial induction expression.
1633 // In that case, look at the operands of the use instruction to determine
1634 // if we can still widen the use instead of truncating its operand.
1635 if (widenWithVariantLoadUse(DU)) {
1636 widenWithVariantLoadUseCodegen(DU);
1637 return nullptr;
1640 // This user does not evaluate to a recurrence after widening, so don't
1641 // follow it. Instead insert a Trunc to kill off the original use,
1642 // eventually isolating the original narrow IV so it can be removed.
1643 truncateIVUse(DU, DT, LI);
1644 return nullptr;
1646 // Assume block terminators cannot evaluate to a recurrence. We can't to
1647 // insert a Trunc after a terminator if there happens to be a critical edge.
1648 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1649 "SCEV is not expected to evaluate a block terminator");
1651 // Reuse the IV increment that SCEVExpander created as long as it dominates
1652 // NarrowUse.
1653 Instruction *WideUse = nullptr;
1654 if (WideAddRec.first == WideIncExpr &&
1655 Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1656 WideUse = WideInc;
1657 else {
1658 WideUse = cloneIVUser(DU, WideAddRec.first);
1659 if (!WideUse)
1660 return nullptr;
1662 // Evaluation of WideAddRec ensured that the narrow expression could be
1663 // extended outside the loop without overflow. This suggests that the wide use
1664 // evaluates to the same expression as the extended narrow use, but doesn't
1665 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1666 // where it fails, we simply throw away the newly created wide use.
1667 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1668 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1669 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1670 << "\n");
1671 DeadInsts.emplace_back(WideUse);
1672 return nullptr;
1675 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1676 // Returning WideUse pushes it on the worklist.
1677 return WideUse;
1680 /// Add eligible users of NarrowDef to NarrowIVUsers.
1681 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1682 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1683 bool NonNegativeDef =
1684 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1685 SE->getConstant(NarrowSCEV->getType(), 0));
1686 for (User *U : NarrowDef->users()) {
1687 Instruction *NarrowUser = cast<Instruction>(U);
1689 // Handle data flow merges and bizarre phi cycles.
1690 if (!Widened.insert(NarrowUser).second)
1691 continue;
1693 bool NonNegativeUse = false;
1694 if (!NonNegativeDef) {
1695 // We might have a control-dependent range information for this context.
1696 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1697 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1700 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1701 NonNegativeDef || NonNegativeUse);
1705 /// Process a single induction variable. First use the SCEVExpander to create a
1706 /// wide induction variable that evaluates to the same recurrence as the
1707 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1708 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1709 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1711 /// It would be simpler to delete uses as they are processed, but we must avoid
1712 /// invalidating SCEV expressions.
1713 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1714 // Is this phi an induction variable?
1715 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1716 if (!AddRec)
1717 return nullptr;
1719 // Widen the induction variable expression.
1720 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1721 ? SE->getSignExtendExpr(AddRec, WideType)
1722 : SE->getZeroExtendExpr(AddRec, WideType);
1724 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1725 "Expect the new IV expression to preserve its type");
1727 // Can the IV be extended outside the loop without overflow?
1728 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1729 if (!AddRec || AddRec->getLoop() != L)
1730 return nullptr;
1732 // An AddRec must have loop-invariant operands. Since this AddRec is
1733 // materialized by a loop header phi, the expression cannot have any post-loop
1734 // operands, so they must dominate the loop header.
1735 assert(
1736 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1737 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1738 "Loop header phi recurrence inputs do not dominate the loop");
1740 // Iterate over IV uses (including transitive ones) looking for IV increments
1741 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1742 // the increment calculate control-dependent range information basing on
1743 // dominating conditions inside of the loop (e.g. a range check inside of the
1744 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1746 // Control-dependent range information is later used to prove that a narrow
1747 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1748 // this on demand because when pushNarrowIVUsers needs this information some
1749 // of the dominating conditions might be already widened.
1750 if (UsePostIncrementRanges)
1751 calculatePostIncRanges(OrigPhi);
1753 // The rewriter provides a value for the desired IV expression. This may
1754 // either find an existing phi or materialize a new one. Either way, we
1755 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1756 // of the phi-SCC dominates the loop entry.
1757 Instruction *InsertPt = &L->getHeader()->front();
1758 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1760 // Remembering the WideIV increment generated by SCEVExpander allows
1761 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1762 // employ a general reuse mechanism because the call above is the only call to
1763 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1764 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1765 WideInc =
1766 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1767 WideIncExpr = SE->getSCEV(WideInc);
1768 // Propagate the debug location associated with the original loop increment
1769 // to the new (widened) increment.
1770 auto *OrigInc =
1771 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1772 WideInc->setDebugLoc(OrigInc->getDebugLoc());
1775 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1776 ++NumWidened;
1778 // Traverse the def-use chain using a worklist starting at the original IV.
1779 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1781 Widened.insert(OrigPhi);
1782 pushNarrowIVUsers(OrigPhi, WidePhi);
1784 while (!NarrowIVUsers.empty()) {
1785 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1787 // Process a def-use edge. This may replace the use, so don't hold a
1788 // use_iterator across it.
1789 Instruction *WideUse = widenIVUse(DU, Rewriter);
1791 // Follow all def-use edges from the previous narrow use.
1792 if (WideUse)
1793 pushNarrowIVUsers(DU.NarrowUse, WideUse);
1795 // widenIVUse may have removed the def-use edge.
1796 if (DU.NarrowDef->use_empty())
1797 DeadInsts.emplace_back(DU.NarrowDef);
1800 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1801 // evaluate the same recurrence, we can just copy the debug info over.
1802 SmallVector<DbgValueInst *, 1> DbgValues;
1803 llvm::findDbgValues(DbgValues, OrigPhi);
1804 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1805 ValueAsMetadata::get(WidePhi));
1806 for (auto &DbgValue : DbgValues)
1807 DbgValue->setOperand(0, MDPhi);
1808 return WidePhi;
1811 /// Calculates control-dependent range for the given def at the given context
1812 /// by looking at dominating conditions inside of the loop
1813 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1814 Instruction *NarrowUser) {
1815 using namespace llvm::PatternMatch;
1817 Value *NarrowDefLHS;
1818 const APInt *NarrowDefRHS;
1819 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1820 m_APInt(NarrowDefRHS))) ||
1821 !NarrowDefRHS->isNonNegative())
1822 return;
1824 auto UpdateRangeFromCondition = [&] (Value *Condition,
1825 bool TrueDest) {
1826 CmpInst::Predicate Pred;
1827 Value *CmpRHS;
1828 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1829 m_Value(CmpRHS))))
1830 return;
1832 CmpInst::Predicate P =
1833 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1835 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1836 auto CmpConstrainedLHSRange =
1837 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1838 auto NarrowDefRange =
1839 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1841 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1844 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1845 if (!HasGuards)
1846 return;
1848 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1849 Ctx->getParent()->rend())) {
1850 Value *C = nullptr;
1851 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1852 UpdateRangeFromCondition(C, /*TrueDest=*/true);
1856 UpdateRangeFromGuards(NarrowUser);
1858 BasicBlock *NarrowUserBB = NarrowUser->getParent();
1859 // If NarrowUserBB is statically unreachable asking dominator queries may
1860 // yield surprising results. (e.g. the block may not have a dom tree node)
1861 if (!DT->isReachableFromEntry(NarrowUserBB))
1862 return;
1864 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1865 L->contains(DTB->getBlock());
1866 DTB = DTB->getIDom()) {
1867 auto *BB = DTB->getBlock();
1868 auto *TI = BB->getTerminator();
1869 UpdateRangeFromGuards(TI);
1871 auto *BI = dyn_cast<BranchInst>(TI);
1872 if (!BI || !BI->isConditional())
1873 continue;
1875 auto *TrueSuccessor = BI->getSuccessor(0);
1876 auto *FalseSuccessor = BI->getSuccessor(1);
1878 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1879 return BBE.isSingleEdge() &&
1880 DT->dominates(BBE, NarrowUser->getParent());
1883 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1884 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1886 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1887 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1891 /// Calculates PostIncRangeInfos map for the given IV
1892 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1893 SmallPtrSet<Instruction *, 16> Visited;
1894 SmallVector<Instruction *, 6> Worklist;
1895 Worklist.push_back(OrigPhi);
1896 Visited.insert(OrigPhi);
1898 while (!Worklist.empty()) {
1899 Instruction *NarrowDef = Worklist.pop_back_val();
1901 for (Use &U : NarrowDef->uses()) {
1902 auto *NarrowUser = cast<Instruction>(U.getUser());
1904 // Don't go looking outside the current loop.
1905 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1906 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1907 continue;
1909 if (!Visited.insert(NarrowUser).second)
1910 continue;
1912 Worklist.push_back(NarrowUser);
1914 calculatePostIncRange(NarrowDef, NarrowUser);
1919 //===----------------------------------------------------------------------===//
1920 // Live IV Reduction - Minimize IVs live across the loop.
1921 //===----------------------------------------------------------------------===//
1923 //===----------------------------------------------------------------------===//
1924 // Simplification of IV users based on SCEV evaluation.
1925 //===----------------------------------------------------------------------===//
1927 namespace {
1929 class IndVarSimplifyVisitor : public IVVisitor {
1930 ScalarEvolution *SE;
1931 const TargetTransformInfo *TTI;
1932 PHINode *IVPhi;
1934 public:
1935 WideIVInfo WI;
1937 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1938 const TargetTransformInfo *TTI,
1939 const DominatorTree *DTree)
1940 : SE(SCEV), TTI(TTI), IVPhi(IV) {
1941 DT = DTree;
1942 WI.NarrowIV = IVPhi;
1945 // Implement the interface used by simplifyUsersOfIV.
1946 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1949 } // end anonymous namespace
1951 /// Iteratively perform simplification on a worklist of IV users. Each
1952 /// successive simplification may push more users which may themselves be
1953 /// candidates for simplification.
1955 /// Sign/Zero extend elimination is interleaved with IV simplification.
1956 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1957 SCEVExpander &Rewriter,
1958 LoopInfo *LI) {
1959 SmallVector<WideIVInfo, 8> WideIVs;
1961 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1962 Intrinsic::getName(Intrinsic::experimental_guard));
1963 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1965 SmallVector<PHINode*, 8> LoopPhis;
1966 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1967 LoopPhis.push_back(cast<PHINode>(I));
1969 // Each round of simplification iterates through the SimplifyIVUsers worklist
1970 // for all current phis, then determines whether any IVs can be
1971 // widened. Widening adds new phis to LoopPhis, inducing another round of
1972 // simplification on the wide IVs.
1973 bool Changed = false;
1974 while (!LoopPhis.empty()) {
1975 // Evaluate as many IV expressions as possible before widening any IVs. This
1976 // forces SCEV to set no-wrap flags before evaluating sign/zero
1977 // extension. The first time SCEV attempts to normalize sign/zero extension,
1978 // the result becomes final. So for the most predictable results, we delay
1979 // evaluation of sign/zero extend evaluation until needed, and avoid running
1980 // other SCEV based analysis prior to simplifyAndExtend.
1981 do {
1982 PHINode *CurrIV = LoopPhis.pop_back_val();
1984 // Information about sign/zero extensions of CurrIV.
1985 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1987 Changed |=
1988 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1990 if (Visitor.WI.WidestNativeType) {
1991 WideIVs.push_back(Visitor.WI);
1993 } while(!LoopPhis.empty());
1995 for (; !WideIVs.empty(); WideIVs.pop_back()) {
1996 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1997 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1998 Changed = true;
1999 LoopPhis.push_back(WidePhi);
2003 return Changed;
2006 //===----------------------------------------------------------------------===//
2007 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2008 //===----------------------------------------------------------------------===//
2010 /// Given an Value which is hoped to be part of an add recurance in the given
2011 /// loop, return the associated Phi node if so. Otherwise, return null. Note
2012 /// that this is less general than SCEVs AddRec checking.
2013 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2014 Instruction *IncI = dyn_cast<Instruction>(IncV);
2015 if (!IncI)
2016 return nullptr;
2018 switch (IncI->getOpcode()) {
2019 case Instruction::Add:
2020 case Instruction::Sub:
2021 break;
2022 case Instruction::GetElementPtr:
2023 // An IV counter must preserve its type.
2024 if (IncI->getNumOperands() == 2)
2025 break;
2026 LLVM_FALLTHROUGH;
2027 default:
2028 return nullptr;
2031 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2032 if (Phi && Phi->getParent() == L->getHeader()) {
2033 if (L->isLoopInvariant(IncI->getOperand(1)))
2034 return Phi;
2035 return nullptr;
2037 if (IncI->getOpcode() == Instruction::GetElementPtr)
2038 return nullptr;
2040 // Allow add/sub to be commuted.
2041 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2042 if (Phi && Phi->getParent() == L->getHeader()) {
2043 if (L->isLoopInvariant(IncI->getOperand(0)))
2044 return Phi;
2046 return nullptr;
2049 /// Whether the current loop exit test is based on this value. Currently this
2050 /// is limited to a direct use in the loop condition.
2051 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2052 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2053 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2054 // TODO: Allow non-icmp loop test.
2055 if (!ICmp)
2056 return false;
2058 // TODO: Allow indirect use.
2059 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2062 /// linearFunctionTestReplace policy. Return true unless we can show that the
2063 /// current exit test is already sufficiently canonical.
2064 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2065 assert(L->getLoopLatch() && "Must be in simplified form");
2067 // Avoid converting a constant or loop invariant test back to a runtime
2068 // test. This is critical for when SCEV's cached ExitCount is less precise
2069 // than the current IR (such as after we've proven a particular exit is
2070 // actually dead and thus the BE count never reaches our ExitCount.)
2071 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2072 if (L->isLoopInvariant(BI->getCondition()))
2073 return false;
2075 // Do LFTR to simplify the exit condition to an ICMP.
2076 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2077 if (!Cond)
2078 return true;
2080 // Do LFTR to simplify the exit ICMP to EQ/NE
2081 ICmpInst::Predicate Pred = Cond->getPredicate();
2082 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2083 return true;
2085 // Look for a loop invariant RHS
2086 Value *LHS = Cond->getOperand(0);
2087 Value *RHS = Cond->getOperand(1);
2088 if (!L->isLoopInvariant(RHS)) {
2089 if (!L->isLoopInvariant(LHS))
2090 return true;
2091 std::swap(LHS, RHS);
2093 // Look for a simple IV counter LHS
2094 PHINode *Phi = dyn_cast<PHINode>(LHS);
2095 if (!Phi)
2096 Phi = getLoopPhiForCounter(LHS, L);
2098 if (!Phi)
2099 return true;
2101 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2102 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2103 if (Idx < 0)
2104 return true;
2106 // Do LFTR if the exit condition's IV is *not* a simple counter.
2107 Value *IncV = Phi->getIncomingValue(Idx);
2108 return Phi != getLoopPhiForCounter(IncV, L);
2111 /// Return true if undefined behavior would provable be executed on the path to
2112 /// OnPathTo if Root produced a posion result. Note that this doesn't say
2113 /// anything about whether OnPathTo is actually executed or whether Root is
2114 /// actually poison. This can be used to assess whether a new use of Root can
2115 /// be added at a location which is control equivalent with OnPathTo (such as
2116 /// immediately before it) without introducing UB which didn't previously
2117 /// exist. Note that a false result conveys no information.
2118 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2119 Instruction *OnPathTo,
2120 DominatorTree *DT) {
2121 // Basic approach is to assume Root is poison, propagate poison forward
2122 // through all users we can easily track, and then check whether any of those
2123 // users are provable UB and must execute before out exiting block might
2124 // exit.
2126 // The set of all recursive users we've visited (which are assumed to all be
2127 // poison because of said visit)
2128 SmallSet<const Value *, 16> KnownPoison;
2129 SmallVector<const Instruction*, 16> Worklist;
2130 Worklist.push_back(Root);
2131 while (!Worklist.empty()) {
2132 const Instruction *I = Worklist.pop_back_val();
2134 // If we know this must trigger UB on a path leading our target.
2135 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2136 return true;
2138 // If we can't analyze propagation through this instruction, just skip it
2139 // and transitive users. Safe as false is a conservative result.
2140 if (!propagatesFullPoison(I) && I != Root)
2141 continue;
2143 if (KnownPoison.insert(I).second)
2144 for (const User *User : I->users())
2145 Worklist.push_back(cast<Instruction>(User));
2148 // Might be non-UB, or might have a path we couldn't prove must execute on
2149 // way to exiting bb.
2150 return false;
2153 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2154 /// down to checking that all operands are constant and listing instructions
2155 /// that may hide undef.
2156 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2157 unsigned Depth) {
2158 if (isa<Constant>(V))
2159 return !isa<UndefValue>(V);
2161 if (Depth >= 6)
2162 return false;
2164 // Conservatively handle non-constant non-instructions. For example, Arguments
2165 // may be undef.
2166 Instruction *I = dyn_cast<Instruction>(V);
2167 if (!I)
2168 return false;
2170 // Load and return values may be undef.
2171 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2172 return false;
2174 // Optimistically handle other instructions.
2175 for (Value *Op : I->operands()) {
2176 if (!Visited.insert(Op).second)
2177 continue;
2178 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2179 return false;
2181 return true;
2184 /// Return true if the given value is concrete. We must prove that undef can
2185 /// never reach it.
2187 /// TODO: If we decide that this is a good approach to checking for undef, we
2188 /// may factor it into a common location.
2189 static bool hasConcreteDef(Value *V) {
2190 SmallPtrSet<Value*, 8> Visited;
2191 Visited.insert(V);
2192 return hasConcreteDefImpl(V, Visited, 0);
2195 /// Return true if this IV has any uses other than the (soon to be rewritten)
2196 /// loop exit test.
2197 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2198 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2199 Value *IncV = Phi->getIncomingValue(LatchIdx);
2201 for (User *U : Phi->users())
2202 if (U != Cond && U != IncV) return false;
2204 for (User *U : IncV->users())
2205 if (U != Cond && U != Phi) return false;
2206 return true;
2209 /// Return true if the given phi is a "counter" in L. A counter is an
2210 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2211 /// step of 1. Note that L must have exactly one latch.
2212 static bool isLoopCounter(PHINode* Phi, Loop *L,
2213 ScalarEvolution *SE) {
2214 assert(Phi->getParent() == L->getHeader());
2215 assert(L->getLoopLatch());
2217 if (!SE->isSCEVable(Phi->getType()))
2218 return false;
2220 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2221 if (!AR || AR->getLoop() != L || !AR->isAffine())
2222 return false;
2224 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2225 if (!Step || !Step->isOne())
2226 return false;
2228 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2229 Value *IncV = Phi->getIncomingValue(LatchIdx);
2230 return (getLoopPhiForCounter(IncV, L) == Phi);
2233 /// Search the loop header for a loop counter (anadd rec w/step of one)
2234 /// suitable for use by LFTR. If multiple counters are available, select the
2235 /// "best" one based profitable heuristics.
2237 /// BECount may be an i8* pointer type. The pointer difference is already
2238 /// valid count without scaling the address stride, so it remains a pointer
2239 /// expression as far as SCEV is concerned.
2240 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2241 const SCEV *BECount,
2242 ScalarEvolution *SE, DominatorTree *DT) {
2243 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2245 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2247 // Loop over all of the PHI nodes, looking for a simple counter.
2248 PHINode *BestPhi = nullptr;
2249 const SCEV *BestInit = nullptr;
2250 BasicBlock *LatchBlock = L->getLoopLatch();
2251 assert(LatchBlock && "Must be in simplified form");
2252 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2254 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2255 PHINode *Phi = cast<PHINode>(I);
2256 if (!isLoopCounter(Phi, L, SE))
2257 continue;
2259 // Avoid comparing an integer IV against a pointer Limit.
2260 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2261 continue;
2263 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2265 // AR may be a pointer type, while BECount is an integer type.
2266 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2267 // AR may not be a narrower type, or we may never exit.
2268 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2269 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2270 continue;
2272 // Avoid reusing a potentially undef value to compute other values that may
2273 // have originally had a concrete definition.
2274 if (!hasConcreteDef(Phi)) {
2275 // We explicitly allow unknown phis as long as they are already used by
2276 // the loop exit test. This is legal since performing LFTR could not
2277 // increase the number of undef users.
2278 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2279 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2280 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2281 continue;
2284 // Avoid introducing undefined behavior due to poison which didn't exist in
2285 // the original program. (Annoyingly, the rules for poison and undef
2286 // propagation are distinct, so this does NOT cover the undef case above.)
2287 // We have to ensure that we don't introduce UB by introducing a use on an
2288 // iteration where said IV produces poison. Our strategy here differs for
2289 // pointers and integer IVs. For integers, we strip and reinfer as needed,
2290 // see code in linearFunctionTestReplace. For pointers, we restrict
2291 // transforms as there is no good way to reinfer inbounds once lost.
2292 if (!Phi->getType()->isIntegerTy() &&
2293 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2294 continue;
2296 const SCEV *Init = AR->getStart();
2298 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2299 // Don't force a live loop counter if another IV can be used.
2300 if (AlmostDeadIV(Phi, LatchBlock, Cond))
2301 continue;
2303 // Prefer to count-from-zero. This is a more "canonical" counter form. It
2304 // also prefers integer to pointer IVs.
2305 if (BestInit->isZero() != Init->isZero()) {
2306 if (BestInit->isZero())
2307 continue;
2309 // If two IVs both count from zero or both count from nonzero then the
2310 // narrower is likely a dead phi that has been widened. Use the wider phi
2311 // to allow the other to be eliminated.
2312 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2313 continue;
2315 BestPhi = Phi;
2316 BestInit = Init;
2318 return BestPhi;
2321 /// Insert an IR expression which computes the value held by the IV IndVar
2322 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2323 /// is taken ExitCount times.
2324 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2325 const SCEV *ExitCount, bool UsePostInc, Loop *L,
2326 SCEVExpander &Rewriter, ScalarEvolution *SE) {
2327 assert(isLoopCounter(IndVar, L, SE));
2328 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2329 const SCEV *IVInit = AR->getStart();
2331 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2332 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2333 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2334 // the existing GEPs whenever possible.
2335 if (IndVar->getType()->isPointerTy() &&
2336 !ExitCount->getType()->isPointerTy()) {
2337 // IVOffset will be the new GEP offset that is interpreted by GEP as a
2338 // signed value. ExitCount on the other hand represents the loop trip count,
2339 // which is an unsigned value. FindLoopCounter only allows induction
2340 // variables that have a positive unit stride of one. This means we don't
2341 // have to handle the case of negative offsets (yet) and just need to zero
2342 // extend ExitCount.
2343 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2344 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2345 if (UsePostInc)
2346 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2348 // Expand the code for the iteration count.
2349 assert(SE->isLoopInvariant(IVOffset, L) &&
2350 "Computed iteration count is not loop invariant!");
2352 // We could handle pointer IVs other than i8*, but we need to compensate for
2353 // gep index scaling.
2354 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2355 cast<PointerType>(IndVar->getType())
2356 ->getElementType())->isOne() &&
2357 "unit stride pointer IV must be i8*");
2359 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2360 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2361 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2362 } else {
2363 // In any other case, convert both IVInit and ExitCount to integers before
2364 // comparing. This may result in SCEV expansion of pointers, but in practice
2365 // SCEV will fold the pointer arithmetic away as such:
2366 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2368 // Valid Cases: (1) both integers is most common; (2) both may be pointers
2369 // for simple memset-style loops.
2371 // IVInit integer and ExitCount pointer would only occur if a canonical IV
2372 // were generated on top of case #2, which is not expected.
2374 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2375 // For unit stride, IVCount = Start + ExitCount with 2's complement
2376 // overflow.
2378 // For integer IVs, truncate the IV before computing IVInit + BECount,
2379 // unless we know apriori that the limit must be a constant when evaluated
2380 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2381 // of the IV in the loop over a (potentially) expensive expansion of the
2382 // widened exit count add(zext(add)) expression.
2383 if (SE->getTypeSizeInBits(IVInit->getType())
2384 > SE->getTypeSizeInBits(ExitCount->getType())) {
2385 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2386 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2387 else
2388 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2391 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2393 if (UsePostInc)
2394 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2396 // Expand the code for the iteration count.
2397 assert(SE->isLoopInvariant(IVLimit, L) &&
2398 "Computed iteration count is not loop invariant!");
2399 // Ensure that we generate the same type as IndVar, or a smaller integer
2400 // type. In the presence of null pointer values, we have an integer type
2401 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2402 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2403 IndVar->getType() : ExitCount->getType();
2404 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2405 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2409 /// This method rewrites the exit condition of the loop to be a canonical !=
2410 /// comparison against the incremented loop induction variable. This pass is
2411 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2412 /// determine a loop-invariant trip count of the loop, which is actually a much
2413 /// broader range than just linear tests.
2414 bool IndVarSimplify::
2415 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2416 const SCEV *ExitCount,
2417 PHINode *IndVar, SCEVExpander &Rewriter) {
2418 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2419 assert(isLoopCounter(IndVar, L, SE));
2420 Instruction * const IncVar =
2421 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2423 // Initialize CmpIndVar to the preincremented IV.
2424 Value *CmpIndVar = IndVar;
2425 bool UsePostInc = false;
2427 // If the exiting block is the same as the backedge block, we prefer to
2428 // compare against the post-incremented value, otherwise we must compare
2429 // against the preincremented value.
2430 if (ExitingBB == L->getLoopLatch()) {
2431 // For pointer IVs, we chose to not strip inbounds which requires us not
2432 // to add a potentially UB introducing use. We need to either a) show
2433 // the loop test we're modifying is already in post-inc form, or b) show
2434 // that adding a use must not introduce UB.
2435 bool SafeToPostInc =
2436 IndVar->getType()->isIntegerTy() ||
2437 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2438 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2439 if (SafeToPostInc) {
2440 UsePostInc = true;
2441 CmpIndVar = IncVar;
2445 // It may be necessary to drop nowrap flags on the incrementing instruction
2446 // if either LFTR moves from a pre-inc check to a post-inc check (in which
2447 // case the increment might have previously been poison on the last iteration
2448 // only) or if LFTR switches to a different IV that was previously dynamically
2449 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2450 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2451 // check), because the pre-inc addrec flags may be adopted from the original
2452 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2453 // TODO: This handling is inaccurate for one case: If we switch to a
2454 // dynamically dead IV that wraps on the first loop iteration only, which is
2455 // not covered by the post-inc addrec. (If the new IV was not dynamically
2456 // dead, it could not be poison on the first iteration in the first place.)
2457 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2458 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2459 if (BO->hasNoUnsignedWrap())
2460 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2461 if (BO->hasNoSignedWrap())
2462 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2465 Value *ExitCnt = genLoopLimit(
2466 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2467 assert(ExitCnt->getType()->isPointerTy() ==
2468 IndVar->getType()->isPointerTy() &&
2469 "genLoopLimit missed a cast");
2471 // Insert a new icmp_ne or icmp_eq instruction before the branch.
2472 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2473 ICmpInst::Predicate P;
2474 if (L->contains(BI->getSuccessor(0)))
2475 P = ICmpInst::ICMP_NE;
2476 else
2477 P = ICmpInst::ICMP_EQ;
2479 IRBuilder<> Builder(BI);
2481 // The new loop exit condition should reuse the debug location of the
2482 // original loop exit condition.
2483 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2484 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2486 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2487 // avoid the expensive expansion of the limit expression in the wider type,
2488 // emit a truncate to narrow the IV to the ExitCount type. This is safe
2489 // since we know (from the exit count bitwidth), that we can't self-wrap in
2490 // the narrower type.
2491 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2492 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2493 if (CmpIndVarSize > ExitCntSize) {
2494 assert(!CmpIndVar->getType()->isPointerTy() &&
2495 !ExitCnt->getType()->isPointerTy());
2497 // Before resorting to actually inserting the truncate, use the same
2498 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2499 // the other side of the comparison instead. We still evaluate the limit
2500 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2501 // a truncate within in.
2502 bool Extended = false;
2503 const SCEV *IV = SE->getSCEV(CmpIndVar);
2504 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2505 ExitCnt->getType());
2506 const SCEV *ZExtTrunc =
2507 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2509 if (ZExtTrunc == IV) {
2510 Extended = true;
2511 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2512 "wide.trip.count");
2513 } else {
2514 const SCEV *SExtTrunc =
2515 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2516 if (SExtTrunc == IV) {
2517 Extended = true;
2518 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2519 "wide.trip.count");
2523 if (Extended) {
2524 bool Discard;
2525 L->makeLoopInvariant(ExitCnt, Discard);
2526 } else
2527 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2528 "lftr.wideiv");
2530 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2531 << " LHS:" << *CmpIndVar << '\n'
2532 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2533 << "\n"
2534 << " RHS:\t" << *ExitCnt << "\n"
2535 << "ExitCount:\t" << *ExitCount << "\n"
2536 << " was: " << *BI->getCondition() << "\n");
2538 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2539 Value *OrigCond = BI->getCondition();
2540 // It's tempting to use replaceAllUsesWith here to fully replace the old
2541 // comparison, but that's not immediately safe, since users of the old
2542 // comparison may not be dominated by the new comparison. Instead, just
2543 // update the branch to use the new comparison; in the common case this
2544 // will make old comparison dead.
2545 BI->setCondition(Cond);
2546 DeadInsts.push_back(OrigCond);
2548 ++NumLFTR;
2549 return true;
2552 //===----------------------------------------------------------------------===//
2553 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2554 //===----------------------------------------------------------------------===//
2556 /// If there's a single exit block, sink any loop-invariant values that
2557 /// were defined in the preheader but not used inside the loop into the
2558 /// exit block to reduce register pressure in the loop.
2559 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2560 BasicBlock *ExitBlock = L->getExitBlock();
2561 if (!ExitBlock) return false;
2563 BasicBlock *Preheader = L->getLoopPreheader();
2564 if (!Preheader) return false;
2566 bool MadeAnyChanges = false;
2567 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2568 BasicBlock::iterator I(Preheader->getTerminator());
2569 while (I != Preheader->begin()) {
2570 --I;
2571 // New instructions were inserted at the end of the preheader.
2572 if (isa<PHINode>(I))
2573 break;
2575 // Don't move instructions which might have side effects, since the side
2576 // effects need to complete before instructions inside the loop. Also don't
2577 // move instructions which might read memory, since the loop may modify
2578 // memory. Note that it's okay if the instruction might have undefined
2579 // behavior: LoopSimplify guarantees that the preheader dominates the exit
2580 // block.
2581 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2582 continue;
2584 // Skip debug info intrinsics.
2585 if (isa<DbgInfoIntrinsic>(I))
2586 continue;
2588 // Skip eh pad instructions.
2589 if (I->isEHPad())
2590 continue;
2592 // Don't sink alloca: we never want to sink static alloca's out of the
2593 // entry block, and correctly sinking dynamic alloca's requires
2594 // checks for stacksave/stackrestore intrinsics.
2595 // FIXME: Refactor this check somehow?
2596 if (isa<AllocaInst>(I))
2597 continue;
2599 // Determine if there is a use in or before the loop (direct or
2600 // otherwise).
2601 bool UsedInLoop = false;
2602 for (Use &U : I->uses()) {
2603 Instruction *User = cast<Instruction>(U.getUser());
2604 BasicBlock *UseBB = User->getParent();
2605 if (PHINode *P = dyn_cast<PHINode>(User)) {
2606 unsigned i =
2607 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2608 UseBB = P->getIncomingBlock(i);
2610 if (UseBB == Preheader || L->contains(UseBB)) {
2611 UsedInLoop = true;
2612 break;
2616 // If there is, the def must remain in the preheader.
2617 if (UsedInLoop)
2618 continue;
2620 // Otherwise, sink it to the exit block.
2621 Instruction *ToMove = &*I;
2622 bool Done = false;
2624 if (I != Preheader->begin()) {
2625 // Skip debug info intrinsics.
2626 do {
2627 --I;
2628 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2630 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2631 Done = true;
2632 } else {
2633 Done = true;
2636 MadeAnyChanges = true;
2637 ToMove->moveBefore(*ExitBlock, InsertPt);
2638 if (Done) break;
2639 InsertPt = ToMove->getIterator();
2642 return MadeAnyChanges;
2645 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2646 SmallVector<BasicBlock*, 16> ExitingBlocks;
2647 L->getExitingBlocks(ExitingBlocks);
2649 // Form an expression for the maximum exit count possible for this loop. We
2650 // merge the max and exact information to approximate a version of
2651 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2652 // TODO: factor this out as a version of getConstantMaxBackedgeTakenCount which
2653 // isn't guaranteed to return a constant.
2654 SmallVector<const SCEV*, 4> ExitCounts;
2655 const SCEV *MaxConstEC = SE->getConstantMaxBackedgeTakenCount(L);
2656 if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2657 ExitCounts.push_back(MaxConstEC);
2658 for (BasicBlock *ExitingBB : ExitingBlocks) {
2659 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2660 if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2661 assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2662 "We should only have known counts for exiting blocks that "
2663 "dominate latch!");
2664 ExitCounts.push_back(ExitCount);
2667 if (ExitCounts.empty())
2668 return false;
2669 const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2671 bool Changed = false;
2672 for (BasicBlock *ExitingBB : ExitingBlocks) {
2673 // If our exitting block exits multiple loops, we can only rewrite the
2674 // innermost one. Otherwise, we're changing how many times the innermost
2675 // loop runs before it exits.
2676 if (LI->getLoopFor(ExitingBB) != L)
2677 continue;
2679 // Can't rewrite non-branch yet.
2680 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2681 if (!BI)
2682 continue;
2684 // If already constant, nothing to do.
2685 if (isa<Constant>(BI->getCondition()))
2686 continue;
2688 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2689 if (isa<SCEVCouldNotCompute>(ExitCount))
2690 continue;
2692 // If we know we'd exit on the first iteration, rewrite the exit to
2693 // reflect this. This does not imply the loop must exit through this
2694 // exit; there may be an earlier one taken on the first iteration.
2695 // TODO: Given we know the backedge can't be taken, we should go ahead
2696 // and break it. Or at least, kill all the header phis and simplify.
2697 if (ExitCount->isZero()) {
2698 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2699 auto *OldCond = BI->getCondition();
2700 auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2701 ConstantInt::getFalse(OldCond->getType());
2702 BI->setCondition(NewCond);
2703 if (OldCond->use_empty())
2704 DeadInsts.push_back(OldCond);
2705 Changed = true;
2706 continue;
2709 // If we end up with a pointer exit count, bail. Note that we can end up
2710 // with a pointer exit count for one exiting block, and not for another in
2711 // the same loop.
2712 if (!ExitCount->getType()->isIntegerTy() ||
2713 !MaxExitCount->getType()->isIntegerTy())
2714 continue;
2716 Type *WiderType =
2717 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2718 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2719 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2720 assert(MaxExitCount->getType() == ExitCount->getType());
2722 // Can we prove that some other exit must be taken strictly before this
2723 // one? TODO: handle cases where ule is known, and equality is covered
2724 // by a dominating exit
2725 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2726 MaxExitCount, ExitCount)) {
2727 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2728 auto *OldCond = BI->getCondition();
2729 auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2730 ConstantInt::getTrue(OldCond->getType());
2731 BI->setCondition(NewCond);
2732 if (OldCond->use_empty())
2733 DeadInsts.push_back(OldCond);
2734 Changed = true;
2735 continue;
2738 // TODO: If we can prove that the exiting iteration is equal to the exit
2739 // count for this exit and that no previous exit oppurtunities exist within
2740 // the loop, then we can discharge all other exits. (May fall out of
2741 // previous TODO.)
2743 // TODO: If we can't prove any relation between our exit count and the
2744 // loops exit count, but taking this exit doesn't require actually running
2745 // the loop (i.e. no side effects, no computed values used in exit), then
2746 // we can replace the exit test with a loop invariant test which exits on
2747 // the first iteration.
2749 return Changed;
2752 //===----------------------------------------------------------------------===//
2753 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2754 //===----------------------------------------------------------------------===//
2756 bool IndVarSimplify::run(Loop *L) {
2757 // We need (and expect!) the incoming loop to be in LCSSA.
2758 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2759 "LCSSA required to run indvars!");
2760 bool Changed = false;
2762 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2763 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2764 // canonicalization can be a pessimization without LSR to "clean up"
2765 // afterwards.
2766 // - We depend on having a preheader; in particular,
2767 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2768 // and we're in trouble if we can't find the induction variable even when
2769 // we've manually inserted one.
2770 // - LFTR relies on having a single backedge.
2771 if (!L->isLoopSimplifyForm())
2772 return false;
2774 // If there are any floating-point recurrences, attempt to
2775 // transform them to use integer recurrences.
2776 Changed |= rewriteNonIntegerIVs(L);
2778 #ifndef NDEBUG
2779 // Used below for a consistency check only
2780 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2781 #endif
2783 // Create a rewriter object which we'll use to transform the code with.
2784 SCEVExpander Rewriter(*SE, DL, "indvars");
2785 #ifndef NDEBUG
2786 Rewriter.setDebugType(DEBUG_TYPE);
2787 #endif
2789 // Eliminate redundant IV users.
2791 // Simplification works best when run before other consumers of SCEV. We
2792 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2793 // other expressions involving loop IVs have been evaluated. This helps SCEV
2794 // set no-wrap flags before normalizing sign/zero extension.
2795 Rewriter.disableCanonicalMode();
2796 Changed |= simplifyAndExtend(L, Rewriter, LI);
2798 // Check to see if we can compute the final value of any expressions
2799 // that are recurrent in the loop, and substitute the exit values from the
2800 // loop into any instructions outside of the loop that use the final values
2801 // of the current expressions.
2802 if (ReplaceExitValue != NeverRepl)
2803 Changed |= rewriteLoopExitValues(L, Rewriter);
2805 // Eliminate redundant IV cycles.
2806 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2808 Changed |= optimizeLoopExits(L);
2810 // If we have a trip count expression, rewrite the loop's exit condition
2811 // using it.
2812 if (!DisableLFTR) {
2813 SmallVector<BasicBlock*, 16> ExitingBlocks;
2814 L->getExitingBlocks(ExitingBlocks);
2815 for (BasicBlock *ExitingBB : ExitingBlocks) {
2816 // Can't rewrite non-branch yet.
2817 if (!isa<BranchInst>(ExitingBB->getTerminator()))
2818 continue;
2820 // If our exitting block exits multiple loops, we can only rewrite the
2821 // innermost one. Otherwise, we're changing how many times the innermost
2822 // loop runs before it exits.
2823 if (LI->getLoopFor(ExitingBB) != L)
2824 continue;
2826 if (!needsLFTR(L, ExitingBB))
2827 continue;
2829 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2830 if (isa<SCEVCouldNotCompute>(ExitCount))
2831 continue;
2833 // This was handled above, but as we form SCEVs, we can sometimes refine
2834 // existing ones; this allows exit counts to be folded to zero which
2835 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2836 // until stable to handle cases like this better.
2837 if (ExitCount->isZero())
2838 continue;
2840 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2841 if (!IndVar)
2842 continue;
2844 // Avoid high cost expansions. Note: This heuristic is questionable in
2845 // that our definition of "high cost" is not exactly principled.
2846 if (Rewriter.isHighCostExpansion(ExitCount, L))
2847 continue;
2849 // Check preconditions for proper SCEVExpander operation. SCEV does not
2850 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2851 // any pass that uses the SCEVExpander must do it. This does not work
2852 // well for loop passes because SCEVExpander makes assumptions about
2853 // all loops, while LoopPassManager only forces the current loop to be
2854 // simplified.
2856 // FIXME: SCEV expansion has no way to bail out, so the caller must
2857 // explicitly check any assumptions made by SCEV. Brittle.
2858 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2859 if (!AR || AR->getLoop()->getLoopPreheader())
2860 Changed |= linearFunctionTestReplace(L, ExitingBB,
2861 ExitCount, IndVar,
2862 Rewriter);
2865 // Clear the rewriter cache, because values that are in the rewriter's cache
2866 // can be deleted in the loop below, causing the AssertingVH in the cache to
2867 // trigger.
2868 Rewriter.clear();
2870 // Now that we're done iterating through lists, clean up any instructions
2871 // which are now dead.
2872 while (!DeadInsts.empty())
2873 if (Instruction *Inst =
2874 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2875 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2877 // The Rewriter may not be used from this point on.
2879 // Loop-invariant instructions in the preheader that aren't used in the
2880 // loop may be sunk below the loop to reduce register pressure.
2881 Changed |= sinkUnusedInvariants(L);
2883 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2884 // trip count and therefore can further simplify exit values in addition to
2885 // rewriteLoopExitValues.
2886 Changed |= rewriteFirstIterationLoopExitValues(L);
2888 // Clean up dead instructions.
2889 Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2891 // Check a post-condition.
2892 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2893 "Indvars did not preserve LCSSA!");
2895 // Verify that LFTR, and any other change have not interfered with SCEV's
2896 // ability to compute trip count.
2897 #ifndef NDEBUG
2898 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2899 SE->forgetLoop(L);
2900 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2901 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2902 SE->getTypeSizeInBits(NewBECount->getType()))
2903 NewBECount = SE->getTruncateOrNoop(NewBECount,
2904 BackedgeTakenCount->getType());
2905 else
2906 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2907 NewBECount->getType());
2908 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2910 #endif
2912 return Changed;
2915 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2916 LoopStandardAnalysisResults &AR,
2917 LPMUpdater &) {
2918 Function *F = L.getHeader()->getParent();
2919 const DataLayout &DL = F->getParent()->getDataLayout();
2921 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2922 if (!IVS.run(&L))
2923 return PreservedAnalyses::all();
2925 auto PA = getLoopPassPreservedAnalyses();
2926 PA.preserveSet<CFGAnalyses>();
2927 return PA;
2930 namespace {
2932 struct IndVarSimplifyLegacyPass : public LoopPass {
2933 static char ID; // Pass identification, replacement for typeid
2935 IndVarSimplifyLegacyPass() : LoopPass(ID) {
2936 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2939 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2940 if (skipLoop(L))
2941 return false;
2943 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2944 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2945 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2946 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2947 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2948 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2949 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2950 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2952 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2953 return IVS.run(L);
2956 void getAnalysisUsage(AnalysisUsage &AU) const override {
2957 AU.setPreservesCFG();
2958 getLoopAnalysisUsage(AU);
2962 } // end anonymous namespace
2964 char IndVarSimplifyLegacyPass::ID = 0;
2966 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2967 "Induction Variable Simplification", false, false)
2968 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2969 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2970 "Induction Variable Simplification", false, false)
2972 Pass *llvm::createIndVarSimplifyPass() {
2973 return new IndVarSimplifyLegacyPass();