[ThinLTO] Add code comment. NFC
[llvm-complete.git] / lib / Transforms / Scalar / IndVarSimplify.cpp
blob5519a00c12c97dc989177748bc6a47778ed0884c
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
24 //===----------------------------------------------------------------------===//
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
88 using namespace llvm;
90 #define DEBUG_TYPE "indvars"
92 STATISTIC(NumWidened , "Number of indvars widened");
93 STATISTIC(NumReplaced , "Number of exit values replaced");
94 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102 "verify-indvars", cl::Hidden,
103 cl::desc("Verify the ScalarEvolution result after running indvars"));
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111 clEnumValN(OnlyCheapRepl, "cheap",
112 "only replace exit value when the cost is cheap"),
113 clEnumValN(NoHardUse, "noharduse",
114 "only replace exit values when loop def likely dead"),
115 clEnumValN(AlwaysRepl, "always",
116 "always replace exit value whenever possible")));
118 static cl::opt<bool> UsePostIncrementRanges(
119 "indvars-post-increment-ranges", cl::Hidden,
120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121 cl::init(true));
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125 cl::desc("Disable Linear Function Test Replace optimization"));
127 static cl::opt<bool>
128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
129 cl::desc("Predicate conditions in read only loops"));
132 namespace {
134 struct RewritePhi;
136 class IndVarSimplify {
137 LoopInfo *LI;
138 ScalarEvolution *SE;
139 DominatorTree *DT;
140 const DataLayout &DL;
141 TargetLibraryInfo *TLI;
142 const TargetTransformInfo *TTI;
144 SmallVector<WeakTrackingVH, 16> DeadInsts;
146 bool isValidRewrite(Value *FromVal, Value *ToVal);
148 bool handleFloatingPointIV(Loop *L, PHINode *PH);
149 bool rewriteNonIntegerIVs(Loop *L);
151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152 /// Try to eliminate loop exits based on analyzeable exit counts
153 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154 /// Try to form loop invariant tests for loop exits by changing how many
155 /// iterations of the loop run when that is unobservable.
156 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160 bool rewriteFirstIterationLoopExitValues(Loop *L);
161 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
163 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164 const SCEV *ExitCount,
165 PHINode *IndVar, SCEVExpander &Rewriter);
167 bool sinkUnusedInvariants(Loop *L);
169 public:
170 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171 const DataLayout &DL, TargetLibraryInfo *TLI,
172 TargetTransformInfo *TTI)
173 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
175 bool run(Loop *L);
178 } // end anonymous namespace
180 /// Return true if the SCEV expansion generated by the rewriter can replace the
181 /// original value. SCEV guarantees that it produces the same value, but the way
182 /// it is produced may be illegal IR. Ideally, this function will only be
183 /// called for verification.
184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185 // If an SCEV expression subsumed multiple pointers, its expansion could
186 // reassociate the GEP changing the base pointer. This is illegal because the
187 // final address produced by a GEP chain must be inbounds relative to its
188 // underlying object. Otherwise basic alias analysis, among other things,
189 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190 // producing an expression involving multiple pointers. Until then, we must
191 // bail out here.
193 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194 // because it understands lcssa phis while SCEV does not.
195 Value *FromPtr = FromVal;
196 Value *ToPtr = ToVal;
197 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198 FromPtr = GEP->getPointerOperand();
200 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201 ToPtr = GEP->getPointerOperand();
203 if (FromPtr != FromVal || ToPtr != ToVal) {
204 // Quickly check the common case
205 if (FromPtr == ToPtr)
206 return true;
208 // SCEV may have rewritten an expression that produces the GEP's pointer
209 // operand. That's ok as long as the pointer operand has the same base
210 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211 // base of a recurrence. This handles the case in which SCEV expansion
212 // converts a pointer type recurrence into a nonrecurrent pointer base
213 // indexed by an integer recurrence.
215 // If the GEP base pointer is a vector of pointers, abort.
216 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217 return false;
219 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221 if (FromBase == ToBase)
222 return true;
224 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225 << " != " << *ToBase << "\n");
227 return false;
229 return true;
232 /// Determine the insertion point for this user. By default, insert immediately
233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235 /// common dominator for the incoming blocks. A nullptr can be returned if no
236 /// viable location is found: it may happen if User is a PHI and Def only comes
237 /// to this PHI from unreachable blocks.
238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
239 DominatorTree *DT, LoopInfo *LI) {
240 PHINode *PHI = dyn_cast<PHINode>(User);
241 if (!PHI)
242 return User;
244 Instruction *InsertPt = nullptr;
245 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246 if (PHI->getIncomingValue(i) != Def)
247 continue;
249 BasicBlock *InsertBB = PHI->getIncomingBlock(i);
251 if (!DT->isReachableFromEntry(InsertBB))
252 continue;
254 if (!InsertPt) {
255 InsertPt = InsertBB->getTerminator();
256 continue;
258 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259 InsertPt = InsertBB->getTerminator();
262 // If we have skipped all inputs, it means that Def only comes to Phi from
263 // unreachable blocks.
264 if (!InsertPt)
265 return nullptr;
267 auto *DefI = dyn_cast<Instruction>(Def);
268 if (!DefI)
269 return InsertPt;
271 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
273 auto *L = LI->getLoopFor(DefI->getParent());
274 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
276 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277 if (LI->getLoopFor(DTN->getBlock()) == L)
278 return DTN->getBlock()->getTerminator();
280 llvm_unreachable("DefI dominates InsertPt!");
283 //===----------------------------------------------------------------------===//
284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285 //===----------------------------------------------------------------------===//
287 /// Convert APF to an integer, if possible.
288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289 bool isExact = false;
290 // See if we can convert this to an int64_t
291 uint64_t UIntVal;
292 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294 !isExact)
295 return false;
296 IntVal = UIntVal;
297 return true;
300 /// If the loop has floating induction variable then insert corresponding
301 /// integer induction variable if possible.
302 /// For example,
303 /// for(double i = 0; i < 10000; ++i)
304 /// bar(i)
305 /// is converted into
306 /// for(int i = 0; i < 10000; ++i)
307 /// bar((double)i);
308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310 unsigned BackEdge = IncomingEdge^1;
312 // Check incoming value.
313 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
315 int64_t InitValue;
316 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317 return false;
319 // Check IV increment. Reject this PN if increment operation is not
320 // an add or increment value can not be represented by an integer.
321 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
324 // If this is not an add of the PHI with a constantfp, or if the constant fp
325 // is not an integer, bail out.
326 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327 int64_t IncValue;
328 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330 return false;
332 // Check Incr uses. One user is PN and the other user is an exit condition
333 // used by the conditional terminator.
334 Value::user_iterator IncrUse = Incr->user_begin();
335 Instruction *U1 = cast<Instruction>(*IncrUse++);
336 if (IncrUse == Incr->user_end()) return false;
337 Instruction *U2 = cast<Instruction>(*IncrUse++);
338 if (IncrUse != Incr->user_end()) return false;
340 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
341 // only used by a branch, we can't transform it.
342 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
343 if (!Compare)
344 Compare = dyn_cast<FCmpInst>(U2);
345 if (!Compare || !Compare->hasOneUse() ||
346 !isa<BranchInst>(Compare->user_back()))
347 return false;
349 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
351 // We need to verify that the branch actually controls the iteration count
352 // of the loop. If not, the new IV can overflow and no one will notice.
353 // The branch block must be in the loop and one of the successors must be out
354 // of the loop.
355 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356 if (!L->contains(TheBr->getParent()) ||
357 (L->contains(TheBr->getSuccessor(0)) &&
358 L->contains(TheBr->getSuccessor(1))))
359 return false;
361 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362 // transform it.
363 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364 int64_t ExitValue;
365 if (ExitValueVal == nullptr ||
366 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367 return false;
369 // Find new predicate for integer comparison.
370 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
371 switch (Compare->getPredicate()) {
372 default: return false; // Unknown comparison.
373 case CmpInst::FCMP_OEQ:
374 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375 case CmpInst::FCMP_ONE:
376 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377 case CmpInst::FCMP_OGT:
378 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379 case CmpInst::FCMP_OGE:
380 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381 case CmpInst::FCMP_OLT:
382 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383 case CmpInst::FCMP_OLE:
384 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
387 // We convert the floating point induction variable to a signed i32 value if
388 // we can. This is only safe if the comparison will not overflow in a way
389 // that won't be trapped by the integer equivalent operations. Check for this
390 // now.
391 // TODO: We could use i64 if it is native and the range requires it.
393 // The start/stride/exit values must all fit in signed i32.
394 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395 return false;
397 // If not actually striding (add x, 0.0), avoid touching the code.
398 if (IncValue == 0)
399 return false;
401 // Positive and negative strides have different safety conditions.
402 if (IncValue > 0) {
403 // If we have a positive stride, we require the init to be less than the
404 // exit value.
405 if (InitValue >= ExitValue)
406 return false;
408 uint32_t Range = uint32_t(ExitValue-InitValue);
409 // Check for infinite loop, either:
410 // while (i <= Exit) or until (i > Exit)
411 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412 if (++Range == 0) return false; // Range overflows.
415 unsigned Leftover = Range % uint32_t(IncValue);
417 // If this is an equality comparison, we require that the strided value
418 // exactly land on the exit value, otherwise the IV condition will wrap
419 // around and do things the fp IV wouldn't.
420 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421 Leftover != 0)
422 return false;
424 // If the stride would wrap around the i32 before exiting, we can't
425 // transform the IV.
426 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427 return false;
428 } else {
429 // If we have a negative stride, we require the init to be greater than the
430 // exit value.
431 if (InitValue <= ExitValue)
432 return false;
434 uint32_t Range = uint32_t(InitValue-ExitValue);
435 // Check for infinite loop, either:
436 // while (i >= Exit) or until (i < Exit)
437 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438 if (++Range == 0) return false; // Range overflows.
441 unsigned Leftover = Range % uint32_t(-IncValue);
443 // If this is an equality comparison, we require that the strided value
444 // exactly land on the exit value, otherwise the IV condition will wrap
445 // around and do things the fp IV wouldn't.
446 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447 Leftover != 0)
448 return false;
450 // If the stride would wrap around the i32 before exiting, we can't
451 // transform the IV.
452 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453 return false;
456 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
458 // Insert new integer induction variable.
459 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461 PN->getIncomingBlock(IncomingEdge));
463 Value *NewAdd =
464 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465 Incr->getName()+".int", Incr);
466 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
468 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469 ConstantInt::get(Int32Ty, ExitValue),
470 Compare->getName());
472 // In the following deletions, PN may become dead and may be deleted.
473 // Use a WeakTrackingVH to observe whether this happens.
474 WeakTrackingVH WeakPH = PN;
476 // Delete the old floating point exit comparison. The branch starts using the
477 // new comparison.
478 NewCompare->takeName(Compare);
479 Compare->replaceAllUsesWith(NewCompare);
480 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
482 // Delete the old floating point increment.
483 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
484 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
486 // If the FP induction variable still has uses, this is because something else
487 // in the loop uses its value. In order to canonicalize the induction
488 // variable, we chose to eliminate the IV and rewrite it in terms of an
489 // int->fp cast.
491 // We give preference to sitofp over uitofp because it is faster on most
492 // platforms.
493 if (WeakPH) {
494 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495 &*PN->getParent()->getFirstInsertionPt());
496 PN->replaceAllUsesWith(Conv);
497 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
499 return true;
502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503 // First step. Check to see if there are any floating-point recurrences.
504 // If there are, change them into integer recurrences, permitting analysis by
505 // the SCEV routines.
506 BasicBlock *Header = L->getHeader();
508 SmallVector<WeakTrackingVH, 8> PHIs;
509 for (PHINode &PN : Header->phis())
510 PHIs.push_back(&PN);
512 bool Changed = false;
513 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515 Changed |= handleFloatingPointIV(L, PN);
517 // If the loop previously had floating-point IV, ScalarEvolution
518 // may not have been able to compute a trip count. Now that we've done some
519 // re-writing, the trip count may be computable.
520 if (Changed)
521 SE->forgetLoop(L);
522 return Changed;
525 namespace {
527 // Collect information about PHI nodes which can be transformed in
528 // rewriteLoopExitValues.
529 struct RewritePhi {
530 PHINode *PN;
532 // Ith incoming value.
533 unsigned Ith;
535 // Exit value after expansion.
536 Value *Val;
538 // High Cost when expansion.
539 bool HighCost;
541 RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
542 : PN(P), Ith(I), Val(V), HighCost(H) {}
545 } // end anonymous namespace
547 //===----------------------------------------------------------------------===//
548 // rewriteLoopExitValues - Optimize IV users outside the loop.
549 // As a side effect, reduces the amount of IV processing within the loop.
550 //===----------------------------------------------------------------------===//
552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
553 SmallPtrSet<const Instruction *, 8> Visited;
554 SmallVector<const Instruction *, 8> WorkList;
555 Visited.insert(I);
556 WorkList.push_back(I);
557 while (!WorkList.empty()) {
558 const Instruction *Curr = WorkList.pop_back_val();
559 // This use is outside the loop, nothing to do.
560 if (!L->contains(Curr))
561 continue;
562 // Do we assume it is a "hard" use which will not be eliminated easily?
563 if (Curr->mayHaveSideEffects())
564 return true;
565 // Otherwise, add all its users to worklist.
566 for (auto U : Curr->users()) {
567 auto *UI = cast<Instruction>(U);
568 if (Visited.insert(UI).second)
569 WorkList.push_back(UI);
572 return false;
575 /// Check to see if this loop has a computable loop-invariant execution count.
576 /// If so, this means that we can compute the final value of any expressions
577 /// that are recurrent in the loop, and substitute the exit values from the loop
578 /// into any instructions outside of the loop that use the final values of the
579 /// current expressions.
581 /// This is mostly redundant with the regular IndVarSimplify activities that
582 /// happen later, except that it's more powerful in some cases, because it's
583 /// able to brute-force evaluate arbitrary instructions as long as they have
584 /// constant operands at the beginning of the loop.
585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586 // Check a pre-condition.
587 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588 "Indvars did not preserve LCSSA!");
590 SmallVector<BasicBlock*, 8> ExitBlocks;
591 L->getUniqueExitBlocks(ExitBlocks);
593 SmallVector<RewritePhi, 8> RewritePhiSet;
594 // Find all values that are computed inside the loop, but used outside of it.
595 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
596 // the exit blocks of the loop to find them.
597 for (BasicBlock *ExitBB : ExitBlocks) {
598 // If there are no PHI nodes in this exit block, then no values defined
599 // inside the loop are used on this path, skip it.
600 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601 if (!PN) continue;
603 unsigned NumPreds = PN->getNumIncomingValues();
605 // Iterate over all of the PHI nodes.
606 BasicBlock::iterator BBI = ExitBB->begin();
607 while ((PN = dyn_cast<PHINode>(BBI++))) {
608 if (PN->use_empty())
609 continue; // dead use, don't replace it
611 if (!SE->isSCEVable(PN->getType()))
612 continue;
614 // It's necessary to tell ScalarEvolution about this explicitly so that
615 // it can walk the def-use list and forget all SCEVs, as it may not be
616 // watching the PHI itself. Once the new exit value is in place, there
617 // may not be a def-use connection between the loop and every instruction
618 // which got a SCEVAddRecExpr for that loop.
619 SE->forgetValue(PN);
621 // Iterate over all of the values in all the PHI nodes.
622 for (unsigned i = 0; i != NumPreds; ++i) {
623 // If the value being merged in is not integer or is not defined
624 // in the loop, skip it.
625 Value *InVal = PN->getIncomingValue(i);
626 if (!isa<Instruction>(InVal))
627 continue;
629 // If this pred is for a subloop, not L itself, skip it.
630 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631 continue; // The Block is in a subloop, skip it.
633 // Check that InVal is defined in the loop.
634 Instruction *Inst = cast<Instruction>(InVal);
635 if (!L->contains(Inst))
636 continue;
638 // Okay, this instruction has a user outside of the current loop
639 // and varies predictably *inside* the loop. Evaluate the value it
640 // contains when the loop exits, if possible. We prefer to start with
641 // expressions which are true for all exits (so as to maximize
642 // expression reuse by the SCEVExpander), but resort to per-exit
643 // evaluation if that fails.
644 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645 if (isa<SCEVCouldNotCompute>(ExitValue) ||
646 !SE->isLoopInvariant(ExitValue, L) ||
647 !isSafeToExpand(ExitValue, *SE)) {
648 // TODO: This should probably be sunk into SCEV in some way; maybe a
649 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
650 // most SCEV expressions and other recurrence types (e.g. shift
651 // recurrences). Is there existing code we can reuse?
652 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653 if (isa<SCEVCouldNotCompute>(ExitCount))
654 continue;
655 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656 if (AddRec->getLoop() == L)
657 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658 if (isa<SCEVCouldNotCompute>(ExitValue) ||
659 !SE->isLoopInvariant(ExitValue, L) ||
660 !isSafeToExpand(ExitValue, *SE))
661 continue;
664 // Computing the value outside of the loop brings no benefit if it is
665 // definitely used inside the loop in a way which can not be optimized
666 // away. Avoid doing so unless we know we have a value which computes
667 // the ExitValue already. TODO: This should be merged into SCEV
668 // expander to leverage its knowledge of existing expressions.
669 if (ReplaceExitValue != AlwaysRepl &&
670 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671 hasHardUserWithinLoop(L, Inst))
672 continue;
674 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
675 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
677 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
678 << '\n'
679 << " LoopVal = " << *Inst << "\n");
681 if (!isValidRewrite(Inst, ExitVal)) {
682 DeadInsts.push_back(ExitVal);
683 continue;
686 #ifndef NDEBUG
687 // If we reuse an instruction from a loop which is neither L nor one of
688 // its containing loops, we end up breaking LCSSA form for this loop by
689 // creating a new use of its instruction.
690 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
691 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
692 if (EVL != L)
693 assert(EVL->contains(L) && "LCSSA breach detected!");
694 #endif
696 // Collect all the candidate PHINodes to be rewritten.
697 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
702 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
704 bool Changed = false;
705 // Transformation.
706 for (const RewritePhi &Phi : RewritePhiSet) {
707 PHINode *PN = Phi.PN;
708 Value *ExitVal = Phi.Val;
710 // Only do the rewrite when the ExitValue can be expanded cheaply.
711 // If LoopCanBeDel is true, rewrite exit value aggressively.
712 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
713 DeadInsts.push_back(ExitVal);
714 continue;
717 Changed = true;
718 ++NumReplaced;
719 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
720 PN->setIncomingValue(Phi.Ith, ExitVal);
722 // If this instruction is dead now, delete it. Don't do it now to avoid
723 // invalidating iterators.
724 if (isInstructionTriviallyDead(Inst, TLI))
725 DeadInsts.push_back(Inst);
727 // Replace PN with ExitVal if that is legal and does not break LCSSA.
728 if (PN->getNumIncomingValues() == 1 &&
729 LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
730 PN->replaceAllUsesWith(ExitVal);
731 PN->eraseFromParent();
735 // The insertion point instruction may have been deleted; clear it out
736 // so that the rewriter doesn't trip over it later.
737 Rewriter.clearInsertPoint();
738 return Changed;
741 //===---------------------------------------------------------------------===//
742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
743 // they will exit at the first iteration.
744 //===---------------------------------------------------------------------===//
746 /// Check to see if this loop has loop invariant conditions which lead to loop
747 /// exits. If so, we know that if the exit path is taken, it is at the first
748 /// loop iteration. This lets us predict exit values of PHI nodes that live in
749 /// loop header.
750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
751 // Verify the input to the pass is already in LCSSA form.
752 assert(L->isLCSSAForm(*DT));
754 SmallVector<BasicBlock *, 8> ExitBlocks;
755 L->getUniqueExitBlocks(ExitBlocks);
757 bool MadeAnyChanges = false;
758 for (auto *ExitBB : ExitBlocks) {
759 // If there are no more PHI nodes in this exit block, then no more
760 // values defined inside the loop are used on this path.
761 for (PHINode &PN : ExitBB->phis()) {
762 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
763 IncomingValIdx != E; ++IncomingValIdx) {
764 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
766 // Can we prove that the exit must run on the first iteration if it
767 // runs at all? (i.e. early exits are fine for our purposes, but
768 // traces which lead to this exit being taken on the 2nd iteration
769 // aren't.) Note that this is about whether the exit branch is
770 // executed, not about whether it is taken.
771 if (!L->getLoopLatch() ||
772 !DT->dominates(IncomingBB, L->getLoopLatch()))
773 continue;
775 // Get condition that leads to the exit path.
776 auto *TermInst = IncomingBB->getTerminator();
778 Value *Cond = nullptr;
779 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
780 // Must be a conditional branch, otherwise the block
781 // should not be in the loop.
782 Cond = BI->getCondition();
783 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
784 Cond = SI->getCondition();
785 else
786 continue;
788 if (!L->isLoopInvariant(Cond))
789 continue;
791 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
793 // Only deal with PHIs in the loop header.
794 if (!ExitVal || ExitVal->getParent() != L->getHeader())
795 continue;
797 // If ExitVal is a PHI on the loop header, then we know its
798 // value along this exit because the exit can only be taken
799 // on the first iteration.
800 auto *LoopPreheader = L->getLoopPreheader();
801 assert(LoopPreheader && "Invalid loop");
802 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
803 if (PreheaderIdx != -1) {
804 assert(ExitVal->getParent() == L->getHeader() &&
805 "ExitVal must be in loop header");
806 MadeAnyChanges = true;
807 PN.setIncomingValue(IncomingValIdx,
808 ExitVal->getIncomingValue(PreheaderIdx));
813 return MadeAnyChanges;
816 /// Check whether it is possible to delete the loop after rewriting exit
817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
818 /// aggressively.
819 bool IndVarSimplify::canLoopBeDeleted(
820 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
821 BasicBlock *Preheader = L->getLoopPreheader();
822 // If there is no preheader, the loop will not be deleted.
823 if (!Preheader)
824 return false;
826 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
827 // We obviate multiple ExitingBlocks case for simplicity.
828 // TODO: If we see testcase with multiple ExitingBlocks can be deleted
829 // after exit value rewriting, we can enhance the logic here.
830 SmallVector<BasicBlock *, 4> ExitingBlocks;
831 L->getExitingBlocks(ExitingBlocks);
832 SmallVector<BasicBlock *, 8> ExitBlocks;
833 L->getUniqueExitBlocks(ExitBlocks);
834 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
835 return false;
837 BasicBlock *ExitBlock = ExitBlocks[0];
838 BasicBlock::iterator BI = ExitBlock->begin();
839 while (PHINode *P = dyn_cast<PHINode>(BI)) {
840 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
842 // If the Incoming value of P is found in RewritePhiSet, we know it
843 // could be rewritten to use a loop invariant value in transformation
844 // phase later. Skip it in the loop invariant check below.
845 bool found = false;
846 for (const RewritePhi &Phi : RewritePhiSet) {
847 unsigned i = Phi.Ith;
848 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
849 found = true;
850 break;
854 Instruction *I;
855 if (!found && (I = dyn_cast<Instruction>(Incoming)))
856 if (!L->hasLoopInvariantOperands(I))
857 return false;
859 ++BI;
862 for (auto *BB : L->blocks())
863 if (llvm::any_of(*BB, [](Instruction &I) {
864 return I.mayHaveSideEffects();
866 return false;
868 return true;
871 //===----------------------------------------------------------------------===//
872 // IV Widening - Extend the width of an IV to cover its widest uses.
873 //===----------------------------------------------------------------------===//
875 namespace {
877 // Collect information about induction variables that are used by sign/zero
878 // extend operations. This information is recorded by CollectExtend and provides
879 // the input to WidenIV.
880 struct WideIVInfo {
881 PHINode *NarrowIV = nullptr;
883 // Widest integer type created [sz]ext
884 Type *WidestNativeType = nullptr;
886 // Was a sext user seen before a zext?
887 bool IsSigned = false;
890 } // end anonymous namespace
892 /// Update information about the induction variable that is extended by this
893 /// sign or zero extend operation. This is used to determine the final width of
894 /// the IV before actually widening it.
895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
896 const TargetTransformInfo *TTI) {
897 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
898 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
899 return;
901 Type *Ty = Cast->getType();
902 uint64_t Width = SE->getTypeSizeInBits(Ty);
903 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
904 return;
906 // Check that `Cast` actually extends the induction variable (we rely on this
907 // later). This takes care of cases where `Cast` is extending a truncation of
908 // the narrow induction variable, and thus can end up being narrower than the
909 // "narrow" induction variable.
910 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
911 if (NarrowIVWidth >= Width)
912 return;
914 // Cast is either an sext or zext up to this point.
915 // We should not widen an indvar if arithmetics on the wider indvar are more
916 // expensive than those on the narrower indvar. We check only the cost of ADD
917 // because at least an ADD is required to increment the induction variable. We
918 // could compute more comprehensively the cost of all instructions on the
919 // induction variable when necessary.
920 if (TTI &&
921 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
922 TTI->getArithmeticInstrCost(Instruction::Add,
923 Cast->getOperand(0)->getType())) {
924 return;
927 if (!WI.WidestNativeType) {
928 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
929 WI.IsSigned = IsSigned;
930 return;
933 // We extend the IV to satisfy the sign of its first user, arbitrarily.
934 if (WI.IsSigned != IsSigned)
935 return;
937 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
938 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
941 namespace {
943 /// Record a link in the Narrow IV def-use chain along with the WideIV that
944 /// computes the same value as the Narrow IV def. This avoids caching Use*
945 /// pointers.
946 struct NarrowIVDefUse {
947 Instruction *NarrowDef = nullptr;
948 Instruction *NarrowUse = nullptr;
949 Instruction *WideDef = nullptr;
951 // True if the narrow def is never negative. Tracking this information lets
952 // us use a sign extension instead of a zero extension or vice versa, when
953 // profitable and legal.
954 bool NeverNegative = false;
956 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
957 bool NeverNegative)
958 : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
959 NeverNegative(NeverNegative) {}
962 /// The goal of this transform is to remove sign and zero extends without
963 /// creating any new induction variables. To do this, it creates a new phi of
964 /// the wider type and redirects all users, either removing extends or inserting
965 /// truncs whenever we stop propagating the type.
966 class WidenIV {
967 // Parameters
968 PHINode *OrigPhi;
969 Type *WideType;
971 // Context
972 LoopInfo *LI;
973 Loop *L;
974 ScalarEvolution *SE;
975 DominatorTree *DT;
977 // Does the module have any calls to the llvm.experimental.guard intrinsic
978 // at all? If not we can avoid scanning instructions looking for guards.
979 bool HasGuards;
981 // Result
982 PHINode *WidePhi = nullptr;
983 Instruction *WideInc = nullptr;
984 const SCEV *WideIncExpr = nullptr;
985 SmallVectorImpl<WeakTrackingVH> &DeadInsts;
987 SmallPtrSet<Instruction *,16> Widened;
988 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
990 enum ExtendKind { ZeroExtended, SignExtended, Unknown };
992 // A map tracking the kind of extension used to widen each narrow IV
993 // and narrow IV user.
994 // Key: pointer to a narrow IV or IV user.
995 // Value: the kind of extension used to widen this Instruction.
996 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
998 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1000 // A map with control-dependent ranges for post increment IV uses. The key is
1001 // a pair of IV def and a use of this def denoting the context. The value is
1002 // a ConstantRange representing possible values of the def at the given
1003 // context.
1004 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1006 Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1007 Instruction *UseI) {
1008 DefUserPair Key(Def, UseI);
1009 auto It = PostIncRangeInfos.find(Key);
1010 return It == PostIncRangeInfos.end()
1011 ? Optional<ConstantRange>(None)
1012 : Optional<ConstantRange>(It->second);
1015 void calculatePostIncRanges(PHINode *OrigPhi);
1016 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1018 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1019 DefUserPair Key(Def, UseI);
1020 auto It = PostIncRangeInfos.find(Key);
1021 if (It == PostIncRangeInfos.end())
1022 PostIncRangeInfos.insert({Key, R});
1023 else
1024 It->second = R.intersectWith(It->second);
1027 public:
1028 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1029 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1030 bool HasGuards)
1031 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1032 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1033 HasGuards(HasGuards), DeadInsts(DI) {
1034 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1035 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1038 PHINode *createWideIV(SCEVExpander &Rewriter);
1040 protected:
1041 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1042 Instruction *Use);
1044 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1045 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1046 const SCEVAddRecExpr *WideAR);
1047 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1049 ExtendKind getExtendKind(Instruction *I);
1051 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1053 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1055 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1057 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1058 unsigned OpCode) const;
1060 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1062 bool widenLoopCompare(NarrowIVDefUse DU);
1063 bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1064 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1066 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1069 } // end anonymous namespace
1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1072 bool IsSigned, Instruction *Use) {
1073 // Set the debug location and conservative insertion point.
1074 IRBuilder<> Builder(Use);
1075 // Hoist the insertion point into loop preheaders as far as possible.
1076 for (const Loop *L = LI->getLoopFor(Use->getParent());
1077 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1078 L = L->getParentLoop())
1079 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1081 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1082 Builder.CreateZExt(NarrowOper, WideType);
1085 /// Instantiate a wide operation to replace a narrow operation. This only needs
1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1087 /// 0 for any operation we decide not to clone.
1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1089 const SCEVAddRecExpr *WideAR) {
1090 unsigned Opcode = DU.NarrowUse->getOpcode();
1091 switch (Opcode) {
1092 default:
1093 return nullptr;
1094 case Instruction::Add:
1095 case Instruction::Mul:
1096 case Instruction::UDiv:
1097 case Instruction::Sub:
1098 return cloneArithmeticIVUser(DU, WideAR);
1100 case Instruction::And:
1101 case Instruction::Or:
1102 case Instruction::Xor:
1103 case Instruction::Shl:
1104 case Instruction::LShr:
1105 case Instruction::AShr:
1106 return cloneBitwiseIVUser(DU);
1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1111 Instruction *NarrowUse = DU.NarrowUse;
1112 Instruction *NarrowDef = DU.NarrowDef;
1113 Instruction *WideDef = DU.WideDef;
1115 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1117 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1118 // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1119 // invariant and will be folded or hoisted. If it actually comes from a
1120 // widened IV, it should be removed during a future call to widenIVUse.
1121 bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1122 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1123 ? WideDef
1124 : createExtendInst(NarrowUse->getOperand(0), WideType,
1125 IsSigned, NarrowUse);
1126 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1127 ? WideDef
1128 : createExtendInst(NarrowUse->getOperand(1), WideType,
1129 IsSigned, NarrowUse);
1131 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1132 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1133 NarrowBO->getName());
1134 IRBuilder<> Builder(NarrowUse);
1135 Builder.Insert(WideBO);
1136 WideBO->copyIRFlags(NarrowBO);
1137 return WideBO;
1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1141 const SCEVAddRecExpr *WideAR) {
1142 Instruction *NarrowUse = DU.NarrowUse;
1143 Instruction *NarrowDef = DU.NarrowDef;
1144 Instruction *WideDef = DU.WideDef;
1146 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1148 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1150 // We're trying to find X such that
1152 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1154 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1155 // and check using SCEV if any of them are correct.
1157 // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1158 // correct solution to X.
1159 auto GuessNonIVOperand = [&](bool SignExt) {
1160 const SCEV *WideLHS;
1161 const SCEV *WideRHS;
1163 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1164 if (SignExt)
1165 return SE->getSignExtendExpr(S, Ty);
1166 return SE->getZeroExtendExpr(S, Ty);
1169 if (IVOpIdx == 0) {
1170 WideLHS = SE->getSCEV(WideDef);
1171 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1172 WideRHS = GetExtend(NarrowRHS, WideType);
1173 } else {
1174 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1175 WideLHS = GetExtend(NarrowLHS, WideType);
1176 WideRHS = SE->getSCEV(WideDef);
1179 // WideUse is "WideDef `op.wide` X" as described in the comment.
1180 const SCEV *WideUse = nullptr;
1182 switch (NarrowUse->getOpcode()) {
1183 default:
1184 llvm_unreachable("No other possibility!");
1186 case Instruction::Add:
1187 WideUse = SE->getAddExpr(WideLHS, WideRHS);
1188 break;
1190 case Instruction::Mul:
1191 WideUse = SE->getMulExpr(WideLHS, WideRHS);
1192 break;
1194 case Instruction::UDiv:
1195 WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1196 break;
1198 case Instruction::Sub:
1199 WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1200 break;
1203 return WideUse == WideAR;
1206 bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1207 if (!GuessNonIVOperand(SignExtend)) {
1208 SignExtend = !SignExtend;
1209 if (!GuessNonIVOperand(SignExtend))
1210 return nullptr;
1213 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1214 ? WideDef
1215 : createExtendInst(NarrowUse->getOperand(0), WideType,
1216 SignExtend, NarrowUse);
1217 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1218 ? WideDef
1219 : createExtendInst(NarrowUse->getOperand(1), WideType,
1220 SignExtend, NarrowUse);
1222 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1223 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1224 NarrowBO->getName());
1226 IRBuilder<> Builder(NarrowUse);
1227 Builder.Insert(WideBO);
1228 WideBO->copyIRFlags(NarrowBO);
1229 return WideBO;
1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1233 auto It = ExtendKindMap.find(I);
1234 assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1235 return It->second;
1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1239 unsigned OpCode) const {
1240 if (OpCode == Instruction::Add)
1241 return SE->getAddExpr(LHS, RHS);
1242 if (OpCode == Instruction::Sub)
1243 return SE->getMinusSCEV(LHS, RHS);
1244 if (OpCode == Instruction::Mul)
1245 return SE->getMulExpr(LHS, RHS);
1247 llvm_unreachable("Unsupported opcode.");
1250 /// No-wrap operations can transfer sign extension of their result to their
1251 /// operands. Generate the SCEV value for the widened operation without
1252 /// actually modifying the IR yet. If the expression after extending the
1253 /// operands is an AddRec for this loop, return the AddRec and the kind of
1254 /// extension used.
1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1256 // Handle the common case of add<nsw/nuw>
1257 const unsigned OpCode = DU.NarrowUse->getOpcode();
1258 // Only Add/Sub/Mul instructions supported yet.
1259 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1260 OpCode != Instruction::Mul)
1261 return {nullptr, Unknown};
1263 // One operand (NarrowDef) has already been extended to WideDef. Now determine
1264 // if extending the other will lead to a recurrence.
1265 const unsigned ExtendOperIdx =
1266 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1267 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1269 const SCEV *ExtendOperExpr = nullptr;
1270 const OverflowingBinaryOperator *OBO =
1271 cast<OverflowingBinaryOperator>(DU.NarrowUse);
1272 ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1273 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1274 ExtendOperExpr = SE->getSignExtendExpr(
1275 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1276 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1277 ExtendOperExpr = SE->getZeroExtendExpr(
1278 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1279 else
1280 return {nullptr, Unknown};
1282 // When creating this SCEV expr, don't apply the current operations NSW or NUW
1283 // flags. This instruction may be guarded by control flow that the no-wrap
1284 // behavior depends on. Non-control-equivalent instructions can be mapped to
1285 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1286 // semantics to those operations.
1287 const SCEV *lhs = SE->getSCEV(DU.WideDef);
1288 const SCEV *rhs = ExtendOperExpr;
1290 // Let's swap operands to the initial order for the case of non-commutative
1291 // operations, like SUB. See PR21014.
1292 if (ExtendOperIdx == 0)
1293 std::swap(lhs, rhs);
1294 const SCEVAddRecExpr *AddRec =
1295 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1297 if (!AddRec || AddRec->getLoop() != L)
1298 return {nullptr, Unknown};
1300 return {AddRec, ExtKind};
1303 /// Is this instruction potentially interesting for further simplification after
1304 /// widening it's type? In other words, can the extend be safely hoisted out of
1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1306 /// so, return the extended recurrence and the kind of extension used. Otherwise
1307 /// return {nullptr, Unknown}.
1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1309 if (!SE->isSCEVable(DU.NarrowUse->getType()))
1310 return {nullptr, Unknown};
1312 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1313 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1314 SE->getTypeSizeInBits(WideType)) {
1315 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1316 // index. So don't follow this use.
1317 return {nullptr, Unknown};
1320 const SCEV *WideExpr;
1321 ExtendKind ExtKind;
1322 if (DU.NeverNegative) {
1323 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1324 if (isa<SCEVAddRecExpr>(WideExpr))
1325 ExtKind = SignExtended;
1326 else {
1327 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1328 ExtKind = ZeroExtended;
1330 } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1331 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1332 ExtKind = SignExtended;
1333 } else {
1334 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1335 ExtKind = ZeroExtended;
1337 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1338 if (!AddRec || AddRec->getLoop() != L)
1339 return {nullptr, Unknown};
1340 return {AddRec, ExtKind};
1343 /// This IV user cannot be widened. Replace this use of the original narrow IV
1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1346 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1347 if (!InsertPt)
1348 return;
1349 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1350 << *DU.NarrowUse << "\n");
1351 IRBuilder<> Builder(InsertPt);
1352 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1353 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1356 /// If the narrow use is a compare instruction, then widen the compare
1357 // (and possibly the other operand). The extend operation is hoisted into the
1358 // loop preheader as far as possible.
1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1360 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1361 if (!Cmp)
1362 return false;
1364 // We can legally widen the comparison in the following two cases:
1366 // - The signedness of the IV extension and comparison match
1368 // - The narrow IV is always positive (and thus its sign extension is equal
1369 // to its zero extension). For instance, let's say we're zero extending
1370 // %narrow for the following use
1372 // icmp slt i32 %narrow, %val ... (A)
1374 // and %narrow is always positive. Then
1376 // (A) == icmp slt i32 sext(%narrow), sext(%val)
1377 // == icmp slt i32 zext(%narrow), sext(%val)
1378 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1379 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1380 return false;
1382 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1383 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1384 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1385 assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1387 // Widen the compare instruction.
1388 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1389 if (!InsertPt)
1390 return false;
1391 IRBuilder<> Builder(InsertPt);
1392 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1394 // Widen the other operand of the compare, if necessary.
1395 if (CastWidth < IVWidth) {
1396 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1397 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1399 return true;
1402 /// If the narrow use is an instruction whose two operands are the defining
1403 /// instruction of DU and a load instruction, then we have the following:
1404 /// if the load is hoisted outside the loop, then we do not reach this function
1405 /// as scalar evolution analysis works fine in widenIVUse with variables
1406 /// hoisted outside the loop and efficient code is subsequently generated by
1407 /// not emitting truncate instructions. But when the load is not hoisted
1408 /// (whether due to limitation in alias analysis or due to a true legality),
1409 /// then scalar evolution can not proceed with loop variant values and
1410 /// inefficient code is generated. This function handles the non-hoisted load
1411 /// special case by making the optimization generate the same type of code for
1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1413 /// instruction). This special case is important especially when the induction
1414 /// variables are affecting addressing mode in code generation.
1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1416 Instruction *NarrowUse = DU.NarrowUse;
1417 Instruction *NarrowDef = DU.NarrowDef;
1418 Instruction *WideDef = DU.WideDef;
1420 // Handle the common case of add<nsw/nuw>
1421 const unsigned OpCode = NarrowUse->getOpcode();
1422 // Only Add/Sub/Mul instructions are supported.
1423 if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1424 OpCode != Instruction::Mul)
1425 return false;
1427 // The operand that is not defined by NarrowDef of DU. Let's call it the
1428 // other operand.
1429 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1430 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1431 "bad DU");
1433 const SCEV *ExtendOperExpr = nullptr;
1434 const OverflowingBinaryOperator *OBO =
1435 cast<OverflowingBinaryOperator>(NarrowUse);
1436 ExtendKind ExtKind = getExtendKind(NarrowDef);
1437 if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1438 ExtendOperExpr = SE->getSignExtendExpr(
1439 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1440 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1441 ExtendOperExpr = SE->getZeroExtendExpr(
1442 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1443 else
1444 return false;
1446 // We are interested in the other operand being a load instruction.
1447 // But, we should look into relaxing this restriction later on.
1448 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1449 if (I && I->getOpcode() != Instruction::Load)
1450 return false;
1452 // Verifying that Defining operand is an AddRec
1453 const SCEV *Op1 = SE->getSCEV(WideDef);
1454 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1455 if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1456 return false;
1457 // Verifying that other operand is an Extend.
1458 if (ExtKind == SignExtended) {
1459 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1460 return false;
1461 } else {
1462 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1463 return false;
1466 if (ExtKind == SignExtended) {
1467 for (Use &U : NarrowUse->uses()) {
1468 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1469 if (!User || User->getType() != WideType)
1470 return false;
1472 } else { // ExtKind == ZeroExtended
1473 for (Use &U : NarrowUse->uses()) {
1474 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1475 if (!User || User->getType() != WideType)
1476 return false;
1480 return true;
1483 /// Special Case for widening with variant Loads (see
1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1486 Instruction *NarrowUse = DU.NarrowUse;
1487 Instruction *NarrowDef = DU.NarrowDef;
1488 Instruction *WideDef = DU.WideDef;
1490 ExtendKind ExtKind = getExtendKind(NarrowDef);
1492 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1494 // Generating a widening use instruction.
1495 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1496 ? WideDef
1497 : createExtendInst(NarrowUse->getOperand(0), WideType,
1498 ExtKind, NarrowUse);
1499 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1500 ? WideDef
1501 : createExtendInst(NarrowUse->getOperand(1), WideType,
1502 ExtKind, NarrowUse);
1504 auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1505 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1506 NarrowBO->getName());
1507 IRBuilder<> Builder(NarrowUse);
1508 Builder.Insert(WideBO);
1509 WideBO->copyIRFlags(NarrowBO);
1511 if (ExtKind == SignExtended)
1512 ExtendKindMap[NarrowUse] = SignExtended;
1513 else
1514 ExtendKindMap[NarrowUse] = ZeroExtended;
1516 // Update the Use.
1517 if (ExtKind == SignExtended) {
1518 for (Use &U : NarrowUse->uses()) {
1519 SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1520 if (User && User->getType() == WideType) {
1521 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1522 << *WideBO << "\n");
1523 ++NumElimExt;
1524 User->replaceAllUsesWith(WideBO);
1525 DeadInsts.emplace_back(User);
1528 } else { // ExtKind == ZeroExtended
1529 for (Use &U : NarrowUse->uses()) {
1530 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1531 if (User && User->getType() == WideType) {
1532 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1533 << *WideBO << "\n");
1534 ++NumElimExt;
1535 User->replaceAllUsesWith(WideBO);
1536 DeadInsts.emplace_back(User);
1542 /// Determine whether an individual user of the narrow IV can be widened. If so,
1543 /// return the wide clone of the user.
1544 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1545 assert(ExtendKindMap.count(DU.NarrowDef) &&
1546 "Should already know the kind of extension used to widen NarrowDef");
1548 // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1549 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1550 if (LI->getLoopFor(UsePhi->getParent()) != L) {
1551 // For LCSSA phis, sink the truncate outside the loop.
1552 // After SimplifyCFG most loop exit targets have a single predecessor.
1553 // Otherwise fall back to a truncate within the loop.
1554 if (UsePhi->getNumOperands() != 1)
1555 truncateIVUse(DU, DT, LI);
1556 else {
1557 // Widening the PHI requires us to insert a trunc. The logical place
1558 // for this trunc is in the same BB as the PHI. This is not possible if
1559 // the BB is terminated by a catchswitch.
1560 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1561 return nullptr;
1563 PHINode *WidePhi =
1564 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1565 UsePhi);
1566 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1567 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1568 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1569 UsePhi->replaceAllUsesWith(Trunc);
1570 DeadInsts.emplace_back(UsePhi);
1571 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1572 << *WidePhi << "\n");
1574 return nullptr;
1578 // This narrow use can be widened by a sext if it's non-negative or its narrow
1579 // def was widended by a sext. Same for zext.
1580 auto canWidenBySExt = [&]() {
1581 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1583 auto canWidenByZExt = [&]() {
1584 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1587 // Our raison d'etre! Eliminate sign and zero extension.
1588 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1589 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1590 Value *NewDef = DU.WideDef;
1591 if (DU.NarrowUse->getType() != WideType) {
1592 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1593 unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1594 if (CastWidth < IVWidth) {
1595 // The cast isn't as wide as the IV, so insert a Trunc.
1596 IRBuilder<> Builder(DU.NarrowUse);
1597 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1599 else {
1600 // A wider extend was hidden behind a narrower one. This may induce
1601 // another round of IV widening in which the intermediate IV becomes
1602 // dead. It should be very rare.
1603 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1604 << " not wide enough to subsume " << *DU.NarrowUse
1605 << "\n");
1606 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1607 NewDef = DU.NarrowUse;
1610 if (NewDef != DU.NarrowUse) {
1611 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1612 << " replaced by " << *DU.WideDef << "\n");
1613 ++NumElimExt;
1614 DU.NarrowUse->replaceAllUsesWith(NewDef);
1615 DeadInsts.emplace_back(DU.NarrowUse);
1617 // Now that the extend is gone, we want to expose it's uses for potential
1618 // further simplification. We don't need to directly inform SimplifyIVUsers
1619 // of the new users, because their parent IV will be processed later as a
1620 // new loop phi. If we preserved IVUsers analysis, we would also want to
1621 // push the uses of WideDef here.
1623 // No further widening is needed. The deceased [sz]ext had done it for us.
1624 return nullptr;
1627 // Does this user itself evaluate to a recurrence after widening?
1628 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1629 if (!WideAddRec.first)
1630 WideAddRec = getWideRecurrence(DU);
1632 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1633 if (!WideAddRec.first) {
1634 // If use is a loop condition, try to promote the condition instead of
1635 // truncating the IV first.
1636 if (widenLoopCompare(DU))
1637 return nullptr;
1639 // We are here about to generate a truncate instruction that may hurt
1640 // performance because the scalar evolution expression computed earlier
1641 // in WideAddRec.first does not indicate a polynomial induction expression.
1642 // In that case, look at the operands of the use instruction to determine
1643 // if we can still widen the use instead of truncating its operand.
1644 if (widenWithVariantLoadUse(DU)) {
1645 widenWithVariantLoadUseCodegen(DU);
1646 return nullptr;
1649 // This user does not evaluate to a recurrence after widening, so don't
1650 // follow it. Instead insert a Trunc to kill off the original use,
1651 // eventually isolating the original narrow IV so it can be removed.
1652 truncateIVUse(DU, DT, LI);
1653 return nullptr;
1655 // Assume block terminators cannot evaluate to a recurrence. We can't to
1656 // insert a Trunc after a terminator if there happens to be a critical edge.
1657 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1658 "SCEV is not expected to evaluate a block terminator");
1660 // Reuse the IV increment that SCEVExpander created as long as it dominates
1661 // NarrowUse.
1662 Instruction *WideUse = nullptr;
1663 if (WideAddRec.first == WideIncExpr &&
1664 Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1665 WideUse = WideInc;
1666 else {
1667 WideUse = cloneIVUser(DU, WideAddRec.first);
1668 if (!WideUse)
1669 return nullptr;
1671 // Evaluation of WideAddRec ensured that the narrow expression could be
1672 // extended outside the loop without overflow. This suggests that the wide use
1673 // evaluates to the same expression as the extended narrow use, but doesn't
1674 // absolutely guarantee it. Hence the following failsafe check. In rare cases
1675 // where it fails, we simply throw away the newly created wide use.
1676 if (WideAddRec.first != SE->getSCEV(WideUse)) {
1677 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1678 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1679 << "\n");
1680 DeadInsts.emplace_back(WideUse);
1681 return nullptr;
1684 // if we reached this point then we are going to replace
1685 // DU.NarrowUse with WideUse. Reattach DbgValue then.
1686 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1688 ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1689 // Returning WideUse pushes it on the worklist.
1690 return WideUse;
1693 /// Add eligible users of NarrowDef to NarrowIVUsers.
1694 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1695 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1696 bool NonNegativeDef =
1697 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1698 SE->getConstant(NarrowSCEV->getType(), 0));
1699 for (User *U : NarrowDef->users()) {
1700 Instruction *NarrowUser = cast<Instruction>(U);
1702 // Handle data flow merges and bizarre phi cycles.
1703 if (!Widened.insert(NarrowUser).second)
1704 continue;
1706 bool NonNegativeUse = false;
1707 if (!NonNegativeDef) {
1708 // We might have a control-dependent range information for this context.
1709 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1710 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1713 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1714 NonNegativeDef || NonNegativeUse);
1718 /// Process a single induction variable. First use the SCEVExpander to create a
1719 /// wide induction variable that evaluates to the same recurrence as the
1720 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1721 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1722 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1724 /// It would be simpler to delete uses as they are processed, but we must avoid
1725 /// invalidating SCEV expressions.
1726 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1727 // Is this phi an induction variable?
1728 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1729 if (!AddRec)
1730 return nullptr;
1732 // Widen the induction variable expression.
1733 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1734 ? SE->getSignExtendExpr(AddRec, WideType)
1735 : SE->getZeroExtendExpr(AddRec, WideType);
1737 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1738 "Expect the new IV expression to preserve its type");
1740 // Can the IV be extended outside the loop without overflow?
1741 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1742 if (!AddRec || AddRec->getLoop() != L)
1743 return nullptr;
1745 // An AddRec must have loop-invariant operands. Since this AddRec is
1746 // materialized by a loop header phi, the expression cannot have any post-loop
1747 // operands, so they must dominate the loop header.
1748 assert(
1749 SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1750 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1751 "Loop header phi recurrence inputs do not dominate the loop");
1753 // Iterate over IV uses (including transitive ones) looking for IV increments
1754 // of the form 'add nsw %iv, <const>'. For each increment and each use of
1755 // the increment calculate control-dependent range information basing on
1756 // dominating conditions inside of the loop (e.g. a range check inside of the
1757 // loop). Calculated ranges are stored in PostIncRangeInfos map.
1759 // Control-dependent range information is later used to prove that a narrow
1760 // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1761 // this on demand because when pushNarrowIVUsers needs this information some
1762 // of the dominating conditions might be already widened.
1763 if (UsePostIncrementRanges)
1764 calculatePostIncRanges(OrigPhi);
1766 // The rewriter provides a value for the desired IV expression. This may
1767 // either find an existing phi or materialize a new one. Either way, we
1768 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1769 // of the phi-SCC dominates the loop entry.
1770 Instruction *InsertPt = &L->getHeader()->front();
1771 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1773 // Remembering the WideIV increment generated by SCEVExpander allows
1774 // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1775 // employ a general reuse mechanism because the call above is the only call to
1776 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1777 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1778 WideInc =
1779 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1780 WideIncExpr = SE->getSCEV(WideInc);
1781 // Propagate the debug location associated with the original loop increment
1782 // to the new (widened) increment.
1783 auto *OrigInc =
1784 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1785 WideInc->setDebugLoc(OrigInc->getDebugLoc());
1788 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1789 ++NumWidened;
1791 // Traverse the def-use chain using a worklist starting at the original IV.
1792 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1794 Widened.insert(OrigPhi);
1795 pushNarrowIVUsers(OrigPhi, WidePhi);
1797 while (!NarrowIVUsers.empty()) {
1798 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1800 // Process a def-use edge. This may replace the use, so don't hold a
1801 // use_iterator across it.
1802 Instruction *WideUse = widenIVUse(DU, Rewriter);
1804 // Follow all def-use edges from the previous narrow use.
1805 if (WideUse)
1806 pushNarrowIVUsers(DU.NarrowUse, WideUse);
1808 // widenIVUse may have removed the def-use edge.
1809 if (DU.NarrowDef->use_empty())
1810 DeadInsts.emplace_back(DU.NarrowDef);
1813 // Attach any debug information to the new PHI.
1814 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1816 return WidePhi;
1819 /// Calculates control-dependent range for the given def at the given context
1820 /// by looking at dominating conditions inside of the loop
1821 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1822 Instruction *NarrowUser) {
1823 using namespace llvm::PatternMatch;
1825 Value *NarrowDefLHS;
1826 const APInt *NarrowDefRHS;
1827 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1828 m_APInt(NarrowDefRHS))) ||
1829 !NarrowDefRHS->isNonNegative())
1830 return;
1832 auto UpdateRangeFromCondition = [&] (Value *Condition,
1833 bool TrueDest) {
1834 CmpInst::Predicate Pred;
1835 Value *CmpRHS;
1836 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1837 m_Value(CmpRHS))))
1838 return;
1840 CmpInst::Predicate P =
1841 TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1843 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1844 auto CmpConstrainedLHSRange =
1845 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1846 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1847 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1849 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1852 auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1853 if (!HasGuards)
1854 return;
1856 for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1857 Ctx->getParent()->rend())) {
1858 Value *C = nullptr;
1859 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1860 UpdateRangeFromCondition(C, /*TrueDest=*/true);
1864 UpdateRangeFromGuards(NarrowUser);
1866 BasicBlock *NarrowUserBB = NarrowUser->getParent();
1867 // If NarrowUserBB is statically unreachable asking dominator queries may
1868 // yield surprising results. (e.g. the block may not have a dom tree node)
1869 if (!DT->isReachableFromEntry(NarrowUserBB))
1870 return;
1872 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1873 L->contains(DTB->getBlock());
1874 DTB = DTB->getIDom()) {
1875 auto *BB = DTB->getBlock();
1876 auto *TI = BB->getTerminator();
1877 UpdateRangeFromGuards(TI);
1879 auto *BI = dyn_cast<BranchInst>(TI);
1880 if (!BI || !BI->isConditional())
1881 continue;
1883 auto *TrueSuccessor = BI->getSuccessor(0);
1884 auto *FalseSuccessor = BI->getSuccessor(1);
1886 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1887 return BBE.isSingleEdge() &&
1888 DT->dominates(BBE, NarrowUser->getParent());
1891 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1892 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1894 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1895 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1899 /// Calculates PostIncRangeInfos map for the given IV
1900 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1901 SmallPtrSet<Instruction *, 16> Visited;
1902 SmallVector<Instruction *, 6> Worklist;
1903 Worklist.push_back(OrigPhi);
1904 Visited.insert(OrigPhi);
1906 while (!Worklist.empty()) {
1907 Instruction *NarrowDef = Worklist.pop_back_val();
1909 for (Use &U : NarrowDef->uses()) {
1910 auto *NarrowUser = cast<Instruction>(U.getUser());
1912 // Don't go looking outside the current loop.
1913 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1914 if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1915 continue;
1917 if (!Visited.insert(NarrowUser).second)
1918 continue;
1920 Worklist.push_back(NarrowUser);
1922 calculatePostIncRange(NarrowDef, NarrowUser);
1927 //===----------------------------------------------------------------------===//
1928 // Live IV Reduction - Minimize IVs live across the loop.
1929 //===----------------------------------------------------------------------===//
1931 //===----------------------------------------------------------------------===//
1932 // Simplification of IV users based on SCEV evaluation.
1933 //===----------------------------------------------------------------------===//
1935 namespace {
1937 class IndVarSimplifyVisitor : public IVVisitor {
1938 ScalarEvolution *SE;
1939 const TargetTransformInfo *TTI;
1940 PHINode *IVPhi;
1942 public:
1943 WideIVInfo WI;
1945 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1946 const TargetTransformInfo *TTI,
1947 const DominatorTree *DTree)
1948 : SE(SCEV), TTI(TTI), IVPhi(IV) {
1949 DT = DTree;
1950 WI.NarrowIV = IVPhi;
1953 // Implement the interface used by simplifyUsersOfIV.
1954 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1957 } // end anonymous namespace
1959 /// Iteratively perform simplification on a worklist of IV users. Each
1960 /// successive simplification may push more users which may themselves be
1961 /// candidates for simplification.
1963 /// Sign/Zero extend elimination is interleaved with IV simplification.
1964 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1965 SCEVExpander &Rewriter,
1966 LoopInfo *LI) {
1967 SmallVector<WideIVInfo, 8> WideIVs;
1969 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1970 Intrinsic::getName(Intrinsic::experimental_guard));
1971 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1973 SmallVector<PHINode*, 8> LoopPhis;
1974 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1975 LoopPhis.push_back(cast<PHINode>(I));
1977 // Each round of simplification iterates through the SimplifyIVUsers worklist
1978 // for all current phis, then determines whether any IVs can be
1979 // widened. Widening adds new phis to LoopPhis, inducing another round of
1980 // simplification on the wide IVs.
1981 bool Changed = false;
1982 while (!LoopPhis.empty()) {
1983 // Evaluate as many IV expressions as possible before widening any IVs. This
1984 // forces SCEV to set no-wrap flags before evaluating sign/zero
1985 // extension. The first time SCEV attempts to normalize sign/zero extension,
1986 // the result becomes final. So for the most predictable results, we delay
1987 // evaluation of sign/zero extend evaluation until needed, and avoid running
1988 // other SCEV based analysis prior to simplifyAndExtend.
1989 do {
1990 PHINode *CurrIV = LoopPhis.pop_back_val();
1992 // Information about sign/zero extensions of CurrIV.
1993 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1995 Changed |=
1996 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1998 if (Visitor.WI.WidestNativeType) {
1999 WideIVs.push_back(Visitor.WI);
2001 } while(!LoopPhis.empty());
2003 for (; !WideIVs.empty(); WideIVs.pop_back()) {
2004 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2005 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2006 Changed = true;
2007 LoopPhis.push_back(WidePhi);
2011 return Changed;
2014 //===----------------------------------------------------------------------===//
2015 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2016 //===----------------------------------------------------------------------===//
2018 /// Given an Value which is hoped to be part of an add recurance in the given
2019 /// loop, return the associated Phi node if so. Otherwise, return null. Note
2020 /// that this is less general than SCEVs AddRec checking.
2021 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2022 Instruction *IncI = dyn_cast<Instruction>(IncV);
2023 if (!IncI)
2024 return nullptr;
2026 switch (IncI->getOpcode()) {
2027 case Instruction::Add:
2028 case Instruction::Sub:
2029 break;
2030 case Instruction::GetElementPtr:
2031 // An IV counter must preserve its type.
2032 if (IncI->getNumOperands() == 2)
2033 break;
2034 LLVM_FALLTHROUGH;
2035 default:
2036 return nullptr;
2039 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2040 if (Phi && Phi->getParent() == L->getHeader()) {
2041 if (L->isLoopInvariant(IncI->getOperand(1)))
2042 return Phi;
2043 return nullptr;
2045 if (IncI->getOpcode() == Instruction::GetElementPtr)
2046 return nullptr;
2048 // Allow add/sub to be commuted.
2049 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2050 if (Phi && Phi->getParent() == L->getHeader()) {
2051 if (L->isLoopInvariant(IncI->getOperand(0)))
2052 return Phi;
2054 return nullptr;
2057 /// Whether the current loop exit test is based on this value. Currently this
2058 /// is limited to a direct use in the loop condition.
2059 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2060 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2061 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2062 // TODO: Allow non-icmp loop test.
2063 if (!ICmp)
2064 return false;
2066 // TODO: Allow indirect use.
2067 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2070 /// linearFunctionTestReplace policy. Return true unless we can show that the
2071 /// current exit test is already sufficiently canonical.
2072 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2073 assert(L->getLoopLatch() && "Must be in simplified form");
2075 // Avoid converting a constant or loop invariant test back to a runtime
2076 // test. This is critical for when SCEV's cached ExitCount is less precise
2077 // than the current IR (such as after we've proven a particular exit is
2078 // actually dead and thus the BE count never reaches our ExitCount.)
2079 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2080 if (L->isLoopInvariant(BI->getCondition()))
2081 return false;
2083 // Do LFTR to simplify the exit condition to an ICMP.
2084 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2085 if (!Cond)
2086 return true;
2088 // Do LFTR to simplify the exit ICMP to EQ/NE
2089 ICmpInst::Predicate Pred = Cond->getPredicate();
2090 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2091 return true;
2093 // Look for a loop invariant RHS
2094 Value *LHS = Cond->getOperand(0);
2095 Value *RHS = Cond->getOperand(1);
2096 if (!L->isLoopInvariant(RHS)) {
2097 if (!L->isLoopInvariant(LHS))
2098 return true;
2099 std::swap(LHS, RHS);
2101 // Look for a simple IV counter LHS
2102 PHINode *Phi = dyn_cast<PHINode>(LHS);
2103 if (!Phi)
2104 Phi = getLoopPhiForCounter(LHS, L);
2106 if (!Phi)
2107 return true;
2109 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2110 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2111 if (Idx < 0)
2112 return true;
2114 // Do LFTR if the exit condition's IV is *not* a simple counter.
2115 Value *IncV = Phi->getIncomingValue(Idx);
2116 return Phi != getLoopPhiForCounter(IncV, L);
2119 /// Return true if undefined behavior would provable be executed on the path to
2120 /// OnPathTo if Root produced a posion result. Note that this doesn't say
2121 /// anything about whether OnPathTo is actually executed or whether Root is
2122 /// actually poison. This can be used to assess whether a new use of Root can
2123 /// be added at a location which is control equivalent with OnPathTo (such as
2124 /// immediately before it) without introducing UB which didn't previously
2125 /// exist. Note that a false result conveys no information.
2126 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2127 Instruction *OnPathTo,
2128 DominatorTree *DT) {
2129 // Basic approach is to assume Root is poison, propagate poison forward
2130 // through all users we can easily track, and then check whether any of those
2131 // users are provable UB and must execute before out exiting block might
2132 // exit.
2134 // The set of all recursive users we've visited (which are assumed to all be
2135 // poison because of said visit)
2136 SmallSet<const Value *, 16> KnownPoison;
2137 SmallVector<const Instruction*, 16> Worklist;
2138 Worklist.push_back(Root);
2139 while (!Worklist.empty()) {
2140 const Instruction *I = Worklist.pop_back_val();
2142 // If we know this must trigger UB on a path leading our target.
2143 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2144 return true;
2146 // If we can't analyze propagation through this instruction, just skip it
2147 // and transitive users. Safe as false is a conservative result.
2148 if (!propagatesFullPoison(I) && I != Root)
2149 continue;
2151 if (KnownPoison.insert(I).second)
2152 for (const User *User : I->users())
2153 Worklist.push_back(cast<Instruction>(User));
2156 // Might be non-UB, or might have a path we couldn't prove must execute on
2157 // way to exiting bb.
2158 return false;
2161 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2162 /// down to checking that all operands are constant and listing instructions
2163 /// that may hide undef.
2164 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2165 unsigned Depth) {
2166 if (isa<Constant>(V))
2167 return !isa<UndefValue>(V);
2169 if (Depth >= 6)
2170 return false;
2172 // Conservatively handle non-constant non-instructions. For example, Arguments
2173 // may be undef.
2174 Instruction *I = dyn_cast<Instruction>(V);
2175 if (!I)
2176 return false;
2178 // Load and return values may be undef.
2179 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2180 return false;
2182 // Optimistically handle other instructions.
2183 for (Value *Op : I->operands()) {
2184 if (!Visited.insert(Op).second)
2185 continue;
2186 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2187 return false;
2189 return true;
2192 /// Return true if the given value is concrete. We must prove that undef can
2193 /// never reach it.
2195 /// TODO: If we decide that this is a good approach to checking for undef, we
2196 /// may factor it into a common location.
2197 static bool hasConcreteDef(Value *V) {
2198 SmallPtrSet<Value*, 8> Visited;
2199 Visited.insert(V);
2200 return hasConcreteDefImpl(V, Visited, 0);
2203 /// Return true if this IV has any uses other than the (soon to be rewritten)
2204 /// loop exit test.
2205 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2206 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2207 Value *IncV = Phi->getIncomingValue(LatchIdx);
2209 for (User *U : Phi->users())
2210 if (U != Cond && U != IncV) return false;
2212 for (User *U : IncV->users())
2213 if (U != Cond && U != Phi) return false;
2214 return true;
2217 /// Return true if the given phi is a "counter" in L. A counter is an
2218 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2219 /// step of 1. Note that L must have exactly one latch.
2220 static bool isLoopCounter(PHINode* Phi, Loop *L,
2221 ScalarEvolution *SE) {
2222 assert(Phi->getParent() == L->getHeader());
2223 assert(L->getLoopLatch());
2225 if (!SE->isSCEVable(Phi->getType()))
2226 return false;
2228 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2229 if (!AR || AR->getLoop() != L || !AR->isAffine())
2230 return false;
2232 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2233 if (!Step || !Step->isOne())
2234 return false;
2236 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2237 Value *IncV = Phi->getIncomingValue(LatchIdx);
2238 return (getLoopPhiForCounter(IncV, L) == Phi);
2241 /// Search the loop header for a loop counter (anadd rec w/step of one)
2242 /// suitable for use by LFTR. If multiple counters are available, select the
2243 /// "best" one based profitable heuristics.
2245 /// BECount may be an i8* pointer type. The pointer difference is already
2246 /// valid count without scaling the address stride, so it remains a pointer
2247 /// expression as far as SCEV is concerned.
2248 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2249 const SCEV *BECount,
2250 ScalarEvolution *SE, DominatorTree *DT) {
2251 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2253 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2255 // Loop over all of the PHI nodes, looking for a simple counter.
2256 PHINode *BestPhi = nullptr;
2257 const SCEV *BestInit = nullptr;
2258 BasicBlock *LatchBlock = L->getLoopLatch();
2259 assert(LatchBlock && "Must be in simplified form");
2260 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2262 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2263 PHINode *Phi = cast<PHINode>(I);
2264 if (!isLoopCounter(Phi, L, SE))
2265 continue;
2267 // Avoid comparing an integer IV against a pointer Limit.
2268 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2269 continue;
2271 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2273 // AR may be a pointer type, while BECount is an integer type.
2274 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2275 // AR may not be a narrower type, or we may never exit.
2276 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2277 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2278 continue;
2280 // Avoid reusing a potentially undef value to compute other values that may
2281 // have originally had a concrete definition.
2282 if (!hasConcreteDef(Phi)) {
2283 // We explicitly allow unknown phis as long as they are already used by
2284 // the loop exit test. This is legal since performing LFTR could not
2285 // increase the number of undef users.
2286 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2287 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2288 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2289 continue;
2292 // Avoid introducing undefined behavior due to poison which didn't exist in
2293 // the original program. (Annoyingly, the rules for poison and undef
2294 // propagation are distinct, so this does NOT cover the undef case above.)
2295 // We have to ensure that we don't introduce UB by introducing a use on an
2296 // iteration where said IV produces poison. Our strategy here differs for
2297 // pointers and integer IVs. For integers, we strip and reinfer as needed,
2298 // see code in linearFunctionTestReplace. For pointers, we restrict
2299 // transforms as there is no good way to reinfer inbounds once lost.
2300 if (!Phi->getType()->isIntegerTy() &&
2301 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2302 continue;
2304 const SCEV *Init = AR->getStart();
2306 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2307 // Don't force a live loop counter if another IV can be used.
2308 if (AlmostDeadIV(Phi, LatchBlock, Cond))
2309 continue;
2311 // Prefer to count-from-zero. This is a more "canonical" counter form. It
2312 // also prefers integer to pointer IVs.
2313 if (BestInit->isZero() != Init->isZero()) {
2314 if (BestInit->isZero())
2315 continue;
2317 // If two IVs both count from zero or both count from nonzero then the
2318 // narrower is likely a dead phi that has been widened. Use the wider phi
2319 // to allow the other to be eliminated.
2320 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2321 continue;
2323 BestPhi = Phi;
2324 BestInit = Init;
2326 return BestPhi;
2329 /// Insert an IR expression which computes the value held by the IV IndVar
2330 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2331 /// is taken ExitCount times.
2332 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2333 const SCEV *ExitCount, bool UsePostInc, Loop *L,
2334 SCEVExpander &Rewriter, ScalarEvolution *SE) {
2335 assert(isLoopCounter(IndVar, L, SE));
2336 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2337 const SCEV *IVInit = AR->getStart();
2339 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2340 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2341 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2342 // the existing GEPs whenever possible.
2343 if (IndVar->getType()->isPointerTy() &&
2344 !ExitCount->getType()->isPointerTy()) {
2345 // IVOffset will be the new GEP offset that is interpreted by GEP as a
2346 // signed value. ExitCount on the other hand represents the loop trip count,
2347 // which is an unsigned value. FindLoopCounter only allows induction
2348 // variables that have a positive unit stride of one. This means we don't
2349 // have to handle the case of negative offsets (yet) and just need to zero
2350 // extend ExitCount.
2351 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2352 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2353 if (UsePostInc)
2354 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2356 // Expand the code for the iteration count.
2357 assert(SE->isLoopInvariant(IVOffset, L) &&
2358 "Computed iteration count is not loop invariant!");
2360 // We could handle pointer IVs other than i8*, but we need to compensate for
2361 // gep index scaling.
2362 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2363 cast<PointerType>(IndVar->getType())
2364 ->getElementType())->isOne() &&
2365 "unit stride pointer IV must be i8*");
2367 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2368 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2369 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2370 } else {
2371 // In any other case, convert both IVInit and ExitCount to integers before
2372 // comparing. This may result in SCEV expansion of pointers, but in practice
2373 // SCEV will fold the pointer arithmetic away as such:
2374 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2376 // Valid Cases: (1) both integers is most common; (2) both may be pointers
2377 // for simple memset-style loops.
2379 // IVInit integer and ExitCount pointer would only occur if a canonical IV
2380 // were generated on top of case #2, which is not expected.
2382 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2383 // For unit stride, IVCount = Start + ExitCount with 2's complement
2384 // overflow.
2386 // For integer IVs, truncate the IV before computing IVInit + BECount,
2387 // unless we know apriori that the limit must be a constant when evaluated
2388 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
2389 // of the IV in the loop over a (potentially) expensive expansion of the
2390 // widened exit count add(zext(add)) expression.
2391 if (SE->getTypeSizeInBits(IVInit->getType())
2392 > SE->getTypeSizeInBits(ExitCount->getType())) {
2393 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2394 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2395 else
2396 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2399 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2401 if (UsePostInc)
2402 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2404 // Expand the code for the iteration count.
2405 assert(SE->isLoopInvariant(IVLimit, L) &&
2406 "Computed iteration count is not loop invariant!");
2407 // Ensure that we generate the same type as IndVar, or a smaller integer
2408 // type. In the presence of null pointer values, we have an integer type
2409 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2410 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2411 IndVar->getType() : ExitCount->getType();
2412 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2413 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2417 /// This method rewrites the exit condition of the loop to be a canonical !=
2418 /// comparison against the incremented loop induction variable. This pass is
2419 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2420 /// determine a loop-invariant trip count of the loop, which is actually a much
2421 /// broader range than just linear tests.
2422 bool IndVarSimplify::
2423 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2424 const SCEV *ExitCount,
2425 PHINode *IndVar, SCEVExpander &Rewriter) {
2426 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2427 assert(isLoopCounter(IndVar, L, SE));
2428 Instruction * const IncVar =
2429 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2431 // Initialize CmpIndVar to the preincremented IV.
2432 Value *CmpIndVar = IndVar;
2433 bool UsePostInc = false;
2435 // If the exiting block is the same as the backedge block, we prefer to
2436 // compare against the post-incremented value, otherwise we must compare
2437 // against the preincremented value.
2438 if (ExitingBB == L->getLoopLatch()) {
2439 // For pointer IVs, we chose to not strip inbounds which requires us not
2440 // to add a potentially UB introducing use. We need to either a) show
2441 // the loop test we're modifying is already in post-inc form, or b) show
2442 // that adding a use must not introduce UB.
2443 bool SafeToPostInc =
2444 IndVar->getType()->isIntegerTy() ||
2445 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2446 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2447 if (SafeToPostInc) {
2448 UsePostInc = true;
2449 CmpIndVar = IncVar;
2453 // It may be necessary to drop nowrap flags on the incrementing instruction
2454 // if either LFTR moves from a pre-inc check to a post-inc check (in which
2455 // case the increment might have previously been poison on the last iteration
2456 // only) or if LFTR switches to a different IV that was previously dynamically
2457 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2458 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2459 // check), because the pre-inc addrec flags may be adopted from the original
2460 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2461 // TODO: This handling is inaccurate for one case: If we switch to a
2462 // dynamically dead IV that wraps on the first loop iteration only, which is
2463 // not covered by the post-inc addrec. (If the new IV was not dynamically
2464 // dead, it could not be poison on the first iteration in the first place.)
2465 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2466 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2467 if (BO->hasNoUnsignedWrap())
2468 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2469 if (BO->hasNoSignedWrap())
2470 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2473 Value *ExitCnt = genLoopLimit(
2474 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2475 assert(ExitCnt->getType()->isPointerTy() ==
2476 IndVar->getType()->isPointerTy() &&
2477 "genLoopLimit missed a cast");
2479 // Insert a new icmp_ne or icmp_eq instruction before the branch.
2480 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2481 ICmpInst::Predicate P;
2482 if (L->contains(BI->getSuccessor(0)))
2483 P = ICmpInst::ICMP_NE;
2484 else
2485 P = ICmpInst::ICMP_EQ;
2487 IRBuilder<> Builder(BI);
2489 // The new loop exit condition should reuse the debug location of the
2490 // original loop exit condition.
2491 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2492 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2494 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2495 // avoid the expensive expansion of the limit expression in the wider type,
2496 // emit a truncate to narrow the IV to the ExitCount type. This is safe
2497 // since we know (from the exit count bitwidth), that we can't self-wrap in
2498 // the narrower type.
2499 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2500 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2501 if (CmpIndVarSize > ExitCntSize) {
2502 assert(!CmpIndVar->getType()->isPointerTy() &&
2503 !ExitCnt->getType()->isPointerTy());
2505 // Before resorting to actually inserting the truncate, use the same
2506 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2507 // the other side of the comparison instead. We still evaluate the limit
2508 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2509 // a truncate within in.
2510 bool Extended = false;
2511 const SCEV *IV = SE->getSCEV(CmpIndVar);
2512 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2513 ExitCnt->getType());
2514 const SCEV *ZExtTrunc =
2515 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2517 if (ZExtTrunc == IV) {
2518 Extended = true;
2519 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2520 "wide.trip.count");
2521 } else {
2522 const SCEV *SExtTrunc =
2523 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2524 if (SExtTrunc == IV) {
2525 Extended = true;
2526 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2527 "wide.trip.count");
2531 if (Extended) {
2532 bool Discard;
2533 L->makeLoopInvariant(ExitCnt, Discard);
2534 } else
2535 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2536 "lftr.wideiv");
2538 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2539 << " LHS:" << *CmpIndVar << '\n'
2540 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2541 << "\n"
2542 << " RHS:\t" << *ExitCnt << "\n"
2543 << "ExitCount:\t" << *ExitCount << "\n"
2544 << " was: " << *BI->getCondition() << "\n");
2546 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2547 Value *OrigCond = BI->getCondition();
2548 // It's tempting to use replaceAllUsesWith here to fully replace the old
2549 // comparison, but that's not immediately safe, since users of the old
2550 // comparison may not be dominated by the new comparison. Instead, just
2551 // update the branch to use the new comparison; in the common case this
2552 // will make old comparison dead.
2553 BI->setCondition(Cond);
2554 DeadInsts.push_back(OrigCond);
2556 ++NumLFTR;
2557 return true;
2560 //===----------------------------------------------------------------------===//
2561 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2562 //===----------------------------------------------------------------------===//
2564 /// If there's a single exit block, sink any loop-invariant values that
2565 /// were defined in the preheader but not used inside the loop into the
2566 /// exit block to reduce register pressure in the loop.
2567 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2568 BasicBlock *ExitBlock = L->getExitBlock();
2569 if (!ExitBlock) return false;
2571 BasicBlock *Preheader = L->getLoopPreheader();
2572 if (!Preheader) return false;
2574 bool MadeAnyChanges = false;
2575 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2576 BasicBlock::iterator I(Preheader->getTerminator());
2577 while (I != Preheader->begin()) {
2578 --I;
2579 // New instructions were inserted at the end of the preheader.
2580 if (isa<PHINode>(I))
2581 break;
2583 // Don't move instructions which might have side effects, since the side
2584 // effects need to complete before instructions inside the loop. Also don't
2585 // move instructions which might read memory, since the loop may modify
2586 // memory. Note that it's okay if the instruction might have undefined
2587 // behavior: LoopSimplify guarantees that the preheader dominates the exit
2588 // block.
2589 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2590 continue;
2592 // Skip debug info intrinsics.
2593 if (isa<DbgInfoIntrinsic>(I))
2594 continue;
2596 // Skip eh pad instructions.
2597 if (I->isEHPad())
2598 continue;
2600 // Don't sink alloca: we never want to sink static alloca's out of the
2601 // entry block, and correctly sinking dynamic alloca's requires
2602 // checks for stacksave/stackrestore intrinsics.
2603 // FIXME: Refactor this check somehow?
2604 if (isa<AllocaInst>(I))
2605 continue;
2607 // Determine if there is a use in or before the loop (direct or
2608 // otherwise).
2609 bool UsedInLoop = false;
2610 for (Use &U : I->uses()) {
2611 Instruction *User = cast<Instruction>(U.getUser());
2612 BasicBlock *UseBB = User->getParent();
2613 if (PHINode *P = dyn_cast<PHINode>(User)) {
2614 unsigned i =
2615 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2616 UseBB = P->getIncomingBlock(i);
2618 if (UseBB == Preheader || L->contains(UseBB)) {
2619 UsedInLoop = true;
2620 break;
2624 // If there is, the def must remain in the preheader.
2625 if (UsedInLoop)
2626 continue;
2628 // Otherwise, sink it to the exit block.
2629 Instruction *ToMove = &*I;
2630 bool Done = false;
2632 if (I != Preheader->begin()) {
2633 // Skip debug info intrinsics.
2634 do {
2635 --I;
2636 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2638 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2639 Done = true;
2640 } else {
2641 Done = true;
2644 MadeAnyChanges = true;
2645 ToMove->moveBefore(*ExitBlock, InsertPt);
2646 if (Done) break;
2647 InsertPt = ToMove->getIterator();
2650 return MadeAnyChanges;
2653 /// Return a symbolic upper bound for the backedge taken count of the loop.
2654 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2655 /// an arbitrary expression as opposed to only constants.
2656 /// TODO: Move into the ScalarEvolution class.
2657 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2658 DominatorTree &DT, Loop *L) {
2659 SmallVector<BasicBlock*, 16> ExitingBlocks;
2660 L->getExitingBlocks(ExitingBlocks);
2662 // Form an expression for the maximum exit count possible for this loop. We
2663 // merge the max and exact information to approximate a version of
2664 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2665 SmallVector<const SCEV*, 4> ExitCounts;
2666 const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L);
2667 if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2668 ExitCounts.push_back(MaxConstEC);
2669 for (BasicBlock *ExitingBB : ExitingBlocks) {
2670 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2671 if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2672 assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2673 "We should only have known counts for exiting blocks that "
2674 "dominate latch!");
2675 ExitCounts.push_back(ExitCount);
2678 if (ExitCounts.empty())
2679 return SE.getCouldNotCompute();
2680 return SE.getUMinFromMismatchedTypes(ExitCounts);
2683 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2684 SmallVector<BasicBlock*, 16> ExitingBlocks;
2685 L->getExitingBlocks(ExitingBlocks);
2687 // Remove all exits which aren't both rewriteable and analyzeable.
2688 auto NewEnd = llvm::remove_if(ExitingBlocks,
2689 [&](BasicBlock *ExitingBB) {
2690 // If our exitting block exits multiple loops, we can only rewrite the
2691 // innermost one. Otherwise, we're changing how many times the innermost
2692 // loop runs before it exits.
2693 if (LI->getLoopFor(ExitingBB) != L)
2694 return true;
2696 // Can't rewrite non-branch yet.
2697 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2698 if (!BI)
2699 return true;
2701 // If already constant, nothing to do.
2702 if (isa<Constant>(BI->getCondition()))
2703 return true;
2705 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2706 if (isa<SCEVCouldNotCompute>(ExitCount))
2707 return true;
2708 return false;
2710 ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2712 if (ExitingBlocks.empty())
2713 return false;
2715 // Get a symbolic upper bound on the loop backedge taken count.
2716 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2717 if (isa<SCEVCouldNotCompute>(MaxExitCount))
2718 return false;
2720 // Visit our exit blocks in order of dominance. We know from the fact that
2721 // all exits (left) are analyzeable that the must be a total dominance order
2722 // between them as each must dominate the latch. The visit order only
2723 // matters for the provably equal case.
2724 llvm::sort(ExitingBlocks,
2725 [&](BasicBlock *A, BasicBlock *B) {
2726 // std::sort sorts in ascending order, so we want the inverse of
2727 // the normal dominance relation.
2728 if (DT->properlyDominates(A, B)) return true;
2729 if (DT->properlyDominates(B, A)) return false;
2730 llvm_unreachable("expected total dominance order!");
2732 #ifdef ASSERT
2733 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2734 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2736 #endif
2738 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2739 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2740 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2741 auto *OldCond = BI->getCondition();
2742 auto *NewCond = ConstantInt::get(OldCond->getType(),
2743 IsTaken ? ExitIfTrue : !ExitIfTrue);
2744 BI->setCondition(NewCond);
2745 if (OldCond->use_empty())
2746 DeadInsts.push_back(OldCond);
2749 bool Changed = false;
2750 SmallSet<const SCEV*, 8> DominatingExitCounts;
2751 for (BasicBlock *ExitingBB : ExitingBlocks) {
2752 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2753 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2755 // If we know we'd exit on the first iteration, rewrite the exit to
2756 // reflect this. This does not imply the loop must exit through this
2757 // exit; there may be an earlier one taken on the first iteration.
2758 // TODO: Given we know the backedge can't be taken, we should go ahead
2759 // and break it. Or at least, kill all the header phis and simplify.
2760 if (ExitCount->isZero()) {
2761 FoldExit(ExitingBB, true);
2762 Changed = true;
2763 continue;
2766 // If we end up with a pointer exit count, bail. Note that we can end up
2767 // with a pointer exit count for one exiting block, and not for another in
2768 // the same loop.
2769 if (!ExitCount->getType()->isIntegerTy() ||
2770 !MaxExitCount->getType()->isIntegerTy())
2771 continue;
2773 Type *WiderType =
2774 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2775 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2776 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2777 assert(MaxExitCount->getType() == ExitCount->getType());
2779 // Can we prove that some other exit must be taken strictly before this
2780 // one?
2781 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2782 MaxExitCount, ExitCount)) {
2783 FoldExit(ExitingBB, false);
2784 Changed = true;
2785 continue;
2788 // As we run, keep track of which exit counts we've encountered. If we
2789 // find a duplicate, we've found an exit which would have exited on the
2790 // exiting iteration, but (from the visit order) strictly follows another
2791 // which does the same and is thus dead.
2792 if (!DominatingExitCounts.insert(ExitCount).second) {
2793 FoldExit(ExitingBB, false);
2794 Changed = true;
2795 continue;
2798 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2799 // here. If we kept track of the min of dominanting exits so far, we could
2800 // discharge exits with EC >= MDEC. This is less powerful than the existing
2801 // transform (since later exits aren't considered), but potentially more
2802 // powerful for any case where SCEV can prove a >=u b, but neither a == b
2803 // or a >u b. Such a case is not currently known.
2805 return Changed;
2808 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2809 SmallVector<BasicBlock*, 16> ExitingBlocks;
2810 L->getExitingBlocks(ExitingBlocks);
2812 bool Changed = false;
2814 // Finally, see if we can rewrite our exit conditions into a loop invariant
2815 // form. If we have a read-only loop, and we can tell that we must exit down
2816 // a path which does not need any of the values computed within the loop, we
2817 // can rewrite the loop to exit on the first iteration. Note that this
2818 // doesn't either a) tell us the loop exits on the first iteration (unless
2819 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2820 // This transformation looks a lot like a restricted form of dead loop
2821 // elimination, but restricted to read-only loops and without neccesssarily
2822 // needing to kill the loop entirely.
2823 if (!LoopPredication)
2824 return Changed;
2826 if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2827 return Changed;
2829 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2830 // through *explicit* control flow. We have to eliminate the possibility of
2831 // implicit exits (see below) before we know it's truly exact.
2832 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2833 if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2834 !SE->isLoopInvariant(ExactBTC, L) ||
2835 !isSafeToExpand(ExactBTC, *SE))
2836 return Changed;
2838 auto BadExit = [&](BasicBlock *ExitingBB) {
2839 // If our exiting block exits multiple loops, we can only rewrite the
2840 // innermost one. Otherwise, we're changing how many times the innermost
2841 // loop runs before it exits.
2842 if (LI->getLoopFor(ExitingBB) != L)
2843 return true;
2845 // Can't rewrite non-branch yet.
2846 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2847 if (!BI)
2848 return true;
2850 // If already constant, nothing to do.
2851 if (isa<Constant>(BI->getCondition()))
2852 return true;
2854 // If the exit block has phis, we need to be able to compute the values
2855 // within the loop which contains them. This assumes trivially lcssa phis
2856 // have already been removed; TODO: generalize
2857 BasicBlock *ExitBlock =
2858 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2859 if (!ExitBlock->phis().empty())
2860 return true;
2862 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2863 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2864 if (!SE->isLoopInvariant(ExitCount, L) ||
2865 !isSafeToExpand(ExitCount, *SE))
2866 return true;
2868 return false;
2871 // If we have any exits which can't be predicated themselves, than we can't
2872 // predicate any exit which isn't guaranteed to execute before it. Consider
2873 // two exits (a) and (b) which would both exit on the same iteration. If we
2874 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2875 // we could convert a loop from exiting through (a) to one exiting through
2876 // (b). Note that this problem exists only for exits with the same exit
2877 // count, and we could be more aggressive when exit counts are known inequal.
2878 llvm::sort(ExitingBlocks,
2879 [&](BasicBlock *A, BasicBlock *B) {
2880 // std::sort sorts in ascending order, so we want the inverse of
2881 // the normal dominance relation, plus a tie breaker for blocks
2882 // unordered by dominance.
2883 if (DT->properlyDominates(A, B)) return true;
2884 if (DT->properlyDominates(B, A)) return false;
2885 return A->getName() < B->getName();
2887 // Check to see if our exit blocks are a total order (i.e. a linear chain of
2888 // exits before the backedge). If they aren't, reasoning about reachability
2889 // is complicated and we choose not to for now.
2890 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2891 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2892 return Changed;
2894 // Given our sorted total order, we know that exit[j] must be evaluated
2895 // after all exit[i] such j > i.
2896 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2897 if (BadExit(ExitingBlocks[i])) {
2898 ExitingBlocks.resize(i);
2899 break;
2902 if (ExitingBlocks.empty())
2903 return Changed;
2905 // We rely on not being able to reach an exiting block on a later iteration
2906 // then it's statically compute exit count. The implementaton of
2907 // getExitCount currently has this invariant, but assert it here so that
2908 // breakage is obvious if this ever changes..
2909 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2910 return DT->dominates(ExitingBB, L->getLoopLatch());
2911 }));
2913 // At this point, ExitingBlocks consists of only those blocks which are
2914 // predicatable. Given that, we know we have at least one exit we can
2915 // predicate if the loop is doesn't have side effects and doesn't have any
2916 // implicit exits (because then our exact BTC isn't actually exact).
2917 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
2918 // suggestions on how to improve this? I can obviously bail out for outer
2919 // loops, but that seems less than ideal. MemorySSA can find memory writes,
2920 // is that enough for *all* side effects?
2921 for (BasicBlock *BB : L->blocks())
2922 for (auto &I : *BB)
2923 // TODO:isGuaranteedToTransfer
2924 if (I.mayHaveSideEffects() || I.mayThrow())
2925 return Changed;
2927 // Finally, do the actual predication for all predicatable blocks. A couple
2928 // of notes here:
2929 // 1) We don't bother to constant fold dominated exits with identical exit
2930 // counts; that's simply a form of CSE/equality propagation and we leave
2931 // it for dedicated passes.
2932 // 2) We insert the comparison at the branch. Hoisting introduces additional
2933 // legality constraints and we leave that to dedicated logic. We want to
2934 // predicate even if we can't insert a loop invariant expression as
2935 // peeling or unrolling will likely reduce the cost of the otherwise loop
2936 // varying check.
2937 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2938 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2939 Value *ExactBTCV = nullptr; //lazy generated if needed
2940 for (BasicBlock *ExitingBB : ExitingBlocks) {
2941 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2943 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2944 Value *NewCond;
2945 if (ExitCount == ExactBTC) {
2946 NewCond = L->contains(BI->getSuccessor(0)) ?
2947 B.getFalse() : B.getTrue();
2948 } else {
2949 Value *ECV = Rewriter.expandCodeFor(ExitCount);
2950 if (!ExactBTCV)
2951 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2952 Value *RHS = ExactBTCV;
2953 if (ECV->getType() != RHS->getType()) {
2954 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2955 ECV = B.CreateZExt(ECV, WiderTy);
2956 RHS = B.CreateZExt(RHS, WiderTy);
2958 auto Pred = L->contains(BI->getSuccessor(0)) ?
2959 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2960 NewCond = B.CreateICmp(Pred, ECV, RHS);
2962 Value *OldCond = BI->getCondition();
2963 BI->setCondition(NewCond);
2964 if (OldCond->use_empty())
2965 DeadInsts.push_back(OldCond);
2966 Changed = true;
2969 return Changed;
2972 //===----------------------------------------------------------------------===//
2973 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2974 //===----------------------------------------------------------------------===//
2976 bool IndVarSimplify::run(Loop *L) {
2977 // We need (and expect!) the incoming loop to be in LCSSA.
2978 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2979 "LCSSA required to run indvars!");
2980 bool Changed = false;
2982 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2983 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2984 // canonicalization can be a pessimization without LSR to "clean up"
2985 // afterwards.
2986 // - We depend on having a preheader; in particular,
2987 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2988 // and we're in trouble if we can't find the induction variable even when
2989 // we've manually inserted one.
2990 // - LFTR relies on having a single backedge.
2991 if (!L->isLoopSimplifyForm())
2992 return false;
2994 // If there are any floating-point recurrences, attempt to
2995 // transform them to use integer recurrences.
2996 Changed |= rewriteNonIntegerIVs(L);
2998 #ifndef NDEBUG
2999 // Used below for a consistency check only
3000 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3001 #endif
3003 // Create a rewriter object which we'll use to transform the code with.
3004 SCEVExpander Rewriter(*SE, DL, "indvars");
3005 #ifndef NDEBUG
3006 Rewriter.setDebugType(DEBUG_TYPE);
3007 #endif
3009 // Eliminate redundant IV users.
3011 // Simplification works best when run before other consumers of SCEV. We
3012 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3013 // other expressions involving loop IVs have been evaluated. This helps SCEV
3014 // set no-wrap flags before normalizing sign/zero extension.
3015 Rewriter.disableCanonicalMode();
3016 Changed |= simplifyAndExtend(L, Rewriter, LI);
3018 // Check to see if we can compute the final value of any expressions
3019 // that are recurrent in the loop, and substitute the exit values from the
3020 // loop into any instructions outside of the loop that use the final values
3021 // of the current expressions.
3022 if (ReplaceExitValue != NeverRepl)
3023 Changed |= rewriteLoopExitValues(L, Rewriter);
3025 // Eliminate redundant IV cycles.
3026 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3028 // Try to eliminate loop exits based on analyzeable exit counts
3029 Changed |= optimizeLoopExits(L, Rewriter);
3031 // Try to form loop invariant tests for loop exits by changing how many
3032 // iterations of the loop run when that is unobservable.
3033 Changed |= predicateLoopExits(L, Rewriter);
3035 // If we have a trip count expression, rewrite the loop's exit condition
3036 // using it.
3037 if (!DisableLFTR) {
3038 SmallVector<BasicBlock*, 16> ExitingBlocks;
3039 L->getExitingBlocks(ExitingBlocks);
3040 for (BasicBlock *ExitingBB : ExitingBlocks) {
3041 // Can't rewrite non-branch yet.
3042 if (!isa<BranchInst>(ExitingBB->getTerminator()))
3043 continue;
3045 // If our exitting block exits multiple loops, we can only rewrite the
3046 // innermost one. Otherwise, we're changing how many times the innermost
3047 // loop runs before it exits.
3048 if (LI->getLoopFor(ExitingBB) != L)
3049 continue;
3051 if (!needsLFTR(L, ExitingBB))
3052 continue;
3054 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3055 if (isa<SCEVCouldNotCompute>(ExitCount))
3056 continue;
3058 // This was handled above, but as we form SCEVs, we can sometimes refine
3059 // existing ones; this allows exit counts to be folded to zero which
3060 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
3061 // until stable to handle cases like this better.
3062 if (ExitCount->isZero())
3063 continue;
3065 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3066 if (!IndVar)
3067 continue;
3069 // Avoid high cost expansions. Note: This heuristic is questionable in
3070 // that our definition of "high cost" is not exactly principled.
3071 if (Rewriter.isHighCostExpansion(ExitCount, L))
3072 continue;
3074 // Check preconditions for proper SCEVExpander operation. SCEV does not
3075 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3076 // any pass that uses the SCEVExpander must do it. This does not work
3077 // well for loop passes because SCEVExpander makes assumptions about
3078 // all loops, while LoopPassManager only forces the current loop to be
3079 // simplified.
3081 // FIXME: SCEV expansion has no way to bail out, so the caller must
3082 // explicitly check any assumptions made by SCEV. Brittle.
3083 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3084 if (!AR || AR->getLoop()->getLoopPreheader())
3085 Changed |= linearFunctionTestReplace(L, ExitingBB,
3086 ExitCount, IndVar,
3087 Rewriter);
3090 // Clear the rewriter cache, because values that are in the rewriter's cache
3091 // can be deleted in the loop below, causing the AssertingVH in the cache to
3092 // trigger.
3093 Rewriter.clear();
3095 // Now that we're done iterating through lists, clean up any instructions
3096 // which are now dead.
3097 while (!DeadInsts.empty())
3098 if (Instruction *Inst =
3099 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3100 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3102 // The Rewriter may not be used from this point on.
3104 // Loop-invariant instructions in the preheader that aren't used in the
3105 // loop may be sunk below the loop to reduce register pressure.
3106 Changed |= sinkUnusedInvariants(L);
3108 // rewriteFirstIterationLoopExitValues does not rely on the computation of
3109 // trip count and therefore can further simplify exit values in addition to
3110 // rewriteLoopExitValues.
3111 Changed |= rewriteFirstIterationLoopExitValues(L);
3113 // Clean up dead instructions.
3114 Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3116 // Check a post-condition.
3117 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3118 "Indvars did not preserve LCSSA!");
3120 // Verify that LFTR, and any other change have not interfered with SCEV's
3121 // ability to compute trip count.
3122 #ifndef NDEBUG
3123 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3124 SE->forgetLoop(L);
3125 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3126 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3127 SE->getTypeSizeInBits(NewBECount->getType()))
3128 NewBECount = SE->getTruncateOrNoop(NewBECount,
3129 BackedgeTakenCount->getType());
3130 else
3131 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3132 NewBECount->getType());
3133 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
3135 #endif
3137 return Changed;
3140 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3141 LoopStandardAnalysisResults &AR,
3142 LPMUpdater &) {
3143 Function *F = L.getHeader()->getParent();
3144 const DataLayout &DL = F->getParent()->getDataLayout();
3146 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3147 if (!IVS.run(&L))
3148 return PreservedAnalyses::all();
3150 auto PA = getLoopPassPreservedAnalyses();
3151 PA.preserveSet<CFGAnalyses>();
3152 return PA;
3155 namespace {
3157 struct IndVarSimplifyLegacyPass : public LoopPass {
3158 static char ID; // Pass identification, replacement for typeid
3160 IndVarSimplifyLegacyPass() : LoopPass(ID) {
3161 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3164 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3165 if (skipLoop(L))
3166 return false;
3168 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3169 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3170 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3171 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3172 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3173 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3174 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3175 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3177 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3178 return IVS.run(L);
3181 void getAnalysisUsage(AnalysisUsage &AU) const override {
3182 AU.setPreservesCFG();
3183 getLoopAnalysisUsage(AU);
3187 } // end anonymous namespace
3189 char IndVarSimplifyLegacyPass::ID = 0;
3191 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3192 "Induction Variable Simplification", false, false)
3193 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3194 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3195 "Induction Variable Simplification", false, false)
3197 Pass *llvm::createIndVarSimplifyPass() {
3198 return new IndVarSimplifyLegacyPass();