1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
13 // If the trip count of a loop is computable, this pass also makes the following
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
24 //===----------------------------------------------------------------------===//
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/MathExtras.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/Local.h"
75 #include "llvm/Transforms/Utils/LoopUtils.h"
76 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
77 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
83 using namespace PatternMatch
;
85 #define DEBUG_TYPE "indvars"
87 STATISTIC(NumWidened
, "Number of indvars widened");
88 STATISTIC(NumReplaced
, "Number of exit values replaced");
89 STATISTIC(NumLFTR
, "Number of loop exit tests replaced");
90 STATISTIC(NumElimExt
, "Number of IV sign/zero extends eliminated");
91 STATISTIC(NumElimIV
, "Number of congruent IVs eliminated");
93 static cl::opt
<ReplaceExitVal
> ReplaceExitValue(
94 "replexitval", cl::Hidden
, cl::init(OnlyCheapRepl
),
95 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
97 clEnumValN(NeverRepl
, "never", "never replace exit value"),
98 clEnumValN(OnlyCheapRepl
, "cheap",
99 "only replace exit value when the cost is cheap"),
101 UnusedIndVarInLoop
, "unusedindvarinloop",
102 "only replace exit value when it is an unused "
103 "induction variable in the loop and has cheap replacement cost"),
104 clEnumValN(NoHardUse
, "noharduse",
105 "only replace exit values when loop def likely dead"),
106 clEnumValN(AlwaysRepl
, "always",
107 "always replace exit value whenever possible")));
109 static cl::opt
<bool> UsePostIncrementRanges(
110 "indvars-post-increment-ranges", cl::Hidden
,
111 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
115 DisableLFTR("disable-lftr", cl::Hidden
, cl::init(false),
116 cl::desc("Disable Linear Function Test Replace optimization"));
119 LoopPredication("indvars-predicate-loops", cl::Hidden
, cl::init(true),
120 cl::desc("Predicate conditions in read only loops"));
123 AllowIVWidening("indvars-widen-indvars", cl::Hidden
, cl::init(true),
124 cl::desc("Allow widening of indvars to eliminate s/zext"));
128 class IndVarSimplify
{
132 const DataLayout
&DL
;
133 TargetLibraryInfo
*TLI
;
134 const TargetTransformInfo
*TTI
;
135 std::unique_ptr
<MemorySSAUpdater
> MSSAU
;
137 SmallVector
<WeakTrackingVH
, 16> DeadInsts
;
140 bool handleFloatingPointIV(Loop
*L
, PHINode
*PH
);
141 bool rewriteNonIntegerIVs(Loop
*L
);
143 bool simplifyAndExtend(Loop
*L
, SCEVExpander
&Rewriter
, LoopInfo
*LI
);
144 /// Try to improve our exit conditions by converting condition from signed
145 /// to unsigned or rotating computation out of the loop.
146 /// (See inline comment about why this is duplicated from simplifyAndExtend)
147 bool canonicalizeExitCondition(Loop
*L
);
148 /// Try to eliminate loop exits based on analyzeable exit counts
149 bool optimizeLoopExits(Loop
*L
, SCEVExpander
&Rewriter
);
150 /// Try to form loop invariant tests for loop exits by changing how many
151 /// iterations of the loop run when that is unobservable.
152 bool predicateLoopExits(Loop
*L
, SCEVExpander
&Rewriter
);
154 bool rewriteFirstIterationLoopExitValues(Loop
*L
);
156 bool linearFunctionTestReplace(Loop
*L
, BasicBlock
*ExitingBB
,
157 const SCEV
*ExitCount
,
158 PHINode
*IndVar
, SCEVExpander
&Rewriter
);
160 bool sinkUnusedInvariants(Loop
*L
);
163 IndVarSimplify(LoopInfo
*LI
, ScalarEvolution
*SE
, DominatorTree
*DT
,
164 const DataLayout
&DL
, TargetLibraryInfo
*TLI
,
165 TargetTransformInfo
*TTI
, MemorySSA
*MSSA
, bool WidenIndVars
)
166 : LI(LI
), SE(SE
), DT(DT
), DL(DL
), TLI(TLI
), TTI(TTI
),
167 WidenIndVars(WidenIndVars
) {
169 MSSAU
= std::make_unique
<MemorySSAUpdater
>(MSSA
);
175 } // end anonymous namespace
177 //===----------------------------------------------------------------------===//
178 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
179 //===----------------------------------------------------------------------===//
181 /// Convert APF to an integer, if possible.
182 static bool ConvertToSInt(const APFloat
&APF
, int64_t &IntVal
) {
183 bool isExact
= false;
184 // See if we can convert this to an int64_t
186 if (APF
.convertToInteger(MutableArrayRef(UIntVal
), 64, true,
187 APFloat::rmTowardZero
, &isExact
) != APFloat::opOK
||
194 /// If the loop has floating induction variable then insert corresponding
195 /// integer induction variable if possible.
197 /// for(double i = 0; i < 10000; ++i)
199 /// is converted into
200 /// for(int i = 0; i < 10000; ++i)
202 bool IndVarSimplify::handleFloatingPointIV(Loop
*L
, PHINode
*PN
) {
203 unsigned IncomingEdge
= L
->contains(PN
->getIncomingBlock(0));
204 unsigned BackEdge
= IncomingEdge
^1;
206 // Check incoming value.
207 auto *InitValueVal
= dyn_cast
<ConstantFP
>(PN
->getIncomingValue(IncomingEdge
));
210 if (!InitValueVal
|| !ConvertToSInt(InitValueVal
->getValueAPF(), InitValue
))
213 // Check IV increment. Reject this PN if increment operation is not
214 // an add or increment value can not be represented by an integer.
215 auto *Incr
= dyn_cast
<BinaryOperator
>(PN
->getIncomingValue(BackEdge
));
216 if (Incr
== nullptr || Incr
->getOpcode() != Instruction::FAdd
) return false;
218 // If this is not an add of the PHI with a constantfp, or if the constant fp
219 // is not an integer, bail out.
220 ConstantFP
*IncValueVal
= dyn_cast
<ConstantFP
>(Incr
->getOperand(1));
222 if (IncValueVal
== nullptr || Incr
->getOperand(0) != PN
||
223 !ConvertToSInt(IncValueVal
->getValueAPF(), IncValue
))
226 // Check Incr uses. One user is PN and the other user is an exit condition
227 // used by the conditional terminator.
228 Value::user_iterator IncrUse
= Incr
->user_begin();
229 Instruction
*U1
= cast
<Instruction
>(*IncrUse
++);
230 if (IncrUse
== Incr
->user_end()) return false;
231 Instruction
*U2
= cast
<Instruction
>(*IncrUse
++);
232 if (IncrUse
!= Incr
->user_end()) return false;
234 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
235 // only used by a branch, we can't transform it.
236 FCmpInst
*Compare
= dyn_cast
<FCmpInst
>(U1
);
238 Compare
= dyn_cast
<FCmpInst
>(U2
);
239 if (!Compare
|| !Compare
->hasOneUse() ||
240 !isa
<BranchInst
>(Compare
->user_back()))
243 BranchInst
*TheBr
= cast
<BranchInst
>(Compare
->user_back());
245 // We need to verify that the branch actually controls the iteration count
246 // of the loop. If not, the new IV can overflow and no one will notice.
247 // The branch block must be in the loop and one of the successors must be out
249 assert(TheBr
->isConditional() && "Can't use fcmp if not conditional");
250 if (!L
->contains(TheBr
->getParent()) ||
251 (L
->contains(TheBr
->getSuccessor(0)) &&
252 L
->contains(TheBr
->getSuccessor(1))))
255 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
257 ConstantFP
*ExitValueVal
= dyn_cast
<ConstantFP
>(Compare
->getOperand(1));
259 if (ExitValueVal
== nullptr ||
260 !ConvertToSInt(ExitValueVal
->getValueAPF(), ExitValue
))
263 // Find new predicate for integer comparison.
264 CmpInst::Predicate NewPred
= CmpInst::BAD_ICMP_PREDICATE
;
265 switch (Compare
->getPredicate()) {
266 default: return false; // Unknown comparison.
267 case CmpInst::FCMP_OEQ
:
268 case CmpInst::FCMP_UEQ
: NewPred
= CmpInst::ICMP_EQ
; break;
269 case CmpInst::FCMP_ONE
:
270 case CmpInst::FCMP_UNE
: NewPred
= CmpInst::ICMP_NE
; break;
271 case CmpInst::FCMP_OGT
:
272 case CmpInst::FCMP_UGT
: NewPred
= CmpInst::ICMP_SGT
; break;
273 case CmpInst::FCMP_OGE
:
274 case CmpInst::FCMP_UGE
: NewPred
= CmpInst::ICMP_SGE
; break;
275 case CmpInst::FCMP_OLT
:
276 case CmpInst::FCMP_ULT
: NewPred
= CmpInst::ICMP_SLT
; break;
277 case CmpInst::FCMP_OLE
:
278 case CmpInst::FCMP_ULE
: NewPred
= CmpInst::ICMP_SLE
; break;
281 // We convert the floating point induction variable to a signed i32 value if
282 // we can. This is only safe if the comparison will not overflow in a way
283 // that won't be trapped by the integer equivalent operations. Check for this
285 // TODO: We could use i64 if it is native and the range requires it.
287 // The start/stride/exit values must all fit in signed i32.
288 if (!isInt
<32>(InitValue
) || !isInt
<32>(IncValue
) || !isInt
<32>(ExitValue
))
291 // If not actually striding (add x, 0.0), avoid touching the code.
295 // Positive and negative strides have different safety conditions.
297 // If we have a positive stride, we require the init to be less than the
299 if (InitValue
>= ExitValue
)
302 uint32_t Range
= uint32_t(ExitValue
-InitValue
);
303 // Check for infinite loop, either:
304 // while (i <= Exit) or until (i > Exit)
305 if (NewPred
== CmpInst::ICMP_SLE
|| NewPred
== CmpInst::ICMP_SGT
) {
306 if (++Range
== 0) return false; // Range overflows.
309 unsigned Leftover
= Range
% uint32_t(IncValue
);
311 // If this is an equality comparison, we require that the strided value
312 // exactly land on the exit value, otherwise the IV condition will wrap
313 // around and do things the fp IV wouldn't.
314 if ((NewPred
== CmpInst::ICMP_EQ
|| NewPred
== CmpInst::ICMP_NE
) &&
318 // If the stride would wrap around the i32 before exiting, we can't
320 if (Leftover
!= 0 && int32_t(ExitValue
+IncValue
) < ExitValue
)
323 // If we have a negative stride, we require the init to be greater than the
325 if (InitValue
<= ExitValue
)
328 uint32_t Range
= uint32_t(InitValue
-ExitValue
);
329 // Check for infinite loop, either:
330 // while (i >= Exit) or until (i < Exit)
331 if (NewPred
== CmpInst::ICMP_SGE
|| NewPred
== CmpInst::ICMP_SLT
) {
332 if (++Range
== 0) return false; // Range overflows.
335 unsigned Leftover
= Range
% uint32_t(-IncValue
);
337 // If this is an equality comparison, we require that the strided value
338 // exactly land on the exit value, otherwise the IV condition will wrap
339 // around and do things the fp IV wouldn't.
340 if ((NewPred
== CmpInst::ICMP_EQ
|| NewPred
== CmpInst::ICMP_NE
) &&
344 // If the stride would wrap around the i32 before exiting, we can't
346 if (Leftover
!= 0 && int32_t(ExitValue
+IncValue
) > ExitValue
)
350 IntegerType
*Int32Ty
= Type::getInt32Ty(PN
->getContext());
352 // Insert new integer induction variable.
353 PHINode
*NewPHI
= PHINode::Create(Int32Ty
, 2, PN
->getName()+".int", PN
);
354 NewPHI
->addIncoming(ConstantInt::get(Int32Ty
, InitValue
),
355 PN
->getIncomingBlock(IncomingEdge
));
358 BinaryOperator::CreateAdd(NewPHI
, ConstantInt::get(Int32Ty
, IncValue
),
359 Incr
->getName()+".int", Incr
);
360 NewPHI
->addIncoming(NewAdd
, PN
->getIncomingBlock(BackEdge
));
362 ICmpInst
*NewCompare
= new ICmpInst(TheBr
, NewPred
, NewAdd
,
363 ConstantInt::get(Int32Ty
, ExitValue
),
366 // In the following deletions, PN may become dead and may be deleted.
367 // Use a WeakTrackingVH to observe whether this happens.
368 WeakTrackingVH WeakPH
= PN
;
370 // Delete the old floating point exit comparison. The branch starts using the
372 NewCompare
->takeName(Compare
);
373 Compare
->replaceAllUsesWith(NewCompare
);
374 RecursivelyDeleteTriviallyDeadInstructions(Compare
, TLI
, MSSAU
.get());
376 // Delete the old floating point increment.
377 Incr
->replaceAllUsesWith(PoisonValue::get(Incr
->getType()));
378 RecursivelyDeleteTriviallyDeadInstructions(Incr
, TLI
, MSSAU
.get());
380 // If the FP induction variable still has uses, this is because something else
381 // in the loop uses its value. In order to canonicalize the induction
382 // variable, we chose to eliminate the IV and rewrite it in terms of an
385 // We give preference to sitofp over uitofp because it is faster on most
388 Value
*Conv
= new SIToFPInst(NewPHI
, PN
->getType(), "indvar.conv",
389 &*PN
->getParent()->getFirstInsertionPt());
390 PN
->replaceAllUsesWith(Conv
);
391 RecursivelyDeleteTriviallyDeadInstructions(PN
, TLI
, MSSAU
.get());
396 bool IndVarSimplify::rewriteNonIntegerIVs(Loop
*L
) {
397 // First step. Check to see if there are any floating-point recurrences.
398 // If there are, change them into integer recurrences, permitting analysis by
399 // the SCEV routines.
400 BasicBlock
*Header
= L
->getHeader();
402 SmallVector
<WeakTrackingVH
, 8> PHIs
;
403 for (PHINode
&PN
: Header
->phis())
406 bool Changed
= false;
407 for (WeakTrackingVH
&PHI
: PHIs
)
408 if (PHINode
*PN
= dyn_cast_or_null
<PHINode
>(&*PHI
))
409 Changed
|= handleFloatingPointIV(L
, PN
);
411 // If the loop previously had floating-point IV, ScalarEvolution
412 // may not have been able to compute a trip count. Now that we've done some
413 // re-writing, the trip count may be computable.
419 //===---------------------------------------------------------------------===//
420 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
421 // they will exit at the first iteration.
422 //===---------------------------------------------------------------------===//
424 /// Check to see if this loop has loop invariant conditions which lead to loop
425 /// exits. If so, we know that if the exit path is taken, it is at the first
426 /// loop iteration. This lets us predict exit values of PHI nodes that live in
428 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop
*L
) {
429 // Verify the input to the pass is already in LCSSA form.
430 assert(L
->isLCSSAForm(*DT
));
432 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
433 L
->getUniqueExitBlocks(ExitBlocks
);
435 bool MadeAnyChanges
= false;
436 for (auto *ExitBB
: ExitBlocks
) {
437 // If there are no more PHI nodes in this exit block, then no more
438 // values defined inside the loop are used on this path.
439 for (PHINode
&PN
: ExitBB
->phis()) {
440 for (unsigned IncomingValIdx
= 0, E
= PN
.getNumIncomingValues();
441 IncomingValIdx
!= E
; ++IncomingValIdx
) {
442 auto *IncomingBB
= PN
.getIncomingBlock(IncomingValIdx
);
444 // Can we prove that the exit must run on the first iteration if it
445 // runs at all? (i.e. early exits are fine for our purposes, but
446 // traces which lead to this exit being taken on the 2nd iteration
447 // aren't.) Note that this is about whether the exit branch is
448 // executed, not about whether it is taken.
449 if (!L
->getLoopLatch() ||
450 !DT
->dominates(IncomingBB
, L
->getLoopLatch()))
453 // Get condition that leads to the exit path.
454 auto *TermInst
= IncomingBB
->getTerminator();
456 Value
*Cond
= nullptr;
457 if (auto *BI
= dyn_cast
<BranchInst
>(TermInst
)) {
458 // Must be a conditional branch, otherwise the block
459 // should not be in the loop.
460 Cond
= BI
->getCondition();
461 } else if (auto *SI
= dyn_cast
<SwitchInst
>(TermInst
))
462 Cond
= SI
->getCondition();
466 if (!L
->isLoopInvariant(Cond
))
469 auto *ExitVal
= dyn_cast
<PHINode
>(PN
.getIncomingValue(IncomingValIdx
));
471 // Only deal with PHIs in the loop header.
472 if (!ExitVal
|| ExitVal
->getParent() != L
->getHeader())
475 // If ExitVal is a PHI on the loop header, then we know its
476 // value along this exit because the exit can only be taken
477 // on the first iteration.
478 auto *LoopPreheader
= L
->getLoopPreheader();
479 assert(LoopPreheader
&& "Invalid loop");
480 int PreheaderIdx
= ExitVal
->getBasicBlockIndex(LoopPreheader
);
481 if (PreheaderIdx
!= -1) {
482 assert(ExitVal
->getParent() == L
->getHeader() &&
483 "ExitVal must be in loop header");
484 MadeAnyChanges
= true;
485 PN
.setIncomingValue(IncomingValIdx
,
486 ExitVal
->getIncomingValue(PreheaderIdx
));
487 SE
->forgetValue(&PN
);
492 return MadeAnyChanges
;
495 //===----------------------------------------------------------------------===//
496 // IV Widening - Extend the width of an IV to cover its widest uses.
497 //===----------------------------------------------------------------------===//
499 /// Update information about the induction variable that is extended by this
500 /// sign or zero extend operation. This is used to determine the final width of
501 /// the IV before actually widening it.
502 static void visitIVCast(CastInst
*Cast
, WideIVInfo
&WI
,
504 const TargetTransformInfo
*TTI
) {
505 bool IsSigned
= Cast
->getOpcode() == Instruction::SExt
;
506 if (!IsSigned
&& Cast
->getOpcode() != Instruction::ZExt
)
509 Type
*Ty
= Cast
->getType();
510 uint64_t Width
= SE
->getTypeSizeInBits(Ty
);
511 if (!Cast
->getModule()->getDataLayout().isLegalInteger(Width
))
514 // Check that `Cast` actually extends the induction variable (we rely on this
515 // later). This takes care of cases where `Cast` is extending a truncation of
516 // the narrow induction variable, and thus can end up being narrower than the
517 // "narrow" induction variable.
518 uint64_t NarrowIVWidth
= SE
->getTypeSizeInBits(WI
.NarrowIV
->getType());
519 if (NarrowIVWidth
>= Width
)
522 // Cast is either an sext or zext up to this point.
523 // We should not widen an indvar if arithmetics on the wider indvar are more
524 // expensive than those on the narrower indvar. We check only the cost of ADD
525 // because at least an ADD is required to increment the induction variable. We
526 // could compute more comprehensively the cost of all instructions on the
527 // induction variable when necessary.
529 TTI
->getArithmeticInstrCost(Instruction::Add
, Ty
) >
530 TTI
->getArithmeticInstrCost(Instruction::Add
,
531 Cast
->getOperand(0)->getType())) {
535 if (!WI
.WidestNativeType
||
536 Width
> SE
->getTypeSizeInBits(WI
.WidestNativeType
)) {
537 WI
.WidestNativeType
= SE
->getEffectiveSCEVType(Ty
);
538 WI
.IsSigned
= IsSigned
;
542 // We extend the IV to satisfy the sign of its user(s), or 'signed'
543 // if there are multiple users with both sign- and zero extensions,
544 // in order not to introduce nondeterministic behaviour based on the
545 // unspecified order of a PHI nodes' users-iterator.
546 WI
.IsSigned
|= IsSigned
;
549 //===----------------------------------------------------------------------===//
550 // Live IV Reduction - Minimize IVs live across the loop.
551 //===----------------------------------------------------------------------===//
553 //===----------------------------------------------------------------------===//
554 // Simplification of IV users based on SCEV evaluation.
555 //===----------------------------------------------------------------------===//
559 class IndVarSimplifyVisitor
: public IVVisitor
{
561 const TargetTransformInfo
*TTI
;
567 IndVarSimplifyVisitor(PHINode
*IV
, ScalarEvolution
*SCEV
,
568 const TargetTransformInfo
*TTI
,
569 const DominatorTree
*DTree
)
570 : SE(SCEV
), TTI(TTI
), IVPhi(IV
) {
575 // Implement the interface used by simplifyUsersOfIV.
576 void visitCast(CastInst
*Cast
) override
{ visitIVCast(Cast
, WI
, SE
, TTI
); }
579 } // end anonymous namespace
581 /// Iteratively perform simplification on a worklist of IV users. Each
582 /// successive simplification may push more users which may themselves be
583 /// candidates for simplification.
585 /// Sign/Zero extend elimination is interleaved with IV simplification.
586 bool IndVarSimplify::simplifyAndExtend(Loop
*L
,
587 SCEVExpander
&Rewriter
,
589 SmallVector
<WideIVInfo
, 8> WideIVs
;
591 auto *GuardDecl
= L
->getBlocks()[0]->getModule()->getFunction(
592 Intrinsic::getName(Intrinsic::experimental_guard
));
593 bool HasGuards
= GuardDecl
&& !GuardDecl
->use_empty();
595 SmallVector
<PHINode
*, 8> LoopPhis
;
596 for (PHINode
&PN
: L
->getHeader()->phis())
597 LoopPhis
.push_back(&PN
);
599 // Each round of simplification iterates through the SimplifyIVUsers worklist
600 // for all current phis, then determines whether any IVs can be
601 // widened. Widening adds new phis to LoopPhis, inducing another round of
602 // simplification on the wide IVs.
603 bool Changed
= false;
604 while (!LoopPhis
.empty()) {
605 // Evaluate as many IV expressions as possible before widening any IVs. This
606 // forces SCEV to set no-wrap flags before evaluating sign/zero
607 // extension. The first time SCEV attempts to normalize sign/zero extension,
608 // the result becomes final. So for the most predictable results, we delay
609 // evaluation of sign/zero extend evaluation until needed, and avoid running
610 // other SCEV based analysis prior to simplifyAndExtend.
612 PHINode
*CurrIV
= LoopPhis
.pop_back_val();
614 // Information about sign/zero extensions of CurrIV.
615 IndVarSimplifyVisitor
Visitor(CurrIV
, SE
, TTI
, DT
);
617 Changed
|= simplifyUsersOfIV(CurrIV
, SE
, DT
, LI
, TTI
, DeadInsts
, Rewriter
,
620 if (Visitor
.WI
.WidestNativeType
) {
621 WideIVs
.push_back(Visitor
.WI
);
623 } while(!LoopPhis
.empty());
625 // Continue if we disallowed widening.
629 for (; !WideIVs
.empty(); WideIVs
.pop_back()) {
632 if (PHINode
*WidePhi
= createWideIV(WideIVs
.back(), LI
, SE
, Rewriter
,
633 DT
, DeadInsts
, ElimExt
, Widened
,
634 HasGuards
, UsePostIncrementRanges
)) {
635 NumElimExt
+= ElimExt
;
636 NumWidened
+= Widened
;
638 LoopPhis
.push_back(WidePhi
);
645 //===----------------------------------------------------------------------===//
646 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
647 //===----------------------------------------------------------------------===//
649 /// Given an Value which is hoped to be part of an add recurance in the given
650 /// loop, return the associated Phi node if so. Otherwise, return null. Note
651 /// that this is less general than SCEVs AddRec checking.
652 static PHINode
*getLoopPhiForCounter(Value
*IncV
, Loop
*L
) {
653 Instruction
*IncI
= dyn_cast
<Instruction
>(IncV
);
657 switch (IncI
->getOpcode()) {
658 case Instruction::Add
:
659 case Instruction::Sub
:
661 case Instruction::GetElementPtr
:
662 // An IV counter must preserve its type.
663 if (IncI
->getNumOperands() == 2)
670 PHINode
*Phi
= dyn_cast
<PHINode
>(IncI
->getOperand(0));
671 if (Phi
&& Phi
->getParent() == L
->getHeader()) {
672 if (L
->isLoopInvariant(IncI
->getOperand(1)))
676 if (IncI
->getOpcode() == Instruction::GetElementPtr
)
679 // Allow add/sub to be commuted.
680 Phi
= dyn_cast
<PHINode
>(IncI
->getOperand(1));
681 if (Phi
&& Phi
->getParent() == L
->getHeader()) {
682 if (L
->isLoopInvariant(IncI
->getOperand(0)))
688 /// Whether the current loop exit test is based on this value. Currently this
689 /// is limited to a direct use in the loop condition.
690 static bool isLoopExitTestBasedOn(Value
*V
, BasicBlock
*ExitingBB
) {
691 BranchInst
*BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
692 ICmpInst
*ICmp
= dyn_cast
<ICmpInst
>(BI
->getCondition());
693 // TODO: Allow non-icmp loop test.
697 // TODO: Allow indirect use.
698 return ICmp
->getOperand(0) == V
|| ICmp
->getOperand(1) == V
;
701 /// linearFunctionTestReplace policy. Return true unless we can show that the
702 /// current exit test is already sufficiently canonical.
703 static bool needsLFTR(Loop
*L
, BasicBlock
*ExitingBB
) {
704 assert(L
->getLoopLatch() && "Must be in simplified form");
706 // Avoid converting a constant or loop invariant test back to a runtime
707 // test. This is critical for when SCEV's cached ExitCount is less precise
708 // than the current IR (such as after we've proven a particular exit is
709 // actually dead and thus the BE count never reaches our ExitCount.)
710 BranchInst
*BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
711 if (L
->isLoopInvariant(BI
->getCondition()))
714 // Do LFTR to simplify the exit condition to an ICMP.
715 ICmpInst
*Cond
= dyn_cast
<ICmpInst
>(BI
->getCondition());
719 // Do LFTR to simplify the exit ICMP to EQ/NE
720 ICmpInst::Predicate Pred
= Cond
->getPredicate();
721 if (Pred
!= ICmpInst::ICMP_NE
&& Pred
!= ICmpInst::ICMP_EQ
)
724 // Look for a loop invariant RHS
725 Value
*LHS
= Cond
->getOperand(0);
726 Value
*RHS
= Cond
->getOperand(1);
727 if (!L
->isLoopInvariant(RHS
)) {
728 if (!L
->isLoopInvariant(LHS
))
732 // Look for a simple IV counter LHS
733 PHINode
*Phi
= dyn_cast
<PHINode
>(LHS
);
735 Phi
= getLoopPhiForCounter(LHS
, L
);
740 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
741 int Idx
= Phi
->getBasicBlockIndex(L
->getLoopLatch());
745 // Do LFTR if the exit condition's IV is *not* a simple counter.
746 Value
*IncV
= Phi
->getIncomingValue(Idx
);
747 return Phi
!= getLoopPhiForCounter(IncV
, L
);
750 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
751 /// down to checking that all operands are constant and listing instructions
752 /// that may hide undef.
753 static bool hasConcreteDefImpl(Value
*V
, SmallPtrSetImpl
<Value
*> &Visited
,
755 if (isa
<Constant
>(V
))
756 return !isa
<UndefValue
>(V
);
761 // Conservatively handle non-constant non-instructions. For example, Arguments
763 Instruction
*I
= dyn_cast
<Instruction
>(V
);
767 // Load and return values may be undef.
768 if(I
->mayReadFromMemory() || isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
))
771 // Optimistically handle other instructions.
772 for (Value
*Op
: I
->operands()) {
773 if (!Visited
.insert(Op
).second
)
775 if (!hasConcreteDefImpl(Op
, Visited
, Depth
+1))
781 /// Return true if the given value is concrete. We must prove that undef can
784 /// TODO: If we decide that this is a good approach to checking for undef, we
785 /// may factor it into a common location.
786 static bool hasConcreteDef(Value
*V
) {
787 SmallPtrSet
<Value
*, 8> Visited
;
789 return hasConcreteDefImpl(V
, Visited
, 0);
792 /// Return true if the given phi is a "counter" in L. A counter is an
793 /// add recurance (of integer or pointer type) with an arbitrary start, and a
794 /// step of 1. Note that L must have exactly one latch.
795 static bool isLoopCounter(PHINode
* Phi
, Loop
*L
,
796 ScalarEvolution
*SE
) {
797 assert(Phi
->getParent() == L
->getHeader());
798 assert(L
->getLoopLatch());
800 if (!SE
->isSCEVable(Phi
->getType()))
803 const SCEVAddRecExpr
*AR
= dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(Phi
));
804 if (!AR
|| AR
->getLoop() != L
|| !AR
->isAffine())
807 const SCEV
*Step
= dyn_cast
<SCEVConstant
>(AR
->getStepRecurrence(*SE
));
808 if (!Step
|| !Step
->isOne())
811 int LatchIdx
= Phi
->getBasicBlockIndex(L
->getLoopLatch());
812 Value
*IncV
= Phi
->getIncomingValue(LatchIdx
);
813 return (getLoopPhiForCounter(IncV
, L
) == Phi
&&
814 isa
<SCEVAddRecExpr
>(SE
->getSCEV(IncV
)));
817 /// Search the loop header for a loop counter (anadd rec w/step of one)
818 /// suitable for use by LFTR. If multiple counters are available, select the
819 /// "best" one based profitable heuristics.
821 /// BECount may be an i8* pointer type. The pointer difference is already
822 /// valid count without scaling the address stride, so it remains a pointer
823 /// expression as far as SCEV is concerned.
824 static PHINode
*FindLoopCounter(Loop
*L
, BasicBlock
*ExitingBB
,
826 ScalarEvolution
*SE
, DominatorTree
*DT
) {
827 uint64_t BCWidth
= SE
->getTypeSizeInBits(BECount
->getType());
829 Value
*Cond
= cast
<BranchInst
>(ExitingBB
->getTerminator())->getCondition();
831 // Loop over all of the PHI nodes, looking for a simple counter.
832 PHINode
*BestPhi
= nullptr;
833 const SCEV
*BestInit
= nullptr;
834 BasicBlock
*LatchBlock
= L
->getLoopLatch();
835 assert(LatchBlock
&& "Must be in simplified form");
836 const DataLayout
&DL
= L
->getHeader()->getModule()->getDataLayout();
838 for (BasicBlock::iterator I
= L
->getHeader()->begin(); isa
<PHINode
>(I
); ++I
) {
839 PHINode
*Phi
= cast
<PHINode
>(I
);
840 if (!isLoopCounter(Phi
, L
, SE
))
843 const auto *AR
= cast
<SCEVAddRecExpr
>(SE
->getSCEV(Phi
));
845 // AR may be a pointer type, while BECount is an integer type.
846 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
847 // AR may not be a narrower type, or we may never exit.
848 uint64_t PhiWidth
= SE
->getTypeSizeInBits(AR
->getType());
849 if (PhiWidth
< BCWidth
|| !DL
.isLegalInteger(PhiWidth
))
852 // Avoid reusing a potentially undef value to compute other values that may
853 // have originally had a concrete definition.
854 if (!hasConcreteDef(Phi
)) {
855 // We explicitly allow unknown phis as long as they are already used by
856 // the loop exit test. This is legal since performing LFTR could not
857 // increase the number of undef users.
858 Value
*IncPhi
= Phi
->getIncomingValueForBlock(LatchBlock
);
859 if (!isLoopExitTestBasedOn(Phi
, ExitingBB
) &&
860 !isLoopExitTestBasedOn(IncPhi
, ExitingBB
))
864 // Avoid introducing undefined behavior due to poison which didn't exist in
865 // the original program. (Annoyingly, the rules for poison and undef
866 // propagation are distinct, so this does NOT cover the undef case above.)
867 // We have to ensure that we don't introduce UB by introducing a use on an
868 // iteration where said IV produces poison. Our strategy here differs for
869 // pointers and integer IVs. For integers, we strip and reinfer as needed,
870 // see code in linearFunctionTestReplace. For pointers, we restrict
871 // transforms as there is no good way to reinfer inbounds once lost.
872 if (!Phi
->getType()->isIntegerTy() &&
873 !mustExecuteUBIfPoisonOnPathTo(Phi
, ExitingBB
->getTerminator(), DT
))
876 const SCEV
*Init
= AR
->getStart();
878 if (BestPhi
&& !isAlmostDeadIV(BestPhi
, LatchBlock
, Cond
)) {
879 // Don't force a live loop counter if another IV can be used.
880 if (isAlmostDeadIV(Phi
, LatchBlock
, Cond
))
883 // Prefer to count-from-zero. This is a more "canonical" counter form. It
884 // also prefers integer to pointer IVs.
885 if (BestInit
->isZero() != Init
->isZero()) {
886 if (BestInit
->isZero())
889 // If two IVs both count from zero or both count from nonzero then the
890 // narrower is likely a dead phi that has been widened. Use the wider phi
891 // to allow the other to be eliminated.
892 else if (PhiWidth
<= SE
->getTypeSizeInBits(BestPhi
->getType()))
901 /// Insert an IR expression which computes the value held by the IV IndVar
902 /// (which must be an loop counter w/unit stride) after the backedge of loop L
903 /// is taken ExitCount times.
904 static Value
*genLoopLimit(PHINode
*IndVar
, BasicBlock
*ExitingBB
,
905 const SCEV
*ExitCount
, bool UsePostInc
, Loop
*L
,
906 SCEVExpander
&Rewriter
, ScalarEvolution
*SE
) {
907 assert(isLoopCounter(IndVar
, L
, SE
));
908 assert(ExitCount
->getType()->isIntegerTy() && "exit count must be integer");
909 const SCEVAddRecExpr
*AR
= cast
<SCEVAddRecExpr
>(SE
->getSCEV(IndVar
));
910 assert(AR
->getStepRecurrence(*SE
)->isOne() && "only handles unit stride");
912 // For integer IVs, truncate the IV before computing the limit unless we
913 // know apriori that the limit must be a constant when evaluated in the
914 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
915 // IV in the loop over a (potentially) expensive expansion of the widened
916 // exit count add(zext(add)) expression.
917 if (IndVar
->getType()->isIntegerTy() &&
918 SE
->getTypeSizeInBits(AR
->getType()) >
919 SE
->getTypeSizeInBits(ExitCount
->getType())) {
920 const SCEV
*IVInit
= AR
->getStart();
921 if (!isa
<SCEVConstant
>(IVInit
) || !isa
<SCEVConstant
>(ExitCount
))
922 AR
= cast
<SCEVAddRecExpr
>(SE
->getTruncateExpr(AR
, ExitCount
->getType()));
925 const SCEVAddRecExpr
*ARBase
= UsePostInc
? AR
->getPostIncExpr(*SE
) : AR
;
926 const SCEV
*IVLimit
= ARBase
->evaluateAtIteration(ExitCount
, *SE
);
927 assert(SE
->isLoopInvariant(IVLimit
, L
) &&
928 "Computed iteration count is not loop invariant!");
929 return Rewriter
.expandCodeFor(IVLimit
, ARBase
->getType(),
930 ExitingBB
->getTerminator());
933 /// This method rewrites the exit condition of the loop to be a canonical !=
934 /// comparison against the incremented loop induction variable. This pass is
935 /// able to rewrite the exit tests of any loop where the SCEV analysis can
936 /// determine a loop-invariant trip count of the loop, which is actually a much
937 /// broader range than just linear tests.
938 bool IndVarSimplify::
939 linearFunctionTestReplace(Loop
*L
, BasicBlock
*ExitingBB
,
940 const SCEV
*ExitCount
,
941 PHINode
*IndVar
, SCEVExpander
&Rewriter
) {
942 assert(L
->getLoopLatch() && "Loop no longer in simplified form?");
943 assert(isLoopCounter(IndVar
, L
, SE
));
944 Instruction
* const IncVar
=
945 cast
<Instruction
>(IndVar
->getIncomingValueForBlock(L
->getLoopLatch()));
947 // Initialize CmpIndVar to the preincremented IV.
948 Value
*CmpIndVar
= IndVar
;
949 bool UsePostInc
= false;
951 // If the exiting block is the same as the backedge block, we prefer to
952 // compare against the post-incremented value, otherwise we must compare
953 // against the preincremented value.
954 if (ExitingBB
== L
->getLoopLatch()) {
955 // For pointer IVs, we chose to not strip inbounds which requires us not
956 // to add a potentially UB introducing use. We need to either a) show
957 // the loop test we're modifying is already in post-inc form, or b) show
958 // that adding a use must not introduce UB.
960 IndVar
->getType()->isIntegerTy() ||
961 isLoopExitTestBasedOn(IncVar
, ExitingBB
) ||
962 mustExecuteUBIfPoisonOnPathTo(IncVar
, ExitingBB
->getTerminator(), DT
);
969 // It may be necessary to drop nowrap flags on the incrementing instruction
970 // if either LFTR moves from a pre-inc check to a post-inc check (in which
971 // case the increment might have previously been poison on the last iteration
972 // only) or if LFTR switches to a different IV that was previously dynamically
973 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
974 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
975 // check), because the pre-inc addrec flags may be adopted from the original
976 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
977 // TODO: This handling is inaccurate for one case: If we switch to a
978 // dynamically dead IV that wraps on the first loop iteration only, which is
979 // not covered by the post-inc addrec. (If the new IV was not dynamically
980 // dead, it could not be poison on the first iteration in the first place.)
981 if (auto *BO
= dyn_cast
<BinaryOperator
>(IncVar
)) {
982 const SCEVAddRecExpr
*AR
= cast
<SCEVAddRecExpr
>(SE
->getSCEV(IncVar
));
983 if (BO
->hasNoUnsignedWrap())
984 BO
->setHasNoUnsignedWrap(AR
->hasNoUnsignedWrap());
985 if (BO
->hasNoSignedWrap())
986 BO
->setHasNoSignedWrap(AR
->hasNoSignedWrap());
989 Value
*ExitCnt
= genLoopLimit(
990 IndVar
, ExitingBB
, ExitCount
, UsePostInc
, L
, Rewriter
, SE
);
991 assert(ExitCnt
->getType()->isPointerTy() ==
992 IndVar
->getType()->isPointerTy() &&
993 "genLoopLimit missed a cast");
995 // Insert a new icmp_ne or icmp_eq instruction before the branch.
996 BranchInst
*BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
997 ICmpInst::Predicate P
;
998 if (L
->contains(BI
->getSuccessor(0)))
999 P
= ICmpInst::ICMP_NE
;
1001 P
= ICmpInst::ICMP_EQ
;
1003 IRBuilder
<> Builder(BI
);
1005 // The new loop exit condition should reuse the debug location of the
1006 // original loop exit condition.
1007 if (auto *Cond
= dyn_cast
<Instruction
>(BI
->getCondition()))
1008 Builder
.SetCurrentDebugLocation(Cond
->getDebugLoc());
1010 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1011 // avoid the expensive expansion of the limit expression in the wider type,
1012 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1013 // since we know (from the exit count bitwidth), that we can't self-wrap in
1014 // the narrower type.
1015 unsigned CmpIndVarSize
= SE
->getTypeSizeInBits(CmpIndVar
->getType());
1016 unsigned ExitCntSize
= SE
->getTypeSizeInBits(ExitCnt
->getType());
1017 if (CmpIndVarSize
> ExitCntSize
) {
1018 assert(!CmpIndVar
->getType()->isPointerTy() &&
1019 !ExitCnt
->getType()->isPointerTy());
1021 // Before resorting to actually inserting the truncate, use the same
1022 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1023 // the other side of the comparison instead. We still evaluate the limit
1024 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1025 // a truncate within in.
1026 bool Extended
= false;
1027 const SCEV
*IV
= SE
->getSCEV(CmpIndVar
);
1028 const SCEV
*TruncatedIV
= SE
->getTruncateExpr(IV
, ExitCnt
->getType());
1029 const SCEV
*ZExtTrunc
=
1030 SE
->getZeroExtendExpr(TruncatedIV
, CmpIndVar
->getType());
1032 if (ZExtTrunc
== IV
) {
1034 ExitCnt
= Builder
.CreateZExt(ExitCnt
, IndVar
->getType(),
1037 const SCEV
*SExtTrunc
=
1038 SE
->getSignExtendExpr(TruncatedIV
, CmpIndVar
->getType());
1039 if (SExtTrunc
== IV
) {
1041 ExitCnt
= Builder
.CreateSExt(ExitCnt
, IndVar
->getType(),
1048 L
->makeLoopInvariant(ExitCnt
, Discard
);
1050 CmpIndVar
= Builder
.CreateTrunc(CmpIndVar
, ExitCnt
->getType(),
1053 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1054 << " LHS:" << *CmpIndVar
<< '\n'
1055 << " op:\t" << (P
== ICmpInst::ICMP_NE
? "!=" : "==")
1057 << " RHS:\t" << *ExitCnt
<< "\n"
1058 << "ExitCount:\t" << *ExitCount
<< "\n"
1059 << " was: " << *BI
->getCondition() << "\n");
1061 Value
*Cond
= Builder
.CreateICmp(P
, CmpIndVar
, ExitCnt
, "exitcond");
1062 Value
*OrigCond
= BI
->getCondition();
1063 // It's tempting to use replaceAllUsesWith here to fully replace the old
1064 // comparison, but that's not immediately safe, since users of the old
1065 // comparison may not be dominated by the new comparison. Instead, just
1066 // update the branch to use the new comparison; in the common case this
1067 // will make old comparison dead.
1068 BI
->setCondition(Cond
);
1069 DeadInsts
.emplace_back(OrigCond
);
1075 //===----------------------------------------------------------------------===//
1076 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1077 //===----------------------------------------------------------------------===//
1079 /// If there's a single exit block, sink any loop-invariant values that
1080 /// were defined in the preheader but not used inside the loop into the
1081 /// exit block to reduce register pressure in the loop.
1082 bool IndVarSimplify::sinkUnusedInvariants(Loop
*L
) {
1083 BasicBlock
*ExitBlock
= L
->getExitBlock();
1084 if (!ExitBlock
) return false;
1086 BasicBlock
*Preheader
= L
->getLoopPreheader();
1087 if (!Preheader
) return false;
1089 bool MadeAnyChanges
= false;
1090 BasicBlock::iterator InsertPt
= ExitBlock
->getFirstInsertionPt();
1091 BasicBlock::iterator
I(Preheader
->getTerminator());
1092 while (I
!= Preheader
->begin()) {
1094 // New instructions were inserted at the end of the preheader.
1095 if (isa
<PHINode
>(I
))
1098 // Don't move instructions which might have side effects, since the side
1099 // effects need to complete before instructions inside the loop. Also don't
1100 // move instructions which might read memory, since the loop may modify
1101 // memory. Note that it's okay if the instruction might have undefined
1102 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1104 if (I
->mayHaveSideEffects() || I
->mayReadFromMemory())
1107 // Skip debug info intrinsics.
1108 if (isa
<DbgInfoIntrinsic
>(I
))
1111 // Skip eh pad instructions.
1115 // Don't sink alloca: we never want to sink static alloca's out of the
1116 // entry block, and correctly sinking dynamic alloca's requires
1117 // checks for stacksave/stackrestore intrinsics.
1118 // FIXME: Refactor this check somehow?
1119 if (isa
<AllocaInst
>(I
))
1122 // Determine if there is a use in or before the loop (direct or
1124 bool UsedInLoop
= false;
1125 for (Use
&U
: I
->uses()) {
1126 Instruction
*User
= cast
<Instruction
>(U
.getUser());
1127 BasicBlock
*UseBB
= User
->getParent();
1128 if (PHINode
*P
= dyn_cast
<PHINode
>(User
)) {
1130 PHINode::getIncomingValueNumForOperand(U
.getOperandNo());
1131 UseBB
= P
->getIncomingBlock(i
);
1133 if (UseBB
== Preheader
|| L
->contains(UseBB
)) {
1139 // If there is, the def must remain in the preheader.
1143 // Otherwise, sink it to the exit block.
1144 Instruction
*ToMove
= &*I
;
1147 if (I
!= Preheader
->begin()) {
1148 // Skip debug info intrinsics.
1151 } while (I
->isDebugOrPseudoInst() && I
!= Preheader
->begin());
1153 if (I
->isDebugOrPseudoInst() && I
== Preheader
->begin())
1159 MadeAnyChanges
= true;
1160 ToMove
->moveBefore(*ExitBlock
, InsertPt
);
1161 SE
->forgetValue(ToMove
);
1163 InsertPt
= ToMove
->getIterator();
1166 return MadeAnyChanges
;
1169 static void replaceExitCond(BranchInst
*BI
, Value
*NewCond
,
1170 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
) {
1171 auto *OldCond
= BI
->getCondition();
1172 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1173 << " with " << *NewCond
<< "\n");
1174 BI
->setCondition(NewCond
);
1175 if (OldCond
->use_empty())
1176 DeadInsts
.emplace_back(OldCond
);
1179 static Constant
*createFoldedExitCond(const Loop
*L
, BasicBlock
*ExitingBB
,
1181 BranchInst
*BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
1182 bool ExitIfTrue
= !L
->contains(*succ_begin(ExitingBB
));
1183 auto *OldCond
= BI
->getCondition();
1184 return ConstantInt::get(OldCond
->getType(),
1185 IsTaken
? ExitIfTrue
: !ExitIfTrue
);
1188 static void foldExit(const Loop
*L
, BasicBlock
*ExitingBB
, bool IsTaken
,
1189 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
) {
1190 BranchInst
*BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
1191 auto *NewCond
= createFoldedExitCond(L
, ExitingBB
, IsTaken
);
1192 replaceExitCond(BI
, NewCond
, DeadInsts
);
1195 static void replaceLoopPHINodesWithPreheaderValues(
1196 LoopInfo
*LI
, Loop
*L
, SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
,
1197 ScalarEvolution
&SE
) {
1198 assert(L
->isLoopSimplifyForm() && "Should only do it in simplify form!");
1199 auto *LoopPreheader
= L
->getLoopPreheader();
1200 auto *LoopHeader
= L
->getHeader();
1201 SmallVector
<Instruction
*> Worklist
;
1202 for (auto &PN
: LoopHeader
->phis()) {
1203 auto *PreheaderIncoming
= PN
.getIncomingValueForBlock(LoopPreheader
);
1204 for (User
*U
: PN
.users())
1205 Worklist
.push_back(cast
<Instruction
>(U
));
1206 SE
.forgetValue(&PN
);
1207 PN
.replaceAllUsesWith(PreheaderIncoming
);
1208 DeadInsts
.emplace_back(&PN
);
1211 // Replacing with the preheader value will often allow IV users to simplify
1212 // (especially if the preheader value is a constant).
1213 SmallPtrSet
<Instruction
*, 16> Visited
;
1214 while (!Worklist
.empty()) {
1215 auto *I
= cast
<Instruction
>(Worklist
.pop_back_val());
1216 if (!Visited
.insert(I
).second
)
1219 // Don't simplify instructions outside the loop.
1220 if (!L
->contains(I
))
1223 Value
*Res
= simplifyInstruction(I
, I
->getModule()->getDataLayout());
1224 if (Res
&& LI
->replacementPreservesLCSSAForm(I
, Res
)) {
1225 for (User
*U
: I
->users())
1226 Worklist
.push_back(cast
<Instruction
>(U
));
1227 I
->replaceAllUsesWith(Res
);
1228 DeadInsts
.emplace_back(I
);
1234 createInvariantCond(const Loop
*L
, BasicBlock
*ExitingBB
,
1235 const ScalarEvolution::LoopInvariantPredicate
&LIP
,
1236 SCEVExpander
&Rewriter
) {
1237 ICmpInst::Predicate InvariantPred
= LIP
.Pred
;
1238 BasicBlock
*Preheader
= L
->getLoopPreheader();
1239 assert(Preheader
&& "Preheader doesn't exist");
1240 Rewriter
.setInsertPoint(Preheader
->getTerminator());
1241 auto *LHSV
= Rewriter
.expandCodeFor(LIP
.LHS
);
1242 auto *RHSV
= Rewriter
.expandCodeFor(LIP
.RHS
);
1243 bool ExitIfTrue
= !L
->contains(*succ_begin(ExitingBB
));
1245 InvariantPred
= ICmpInst::getInversePredicate(InvariantPred
);
1246 IRBuilder
<> Builder(Preheader
->getTerminator());
1247 BranchInst
*BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
1248 return Builder
.CreateICmp(InvariantPred
, LHSV
, RHSV
,
1249 BI
->getCondition()->getName());
1252 static std::optional
<Value
*>
1253 createReplacement(ICmpInst
*ICmp
, const Loop
*L
, BasicBlock
*ExitingBB
,
1254 const SCEV
*MaxIter
, bool Inverted
, bool SkipLastIter
,
1255 ScalarEvolution
*SE
, SCEVExpander
&Rewriter
) {
1256 ICmpInst::Predicate Pred
= ICmp
->getPredicate();
1257 Value
*LHS
= ICmp
->getOperand(0);
1258 Value
*RHS
= ICmp
->getOperand(1);
1260 // 'LHS pred RHS' should now mean that we stay in loop.
1261 auto *BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
1263 Pred
= CmpInst::getInversePredicate(Pred
);
1265 const SCEV
*LHSS
= SE
->getSCEVAtScope(LHS
, L
);
1266 const SCEV
*RHSS
= SE
->getSCEVAtScope(RHS
, L
);
1267 // Can we prove it to be trivially true or false?
1268 if (auto EV
= SE
->evaluatePredicateAt(Pred
, LHSS
, RHSS
, BI
))
1269 return createFoldedExitCond(L
, ExitingBB
, /*IsTaken*/ !*EV
);
1271 auto *ARTy
= LHSS
->getType();
1272 auto *MaxIterTy
= MaxIter
->getType();
1273 // If possible, adjust types.
1274 if (SE
->getTypeSizeInBits(ARTy
) > SE
->getTypeSizeInBits(MaxIterTy
))
1275 MaxIter
= SE
->getZeroExtendExpr(MaxIter
, ARTy
);
1276 else if (SE
->getTypeSizeInBits(ARTy
) < SE
->getTypeSizeInBits(MaxIterTy
)) {
1277 const SCEV
*MinusOne
= SE
->getMinusOne(ARTy
);
1278 auto *MaxAllowedIter
= SE
->getZeroExtendExpr(MinusOne
, MaxIterTy
);
1279 if (SE
->isKnownPredicateAt(ICmpInst::ICMP_ULE
, MaxIter
, MaxAllowedIter
, BI
))
1280 MaxIter
= SE
->getTruncateExpr(MaxIter
, ARTy
);
1284 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1285 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1286 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1287 // So we manually construct umin(a - 1, b - 1).
1288 SmallVector
<const SCEV
*, 4> Elements
;
1289 if (auto *UMin
= dyn_cast
<SCEVUMinExpr
>(MaxIter
)) {
1290 for (auto *Op
: UMin
->operands())
1291 Elements
.push_back(SE
->getMinusSCEV(Op
, SE
->getOne(Op
->getType())));
1292 MaxIter
= SE
->getUMinFromMismatchedTypes(Elements
);
1294 MaxIter
= SE
->getMinusSCEV(MaxIter
, SE
->getOne(MaxIter
->getType()));
1297 // Check if there is a loop-invariant predicate equivalent to our check.
1298 auto LIP
= SE
->getLoopInvariantExitCondDuringFirstIterations(Pred
, LHSS
, RHSS
,
1301 return std::nullopt
;
1303 // Can we prove it to be trivially true?
1304 if (SE
->isKnownPredicateAt(LIP
->Pred
, LIP
->LHS
, LIP
->RHS
, BI
))
1305 return createFoldedExitCond(L
, ExitingBB
, /*IsTaken*/ false);
1307 return createInvariantCond(L
, ExitingBB
, *LIP
, Rewriter
);
1310 static bool optimizeLoopExitWithUnknownExitCount(
1311 const Loop
*L
, BranchInst
*BI
, BasicBlock
*ExitingBB
, const SCEV
*MaxIter
,
1312 bool SkipLastIter
, ScalarEvolution
*SE
, SCEVExpander
&Rewriter
,
1313 SmallVectorImpl
<WeakTrackingVH
> &DeadInsts
) {
1315 (L
->contains(BI
->getSuccessor(0)) != L
->contains(BI
->getSuccessor(1))) &&
1316 "Not a loop exit!");
1318 // For branch that stays in loop by TRUE condition, go through AND. For branch
1319 // that stays in loop by FALSE condition, go through OR. Both gives the
1320 // similar logic: "stay in loop iff all conditions are true(false)".
1321 bool Inverted
= L
->contains(BI
->getSuccessor(1));
1322 SmallVector
<ICmpInst
*, 4> LeafConditions
;
1323 SmallVector
<Value
*, 4> Worklist
;
1324 SmallPtrSet
<Value
*, 4> Visited
;
1325 Value
*OldCond
= BI
->getCondition();
1326 Visited
.insert(OldCond
);
1327 Worklist
.push_back(OldCond
);
1329 auto GoThrough
= [&](Value
*V
) {
1330 Value
*LHS
= nullptr, *RHS
= nullptr;
1332 if (!match(V
, m_LogicalOr(m_Value(LHS
), m_Value(RHS
))))
1335 if (!match(V
, m_LogicalAnd(m_Value(LHS
), m_Value(RHS
))))
1338 if (Visited
.insert(LHS
).second
)
1339 Worklist
.push_back(LHS
);
1340 if (Visited
.insert(RHS
).second
)
1341 Worklist
.push_back(RHS
);
1346 Value
*Curr
= Worklist
.pop_back_val();
1347 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1348 // those with one use, to avoid instruction duplication.
1349 if (Curr
->hasOneUse())
1350 if (!GoThrough(Curr
))
1351 if (auto *ICmp
= dyn_cast
<ICmpInst
>(Curr
))
1352 LeafConditions
.push_back(ICmp
);
1353 } while (!Worklist
.empty());
1355 // If the current basic block has the same exit count as the whole loop, and
1356 // it consists of multiple icmp's, try to collect all icmp's that give exact
1357 // same exit count. For all other icmp's, we could use one less iteration,
1358 // because their value on the last iteration doesn't really matter.
1359 SmallPtrSet
<ICmpInst
*, 4> ICmpsFailingOnLastIter
;
1360 if (!SkipLastIter
&& LeafConditions
.size() > 1 &&
1361 SE
->getExitCount(L
, ExitingBB
,
1362 ScalarEvolution::ExitCountKind::SymbolicMaximum
) ==
1364 for (auto *ICmp
: LeafConditions
) {
1365 auto EL
= SE
->computeExitLimitFromCond(L
, ICmp
, Inverted
,
1366 /*ControlsExit*/ false);
1367 auto *ExitMax
= EL
.SymbolicMaxNotTaken
;
1368 if (isa
<SCEVCouldNotCompute
>(ExitMax
))
1370 // They could be of different types (specifically this happens after
1373 SE
->getWiderType(ExitMax
->getType(), MaxIter
->getType());
1374 auto *WideExitMax
= SE
->getNoopOrZeroExtend(ExitMax
, WiderType
);
1375 auto *WideMaxIter
= SE
->getNoopOrZeroExtend(MaxIter
, WiderType
);
1376 if (WideExitMax
== WideMaxIter
)
1377 ICmpsFailingOnLastIter
.insert(ICmp
);
1380 bool Changed
= false;
1381 for (auto *OldCond
: LeafConditions
) {
1382 // Skip last iteration for this icmp under one of two conditions:
1383 // - We do it for all conditions;
1384 // - There is another ICmp that would fail on last iter, so this one doesn't
1386 bool OptimisticSkipLastIter
= SkipLastIter
;
1387 if (!OptimisticSkipLastIter
) {
1388 if (ICmpsFailingOnLastIter
.size() > 1)
1389 OptimisticSkipLastIter
= true;
1390 else if (ICmpsFailingOnLastIter
.size() == 1)
1391 OptimisticSkipLastIter
= !ICmpsFailingOnLastIter
.count(OldCond
);
1394 createReplacement(OldCond
, L
, ExitingBB
, MaxIter
, Inverted
,
1395 OptimisticSkipLastIter
, SE
, Rewriter
)) {
1397 auto *NewCond
= *Replaced
;
1398 if (auto *NCI
= dyn_cast
<Instruction
>(NewCond
)) {
1399 NCI
->setName(OldCond
->getName() + ".first_iter");
1401 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1402 << " with " << *NewCond
<< "\n");
1403 assert(OldCond
->hasOneUse() && "Must be!");
1404 OldCond
->replaceAllUsesWith(NewCond
);
1405 DeadInsts
.push_back(OldCond
);
1406 // Make sure we no longer consider this condition as failing on last
1408 ICmpsFailingOnLastIter
.erase(OldCond
);
1414 bool IndVarSimplify::canonicalizeExitCondition(Loop
*L
) {
1415 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1416 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1417 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1418 // it already has flags. The alternative to this would be to extending the
1419 // set of "interesting" IV users to include the icmp, but doing that
1420 // regresses results in practice by querying SCEVs before trip counts which
1421 // rely on them which results in SCEV caching sub-optimal answers. The
1422 // concern about caching sub-optimal results is why we only query SCEVs of
1423 // the loop invariant RHS here.
1424 SmallVector
<BasicBlock
*, 16> ExitingBlocks
;
1425 L
->getExitingBlocks(ExitingBlocks
);
1426 bool Changed
= false;
1427 for (auto *ExitingBB
: ExitingBlocks
) {
1428 auto *BI
= dyn_cast
<BranchInst
>(ExitingBB
->getTerminator());
1431 assert(BI
->isConditional() && "exit branch must be conditional");
1433 auto *ICmp
= dyn_cast
<ICmpInst
>(BI
->getCondition());
1434 if (!ICmp
|| !ICmp
->hasOneUse())
1437 auto *LHS
= ICmp
->getOperand(0);
1438 auto *RHS
= ICmp
->getOperand(1);
1439 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1440 // poisoning cache with sub-optimal results. For the must-execute case,
1441 // this is a neccessary precondition for correctness.
1442 if (!L
->isLoopInvariant(RHS
)) {
1443 if (!L
->isLoopInvariant(LHS
))
1445 // Same logic applies for the inverse case
1446 std::swap(LHS
, RHS
);
1449 // Match (icmp signed-cond zext, RHS)
1450 Value
*LHSOp
= nullptr;
1451 if (!match(LHS
, m_ZExt(m_Value(LHSOp
))) || !ICmp
->isSigned())
1454 const DataLayout
&DL
= ExitingBB
->getModule()->getDataLayout();
1455 const unsigned InnerBitWidth
= DL
.getTypeSizeInBits(LHSOp
->getType());
1456 const unsigned OuterBitWidth
= DL
.getTypeSizeInBits(RHS
->getType());
1457 auto FullCR
= ConstantRange::getFull(InnerBitWidth
);
1458 FullCR
= FullCR
.zeroExtend(OuterBitWidth
);
1459 auto RHSCR
= SE
->getUnsignedRange(SE
->applyLoopGuards(SE
->getSCEV(RHS
), L
));
1460 if (FullCR
.contains(RHSCR
)) {
1461 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1462 // replace the signed condition with the unsigned version.
1463 ICmp
->setPredicate(ICmp
->getUnsignedPredicate());
1465 // Note: No SCEV invalidation needed. We've changed the predicate, but
1466 // have not changed exit counts, or the values produced by the compare.
1471 // Now that we've canonicalized the condition to match the extend,
1472 // see if we can rotate the extend out of the loop.
1473 for (auto *ExitingBB
: ExitingBlocks
) {
1474 auto *BI
= dyn_cast
<BranchInst
>(ExitingBB
->getTerminator());
1477 assert(BI
->isConditional() && "exit branch must be conditional");
1479 auto *ICmp
= dyn_cast
<ICmpInst
>(BI
->getCondition());
1480 if (!ICmp
|| !ICmp
->hasOneUse() || !ICmp
->isUnsigned())
1483 bool Swapped
= false;
1484 auto *LHS
= ICmp
->getOperand(0);
1485 auto *RHS
= ICmp
->getOperand(1);
1486 if (L
->isLoopInvariant(LHS
) == L
->isLoopInvariant(RHS
))
1487 // Nothing to rotate
1489 if (L
->isLoopInvariant(LHS
)) {
1490 // Same logic applies for the inverse case until we actually pick
1491 // which operand of the compare to update.
1493 std::swap(LHS
, RHS
);
1495 assert(!L
->isLoopInvariant(LHS
) && L
->isLoopInvariant(RHS
));
1497 // Match (icmp unsigned-cond zext, RHS)
1498 // TODO: Extend to handle corresponding sext/signed-cmp case
1499 // TODO: Extend to other invertible functions
1500 Value
*LHSOp
= nullptr;
1501 if (!match(LHS
, m_ZExt(m_Value(LHSOp
))))
1504 // In general, we only rotate if we can do so without increasing the number
1505 // of instructions. The exception is when we have an zext(add-rec). The
1506 // reason for allowing this exception is that we know we need to get rid
1507 // of the zext for SCEV to be able to compute a trip count for said loops;
1508 // we consider the new trip count valuable enough to increase instruction
1510 if (!LHS
->hasOneUse() && !isa
<SCEVAddRecExpr
>(SE
->getSCEV(LHSOp
)))
1513 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1514 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1515 // when zext is loop varying and RHS is loop invariant. This converts
1516 // loop varying work to loop-invariant work.
1517 auto doRotateTransform
= [&]() {
1518 assert(ICmp
->isUnsigned() && "must have proven unsigned already");
1520 CastInst::Create(Instruction::Trunc
, RHS
, LHSOp
->getType(), "",
1521 L
->getLoopPreheader()->getTerminator());
1522 ICmp
->setOperand(Swapped
? 1 : 0, LHSOp
);
1523 ICmp
->setOperand(Swapped
? 0 : 1, NewRHS
);
1524 if (LHS
->use_empty())
1525 DeadInsts
.push_back(LHS
);
1529 const DataLayout
&DL
= ExitingBB
->getModule()->getDataLayout();
1530 const unsigned InnerBitWidth
= DL
.getTypeSizeInBits(LHSOp
->getType());
1531 const unsigned OuterBitWidth
= DL
.getTypeSizeInBits(RHS
->getType());
1532 auto FullCR
= ConstantRange::getFull(InnerBitWidth
);
1533 FullCR
= FullCR
.zeroExtend(OuterBitWidth
);
1534 auto RHSCR
= SE
->getUnsignedRange(SE
->applyLoopGuards(SE
->getSCEV(RHS
), L
));
1535 if (FullCR
.contains(RHSCR
)) {
1536 doRotateTransform();
1538 // Note, we are leaving SCEV in an unfortunately imprecise case here
1539 // as rotation tends to reveal information about trip counts not
1540 // previously visible.
1548 bool IndVarSimplify::optimizeLoopExits(Loop
*L
, SCEVExpander
&Rewriter
) {
1549 SmallVector
<BasicBlock
*, 16> ExitingBlocks
;
1550 L
->getExitingBlocks(ExitingBlocks
);
1552 // Remove all exits which aren't both rewriteable and execute on every
1554 llvm::erase_if(ExitingBlocks
, [&](BasicBlock
*ExitingBB
) {
1555 // If our exitting block exits multiple loops, we can only rewrite the
1556 // innermost one. Otherwise, we're changing how many times the innermost
1557 // loop runs before it exits.
1558 if (LI
->getLoopFor(ExitingBB
) != L
)
1561 // Can't rewrite non-branch yet.
1562 BranchInst
*BI
= dyn_cast
<BranchInst
>(ExitingBB
->getTerminator());
1566 // Likewise, the loop latch must be dominated by the exiting BB.
1567 if (!DT
->dominates(ExitingBB
, L
->getLoopLatch()))
1570 if (auto *CI
= dyn_cast
<ConstantInt
>(BI
->getCondition())) {
1571 // If already constant, nothing to do. However, if this is an
1572 // unconditional exit, we can still replace header phis with their
1574 if (!L
->contains(BI
->getSuccessor(CI
->isNullValue())))
1575 replaceLoopPHINodesWithPreheaderValues(LI
, L
, DeadInsts
, *SE
);
1582 if (ExitingBlocks
.empty())
1585 // Get a symbolic upper bound on the loop backedge taken count.
1586 const SCEV
*MaxBECount
= SE
->getSymbolicMaxBackedgeTakenCount(L
);
1587 if (isa
<SCEVCouldNotCompute
>(MaxBECount
))
1590 // Visit our exit blocks in order of dominance. We know from the fact that
1591 // all exits must dominate the latch, so there is a total dominance order
1593 llvm::sort(ExitingBlocks
, [&](BasicBlock
*A
, BasicBlock
*B
) {
1594 // std::sort sorts in ascending order, so we want the inverse of
1595 // the normal dominance relation.
1596 if (A
== B
) return false;
1597 if (DT
->properlyDominates(A
, B
))
1600 assert(DT
->properlyDominates(B
, A
) &&
1601 "expected total dominance order!");
1606 for (unsigned i
= 1; i
< ExitingBlocks
.size(); i
++) {
1607 assert(DT
->dominates(ExitingBlocks
[i
-1], ExitingBlocks
[i
]));
1611 bool Changed
= false;
1612 bool SkipLastIter
= false;
1613 const SCEV
*CurrMaxExit
= SE
->getCouldNotCompute();
1614 auto UpdateSkipLastIter
= [&](const SCEV
*MaxExitCount
) {
1615 if (SkipLastIter
|| isa
<SCEVCouldNotCompute
>(MaxExitCount
))
1617 if (isa
<SCEVCouldNotCompute
>(CurrMaxExit
))
1618 CurrMaxExit
= MaxExitCount
;
1620 CurrMaxExit
= SE
->getUMinFromMismatchedTypes(CurrMaxExit
, MaxExitCount
);
1621 // If the loop has more than 1 iteration, all further checks will be
1622 // executed 1 iteration less.
1623 if (CurrMaxExit
== MaxBECount
)
1624 SkipLastIter
= true;
1626 SmallSet
<const SCEV
*, 8> DominatingExactExitCounts
;
1627 for (BasicBlock
*ExitingBB
: ExitingBlocks
) {
1628 const SCEV
*ExactExitCount
= SE
->getExitCount(L
, ExitingBB
);
1629 const SCEV
*MaxExitCount
= SE
->getExitCount(
1630 L
, ExitingBB
, ScalarEvolution::ExitCountKind::SymbolicMaximum
);
1631 if (isa
<SCEVCouldNotCompute
>(ExactExitCount
)) {
1632 // Okay, we do not know the exit count here. Can we at least prove that it
1633 // will remain the same within iteration space?
1634 auto *BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
1635 auto OptimizeCond
= [&](bool SkipLastIter
) {
1636 return optimizeLoopExitWithUnknownExitCount(L
, BI
, ExitingBB
,
1637 MaxBECount
, SkipLastIter
,
1638 SE
, Rewriter
, DeadInsts
);
1641 // TODO: We might have proved that we can skip the last iteration for
1642 // this check. In this case, we only want to check the condition on the
1643 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1646 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1648 // If we could not prove that len != 0, then we also could not prove that
1649 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1650 // OptimizeCond will likely not prove anything for it, even if it could
1651 // prove the same fact for len.
1653 // As a temporary solution, we query both last and pre-last iterations in
1654 // hope that we will be able to prove triviality for at least one of
1655 // them. We can stop querying MaxBECount for this case once SCEV
1656 // understands that (MaxBECount - 1) will not overflow here.
1657 if (OptimizeCond(false))
1659 else if (SkipLastIter
&& OptimizeCond(true))
1661 UpdateSkipLastIter(MaxExitCount
);
1665 UpdateSkipLastIter(ExactExitCount
);
1667 // If we know we'd exit on the first iteration, rewrite the exit to
1668 // reflect this. This does not imply the loop must exit through this
1669 // exit; there may be an earlier one taken on the first iteration.
1670 // We know that the backedge can't be taken, so we replace all
1671 // the header PHIs with values coming from the preheader.
1672 if (ExactExitCount
->isZero()) {
1673 foldExit(L
, ExitingBB
, true, DeadInsts
);
1674 replaceLoopPHINodesWithPreheaderValues(LI
, L
, DeadInsts
, *SE
);
1679 assert(ExactExitCount
->getType()->isIntegerTy() &&
1680 MaxBECount
->getType()->isIntegerTy() &&
1681 "Exit counts must be integers");
1684 SE
->getWiderType(MaxBECount
->getType(), ExactExitCount
->getType());
1685 ExactExitCount
= SE
->getNoopOrZeroExtend(ExactExitCount
, WiderType
);
1686 MaxBECount
= SE
->getNoopOrZeroExtend(MaxBECount
, WiderType
);
1687 assert(MaxBECount
->getType() == ExactExitCount
->getType());
1689 // Can we prove that some other exit must be taken strictly before this
1691 if (SE
->isLoopEntryGuardedByCond(L
, CmpInst::ICMP_ULT
, MaxBECount
,
1693 foldExit(L
, ExitingBB
, false, DeadInsts
);
1698 // As we run, keep track of which exit counts we've encountered. If we
1699 // find a duplicate, we've found an exit which would have exited on the
1700 // exiting iteration, but (from the visit order) strictly follows another
1701 // which does the same and is thus dead.
1702 if (!DominatingExactExitCounts
.insert(ExactExitCount
).second
) {
1703 foldExit(L
, ExitingBB
, false, DeadInsts
);
1708 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1709 // here. If we kept track of the min of dominanting exits so far, we could
1710 // discharge exits with EC >= MDEC. This is less powerful than the existing
1711 // transform (since later exits aren't considered), but potentially more
1712 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1713 // or a >u b. Such a case is not currently known.
1718 bool IndVarSimplify::predicateLoopExits(Loop
*L
, SCEVExpander
&Rewriter
) {
1719 SmallVector
<BasicBlock
*, 16> ExitingBlocks
;
1720 L
->getExitingBlocks(ExitingBlocks
);
1722 // Finally, see if we can rewrite our exit conditions into a loop invariant
1723 // form. If we have a read-only loop, and we can tell that we must exit down
1724 // a path which does not need any of the values computed within the loop, we
1725 // can rewrite the loop to exit on the first iteration. Note that this
1726 // doesn't either a) tell us the loop exits on the first iteration (unless
1727 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1728 // This transformation looks a lot like a restricted form of dead loop
1729 // elimination, but restricted to read-only loops and without neccesssarily
1730 // needing to kill the loop entirely.
1731 if (!LoopPredication
)
1734 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1735 // through *explicit* control flow. We have to eliminate the possibility of
1736 // implicit exits (see below) before we know it's truly exact.
1737 const SCEV
*ExactBTC
= SE
->getBackedgeTakenCount(L
);
1738 if (isa
<SCEVCouldNotCompute
>(ExactBTC
) || !Rewriter
.isSafeToExpand(ExactBTC
))
1741 assert(SE
->isLoopInvariant(ExactBTC
, L
) && "BTC must be loop invariant");
1742 assert(ExactBTC
->getType()->isIntegerTy() && "BTC must be integer");
1744 auto BadExit
= [&](BasicBlock
*ExitingBB
) {
1745 // If our exiting block exits multiple loops, we can only rewrite the
1746 // innermost one. Otherwise, we're changing how many times the innermost
1747 // loop runs before it exits.
1748 if (LI
->getLoopFor(ExitingBB
) != L
)
1751 // Can't rewrite non-branch yet.
1752 BranchInst
*BI
= dyn_cast
<BranchInst
>(ExitingBB
->getTerminator());
1756 // If already constant, nothing to do.
1757 if (isa
<Constant
>(BI
->getCondition()))
1760 // If the exit block has phis, we need to be able to compute the values
1761 // within the loop which contains them. This assumes trivially lcssa phis
1762 // have already been removed; TODO: generalize
1763 BasicBlock
*ExitBlock
=
1764 BI
->getSuccessor(L
->contains(BI
->getSuccessor(0)) ? 1 : 0);
1765 if (!ExitBlock
->phis().empty())
1768 const SCEV
*ExitCount
= SE
->getExitCount(L
, ExitingBB
);
1769 if (isa
<SCEVCouldNotCompute
>(ExitCount
) ||
1770 !Rewriter
.isSafeToExpand(ExitCount
))
1773 assert(SE
->isLoopInvariant(ExitCount
, L
) &&
1774 "Exit count must be loop invariant");
1775 assert(ExitCount
->getType()->isIntegerTy() && "Exit count must be integer");
1779 // If we have any exits which can't be predicated themselves, than we can't
1780 // predicate any exit which isn't guaranteed to execute before it. Consider
1781 // two exits (a) and (b) which would both exit on the same iteration. If we
1782 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1783 // we could convert a loop from exiting through (a) to one exiting through
1784 // (b). Note that this problem exists only for exits with the same exit
1785 // count, and we could be more aggressive when exit counts are known inequal.
1786 llvm::sort(ExitingBlocks
,
1787 [&](BasicBlock
*A
, BasicBlock
*B
) {
1788 // std::sort sorts in ascending order, so we want the inverse of
1789 // the normal dominance relation, plus a tie breaker for blocks
1790 // unordered by dominance.
1791 if (DT
->properlyDominates(A
, B
)) return true;
1792 if (DT
->properlyDominates(B
, A
)) return false;
1793 return A
->getName() < B
->getName();
1795 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1796 // exits before the backedge). If they aren't, reasoning about reachability
1797 // is complicated and we choose not to for now.
1798 for (unsigned i
= 1; i
< ExitingBlocks
.size(); i
++)
1799 if (!DT
->dominates(ExitingBlocks
[i
-1], ExitingBlocks
[i
]))
1802 // Given our sorted total order, we know that exit[j] must be evaluated
1803 // after all exit[i] such j > i.
1804 for (unsigned i
= 0, e
= ExitingBlocks
.size(); i
< e
; i
++)
1805 if (BadExit(ExitingBlocks
[i
])) {
1806 ExitingBlocks
.resize(i
);
1810 if (ExitingBlocks
.empty())
1813 // We rely on not being able to reach an exiting block on a later iteration
1814 // then it's statically compute exit count. The implementaton of
1815 // getExitCount currently has this invariant, but assert it here so that
1816 // breakage is obvious if this ever changes..
1817 assert(llvm::all_of(ExitingBlocks
, [&](BasicBlock
*ExitingBB
) {
1818 return DT
->dominates(ExitingBB
, L
->getLoopLatch());
1821 // At this point, ExitingBlocks consists of only those blocks which are
1822 // predicatable. Given that, we know we have at least one exit we can
1823 // predicate if the loop is doesn't have side effects and doesn't have any
1824 // implicit exits (because then our exact BTC isn't actually exact).
1825 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1826 // suggestions on how to improve this? I can obviously bail out for outer
1827 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1828 // is that enough for *all* side effects?
1829 for (BasicBlock
*BB
: L
->blocks())
1831 // TODO:isGuaranteedToTransfer
1832 if (I
.mayHaveSideEffects())
1835 bool Changed
= false;
1836 // Finally, do the actual predication for all predicatable blocks. A couple
1838 // 1) We don't bother to constant fold dominated exits with identical exit
1839 // counts; that's simply a form of CSE/equality propagation and we leave
1840 // it for dedicated passes.
1841 // 2) We insert the comparison at the branch. Hoisting introduces additional
1842 // legality constraints and we leave that to dedicated logic. We want to
1843 // predicate even if we can't insert a loop invariant expression as
1844 // peeling or unrolling will likely reduce the cost of the otherwise loop
1846 Rewriter
.setInsertPoint(L
->getLoopPreheader()->getTerminator());
1847 IRBuilder
<> B(L
->getLoopPreheader()->getTerminator());
1848 Value
*ExactBTCV
= nullptr; // Lazily generated if needed.
1849 for (BasicBlock
*ExitingBB
: ExitingBlocks
) {
1850 const SCEV
*ExitCount
= SE
->getExitCount(L
, ExitingBB
);
1852 auto *BI
= cast
<BranchInst
>(ExitingBB
->getTerminator());
1854 if (ExitCount
== ExactBTC
) {
1855 NewCond
= L
->contains(BI
->getSuccessor(0)) ?
1856 B
.getFalse() : B
.getTrue();
1858 Value
*ECV
= Rewriter
.expandCodeFor(ExitCount
);
1860 ExactBTCV
= Rewriter
.expandCodeFor(ExactBTC
);
1861 Value
*RHS
= ExactBTCV
;
1862 if (ECV
->getType() != RHS
->getType()) {
1863 Type
*WiderTy
= SE
->getWiderType(ECV
->getType(), RHS
->getType());
1864 ECV
= B
.CreateZExt(ECV
, WiderTy
);
1865 RHS
= B
.CreateZExt(RHS
, WiderTy
);
1867 auto Pred
= L
->contains(BI
->getSuccessor(0)) ?
1868 ICmpInst::ICMP_NE
: ICmpInst::ICMP_EQ
;
1869 NewCond
= B
.CreateICmp(Pred
, ECV
, RHS
);
1871 Value
*OldCond
= BI
->getCondition();
1872 BI
->setCondition(NewCond
);
1873 if (OldCond
->use_empty())
1874 DeadInsts
.emplace_back(OldCond
);
1881 //===----------------------------------------------------------------------===//
1882 // IndVarSimplify driver. Manage several subpasses of IV simplification.
1883 //===----------------------------------------------------------------------===//
1885 bool IndVarSimplify::run(Loop
*L
) {
1886 // We need (and expect!) the incoming loop to be in LCSSA.
1887 assert(L
->isRecursivelyLCSSAForm(*DT
, *LI
) &&
1888 "LCSSA required to run indvars!");
1890 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1891 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1892 // canonicalization can be a pessimization without LSR to "clean up"
1894 // - We depend on having a preheader; in particular,
1895 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1896 // and we're in trouble if we can't find the induction variable even when
1897 // we've manually inserted one.
1898 // - LFTR relies on having a single backedge.
1899 if (!L
->isLoopSimplifyForm())
1902 bool Changed
= false;
1903 // If there are any floating-point recurrences, attempt to
1904 // transform them to use integer recurrences.
1905 Changed
|= rewriteNonIntegerIVs(L
);
1907 // Create a rewriter object which we'll use to transform the code with.
1908 SCEVExpander
Rewriter(*SE
, DL
, "indvars");
1910 Rewriter
.setDebugType(DEBUG_TYPE
);
1913 // Eliminate redundant IV users.
1915 // Simplification works best when run before other consumers of SCEV. We
1916 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1917 // other expressions involving loop IVs have been evaluated. This helps SCEV
1918 // set no-wrap flags before normalizing sign/zero extension.
1919 Rewriter
.disableCanonicalMode();
1920 Changed
|= simplifyAndExtend(L
, Rewriter
, LI
);
1922 // Check to see if we can compute the final value of any expressions
1923 // that are recurrent in the loop, and substitute the exit values from the
1924 // loop into any instructions outside of the loop that use the final values
1925 // of the current expressions.
1926 if (ReplaceExitValue
!= NeverRepl
) {
1927 if (int Rewrites
= rewriteLoopExitValues(L
, LI
, TLI
, SE
, TTI
, Rewriter
, DT
,
1928 ReplaceExitValue
, DeadInsts
)) {
1929 NumReplaced
+= Rewrites
;
1934 // Eliminate redundant IV cycles.
1935 NumElimIV
+= Rewriter
.replaceCongruentIVs(L
, DT
, DeadInsts
, TTI
);
1937 // Try to convert exit conditions to unsigned and rotate computation
1938 // out of the loop. Note: Handles invalidation internally if needed.
1939 Changed
|= canonicalizeExitCondition(L
);
1941 // Try to eliminate loop exits based on analyzeable exit counts
1942 if (optimizeLoopExits(L
, Rewriter
)) {
1944 // Given we've changed exit counts, notify SCEV
1945 // Some nested loops may share same folded exit basic block,
1946 // thus we need to notify top most loop.
1947 SE
->forgetTopmostLoop(L
);
1950 // Try to form loop invariant tests for loop exits by changing how many
1951 // iterations of the loop run when that is unobservable.
1952 if (predicateLoopExits(L
, Rewriter
)) {
1954 // Given we've changed exit counts, notify SCEV
1958 // If we have a trip count expression, rewrite the loop's exit condition
1961 BasicBlock
*PreHeader
= L
->getLoopPreheader();
1963 SmallVector
<BasicBlock
*, 16> ExitingBlocks
;
1964 L
->getExitingBlocks(ExitingBlocks
);
1965 for (BasicBlock
*ExitingBB
: ExitingBlocks
) {
1966 // Can't rewrite non-branch yet.
1967 if (!isa
<BranchInst
>(ExitingBB
->getTerminator()))
1970 // If our exitting block exits multiple loops, we can only rewrite the
1971 // innermost one. Otherwise, we're changing how many times the innermost
1972 // loop runs before it exits.
1973 if (LI
->getLoopFor(ExitingBB
) != L
)
1976 if (!needsLFTR(L
, ExitingBB
))
1979 const SCEV
*ExitCount
= SE
->getExitCount(L
, ExitingBB
);
1980 if (isa
<SCEVCouldNotCompute
>(ExitCount
))
1983 // This was handled above, but as we form SCEVs, we can sometimes refine
1984 // existing ones; this allows exit counts to be folded to zero which
1985 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1986 // until stable to handle cases like this better.
1987 if (ExitCount
->isZero())
1990 PHINode
*IndVar
= FindLoopCounter(L
, ExitingBB
, ExitCount
, SE
, DT
);
1994 // Avoid high cost expansions. Note: This heuristic is questionable in
1995 // that our definition of "high cost" is not exactly principled.
1996 if (Rewriter
.isHighCostExpansion(ExitCount
, L
, SCEVCheapExpansionBudget
,
1997 TTI
, PreHeader
->getTerminator()))
2000 if (!Rewriter
.isSafeToExpand(ExitCount
))
2003 Changed
|= linearFunctionTestReplace(L
, ExitingBB
,
2008 // Clear the rewriter cache, because values that are in the rewriter's cache
2009 // can be deleted in the loop below, causing the AssertingVH in the cache to
2013 // Now that we're done iterating through lists, clean up any instructions
2014 // which are now dead.
2015 while (!DeadInsts
.empty()) {
2016 Value
*V
= DeadInsts
.pop_back_val();
2018 if (PHINode
*PHI
= dyn_cast_or_null
<PHINode
>(V
))
2019 Changed
|= RecursivelyDeleteDeadPHINode(PHI
, TLI
, MSSAU
.get());
2020 else if (Instruction
*Inst
= dyn_cast_or_null
<Instruction
>(V
))
2022 RecursivelyDeleteTriviallyDeadInstructions(Inst
, TLI
, MSSAU
.get());
2025 // The Rewriter may not be used from this point on.
2027 // Loop-invariant instructions in the preheader that aren't used in the
2028 // loop may be sunk below the loop to reduce register pressure.
2029 Changed
|= sinkUnusedInvariants(L
);
2031 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2032 // trip count and therefore can further simplify exit values in addition to
2033 // rewriteLoopExitValues.
2034 Changed
|= rewriteFirstIterationLoopExitValues(L
);
2036 // Clean up dead instructions.
2037 Changed
|= DeleteDeadPHIs(L
->getHeader(), TLI
, MSSAU
.get());
2039 // Check a post-condition.
2040 assert(L
->isRecursivelyLCSSAForm(*DT
, *LI
) &&
2041 "Indvars did not preserve LCSSA!");
2042 if (VerifyMemorySSA
&& MSSAU
)
2043 MSSAU
->getMemorySSA()->verifyMemorySSA();
2048 PreservedAnalyses
IndVarSimplifyPass::run(Loop
&L
, LoopAnalysisManager
&AM
,
2049 LoopStandardAnalysisResults
&AR
,
2051 Function
*F
= L
.getHeader()->getParent();
2052 const DataLayout
&DL
= F
->getParent()->getDataLayout();
2054 IndVarSimplify
IVS(&AR
.LI
, &AR
.SE
, &AR
.DT
, DL
, &AR
.TLI
, &AR
.TTI
, AR
.MSSA
,
2055 WidenIndVars
&& AllowIVWidening
);
2057 return PreservedAnalyses::all();
2059 auto PA
= getLoopPassPreservedAnalyses();
2060 PA
.preserveSet
<CFGAnalyses
>();
2062 PA
.preserve
<MemorySSAAnalysis
>();