1 //===- InductiveRangeCheckElimination.cpp - -------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // The InductiveRangeCheckElimination pass splits a loop's iteration space into
10 // three disjoint ranges. It does that in a way such that the loop running in
11 // the middle loop provably does not need range checks. As an example, it will
14 // len = < known positive >
15 // for (i = 0; i < n; i++) {
16 // if (0 <= i && i < len) {
19 // throw_out_of_bounds();
25 // len = < known positive >
26 // limit = smin(n, len)
27 // // no first segment
28 // for (i = 0; i < limit; i++) {
29 // if (0 <= i && i < len) { // this check is fully redundant
32 // throw_out_of_bounds();
35 // for (i = limit; i < n; i++) {
36 // if (0 <= i && i < len) {
39 // throw_out_of_bounds();
43 //===----------------------------------------------------------------------===//
45 #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
46 #include "llvm/ADT/APInt.h"
47 #include "llvm/ADT/ArrayRef.h"
48 #include "llvm/ADT/None.h"
49 #include "llvm/ADT/Optional.h"
50 #include "llvm/ADT/PriorityWorklist.h"
51 #include "llvm/ADT/SmallPtrSet.h"
52 #include "llvm/ADT/SmallVector.h"
53 #include "llvm/ADT/StringRef.h"
54 #include "llvm/ADT/Twine.h"
55 #include "llvm/Analysis/BlockFrequencyInfo.h"
56 #include "llvm/Analysis/BranchProbabilityInfo.h"
57 #include "llvm/Analysis/LoopAnalysisManager.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/LoopPass.h"
60 #include "llvm/Analysis/PostDominators.h"
61 #include "llvm/Analysis/ScalarEvolution.h"
62 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/CFG.h"
65 #include "llvm/IR/Constants.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/IR/IRBuilder.h"
70 #include "llvm/IR/InstrTypes.h"
71 #include "llvm/IR/Instructions.h"
72 #include "llvm/IR/Metadata.h"
73 #include "llvm/IR/Module.h"
74 #include "llvm/IR/PatternMatch.h"
75 #include "llvm/IR/Type.h"
76 #include "llvm/IR/Use.h"
77 #include "llvm/IR/User.h"
78 #include "llvm/IR/Value.h"
79 #include "llvm/InitializePasses.h"
80 #include "llvm/Pass.h"
81 #include "llvm/Support/BranchProbability.h"
82 #include "llvm/Support/Casting.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/raw_ostream.h"
88 #include "llvm/Transforms/Scalar.h"
89 #include "llvm/Transforms/Utils/Cloning.h"
90 #include "llvm/Transforms/Utils/LoopSimplify.h"
91 #include "llvm/Transforms/Utils/LoopUtils.h"
92 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
93 #include "llvm/Transforms/Utils/ValueMapper.h"
101 using namespace llvm
;
102 using namespace llvm::PatternMatch
;
104 static cl::opt
<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden
,
107 static cl::opt
<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden
,
110 static cl::opt
<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden
,
113 static cl::opt
<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
114 cl::Hidden
, cl::init(false));
116 static cl::opt
<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
117 cl::Hidden
, cl::init(10));
119 static cl::opt
<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
120 cl::Hidden
, cl::init(true));
122 static cl::opt
<bool> AllowNarrowLatchCondition(
123 "irce-allow-narrow-latch", cl::Hidden
, cl::init(true),
124 cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
125 "with narrow latch condition."));
127 static const char *ClonedLoopTag
= "irce.loop.clone";
129 #define DEBUG_TYPE "irce"
133 /// An inductive range check is conditional branch in a loop with
135 /// 1. a very cold successor (i.e. the branch jumps to that successor very
140 /// 2. a condition that is provably true for some contiguous range of values
141 /// taken by the containing loop's induction variable.
143 class InductiveRangeCheck
{
145 const SCEV
*Begin
= nullptr;
146 const SCEV
*Step
= nullptr;
147 const SCEV
*End
= nullptr;
148 Use
*CheckUse
= nullptr;
150 static bool parseRangeCheckICmp(Loop
*L
, ICmpInst
*ICI
, ScalarEvolution
&SE
,
151 Value
*&Index
, Value
*&Length
,
155 extractRangeChecksFromCond(Loop
*L
, ScalarEvolution
&SE
, Use
&ConditionUse
,
156 SmallVectorImpl
<InductiveRangeCheck
> &Checks
,
157 SmallPtrSetImpl
<Value
*> &Visited
);
160 const SCEV
*getBegin() const { return Begin
; }
161 const SCEV
*getStep() const { return Step
; }
162 const SCEV
*getEnd() const { return End
; }
164 void print(raw_ostream
&OS
) const {
165 OS
<< "InductiveRangeCheck:\n";
172 OS
<< "\n CheckUse: ";
173 getCheckUse()->getUser()->print(OS
);
174 OS
<< " Operand: " << getCheckUse()->getOperandNo() << "\n";
182 Use
*getCheckUse() const { return CheckUse
; }
184 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If
185 /// R.getEnd() le R.getBegin(), then R denotes the empty range.
192 Range(const SCEV
*Begin
, const SCEV
*End
) : Begin(Begin
), End(End
) {
193 assert(Begin
->getType() == End
->getType() && "ill-typed range!");
196 Type
*getType() const { return Begin
->getType(); }
197 const SCEV
*getBegin() const { return Begin
; }
198 const SCEV
*getEnd() const { return End
; }
199 bool isEmpty(ScalarEvolution
&SE
, bool IsSigned
) const {
203 return SE
.isKnownPredicate(ICmpInst::ICMP_SGE
, Begin
, End
);
205 return SE
.isKnownPredicate(ICmpInst::ICMP_UGE
, Begin
, End
);
209 /// This is the value the condition of the branch needs to evaluate to for the
210 /// branch to take the hot successor (see (1) above).
211 bool getPassingDirection() { return true; }
213 /// Computes a range for the induction variable (IndVar) in which the range
214 /// check is redundant and can be constant-folded away. The induction
215 /// variable is not required to be the canonical {0,+,1} induction variable.
216 Optional
<Range
> computeSafeIterationSpace(ScalarEvolution
&SE
,
217 const SCEVAddRecExpr
*IndVar
,
218 bool IsLatchSigned
) const;
220 /// Parse out a set of inductive range checks from \p BI and append them to \p
223 /// NB! There may be conditions feeding into \p BI that aren't inductive range
224 /// checks, and hence don't end up in \p Checks.
226 extractRangeChecksFromBranch(BranchInst
*BI
, Loop
*L
, ScalarEvolution
&SE
,
227 BranchProbabilityInfo
*BPI
,
228 SmallVectorImpl
<InductiveRangeCheck
> &Checks
);
231 struct LoopStructure
;
233 class InductiveRangeCheckElimination
{
235 BranchProbabilityInfo
*BPI
;
240 llvm::Optional
<llvm::function_ref
<llvm::BlockFrequencyInfo
&()> >;
243 // Returns true if it is profitable to do a transform basing on estimation of
244 // number of iterations.
245 bool isProfitableToTransform(const Loop
&L
, LoopStructure
&LS
);
248 InductiveRangeCheckElimination(ScalarEvolution
&SE
,
249 BranchProbabilityInfo
*BPI
, DominatorTree
&DT
,
250 LoopInfo
&LI
, GetBFIFunc GetBFI
= None
)
251 : SE(SE
), BPI(BPI
), DT(DT
), LI(LI
), GetBFI(GetBFI
) {}
253 bool run(Loop
*L
, function_ref
<void(Loop
*, bool)> LPMAddNewLoop
);
256 class IRCELegacyPass
: public FunctionPass
{
260 IRCELegacyPass() : FunctionPass(ID
) {
261 initializeIRCELegacyPassPass(*PassRegistry::getPassRegistry());
264 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
265 AU
.addRequired
<BranchProbabilityInfoWrapperPass
>();
266 AU
.addRequired
<DominatorTreeWrapperPass
>();
267 AU
.addPreserved
<DominatorTreeWrapperPass
>();
268 AU
.addRequired
<LoopInfoWrapperPass
>();
269 AU
.addPreserved
<LoopInfoWrapperPass
>();
270 AU
.addRequired
<ScalarEvolutionWrapperPass
>();
271 AU
.addPreserved
<ScalarEvolutionWrapperPass
>();
274 bool runOnFunction(Function
&F
) override
;
277 } // end anonymous namespace
279 char IRCELegacyPass::ID
= 0;
281 INITIALIZE_PASS_BEGIN(IRCELegacyPass
, "irce",
282 "Inductive range check elimination", false, false)
283 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass
)
284 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
285 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
286 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
)
287 INITIALIZE_PASS_END(IRCELegacyPass
, "irce", "Inductive range check elimination",
290 /// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot
291 /// be interpreted as a range check, return false and set `Index` and `Length`
292 /// to `nullptr`. Otherwise set `Index` to the value being range checked, and
293 /// set `Length` to the upper limit `Index` is being range checked.
295 InductiveRangeCheck::parseRangeCheckICmp(Loop
*L
, ICmpInst
*ICI
,
296 ScalarEvolution
&SE
, Value
*&Index
,
297 Value
*&Length
, bool &IsSigned
) {
298 auto IsLoopInvariant
= [&SE
, L
](Value
*V
) {
299 return SE
.isLoopInvariant(SE
.getSCEV(V
), L
);
302 ICmpInst::Predicate Pred
= ICI
->getPredicate();
303 Value
*LHS
= ICI
->getOperand(0);
304 Value
*RHS
= ICI
->getOperand(1);
310 case ICmpInst::ICMP_SLE
:
313 case ICmpInst::ICMP_SGE
:
315 if (match(RHS
, m_ConstantInt
<0>())) {
317 return true; // Lower.
321 case ICmpInst::ICMP_SLT
:
324 case ICmpInst::ICMP_SGT
:
326 if (match(RHS
, m_ConstantInt
<-1>())) {
328 return true; // Lower.
331 if (IsLoopInvariant(LHS
)) {
334 return true; // Upper.
338 case ICmpInst::ICMP_ULT
:
341 case ICmpInst::ICMP_UGT
:
343 if (IsLoopInvariant(LHS
)) {
346 return true; // Both lower and upper.
351 llvm_unreachable("default clause returns!");
354 void InductiveRangeCheck::extractRangeChecksFromCond(
355 Loop
*L
, ScalarEvolution
&SE
, Use
&ConditionUse
,
356 SmallVectorImpl
<InductiveRangeCheck
> &Checks
,
357 SmallPtrSetImpl
<Value
*> &Visited
) {
358 Value
*Condition
= ConditionUse
.get();
359 if (!Visited
.insert(Condition
).second
)
362 // TODO: Do the same for OR, XOR, NOT etc?
363 if (match(Condition
, m_LogicalAnd(m_Value(), m_Value()))) {
364 extractRangeChecksFromCond(L
, SE
, cast
<User
>(Condition
)->getOperandUse(0),
366 extractRangeChecksFromCond(L
, SE
, cast
<User
>(Condition
)->getOperandUse(1),
371 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Condition
);
375 Value
*Length
= nullptr, *Index
;
377 if (!parseRangeCheckICmp(L
, ICI
, SE
, Index
, Length
, IsSigned
))
380 const auto *IndexAddRec
= dyn_cast
<SCEVAddRecExpr
>(SE
.getSCEV(Index
));
382 IndexAddRec
&& (IndexAddRec
->getLoop() == L
) && IndexAddRec
->isAffine();
387 const SCEV
*End
= nullptr;
388 // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
389 // We can potentially do much better here.
391 End
= SE
.getSCEV(Length
);
393 // So far we can only reach this point for Signed range check. This may
394 // change in future. In this case we will need to pick Unsigned max for the
395 // unsigned range check.
396 unsigned BitWidth
= cast
<IntegerType
>(IndexAddRec
->getType())->getBitWidth();
397 const SCEV
*SIntMax
= SE
.getConstant(APInt::getSignedMaxValue(BitWidth
));
401 InductiveRangeCheck IRC
;
403 IRC
.Begin
= IndexAddRec
->getStart();
404 IRC
.Step
= IndexAddRec
->getStepRecurrence(SE
);
405 IRC
.CheckUse
= &ConditionUse
;
406 Checks
.push_back(IRC
);
409 void InductiveRangeCheck::extractRangeChecksFromBranch(
410 BranchInst
*BI
, Loop
*L
, ScalarEvolution
&SE
, BranchProbabilityInfo
*BPI
,
411 SmallVectorImpl
<InductiveRangeCheck
> &Checks
) {
412 if (BI
->isUnconditional() || BI
->getParent() == L
->getLoopLatch())
415 BranchProbability
LikelyTaken(15, 16);
417 if (!SkipProfitabilityChecks
&& BPI
&&
418 BPI
->getEdgeProbability(BI
->getParent(), (unsigned)0) < LikelyTaken
)
421 SmallPtrSet
<Value
*, 8> Visited
;
422 InductiveRangeCheck::extractRangeChecksFromCond(L
, SE
, BI
->getOperandUse(0),
426 // Add metadata to the loop L to disable loop optimizations. Callers need to
427 // confirm that optimizing loop L is not beneficial.
428 static void DisableAllLoopOptsOnLoop(Loop
&L
) {
429 // We do not care about any existing loopID related metadata for L, since we
430 // are setting all loop metadata to false.
431 LLVMContext
&Context
= L
.getHeader()->getContext();
432 // Reserve first location for self reference to the LoopID metadata node.
433 MDNode
*Dummy
= MDNode::get(Context
, {});
434 MDNode
*DisableUnroll
= MDNode::get(
435 Context
, {MDString::get(Context
, "llvm.loop.unroll.disable")});
437 ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context
), 0));
438 MDNode
*DisableVectorize
= MDNode::get(
440 {MDString::get(Context
, "llvm.loop.vectorize.enable"), FalseVal
});
441 MDNode
*DisableLICMVersioning
= MDNode::get(
442 Context
, {MDString::get(Context
, "llvm.loop.licm_versioning.disable")});
443 MDNode
*DisableDistribution
= MDNode::get(
445 {MDString::get(Context
, "llvm.loop.distribute.enable"), FalseVal
});
447 MDNode::get(Context
, {Dummy
, DisableUnroll
, DisableVectorize
,
448 DisableLICMVersioning
, DisableDistribution
});
449 // Set operand 0 to refer to the loop id itself.
450 NewLoopID
->replaceOperandWith(0, NewLoopID
);
451 L
.setLoopID(NewLoopID
);
456 // Keeps track of the structure of a loop. This is similar to llvm::Loop,
457 // except that it is more lightweight and can track the state of a loop through
458 // changing and potentially invalid IR. This structure also formalizes the
459 // kinds of loops we can deal with -- ones that have a single latch that is also
460 // an exiting block *and* have a canonical induction variable.
461 struct LoopStructure
{
462 const char *Tag
= "";
464 BasicBlock
*Header
= nullptr;
465 BasicBlock
*Latch
= nullptr;
467 // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
468 // successor is `LatchExit', the exit block of the loop.
469 BranchInst
*LatchBr
= nullptr;
470 BasicBlock
*LatchExit
= nullptr;
471 unsigned LatchBrExitIdx
= std::numeric_limits
<unsigned>::max();
473 // The loop represented by this instance of LoopStructure is semantically
476 // intN_ty inc = IndVarIncreasing ? 1 : -1;
477 // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
479 // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase)
482 Value
*IndVarBase
= nullptr;
483 Value
*IndVarStart
= nullptr;
484 Value
*IndVarStep
= nullptr;
485 Value
*LoopExitAt
= nullptr;
486 bool IndVarIncreasing
= false;
487 bool IsSignedPredicate
= true;
489 LoopStructure() = default;
491 template <typename M
> LoopStructure
map(M Map
) const {
492 LoopStructure Result
;
494 Result
.Header
= cast
<BasicBlock
>(Map(Header
));
495 Result
.Latch
= cast
<BasicBlock
>(Map(Latch
));
496 Result
.LatchBr
= cast
<BranchInst
>(Map(LatchBr
));
497 Result
.LatchExit
= cast
<BasicBlock
>(Map(LatchExit
));
498 Result
.LatchBrExitIdx
= LatchBrExitIdx
;
499 Result
.IndVarBase
= Map(IndVarBase
);
500 Result
.IndVarStart
= Map(IndVarStart
);
501 Result
.IndVarStep
= Map(IndVarStep
);
502 Result
.LoopExitAt
= Map(LoopExitAt
);
503 Result
.IndVarIncreasing
= IndVarIncreasing
;
504 Result
.IsSignedPredicate
= IsSignedPredicate
;
508 static Optional
<LoopStructure
> parseLoopStructure(ScalarEvolution
&, Loop
&,
512 /// This class is used to constrain loops to run within a given iteration space.
513 /// The algorithm this class implements is given a Loop and a range [Begin,
514 /// End). The algorithm then tries to break out a "main loop" out of the loop
515 /// it is given in a way that the "main loop" runs with the induction variable
516 /// in a subset of [Begin, End). The algorithm emits appropriate pre and post
517 /// loops to run any remaining iterations. The pre loop runs any iterations in
518 /// which the induction variable is < Begin, and the post loop runs any
519 /// iterations in which the induction variable is >= End.
520 class LoopConstrainer
{
521 // The representation of a clone of the original loop we started out with.
524 std::vector
<BasicBlock
*> Blocks
;
526 // `Map` maps values in the clonee into values in the cloned version
527 ValueToValueMapTy Map
;
529 // An instance of `LoopStructure` for the cloned loop
530 LoopStructure Structure
;
533 // Result of rewriting the range of a loop. See changeIterationSpaceEnd for
534 // more details on what these fields mean.
535 struct RewrittenRangeInfo
{
536 BasicBlock
*PseudoExit
= nullptr;
537 BasicBlock
*ExitSelector
= nullptr;
538 std::vector
<PHINode
*> PHIValuesAtPseudoExit
;
539 PHINode
*IndVarEnd
= nullptr;
541 RewrittenRangeInfo() = default;
544 // Calculated subranges we restrict the iteration space of the main loop to.
545 // See the implementation of `calculateSubRanges' for more details on how
546 // these fields are computed. `LowLimit` is None if there is no restriction
547 // on low end of the restricted iteration space of the main loop. `HighLimit`
548 // is None if there is no restriction on high end of the restricted iteration
549 // space of the main loop.
552 Optional
<const SCEV
*> LowLimit
;
553 Optional
<const SCEV
*> HighLimit
;
556 // Compute a safe set of limits for the main loop to run in -- effectively the
557 // intersection of `Range' and the iteration space of the original loop.
558 // Return None if unable to compute the set of subranges.
559 Optional
<SubRanges
> calculateSubRanges(bool IsSignedPredicate
) const;
561 // Clone `OriginalLoop' and return the result in CLResult. The IR after
562 // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
563 // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
564 // but there is no such edge.
565 void cloneLoop(ClonedLoop
&CLResult
, const char *Tag
) const;
567 // Create the appropriate loop structure needed to describe a cloned copy of
568 // `Original`. The clone is described by `VM`.
569 Loop
*createClonedLoopStructure(Loop
*Original
, Loop
*Parent
,
570 ValueToValueMapTy
&VM
, bool IsSubloop
);
572 // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
573 // iteration space of the rewritten loop ends at ExitLoopAt. The start of the
574 // iteration space is not changed. `ExitLoopAt' is assumed to be slt
575 // `OriginalHeaderCount'.
577 // If there are iterations left to execute, control is made to jump to
578 // `ContinuationBlock', otherwise they take the normal loop exit. The
579 // returned `RewrittenRangeInfo' object is populated as follows:
581 // .PseudoExit is a basic block that unconditionally branches to
582 // `ContinuationBlock'.
584 // .ExitSelector is a basic block that decides, on exit from the loop,
585 // whether to branch to the "true" exit or to `PseudoExit'.
587 // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
588 // for each PHINode in the loop header on taking the pseudo exit.
590 // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
591 // preheader because it is made to branch to the loop header only
594 changeIterationSpaceEnd(const LoopStructure
&LS
, BasicBlock
*Preheader
,
596 BasicBlock
*ContinuationBlock
) const;
598 // The loop denoted by `LS' has `OldPreheader' as its preheader. This
599 // function creates a new preheader for `LS' and returns it.
600 BasicBlock
*createPreheader(const LoopStructure
&LS
, BasicBlock
*OldPreheader
,
601 const char *Tag
) const;
603 // `ContinuationBlockAndPreheader' was the continuation block for some call to
604 // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
605 // This function rewrites the PHI nodes in `LS.Header' to start with the
607 void rewriteIncomingValuesForPHIs(
608 LoopStructure
&LS
, BasicBlock
*ContinuationBlockAndPreheader
,
609 const LoopConstrainer::RewrittenRangeInfo
&RRI
) const;
611 // Even though we do not preserve any passes at this time, we at least need to
612 // keep the parent loop structure consistent. The `LPPassManager' seems to
613 // verify this after running a loop pass. This function adds the list of
614 // blocks denoted by BBs to this loops parent loop if required.
615 void addToParentLoopIfNeeded(ArrayRef
<BasicBlock
*> BBs
);
617 // Some global state.
623 function_ref
<void(Loop
*, bool)> LPMAddNewLoop
;
625 // Information about the original loop we started out with.
628 const SCEV
*LatchTakenCount
= nullptr;
629 BasicBlock
*OriginalPreheader
= nullptr;
631 // The preheader of the main loop. This may or may not be different from
632 // `OriginalPreheader'.
633 BasicBlock
*MainLoopPreheader
= nullptr;
635 // The range we need to run the main loop in.
636 InductiveRangeCheck::Range Range
;
638 // The structure of the main loop (see comment at the beginning of this class
640 LoopStructure MainLoopStructure
;
643 LoopConstrainer(Loop
&L
, LoopInfo
&LI
,
644 function_ref
<void(Loop
*, bool)> LPMAddNewLoop
,
645 const LoopStructure
&LS
, ScalarEvolution
&SE
,
646 DominatorTree
&DT
, InductiveRangeCheck::Range R
)
647 : F(*L
.getHeader()->getParent()), Ctx(L
.getHeader()->getContext()),
648 SE(SE
), DT(DT
), LI(LI
), LPMAddNewLoop(LPMAddNewLoop
), OriginalLoop(L
),
649 Range(R
), MainLoopStructure(LS
) {}
651 // Entry point for the algorithm. Returns true on success.
655 } // end anonymous namespace
657 /// Given a loop with an deccreasing induction variable, is it possible to
658 /// safely calculate the bounds of a new loop using the given Predicate.
659 static bool isSafeDecreasingBound(const SCEV
*Start
,
660 const SCEV
*BoundSCEV
, const SCEV
*Step
,
661 ICmpInst::Predicate Pred
,
662 unsigned LatchBrExitIdx
,
663 Loop
*L
, ScalarEvolution
&SE
) {
664 if (Pred
!= ICmpInst::ICMP_SLT
&& Pred
!= ICmpInst::ICMP_SGT
&&
665 Pred
!= ICmpInst::ICMP_ULT
&& Pred
!= ICmpInst::ICMP_UGT
)
668 if (!SE
.isAvailableAtLoopEntry(BoundSCEV
, L
))
671 assert(SE
.isKnownNegative(Step
) && "expecting negative step");
673 LLVM_DEBUG(dbgs() << "irce: isSafeDecreasingBound with:\n");
674 LLVM_DEBUG(dbgs() << "irce: Start: " << *Start
<< "\n");
675 LLVM_DEBUG(dbgs() << "irce: Step: " << *Step
<< "\n");
676 LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV
<< "\n");
677 LLVM_DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred
)
679 LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx
<< "\n");
681 bool IsSigned
= ICmpInst::isSigned(Pred
);
682 // The predicate that we need to check that the induction variable lies
684 ICmpInst::Predicate BoundPred
=
685 IsSigned
? CmpInst::ICMP_SGT
: CmpInst::ICMP_UGT
;
687 if (LatchBrExitIdx
== 1)
688 return SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
, BoundSCEV
);
690 assert(LatchBrExitIdx
== 0 &&
691 "LatchBrExitIdx should be either 0 or 1");
693 const SCEV
*StepPlusOne
= SE
.getAddExpr(Step
, SE
.getOne(Step
->getType()));
694 unsigned BitWidth
= cast
<IntegerType
>(BoundSCEV
->getType())->getBitWidth();
695 APInt Min
= IsSigned
? APInt::getSignedMinValue(BitWidth
) :
696 APInt::getMinValue(BitWidth
);
697 const SCEV
*Limit
= SE
.getMinusSCEV(SE
.getConstant(Min
), StepPlusOne
);
699 const SCEV
*MinusOne
=
700 SE
.getMinusSCEV(BoundSCEV
, SE
.getOne(BoundSCEV
->getType()));
702 return SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
, MinusOne
) &&
703 SE
.isLoopEntryGuardedByCond(L
, BoundPred
, BoundSCEV
, Limit
);
707 /// Given a loop with an increasing induction variable, is it possible to
708 /// safely calculate the bounds of a new loop using the given Predicate.
709 static bool isSafeIncreasingBound(const SCEV
*Start
,
710 const SCEV
*BoundSCEV
, const SCEV
*Step
,
711 ICmpInst::Predicate Pred
,
712 unsigned LatchBrExitIdx
,
713 Loop
*L
, ScalarEvolution
&SE
) {
714 if (Pred
!= ICmpInst::ICMP_SLT
&& Pred
!= ICmpInst::ICMP_SGT
&&
715 Pred
!= ICmpInst::ICMP_ULT
&& Pred
!= ICmpInst::ICMP_UGT
)
718 if (!SE
.isAvailableAtLoopEntry(BoundSCEV
, L
))
721 LLVM_DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n");
722 LLVM_DEBUG(dbgs() << "irce: Start: " << *Start
<< "\n");
723 LLVM_DEBUG(dbgs() << "irce: Step: " << *Step
<< "\n");
724 LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV
<< "\n");
725 LLVM_DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred
)
727 LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx
<< "\n");
729 bool IsSigned
= ICmpInst::isSigned(Pred
);
730 // The predicate that we need to check that the induction variable lies
732 ICmpInst::Predicate BoundPred
=
733 IsSigned
? CmpInst::ICMP_SLT
: CmpInst::ICMP_ULT
;
735 if (LatchBrExitIdx
== 1)
736 return SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
, BoundSCEV
);
738 assert(LatchBrExitIdx
== 0 && "LatchBrExitIdx should be 0 or 1");
740 const SCEV
*StepMinusOne
=
741 SE
.getMinusSCEV(Step
, SE
.getOne(Step
->getType()));
742 unsigned BitWidth
= cast
<IntegerType
>(BoundSCEV
->getType())->getBitWidth();
743 APInt Max
= IsSigned
? APInt::getSignedMaxValue(BitWidth
) :
744 APInt::getMaxValue(BitWidth
);
745 const SCEV
*Limit
= SE
.getMinusSCEV(SE
.getConstant(Max
), StepMinusOne
);
747 return (SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
,
748 SE
.getAddExpr(BoundSCEV
, Step
)) &&
749 SE
.isLoopEntryGuardedByCond(L
, BoundPred
, BoundSCEV
, Limit
));
752 Optional
<LoopStructure
>
753 LoopStructure::parseLoopStructure(ScalarEvolution
&SE
, Loop
&L
,
754 const char *&FailureReason
) {
755 if (!L
.isLoopSimplifyForm()) {
756 FailureReason
= "loop not in LoopSimplify form";
760 BasicBlock
*Latch
= L
.getLoopLatch();
761 assert(Latch
&& "Simplified loops only have one latch!");
763 if (Latch
->getTerminator()->getMetadata(ClonedLoopTag
)) {
764 FailureReason
= "loop has already been cloned";
768 if (!L
.isLoopExiting(Latch
)) {
769 FailureReason
= "no loop latch";
773 BasicBlock
*Header
= L
.getHeader();
774 BasicBlock
*Preheader
= L
.getLoopPreheader();
776 FailureReason
= "no preheader";
780 BranchInst
*LatchBr
= dyn_cast
<BranchInst
>(Latch
->getTerminator());
781 if (!LatchBr
|| LatchBr
->isUnconditional()) {
782 FailureReason
= "latch terminator not conditional branch";
786 unsigned LatchBrExitIdx
= LatchBr
->getSuccessor(0) == Header
? 1 : 0;
788 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(LatchBr
->getCondition());
789 if (!ICI
|| !isa
<IntegerType
>(ICI
->getOperand(0)->getType())) {
790 FailureReason
= "latch terminator branch not conditional on integral icmp";
794 const SCEV
*LatchCount
= SE
.getExitCount(&L
, Latch
);
795 if (isa
<SCEVCouldNotCompute
>(LatchCount
)) {
796 FailureReason
= "could not compute latch count";
800 ICmpInst::Predicate Pred
= ICI
->getPredicate();
801 Value
*LeftValue
= ICI
->getOperand(0);
802 const SCEV
*LeftSCEV
= SE
.getSCEV(LeftValue
);
803 IntegerType
*IndVarTy
= cast
<IntegerType
>(LeftValue
->getType());
805 Value
*RightValue
= ICI
->getOperand(1);
806 const SCEV
*RightSCEV
= SE
.getSCEV(RightValue
);
808 // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
809 if (!isa
<SCEVAddRecExpr
>(LeftSCEV
)) {
810 if (isa
<SCEVAddRecExpr
>(RightSCEV
)) {
811 std::swap(LeftSCEV
, RightSCEV
);
812 std::swap(LeftValue
, RightValue
);
813 Pred
= ICmpInst::getSwappedPredicate(Pred
);
815 FailureReason
= "no add recurrences in the icmp";
820 auto HasNoSignedWrap
= [&](const SCEVAddRecExpr
*AR
) {
821 if (AR
->getNoWrapFlags(SCEV::FlagNSW
))
824 IntegerType
*Ty
= cast
<IntegerType
>(AR
->getType());
825 IntegerType
*WideTy
=
826 IntegerType::get(Ty
->getContext(), Ty
->getBitWidth() * 2);
828 const SCEVAddRecExpr
*ExtendAfterOp
=
829 dyn_cast
<SCEVAddRecExpr
>(SE
.getSignExtendExpr(AR
, WideTy
));
831 const SCEV
*ExtendedStart
= SE
.getSignExtendExpr(AR
->getStart(), WideTy
);
832 const SCEV
*ExtendedStep
=
833 SE
.getSignExtendExpr(AR
->getStepRecurrence(SE
), WideTy
);
835 bool NoSignedWrap
= ExtendAfterOp
->getStart() == ExtendedStart
&&
836 ExtendAfterOp
->getStepRecurrence(SE
) == ExtendedStep
;
842 // We may have proved this when computing the sign extension above.
843 return AR
->getNoWrapFlags(SCEV::FlagNSW
) != SCEV::FlagAnyWrap
;
846 // `ICI` is interpreted as taking the backedge if the *next* value of the
847 // induction variable satisfies some constraint.
849 const SCEVAddRecExpr
*IndVarBase
= cast
<SCEVAddRecExpr
>(LeftSCEV
);
850 if (!IndVarBase
->isAffine()) {
851 FailureReason
= "LHS in icmp not induction variable";
854 const SCEV
* StepRec
= IndVarBase
->getStepRecurrence(SE
);
855 if (!isa
<SCEVConstant
>(StepRec
)) {
856 FailureReason
= "LHS in icmp not induction variable";
859 ConstantInt
*StepCI
= cast
<SCEVConstant
>(StepRec
)->getValue();
861 if (ICI
->isEquality() && !HasNoSignedWrap(IndVarBase
)) {
862 FailureReason
= "LHS in icmp needs nsw for equality predicates";
866 assert(!StepCI
->isZero() && "Zero step?");
867 bool IsIncreasing
= !StepCI
->isNegative();
868 bool IsSignedPredicate
;
869 const SCEV
*StartNext
= IndVarBase
->getStart();
870 const SCEV
*Addend
= SE
.getNegativeSCEV(IndVarBase
->getStepRecurrence(SE
));
871 const SCEV
*IndVarStart
= SE
.getAddExpr(StartNext
, Addend
);
872 const SCEV
*Step
= SE
.getSCEV(StepCI
);
874 const SCEV
*FixedRightSCEV
= nullptr;
876 // If RightValue resides within loop (but still being loop invariant),
877 // regenerate it as preheader.
878 if (auto *I
= dyn_cast
<Instruction
>(RightValue
))
879 if (L
.contains(I
->getParent()))
880 FixedRightSCEV
= RightSCEV
;
883 bool DecreasedRightValueByOne
= false;
884 if (StepCI
->isOne()) {
885 // Try to turn eq/ne predicates to those we can work with.
886 if (Pred
== ICmpInst::ICMP_NE
&& LatchBrExitIdx
== 1)
887 // while (++i != len) { while (++i < len) {
890 // If both parts are known non-negative, it is profitable to use
891 // unsigned comparison in increasing loop. This allows us to make the
892 // comparison check against "RightSCEV + 1" more optimistic.
893 if (isKnownNonNegativeInLoop(IndVarStart
, &L
, SE
) &&
894 isKnownNonNegativeInLoop(RightSCEV
, &L
, SE
))
895 Pred
= ICmpInst::ICMP_ULT
;
897 Pred
= ICmpInst::ICMP_SLT
;
898 else if (Pred
== ICmpInst::ICMP_EQ
&& LatchBrExitIdx
== 0) {
899 // while (true) { while (true) {
900 // if (++i == len) ---> if (++i > len - 1)
904 if (IndVarBase
->getNoWrapFlags(SCEV::FlagNUW
) &&
905 cannotBeMinInLoop(RightSCEV
, &L
, SE
, /*Signed*/false)) {
906 Pred
= ICmpInst::ICMP_UGT
;
907 RightSCEV
= SE
.getMinusSCEV(RightSCEV
,
908 SE
.getOne(RightSCEV
->getType()));
909 DecreasedRightValueByOne
= true;
910 } else if (cannotBeMinInLoop(RightSCEV
, &L
, SE
, /*Signed*/true)) {
911 Pred
= ICmpInst::ICMP_SGT
;
912 RightSCEV
= SE
.getMinusSCEV(RightSCEV
,
913 SE
.getOne(RightSCEV
->getType()));
914 DecreasedRightValueByOne
= true;
919 bool LTPred
= (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_ULT
);
920 bool GTPred
= (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_UGT
);
921 bool FoundExpectedPred
=
922 (LTPred
&& LatchBrExitIdx
== 1) || (GTPred
&& LatchBrExitIdx
== 0);
924 if (!FoundExpectedPred
) {
925 FailureReason
= "expected icmp slt semantically, found something else";
929 IsSignedPredicate
= ICmpInst::isSigned(Pred
);
930 if (!IsSignedPredicate
&& !AllowUnsignedLatchCondition
) {
931 FailureReason
= "unsigned latch conditions are explicitly prohibited";
935 if (!isSafeIncreasingBound(IndVarStart
, RightSCEV
, Step
, Pred
,
936 LatchBrExitIdx
, &L
, SE
)) {
937 FailureReason
= "Unsafe loop bounds";
940 if (LatchBrExitIdx
== 0) {
941 // We need to increase the right value unless we have already decreased
942 // it virtually when we replaced EQ with SGT.
943 if (!DecreasedRightValueByOne
)
945 SE
.getAddExpr(RightSCEV
, SE
.getOne(RightSCEV
->getType()));
947 assert(!DecreasedRightValueByOne
&&
948 "Right value can be decreased only for LatchBrExitIdx == 0!");
951 bool IncreasedRightValueByOne
= false;
952 if (StepCI
->isMinusOne()) {
953 // Try to turn eq/ne predicates to those we can work with.
954 if (Pred
== ICmpInst::ICMP_NE
&& LatchBrExitIdx
== 1)
955 // while (--i != len) { while (--i > len) {
958 // We intentionally don't turn the predicate into UGT even if we know
959 // that both operands are non-negative, because it will only pessimize
960 // our check against "RightSCEV - 1".
961 Pred
= ICmpInst::ICMP_SGT
;
962 else if (Pred
== ICmpInst::ICMP_EQ
&& LatchBrExitIdx
== 0) {
963 // while (true) { while (true) {
964 // if (--i == len) ---> if (--i < len + 1)
968 if (IndVarBase
->getNoWrapFlags(SCEV::FlagNUW
) &&
969 cannotBeMaxInLoop(RightSCEV
, &L
, SE
, /* Signed */ false)) {
970 Pred
= ICmpInst::ICMP_ULT
;
971 RightSCEV
= SE
.getAddExpr(RightSCEV
, SE
.getOne(RightSCEV
->getType()));
972 IncreasedRightValueByOne
= true;
973 } else if (cannotBeMaxInLoop(RightSCEV
, &L
, SE
, /* Signed */ true)) {
974 Pred
= ICmpInst::ICMP_SLT
;
975 RightSCEV
= SE
.getAddExpr(RightSCEV
, SE
.getOne(RightSCEV
->getType()));
976 IncreasedRightValueByOne
= true;
981 bool LTPred
= (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_ULT
);
982 bool GTPred
= (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_UGT
);
984 bool FoundExpectedPred
=
985 (GTPred
&& LatchBrExitIdx
== 1) || (LTPred
&& LatchBrExitIdx
== 0);
987 if (!FoundExpectedPred
) {
988 FailureReason
= "expected icmp sgt semantically, found something else";
993 Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SGT
;
995 if (!IsSignedPredicate
&& !AllowUnsignedLatchCondition
) {
996 FailureReason
= "unsigned latch conditions are explicitly prohibited";
1000 if (!isSafeDecreasingBound(IndVarStart
, RightSCEV
, Step
, Pred
,
1001 LatchBrExitIdx
, &L
, SE
)) {
1002 FailureReason
= "Unsafe bounds";
1006 if (LatchBrExitIdx
== 0) {
1007 // We need to decrease the right value unless we have already increased
1008 // it virtually when we replaced EQ with SLT.
1009 if (!IncreasedRightValueByOne
)
1011 SE
.getMinusSCEV(RightSCEV
, SE
.getOne(RightSCEV
->getType()));
1013 assert(!IncreasedRightValueByOne
&&
1014 "Right value can be increased only for LatchBrExitIdx == 0!");
1017 BasicBlock
*LatchExit
= LatchBr
->getSuccessor(LatchBrExitIdx
);
1019 assert(SE
.getLoopDisposition(LatchCount
, &L
) ==
1020 ScalarEvolution::LoopInvariant
&&
1021 "loop variant exit count doesn't make sense!");
1023 assert(!L
.contains(LatchExit
) && "expected an exit block!");
1024 const DataLayout
&DL
= Preheader
->getModule()->getDataLayout();
1025 SCEVExpander
Expander(SE
, DL
, "irce");
1026 Instruction
*Ins
= Preheader
->getTerminator();
1030 Expander
.expandCodeFor(FixedRightSCEV
, FixedRightSCEV
->getType(), Ins
);
1032 Value
*IndVarStartV
= Expander
.expandCodeFor(IndVarStart
, IndVarTy
, Ins
);
1033 IndVarStartV
->setName("indvar.start");
1035 LoopStructure Result
;
1037 Result
.Tag
= "main";
1038 Result
.Header
= Header
;
1039 Result
.Latch
= Latch
;
1040 Result
.LatchBr
= LatchBr
;
1041 Result
.LatchExit
= LatchExit
;
1042 Result
.LatchBrExitIdx
= LatchBrExitIdx
;
1043 Result
.IndVarStart
= IndVarStartV
;
1044 Result
.IndVarStep
= StepCI
;
1045 Result
.IndVarBase
= LeftValue
;
1046 Result
.IndVarIncreasing
= IsIncreasing
;
1047 Result
.LoopExitAt
= RightValue
;
1048 Result
.IsSignedPredicate
= IsSignedPredicate
;
1050 FailureReason
= nullptr;
1055 /// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
1056 /// signed or unsigned extension of \p S to type \p Ty.
1057 static const SCEV
*NoopOrExtend(const SCEV
*S
, Type
*Ty
, ScalarEvolution
&SE
,
1059 return Signed
? SE
.getNoopOrSignExtend(S
, Ty
) : SE
.getNoopOrZeroExtend(S
, Ty
);
1062 Optional
<LoopConstrainer::SubRanges
>
1063 LoopConstrainer::calculateSubRanges(bool IsSignedPredicate
) const {
1064 IntegerType
*Ty
= cast
<IntegerType
>(LatchTakenCount
->getType());
1066 auto *RTy
= cast
<IntegerType
>(Range
.getType());
1068 // We only support wide range checks and narrow latches.
1069 if (!AllowNarrowLatchCondition
&& RTy
!= Ty
)
1071 if (RTy
->getBitWidth() < Ty
->getBitWidth())
1074 LoopConstrainer::SubRanges Result
;
1076 // I think we can be more aggressive here and make this nuw / nsw if the
1077 // addition that feeds into the icmp for the latch's terminating branch is nuw
1078 // / nsw. In any case, a wrapping 2's complement addition is safe.
1079 const SCEV
*Start
= NoopOrExtend(SE
.getSCEV(MainLoopStructure
.IndVarStart
),
1080 RTy
, SE
, IsSignedPredicate
);
1081 const SCEV
*End
= NoopOrExtend(SE
.getSCEV(MainLoopStructure
.LoopExitAt
), RTy
,
1082 SE
, IsSignedPredicate
);
1084 bool Increasing
= MainLoopStructure
.IndVarIncreasing
;
1086 // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or
1087 // [Smallest, GreatestSeen] is the range of values the induction variable
1090 const SCEV
*Smallest
= nullptr, *Greatest
= nullptr, *GreatestSeen
= nullptr;
1092 const SCEV
*One
= SE
.getOne(RTy
);
1096 // No overflow, because the range [Smallest, GreatestSeen] is not empty.
1097 GreatestSeen
= SE
.getMinusSCEV(End
, One
);
1099 // These two computations may sign-overflow. Here is why that is okay:
1101 // We know that the induction variable does not sign-overflow on any
1102 // iteration except the last one, and it starts at `Start` and ends at
1103 // `End`, decrementing by one every time.
1105 // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
1106 // induction variable is decreasing we know that that the smallest value
1107 // the loop body is actually executed with is `INT_SMIN` == `Smallest`.
1109 // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
1110 // that case, `Clamp` will always return `Smallest` and
1111 // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
1112 // will be an empty range. Returning an empty range is always safe.
1114 Smallest
= SE
.getAddExpr(End
, One
);
1115 Greatest
= SE
.getAddExpr(Start
, One
);
1116 GreatestSeen
= Start
;
1119 auto Clamp
= [this, Smallest
, Greatest
, IsSignedPredicate
](const SCEV
*S
) {
1120 return IsSignedPredicate
1121 ? SE
.getSMaxExpr(Smallest
, SE
.getSMinExpr(Greatest
, S
))
1122 : SE
.getUMaxExpr(Smallest
, SE
.getUMinExpr(Greatest
, S
));
1125 // In some cases we can prove that we don't need a pre or post loop.
1126 ICmpInst::Predicate PredLE
=
1127 IsSignedPredicate
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1128 ICmpInst::Predicate PredLT
=
1129 IsSignedPredicate
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1131 bool ProvablyNoPreloop
=
1132 SE
.isKnownPredicate(PredLE
, Range
.getBegin(), Smallest
);
1133 if (!ProvablyNoPreloop
)
1134 Result
.LowLimit
= Clamp(Range
.getBegin());
1136 bool ProvablyNoPostLoop
=
1137 SE
.isKnownPredicate(PredLT
, GreatestSeen
, Range
.getEnd());
1138 if (!ProvablyNoPostLoop
)
1139 Result
.HighLimit
= Clamp(Range
.getEnd());
1144 void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop
&Result
,
1145 const char *Tag
) const {
1146 for (BasicBlock
*BB
: OriginalLoop
.getBlocks()) {
1147 BasicBlock
*Clone
= CloneBasicBlock(BB
, Result
.Map
, Twine(".") + Tag
, &F
);
1148 Result
.Blocks
.push_back(Clone
);
1149 Result
.Map
[BB
] = Clone
;
1152 auto GetClonedValue
= [&Result
](Value
*V
) {
1153 assert(V
&& "null values not in domain!");
1154 auto It
= Result
.Map
.find(V
);
1155 if (It
== Result
.Map
.end())
1157 return static_cast<Value
*>(It
->second
);
1161 cast
<BasicBlock
>(GetClonedValue(OriginalLoop
.getLoopLatch()));
1162 ClonedLatch
->getTerminator()->setMetadata(ClonedLoopTag
,
1163 MDNode::get(Ctx
, {}));
1165 Result
.Structure
= MainLoopStructure
.map(GetClonedValue
);
1166 Result
.Structure
.Tag
= Tag
;
1168 for (unsigned i
= 0, e
= Result
.Blocks
.size(); i
!= e
; ++i
) {
1169 BasicBlock
*ClonedBB
= Result
.Blocks
[i
];
1170 BasicBlock
*OriginalBB
= OriginalLoop
.getBlocks()[i
];
1172 assert(Result
.Map
[OriginalBB
] == ClonedBB
&& "invariant!");
1174 for (Instruction
&I
: *ClonedBB
)
1175 RemapInstruction(&I
, Result
.Map
,
1176 RF_NoModuleLevelChanges
| RF_IgnoreMissingLocals
);
1178 // Exit blocks will now have one more predecessor and their PHI nodes need
1179 // to be edited to reflect that. No phi nodes need to be introduced because
1180 // the loop is in LCSSA.
1182 for (auto *SBB
: successors(OriginalBB
)) {
1183 if (OriginalLoop
.contains(SBB
))
1184 continue; // not an exit block
1186 for (PHINode
&PN
: SBB
->phis()) {
1187 Value
*OldIncoming
= PN
.getIncomingValueForBlock(OriginalBB
);
1188 PN
.addIncoming(GetClonedValue(OldIncoming
), ClonedBB
);
1194 LoopConstrainer::RewrittenRangeInfo
LoopConstrainer::changeIterationSpaceEnd(
1195 const LoopStructure
&LS
, BasicBlock
*Preheader
, Value
*ExitSubloopAt
,
1196 BasicBlock
*ContinuationBlock
) const {
1197 // We start with a loop with a single latch:
1199 // +--------------------+
1203 // +--------+-----------+
1204 // | ----------------\
1206 // +--------v----v------+ |
1210 // +--------------------+ |
1214 // +--------------------+ |
1216 // | latch >----------/
1218 // +-------v------------+
1221 // | +--------------------+
1223 // +---> original exit |
1225 // +--------------------+
1227 // We change the control flow to look like
1230 // +--------------------+
1232 // | preheader >-------------------------+
1234 // +--------v-----------+ |
1235 // | /-------------+ |
1237 // +--------v--v--------+ | |
1239 // | header | | +--------+ |
1241 // +--------------------+ | | +-----v-----v-----------+
1243 // | | | .pseudo.exit |
1245 // | | +-----------v-----------+
1248 // | | +--------v-------------+
1249 // +--------------------+ | | | |
1250 // | | | | | ContinuationBlock |
1251 // | latch >------+ | | |
1252 // | | | +----------------------+
1253 // +---------v----------+ |
1256 // | +---------------^-----+
1258 // +-----> .exit.selector |
1260 // +----------v----------+
1262 // +--------------------+ |
1264 // | original exit <----+
1266 // +--------------------+
1268 RewrittenRangeInfo RRI
;
1270 BasicBlock
*BBInsertLocation
= LS
.Latch
->getNextNode();
1271 RRI
.ExitSelector
= BasicBlock::Create(Ctx
, Twine(LS
.Tag
) + ".exit.selector",
1272 &F
, BBInsertLocation
);
1273 RRI
.PseudoExit
= BasicBlock::Create(Ctx
, Twine(LS
.Tag
) + ".pseudo.exit", &F
,
1276 BranchInst
*PreheaderJump
= cast
<BranchInst
>(Preheader
->getTerminator());
1277 bool Increasing
= LS
.IndVarIncreasing
;
1278 bool IsSignedPredicate
= LS
.IsSignedPredicate
;
1280 IRBuilder
<> B(PreheaderJump
);
1281 auto *RangeTy
= Range
.getBegin()->getType();
1282 auto NoopOrExt
= [&](Value
*V
) {
1283 if (V
->getType() == RangeTy
)
1285 return IsSignedPredicate
? B
.CreateSExt(V
, RangeTy
, "wide." + V
->getName())
1286 : B
.CreateZExt(V
, RangeTy
, "wide." + V
->getName());
1289 // EnterLoopCond - is it okay to start executing this `LS'?
1290 Value
*EnterLoopCond
= nullptr;
1293 ? (IsSignedPredicate
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
)
1294 : (IsSignedPredicate
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
);
1295 Value
*IndVarStart
= NoopOrExt(LS
.IndVarStart
);
1296 EnterLoopCond
= B
.CreateICmp(Pred
, IndVarStart
, ExitSubloopAt
);
1298 B
.CreateCondBr(EnterLoopCond
, LS
.Header
, RRI
.PseudoExit
);
1299 PreheaderJump
->eraseFromParent();
1301 LS
.LatchBr
->setSuccessor(LS
.LatchBrExitIdx
, RRI
.ExitSelector
);
1302 B
.SetInsertPoint(LS
.LatchBr
);
1303 Value
*IndVarBase
= NoopOrExt(LS
.IndVarBase
);
1304 Value
*TakeBackedgeLoopCond
= B
.CreateICmp(Pred
, IndVarBase
, ExitSubloopAt
);
1306 Value
*CondForBranch
= LS
.LatchBrExitIdx
== 1
1307 ? TakeBackedgeLoopCond
1308 : B
.CreateNot(TakeBackedgeLoopCond
);
1310 LS
.LatchBr
->setCondition(CondForBranch
);
1312 B
.SetInsertPoint(RRI
.ExitSelector
);
1314 // IterationsLeft - are there any more iterations left, given the original
1315 // upper bound on the induction variable? If not, we branch to the "real"
1317 Value
*LoopExitAt
= NoopOrExt(LS
.LoopExitAt
);
1318 Value
*IterationsLeft
= B
.CreateICmp(Pred
, IndVarBase
, LoopExitAt
);
1319 B
.CreateCondBr(IterationsLeft
, RRI
.PseudoExit
, LS
.LatchExit
);
1321 BranchInst
*BranchToContinuation
=
1322 BranchInst::Create(ContinuationBlock
, RRI
.PseudoExit
);
1324 // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
1325 // each of the PHI nodes in the loop header. This feeds into the initial
1326 // value of the same PHI nodes if/when we continue execution.
1327 for (PHINode
&PN
: LS
.Header
->phis()) {
1328 PHINode
*NewPHI
= PHINode::Create(PN
.getType(), 2, PN
.getName() + ".copy",
1329 BranchToContinuation
);
1331 NewPHI
->addIncoming(PN
.getIncomingValueForBlock(Preheader
), Preheader
);
1332 NewPHI
->addIncoming(PN
.getIncomingValueForBlock(LS
.Latch
),
1334 RRI
.PHIValuesAtPseudoExit
.push_back(NewPHI
);
1337 RRI
.IndVarEnd
= PHINode::Create(IndVarBase
->getType(), 2, "indvar.end",
1338 BranchToContinuation
);
1339 RRI
.IndVarEnd
->addIncoming(IndVarStart
, Preheader
);
1340 RRI
.IndVarEnd
->addIncoming(IndVarBase
, RRI
.ExitSelector
);
1342 // The latch exit now has a branch from `RRI.ExitSelector' instead of
1343 // `LS.Latch'. The PHI nodes need to be updated to reflect that.
1344 LS
.LatchExit
->replacePhiUsesWith(LS
.Latch
, RRI
.ExitSelector
);
1349 void LoopConstrainer::rewriteIncomingValuesForPHIs(
1350 LoopStructure
&LS
, BasicBlock
*ContinuationBlock
,
1351 const LoopConstrainer::RewrittenRangeInfo
&RRI
) const {
1352 unsigned PHIIndex
= 0;
1353 for (PHINode
&PN
: LS
.Header
->phis())
1354 PN
.setIncomingValueForBlock(ContinuationBlock
,
1355 RRI
.PHIValuesAtPseudoExit
[PHIIndex
++]);
1357 LS
.IndVarStart
= RRI
.IndVarEnd
;
1360 BasicBlock
*LoopConstrainer::createPreheader(const LoopStructure
&LS
,
1361 BasicBlock
*OldPreheader
,
1362 const char *Tag
) const {
1363 BasicBlock
*Preheader
= BasicBlock::Create(Ctx
, Tag
, &F
, LS
.Header
);
1364 BranchInst::Create(LS
.Header
, Preheader
);
1366 LS
.Header
->replacePhiUsesWith(OldPreheader
, Preheader
);
1371 void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef
<BasicBlock
*> BBs
) {
1372 Loop
*ParentLoop
= OriginalLoop
.getParentLoop();
1376 for (BasicBlock
*BB
: BBs
)
1377 ParentLoop
->addBasicBlockToLoop(BB
, LI
);
1380 Loop
*LoopConstrainer::createClonedLoopStructure(Loop
*Original
, Loop
*Parent
,
1381 ValueToValueMapTy
&VM
,
1383 Loop
&New
= *LI
.AllocateLoop();
1385 Parent
->addChildLoop(&New
);
1387 LI
.addTopLevelLoop(&New
);
1388 LPMAddNewLoop(&New
, IsSubloop
);
1390 // Add all of the blocks in Original to the new loop.
1391 for (auto *BB
: Original
->blocks())
1392 if (LI
.getLoopFor(BB
) == Original
)
1393 New
.addBasicBlockToLoop(cast
<BasicBlock
>(VM
[BB
]), LI
);
1395 // Add all of the subloops to the new loop.
1396 for (Loop
*SubLoop
: *Original
)
1397 createClonedLoopStructure(SubLoop
, &New
, VM
, /* IsSubloop */ true);
1402 bool LoopConstrainer::run() {
1403 BasicBlock
*Preheader
= nullptr;
1404 LatchTakenCount
= SE
.getExitCount(&OriginalLoop
, MainLoopStructure
.Latch
);
1405 Preheader
= OriginalLoop
.getLoopPreheader();
1406 assert(!isa
<SCEVCouldNotCompute
>(LatchTakenCount
) && Preheader
!= nullptr &&
1409 OriginalPreheader
= Preheader
;
1410 MainLoopPreheader
= Preheader
;
1412 bool IsSignedPredicate
= MainLoopStructure
.IsSignedPredicate
;
1413 Optional
<SubRanges
> MaybeSR
= calculateSubRanges(IsSignedPredicate
);
1414 if (!MaybeSR
.hasValue()) {
1415 LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
1419 SubRanges SR
= MaybeSR
.getValue();
1420 bool Increasing
= MainLoopStructure
.IndVarIncreasing
;
1422 cast
<IntegerType
>(Range
.getBegin()->getType());
1424 SCEVExpander
Expander(SE
, F
.getParent()->getDataLayout(), "irce");
1425 Instruction
*InsertPt
= OriginalPreheader
->getTerminator();
1427 // It would have been better to make `PreLoop' and `PostLoop'
1428 // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
1430 ClonedLoop PreLoop
, PostLoop
;
1432 Increasing
? SR
.LowLimit
.hasValue() : SR
.HighLimit
.hasValue();
1433 bool NeedsPostLoop
=
1434 Increasing
? SR
.HighLimit
.hasValue() : SR
.LowLimit
.hasValue();
1436 Value
*ExitPreLoopAt
= nullptr;
1437 Value
*ExitMainLoopAt
= nullptr;
1438 const SCEVConstant
*MinusOneS
=
1439 cast
<SCEVConstant
>(SE
.getConstant(IVTy
, -1, true /* isSigned */));
1442 const SCEV
*ExitPreLoopAtSCEV
= nullptr;
1445 ExitPreLoopAtSCEV
= *SR
.LowLimit
;
1446 else if (cannotBeMinInLoop(*SR
.HighLimit
, &OriginalLoop
, SE
,
1448 ExitPreLoopAtSCEV
= SE
.getAddExpr(*SR
.HighLimit
, MinusOneS
);
1450 LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
1451 << "preloop exit limit. HighLimit = "
1452 << *(*SR
.HighLimit
) << "\n");
1456 if (!isSafeToExpandAt(ExitPreLoopAtSCEV
, InsertPt
, SE
)) {
1457 LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
1458 << " preloop exit limit " << *ExitPreLoopAtSCEV
1459 << " at block " << InsertPt
->getParent()->getName()
1464 ExitPreLoopAt
= Expander
.expandCodeFor(ExitPreLoopAtSCEV
, IVTy
, InsertPt
);
1465 ExitPreLoopAt
->setName("exit.preloop.at");
1468 if (NeedsPostLoop
) {
1469 const SCEV
*ExitMainLoopAtSCEV
= nullptr;
1472 ExitMainLoopAtSCEV
= *SR
.HighLimit
;
1473 else if (cannotBeMinInLoop(*SR
.LowLimit
, &OriginalLoop
, SE
,
1475 ExitMainLoopAtSCEV
= SE
.getAddExpr(*SR
.LowLimit
, MinusOneS
);
1477 LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
1478 << "mainloop exit limit. LowLimit = "
1479 << *(*SR
.LowLimit
) << "\n");
1483 if (!isSafeToExpandAt(ExitMainLoopAtSCEV
, InsertPt
, SE
)) {
1484 LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
1485 << " main loop exit limit " << *ExitMainLoopAtSCEV
1486 << " at block " << InsertPt
->getParent()->getName()
1491 ExitMainLoopAt
= Expander
.expandCodeFor(ExitMainLoopAtSCEV
, IVTy
, InsertPt
);
1492 ExitMainLoopAt
->setName("exit.mainloop.at");
1495 // We clone these ahead of time so that we don't have to deal with changing
1496 // and temporarily invalid IR as we transform the loops.
1498 cloneLoop(PreLoop
, "preloop");
1500 cloneLoop(PostLoop
, "postloop");
1502 RewrittenRangeInfo PreLoopRRI
;
1505 Preheader
->getTerminator()->replaceUsesOfWith(MainLoopStructure
.Header
,
1506 PreLoop
.Structure
.Header
);
1509 createPreheader(MainLoopStructure
, Preheader
, "mainloop");
1510 PreLoopRRI
= changeIterationSpaceEnd(PreLoop
.Structure
, Preheader
,
1511 ExitPreLoopAt
, MainLoopPreheader
);
1512 rewriteIncomingValuesForPHIs(MainLoopStructure
, MainLoopPreheader
,
1516 BasicBlock
*PostLoopPreheader
= nullptr;
1517 RewrittenRangeInfo PostLoopRRI
;
1519 if (NeedsPostLoop
) {
1521 createPreheader(PostLoop
.Structure
, Preheader
, "postloop");
1522 PostLoopRRI
= changeIterationSpaceEnd(MainLoopStructure
, MainLoopPreheader
,
1523 ExitMainLoopAt
, PostLoopPreheader
);
1524 rewriteIncomingValuesForPHIs(PostLoop
.Structure
, PostLoopPreheader
,
1528 BasicBlock
*NewMainLoopPreheader
=
1529 MainLoopPreheader
!= Preheader
? MainLoopPreheader
: nullptr;
1530 BasicBlock
*NewBlocks
[] = {PostLoopPreheader
, PreLoopRRI
.PseudoExit
,
1531 PreLoopRRI
.ExitSelector
, PostLoopRRI
.PseudoExit
,
1532 PostLoopRRI
.ExitSelector
, NewMainLoopPreheader
};
1534 // Some of the above may be nullptr, filter them out before passing to
1535 // addToParentLoopIfNeeded.
1537 std::remove(std::begin(NewBlocks
), std::end(NewBlocks
), nullptr);
1539 addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks
), NewBlocksEnd
));
1543 // We need to first add all the pre and post loop blocks into the loop
1544 // structures (as part of createClonedLoopStructure), and then update the
1545 // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
1546 // LI when LoopSimplifyForm is generated.
1547 Loop
*PreL
= nullptr, *PostL
= nullptr;
1548 if (!PreLoop
.Blocks
.empty()) {
1549 PreL
= createClonedLoopStructure(&OriginalLoop
,
1550 OriginalLoop
.getParentLoop(), PreLoop
.Map
,
1551 /* IsSubLoop */ false);
1554 if (!PostLoop
.Blocks
.empty()) {
1556 createClonedLoopStructure(&OriginalLoop
, OriginalLoop
.getParentLoop(),
1557 PostLoop
.Map
, /* IsSubLoop */ false);
1560 // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
1561 auto CanonicalizeLoop
= [&] (Loop
*L
, bool IsOriginalLoop
) {
1562 formLCSSARecursively(*L
, DT
, &LI
, &SE
);
1563 simplifyLoop(L
, &DT
, &LI
, &SE
, nullptr, nullptr, true);
1564 // Pre/post loops are slow paths, we do not need to perform any loop
1565 // optimizations on them.
1566 if (!IsOriginalLoop
)
1567 DisableAllLoopOptsOnLoop(*L
);
1570 CanonicalizeLoop(PreL
, false);
1572 CanonicalizeLoop(PostL
, false);
1573 CanonicalizeLoop(&OriginalLoop
, true);
1578 /// Computes and returns a range of values for the induction variable (IndVar)
1579 /// in which the range check can be safely elided. If it cannot compute such a
1580 /// range, returns None.
1581 Optional
<InductiveRangeCheck::Range
>
1582 InductiveRangeCheck::computeSafeIterationSpace(
1583 ScalarEvolution
&SE
, const SCEVAddRecExpr
*IndVar
,
1584 bool IsLatchSigned
) const {
1585 // We can deal when types of latch check and range checks don't match in case
1586 // if latch check is more narrow.
1587 auto *IVType
= cast
<IntegerType
>(IndVar
->getType());
1588 auto *RCType
= cast
<IntegerType
>(getBegin()->getType());
1589 if (IVType
->getBitWidth() > RCType
->getBitWidth())
1591 // IndVar is of the form "A + B * I" (where "I" is the canonical induction
1592 // variable, that may or may not exist as a real llvm::Value in the loop) and
1593 // this inductive range check is a range check on the "C + D * I" ("C" is
1594 // getBegin() and "D" is getStep()). We rewrite the value being range
1595 // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
1597 // The actual inequalities we solve are of the form
1599 // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1)
1601 // Here L stands for upper limit of the safe iteration space.
1602 // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid
1603 // overflows when calculating (0 - M) and (L - M) we, depending on type of
1604 // IV's iteration space, limit the calculations by borders of the iteration
1605 // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0.
1606 // If we figured out that "anything greater than (-M) is safe", we strengthen
1607 // this to "everything greater than 0 is safe", assuming that values between
1608 // -M and 0 just do not exist in unsigned iteration space, and we don't want
1609 // to deal with overflown values.
1611 if (!IndVar
->isAffine())
1614 const SCEV
*A
= NoopOrExtend(IndVar
->getStart(), RCType
, SE
, IsLatchSigned
);
1615 const SCEVConstant
*B
= dyn_cast
<SCEVConstant
>(
1616 NoopOrExtend(IndVar
->getStepRecurrence(SE
), RCType
, SE
, IsLatchSigned
));
1619 assert(!B
->isZero() && "Recurrence with zero step?");
1621 const SCEV
*C
= getBegin();
1622 const SCEVConstant
*D
= dyn_cast
<SCEVConstant
>(getStep());
1626 assert(!D
->getValue()->isZero() && "Recurrence with zero step?");
1627 unsigned BitWidth
= RCType
->getBitWidth();
1628 const SCEV
*SIntMax
= SE
.getConstant(APInt::getSignedMaxValue(BitWidth
));
1630 // Subtract Y from X so that it does not go through border of the IV
1631 // iteration space. Mathematically, it is equivalent to:
1633 // ClampedSubtract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX). [1]
1635 // In [1], 'X - Y' is a mathematical subtraction (result is not bounded to
1636 // any width of bit grid). But after we take min/max, the result is
1637 // guaranteed to be within [INT_MIN, INT_MAX].
1639 // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min
1640 // values, depending on type of latch condition that defines IV iteration
1642 auto ClampedSubtract
= [&](const SCEV
*X
, const SCEV
*Y
) {
1643 // FIXME: The current implementation assumes that X is in [0, SINT_MAX].
1644 // This is required to ensure that SINT_MAX - X does not overflow signed and
1645 // that X - Y does not overflow unsigned if Y is negative. Can we lift this
1646 // restriction and make it work for negative X either?
1647 if (IsLatchSigned
) {
1648 // X is a number from signed range, Y is interpreted as signed.
1649 // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only
1650 // thing we should care about is that we didn't cross SINT_MAX.
1651 // So, if Y is positive, we subtract Y safely.
1652 // Rule 1: Y > 0 ---> Y.
1653 // If 0 <= -Y <= (SINT_MAX - X), we subtract Y safely.
1654 // Rule 2: Y >=s (X - SINT_MAX) ---> Y.
1655 // If 0 <= (SINT_MAX - X) < -Y, we can only subtract (X - SINT_MAX).
1656 // Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX).
1657 // It gives us smax(Y, X - SINT_MAX) to subtract in all cases.
1658 const SCEV
*XMinusSIntMax
= SE
.getMinusSCEV(X
, SIntMax
);
1659 return SE
.getMinusSCEV(X
, SE
.getSMaxExpr(Y
, XMinusSIntMax
),
1662 // X is a number from unsigned range, Y is interpreted as signed.
1663 // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only
1664 // thing we should care about is that we didn't cross zero.
1665 // So, if Y is negative, we subtract Y safely.
1666 // Rule 1: Y <s 0 ---> Y.
1667 // If 0 <= Y <= X, we subtract Y safely.
1668 // Rule 2: Y <=s X ---> Y.
1669 // If 0 <= X < Y, we should stop at 0 and can only subtract X.
1670 // Rule 3: Y >s X ---> X.
1671 // It gives us smin(X, Y) to subtract in all cases.
1672 return SE
.getMinusSCEV(X
, SE
.getSMinExpr(X
, Y
), SCEV::FlagNUW
);
1674 const SCEV
*M
= SE
.getMinusSCEV(C
, A
);
1675 const SCEV
*Zero
= SE
.getZero(M
->getType());
1677 // This function returns SCEV equal to 1 if X is non-negative 0 otherwise.
1678 auto SCEVCheckNonNegative
= [&](const SCEV
*X
) {
1679 const Loop
*L
= IndVar
->getLoop();
1680 const SCEV
*One
= SE
.getOne(X
->getType());
1681 // Can we trivially prove that X is a non-negative or negative value?
1682 if (isKnownNonNegativeInLoop(X
, L
, SE
))
1684 else if (isKnownNegativeInLoop(X
, L
, SE
))
1686 // If not, we will have to figure it out during the execution.
1687 // Function smax(smin(X, 0), -1) + 1 equals to 1 if X >= 0 and 0 if X < 0.
1688 const SCEV
*NegOne
= SE
.getNegativeSCEV(One
);
1689 return SE
.getAddExpr(SE
.getSMaxExpr(SE
.getSMinExpr(X
, Zero
), NegOne
), One
);
1691 // FIXME: Current implementation of ClampedSubtract implicitly assumes that
1692 // X is non-negative (in sense of a signed value). We need to re-implement
1693 // this function in a way that it will correctly handle negative X as well.
1694 // We use it twice: for X = 0 everything is fine, but for X = getEnd() we can
1695 // end up with a negative X and produce wrong results. So currently we ensure
1696 // that if getEnd() is negative then both ends of the safe range are zero.
1697 // Note that this may pessimize elimination of unsigned range checks against
1699 const SCEV
*REnd
= getEnd();
1700 const SCEV
*EndIsNonNegative
= SCEVCheckNonNegative(REnd
);
1702 const SCEV
*Begin
= SE
.getMulExpr(ClampedSubtract(Zero
, M
), EndIsNonNegative
);
1703 const SCEV
*End
= SE
.getMulExpr(ClampedSubtract(REnd
, M
), EndIsNonNegative
);
1704 return InductiveRangeCheck::Range(Begin
, End
);
1707 static Optional
<InductiveRangeCheck::Range
>
1708 IntersectSignedRange(ScalarEvolution
&SE
,
1709 const Optional
<InductiveRangeCheck::Range
> &R1
,
1710 const InductiveRangeCheck::Range
&R2
) {
1711 if (R2
.isEmpty(SE
, /* IsSigned */ true))
1715 auto &R1Value
= R1
.getValue();
1716 // We never return empty ranges from this function, and R1 is supposed to be
1717 // a result of intersection. Thus, R1 is never empty.
1718 assert(!R1Value
.isEmpty(SE
, /* IsSigned */ true) &&
1719 "We should never have empty R1!");
1721 // TODO: we could widen the smaller range and have this work; but for now we
1722 // bail out to keep things simple.
1723 if (R1Value
.getType() != R2
.getType())
1726 const SCEV
*NewBegin
= SE
.getSMaxExpr(R1Value
.getBegin(), R2
.getBegin());
1727 const SCEV
*NewEnd
= SE
.getSMinExpr(R1Value
.getEnd(), R2
.getEnd());
1729 // If the resulting range is empty, just return None.
1730 auto Ret
= InductiveRangeCheck::Range(NewBegin
, NewEnd
);
1731 if (Ret
.isEmpty(SE
, /* IsSigned */ true))
1736 static Optional
<InductiveRangeCheck::Range
>
1737 IntersectUnsignedRange(ScalarEvolution
&SE
,
1738 const Optional
<InductiveRangeCheck::Range
> &R1
,
1739 const InductiveRangeCheck::Range
&R2
) {
1740 if (R2
.isEmpty(SE
, /* IsSigned */ false))
1744 auto &R1Value
= R1
.getValue();
1745 // We never return empty ranges from this function, and R1 is supposed to be
1746 // a result of intersection. Thus, R1 is never empty.
1747 assert(!R1Value
.isEmpty(SE
, /* IsSigned */ false) &&
1748 "We should never have empty R1!");
1750 // TODO: we could widen the smaller range and have this work; but for now we
1751 // bail out to keep things simple.
1752 if (R1Value
.getType() != R2
.getType())
1755 const SCEV
*NewBegin
= SE
.getUMaxExpr(R1Value
.getBegin(), R2
.getBegin());
1756 const SCEV
*NewEnd
= SE
.getUMinExpr(R1Value
.getEnd(), R2
.getEnd());
1758 // If the resulting range is empty, just return None.
1759 auto Ret
= InductiveRangeCheck::Range(NewBegin
, NewEnd
);
1760 if (Ret
.isEmpty(SE
, /* IsSigned */ false))
1765 PreservedAnalyses
IRCEPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1766 auto &SE
= AM
.getResult
<ScalarEvolutionAnalysis
>(F
);
1767 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1768 auto &BPI
= AM
.getResult
<BranchProbabilityAnalysis
>(F
);
1769 LoopInfo
&LI
= AM
.getResult
<LoopAnalysis
>(F
);
1771 // Get BFI analysis result on demand. Please note that modification of
1772 // CFG invalidates this analysis and we should handle it.
1773 auto getBFI
= [&F
, &AM
]()->BlockFrequencyInfo
& {
1774 return AM
.getResult
<BlockFrequencyAnalysis
>(F
);
1776 InductiveRangeCheckElimination
IRCE(SE
, &BPI
, DT
, LI
, { getBFI
});
1778 bool Changed
= false;
1780 bool CFGChanged
= false;
1781 for (const auto &L
: LI
) {
1782 CFGChanged
|= simplifyLoop(L
, &DT
, &LI
, &SE
, nullptr, nullptr,
1783 /*PreserveLCSSA=*/false);
1784 Changed
|= formLCSSARecursively(*L
, DT
, &LI
, &SE
);
1786 Changed
|= CFGChanged
;
1788 if (CFGChanged
&& !SkipProfitabilityChecks
) {
1789 PreservedAnalyses PA
= PreservedAnalyses::all();
1790 PA
.abandon
<BlockFrequencyAnalysis
>();
1791 AM
.invalidate(F
, PA
);
1795 SmallPriorityWorklist
<Loop
*, 4> Worklist
;
1796 appendLoopsToWorklist(LI
, Worklist
);
1797 auto LPMAddNewLoop
= [&Worklist
](Loop
*NL
, bool IsSubloop
) {
1799 appendLoopsToWorklist(*NL
, Worklist
);
1802 while (!Worklist
.empty()) {
1803 Loop
*L
= Worklist
.pop_back_val();
1804 if (IRCE
.run(L
, LPMAddNewLoop
)) {
1806 if (!SkipProfitabilityChecks
) {
1807 PreservedAnalyses PA
= PreservedAnalyses::all();
1808 PA
.abandon
<BlockFrequencyAnalysis
>();
1809 AM
.invalidate(F
, PA
);
1815 return PreservedAnalyses::all();
1816 return getLoopPassPreservedAnalyses();
1819 bool IRCELegacyPass::runOnFunction(Function
&F
) {
1820 if (skipFunction(F
))
1823 ScalarEvolution
&SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
1824 BranchProbabilityInfo
&BPI
=
1825 getAnalysis
<BranchProbabilityInfoWrapperPass
>().getBPI();
1826 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1827 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1828 InductiveRangeCheckElimination
IRCE(SE
, &BPI
, DT
, LI
);
1830 bool Changed
= false;
1832 for (const auto &L
: LI
) {
1833 Changed
|= simplifyLoop(L
, &DT
, &LI
, &SE
, nullptr, nullptr,
1834 /*PreserveLCSSA=*/false);
1835 Changed
|= formLCSSARecursively(*L
, DT
, &LI
, &SE
);
1838 SmallPriorityWorklist
<Loop
*, 4> Worklist
;
1839 appendLoopsToWorklist(LI
, Worklist
);
1840 auto LPMAddNewLoop
= [&](Loop
*NL
, bool IsSubloop
) {
1842 appendLoopsToWorklist(*NL
, Worklist
);
1845 while (!Worklist
.empty()) {
1846 Loop
*L
= Worklist
.pop_back_val();
1847 Changed
|= IRCE
.run(L
, LPMAddNewLoop
);
1853 InductiveRangeCheckElimination::isProfitableToTransform(const Loop
&L
,
1854 LoopStructure
&LS
) {
1855 if (SkipProfitabilityChecks
)
1857 if (GetBFI
.hasValue()) {
1858 BlockFrequencyInfo
&BFI
= (*GetBFI
)();
1859 uint64_t hFreq
= BFI
.getBlockFreq(LS
.Header
).getFrequency();
1860 uint64_t phFreq
= BFI
.getBlockFreq(L
.getLoopPreheader()).getFrequency();
1861 if (phFreq
!= 0 && hFreq
!= 0 && (hFreq
/ phFreq
< MinRuntimeIterations
)) {
1862 LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
1863 << "the estimated number of iterations basing on "
1864 "frequency info is " << (hFreq
/ phFreq
) << "\n";);
1872 BranchProbability ExitProbability
=
1873 BPI
->getEdgeProbability(LS
.Latch
, LS
.LatchBrExitIdx
);
1874 if (ExitProbability
> BranchProbability(1, MinRuntimeIterations
)) {
1875 LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
1876 << "the exit probability is too big " << ExitProbability
1883 bool InductiveRangeCheckElimination::run(
1884 Loop
*L
, function_ref
<void(Loop
*, bool)> LPMAddNewLoop
) {
1885 if (L
->getBlocks().size() >= LoopSizeCutoff
) {
1886 LLVM_DEBUG(dbgs() << "irce: giving up constraining loop, too large\n");
1890 BasicBlock
*Preheader
= L
->getLoopPreheader();
1892 LLVM_DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
1896 LLVMContext
&Context
= Preheader
->getContext();
1897 SmallVector
<InductiveRangeCheck
, 16> RangeChecks
;
1899 for (auto BBI
: L
->getBlocks())
1900 if (BranchInst
*TBI
= dyn_cast
<BranchInst
>(BBI
->getTerminator()))
1901 InductiveRangeCheck::extractRangeChecksFromBranch(TBI
, L
, SE
, BPI
,
1904 if (RangeChecks
.empty())
1907 auto PrintRecognizedRangeChecks
= [&](raw_ostream
&OS
) {
1908 OS
<< "irce: looking at loop "; L
->print(OS
);
1909 OS
<< "irce: loop has " << RangeChecks
.size()
1910 << " inductive range checks: \n";
1911 for (InductiveRangeCheck
&IRC
: RangeChecks
)
1915 LLVM_DEBUG(PrintRecognizedRangeChecks(dbgs()));
1917 if (PrintRangeChecks
)
1918 PrintRecognizedRangeChecks(errs());
1920 const char *FailureReason
= nullptr;
1921 Optional
<LoopStructure
> MaybeLoopStructure
=
1922 LoopStructure::parseLoopStructure(SE
, *L
, FailureReason
);
1923 if (!MaybeLoopStructure
.hasValue()) {
1924 LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
1925 << FailureReason
<< "\n";);
1928 LoopStructure LS
= MaybeLoopStructure
.getValue();
1929 if (!isProfitableToTransform(*L
, LS
))
1931 const SCEVAddRecExpr
*IndVar
=
1932 cast
<SCEVAddRecExpr
>(SE
.getMinusSCEV(SE
.getSCEV(LS
.IndVarBase
), SE
.getSCEV(LS
.IndVarStep
)));
1934 Optional
<InductiveRangeCheck::Range
> SafeIterRange
;
1935 Instruction
*ExprInsertPt
= Preheader
->getTerminator();
1937 SmallVector
<InductiveRangeCheck
, 4> RangeChecksToEliminate
;
1938 // Basing on the type of latch predicate, we interpret the IV iteration range
1939 // as signed or unsigned range. We use different min/max functions (signed or
1940 // unsigned) when intersecting this range with safe iteration ranges implied
1942 auto IntersectRange
=
1943 LS
.IsSignedPredicate
? IntersectSignedRange
: IntersectUnsignedRange
;
1945 IRBuilder
<> B(ExprInsertPt
);
1946 for (InductiveRangeCheck
&IRC
: RangeChecks
) {
1947 auto Result
= IRC
.computeSafeIterationSpace(SE
, IndVar
,
1948 LS
.IsSignedPredicate
);
1949 if (Result
.hasValue()) {
1950 auto MaybeSafeIterRange
=
1951 IntersectRange(SE
, SafeIterRange
, Result
.getValue());
1952 if (MaybeSafeIterRange
.hasValue()) {
1954 !MaybeSafeIterRange
.getValue().isEmpty(SE
, LS
.IsSignedPredicate
) &&
1955 "We should never return empty ranges!");
1956 RangeChecksToEliminate
.push_back(IRC
);
1957 SafeIterRange
= MaybeSafeIterRange
.getValue();
1962 if (!SafeIterRange
.hasValue())
1965 LoopConstrainer
LC(*L
, LI
, LPMAddNewLoop
, LS
, SE
, DT
,
1966 SafeIterRange
.getValue());
1967 bool Changed
= LC
.run();
1970 auto PrintConstrainedLoopInfo
= [L
]() {
1971 dbgs() << "irce: in function ";
1972 dbgs() << L
->getHeader()->getParent()->getName() << ": ";
1973 dbgs() << "constrained ";
1977 LLVM_DEBUG(PrintConstrainedLoopInfo());
1979 if (PrintChangedLoops
)
1980 PrintConstrainedLoopInfo();
1982 // Optimize away the now-redundant range checks.
1984 for (InductiveRangeCheck
&IRC
: RangeChecksToEliminate
) {
1985 ConstantInt
*FoldedRangeCheck
= IRC
.getPassingDirection()
1986 ? ConstantInt::getTrue(Context
)
1987 : ConstantInt::getFalse(Context
);
1988 IRC
.getCheckUse()->set(FoldedRangeCheck
);
1995 Pass
*llvm::createInductiveRangeCheckEliminationPass() {
1996 return new IRCELegacyPass();