1 //===- InductiveRangeCheckElimination.cpp - -------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // The InductiveRangeCheckElimination pass splits a loop's iteration space into
10 // three disjoint ranges. It does that in a way such that the loop running in
11 // the middle loop provably does not need range checks. As an example, it will
14 // len = < known positive >
15 // for (i = 0; i < n; i++) {
16 // if (0 <= i && i < len) {
19 // throw_out_of_bounds();
25 // len = < known positive >
26 // limit = smin(n, len)
27 // // no first segment
28 // for (i = 0; i < limit; i++) {
29 // if (0 <= i && i < len) { // this check is fully redundant
32 // throw_out_of_bounds();
35 // for (i = limit; i < n; i++) {
36 // if (0 <= i && i < len) {
39 // throw_out_of_bounds();
43 //===----------------------------------------------------------------------===//
45 #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
46 #include "llvm/ADT/APInt.h"
47 #include "llvm/ADT/ArrayRef.h"
48 #include "llvm/ADT/None.h"
49 #include "llvm/ADT/Optional.h"
50 #include "llvm/ADT/SmallPtrSet.h"
51 #include "llvm/ADT/SmallVector.h"
52 #include "llvm/ADT/StringRef.h"
53 #include "llvm/ADT/Twine.h"
54 #include "llvm/Analysis/BranchProbabilityInfo.h"
55 #include "llvm/Analysis/LoopAnalysisManager.h"
56 #include "llvm/Analysis/LoopInfo.h"
57 #include "llvm/Analysis/LoopPass.h"
58 #include "llvm/Analysis/ScalarEvolution.h"
59 #include "llvm/Analysis/ScalarEvolutionExpander.h"
60 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
61 #include "llvm/IR/BasicBlock.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DerivedTypes.h"
65 #include "llvm/IR/Dominators.h"
66 #include "llvm/IR/Function.h"
67 #include "llvm/IR/IRBuilder.h"
68 #include "llvm/IR/InstrTypes.h"
69 #include "llvm/IR/Instructions.h"
70 #include "llvm/IR/Metadata.h"
71 #include "llvm/IR/Module.h"
72 #include "llvm/IR/PatternMatch.h"
73 #include "llvm/IR/Type.h"
74 #include "llvm/IR/Use.h"
75 #include "llvm/IR/User.h"
76 #include "llvm/IR/Value.h"
77 #include "llvm/Pass.h"
78 #include "llvm/Support/BranchProbability.h"
79 #include "llvm/Support/Casting.h"
80 #include "llvm/Support/CommandLine.h"
81 #include "llvm/Support/Compiler.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/ErrorHandling.h"
84 #include "llvm/Support/raw_ostream.h"
85 #include "llvm/Transforms/Scalar.h"
86 #include "llvm/Transforms/Utils/Cloning.h"
87 #include "llvm/Transforms/Utils/LoopSimplify.h"
88 #include "llvm/Transforms/Utils/LoopUtils.h"
89 #include "llvm/Transforms/Utils/ValueMapper.h"
98 using namespace llvm::PatternMatch
;
100 static cl::opt
<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden
,
103 static cl::opt
<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden
,
106 static cl::opt
<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden
,
109 static cl::opt
<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
110 cl::Hidden
, cl::init(10));
112 static cl::opt
<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
113 cl::Hidden
, cl::init(false));
115 static cl::opt
<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
116 cl::Hidden
, cl::init(true));
118 static cl::opt
<bool> AllowNarrowLatchCondition(
119 "irce-allow-narrow-latch", cl::Hidden
, cl::init(true),
120 cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
121 "with narrow latch condition."));
123 static const char *ClonedLoopTag
= "irce.loop.clone";
125 #define DEBUG_TYPE "irce"
129 /// An inductive range check is conditional branch in a loop with
131 /// 1. a very cold successor (i.e. the branch jumps to that successor very
136 /// 2. a condition that is provably true for some contiguous range of values
137 /// taken by the containing loop's induction variable.
139 class InductiveRangeCheck
{
141 const SCEV
*Begin
= nullptr;
142 const SCEV
*Step
= nullptr;
143 const SCEV
*End
= nullptr;
144 Use
*CheckUse
= nullptr;
145 bool IsSigned
= true;
147 static bool parseRangeCheckICmp(Loop
*L
, ICmpInst
*ICI
, ScalarEvolution
&SE
,
148 Value
*&Index
, Value
*&Length
,
152 extractRangeChecksFromCond(Loop
*L
, ScalarEvolution
&SE
, Use
&ConditionUse
,
153 SmallVectorImpl
<InductiveRangeCheck
> &Checks
,
154 SmallPtrSetImpl
<Value
*> &Visited
);
157 const SCEV
*getBegin() const { return Begin
; }
158 const SCEV
*getStep() const { return Step
; }
159 const SCEV
*getEnd() const { return End
; }
160 bool isSigned() const { return IsSigned
; }
162 void print(raw_ostream
&OS
) const {
163 OS
<< "InductiveRangeCheck:\n";
170 OS
<< "\n CheckUse: ";
171 getCheckUse()->getUser()->print(OS
);
172 OS
<< " Operand: " << getCheckUse()->getOperandNo() << "\n";
180 Use
*getCheckUse() const { return CheckUse
; }
182 /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If
183 /// R.getEnd() le R.getBegin(), then R denotes the empty range.
190 Range(const SCEV
*Begin
, const SCEV
*End
) : Begin(Begin
), End(End
) {
191 assert(Begin
->getType() == End
->getType() && "ill-typed range!");
194 Type
*getType() const { return Begin
->getType(); }
195 const SCEV
*getBegin() const { return Begin
; }
196 const SCEV
*getEnd() const { return End
; }
197 bool isEmpty(ScalarEvolution
&SE
, bool IsSigned
) const {
201 return SE
.isKnownPredicate(ICmpInst::ICMP_SGE
, Begin
, End
);
203 return SE
.isKnownPredicate(ICmpInst::ICMP_UGE
, Begin
, End
);
207 /// This is the value the condition of the branch needs to evaluate to for the
208 /// branch to take the hot successor (see (1) above).
209 bool getPassingDirection() { return true; }
211 /// Computes a range for the induction variable (IndVar) in which the range
212 /// check is redundant and can be constant-folded away. The induction
213 /// variable is not required to be the canonical {0,+,1} induction variable.
214 Optional
<Range
> computeSafeIterationSpace(ScalarEvolution
&SE
,
215 const SCEVAddRecExpr
*IndVar
,
216 bool IsLatchSigned
) const;
218 /// Parse out a set of inductive range checks from \p BI and append them to \p
221 /// NB! There may be conditions feeding into \p BI that aren't inductive range
222 /// checks, and hence don't end up in \p Checks.
224 extractRangeChecksFromBranch(BranchInst
*BI
, Loop
*L
, ScalarEvolution
&SE
,
225 BranchProbabilityInfo
*BPI
,
226 SmallVectorImpl
<InductiveRangeCheck
> &Checks
);
229 class InductiveRangeCheckElimination
{
231 BranchProbabilityInfo
*BPI
;
236 InductiveRangeCheckElimination(ScalarEvolution
&SE
,
237 BranchProbabilityInfo
*BPI
, DominatorTree
&DT
,
239 : SE(SE
), BPI(BPI
), DT(DT
), LI(LI
) {}
241 bool run(Loop
*L
, function_ref
<void(Loop
*, bool)> LPMAddNewLoop
);
244 class IRCELegacyPass
: public LoopPass
{
248 IRCELegacyPass() : LoopPass(ID
) {
249 initializeIRCELegacyPassPass(*PassRegistry::getPassRegistry());
252 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
253 AU
.addRequired
<BranchProbabilityInfoWrapperPass
>();
254 getLoopAnalysisUsage(AU
);
257 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
) override
;
260 } // end anonymous namespace
262 char IRCELegacyPass::ID
= 0;
264 INITIALIZE_PASS_BEGIN(IRCELegacyPass
, "irce",
265 "Inductive range check elimination", false, false)
266 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass
)
267 INITIALIZE_PASS_DEPENDENCY(LoopPass
)
268 INITIALIZE_PASS_END(IRCELegacyPass
, "irce", "Inductive range check elimination",
271 /// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot
272 /// be interpreted as a range check, return false and set `Index` and `Length`
273 /// to `nullptr`. Otherwise set `Index` to the value being range checked, and
274 /// set `Length` to the upper limit `Index` is being range checked.
276 InductiveRangeCheck::parseRangeCheckICmp(Loop
*L
, ICmpInst
*ICI
,
277 ScalarEvolution
&SE
, Value
*&Index
,
278 Value
*&Length
, bool &IsSigned
) {
279 auto IsLoopInvariant
= [&SE
, L
](Value
*V
) {
280 return SE
.isLoopInvariant(SE
.getSCEV(V
), L
);
283 ICmpInst::Predicate Pred
= ICI
->getPredicate();
284 Value
*LHS
= ICI
->getOperand(0);
285 Value
*RHS
= ICI
->getOperand(1);
291 case ICmpInst::ICMP_SLE
:
294 case ICmpInst::ICMP_SGE
:
296 if (match(RHS
, m_ConstantInt
<0>())) {
298 return true; // Lower.
302 case ICmpInst::ICMP_SLT
:
305 case ICmpInst::ICMP_SGT
:
307 if (match(RHS
, m_ConstantInt
<-1>())) {
309 return true; // Lower.
312 if (IsLoopInvariant(LHS
)) {
315 return true; // Upper.
319 case ICmpInst::ICMP_ULT
:
322 case ICmpInst::ICMP_UGT
:
324 if (IsLoopInvariant(LHS
)) {
327 return true; // Both lower and upper.
332 llvm_unreachable("default clause returns!");
335 void InductiveRangeCheck::extractRangeChecksFromCond(
336 Loop
*L
, ScalarEvolution
&SE
, Use
&ConditionUse
,
337 SmallVectorImpl
<InductiveRangeCheck
> &Checks
,
338 SmallPtrSetImpl
<Value
*> &Visited
) {
339 Value
*Condition
= ConditionUse
.get();
340 if (!Visited
.insert(Condition
).second
)
343 // TODO: Do the same for OR, XOR, NOT etc?
344 if (match(Condition
, m_And(m_Value(), m_Value()))) {
345 extractRangeChecksFromCond(L
, SE
, cast
<User
>(Condition
)->getOperandUse(0),
347 extractRangeChecksFromCond(L
, SE
, cast
<User
>(Condition
)->getOperandUse(1),
352 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(Condition
);
356 Value
*Length
= nullptr, *Index
;
358 if (!parseRangeCheckICmp(L
, ICI
, SE
, Index
, Length
, IsSigned
))
361 const auto *IndexAddRec
= dyn_cast
<SCEVAddRecExpr
>(SE
.getSCEV(Index
));
363 IndexAddRec
&& (IndexAddRec
->getLoop() == L
) && IndexAddRec
->isAffine();
368 const SCEV
*End
= nullptr;
369 // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
370 // We can potentially do much better here.
372 End
= SE
.getSCEV(Length
);
374 // So far we can only reach this point for Signed range check. This may
375 // change in future. In this case we will need to pick Unsigned max for the
376 // unsigned range check.
377 unsigned BitWidth
= cast
<IntegerType
>(IndexAddRec
->getType())->getBitWidth();
378 const SCEV
*SIntMax
= SE
.getConstant(APInt::getSignedMaxValue(BitWidth
));
382 InductiveRangeCheck IRC
;
384 IRC
.Begin
= IndexAddRec
->getStart();
385 IRC
.Step
= IndexAddRec
->getStepRecurrence(SE
);
386 IRC
.CheckUse
= &ConditionUse
;
387 IRC
.IsSigned
= IsSigned
;
388 Checks
.push_back(IRC
);
391 void InductiveRangeCheck::extractRangeChecksFromBranch(
392 BranchInst
*BI
, Loop
*L
, ScalarEvolution
&SE
, BranchProbabilityInfo
*BPI
,
393 SmallVectorImpl
<InductiveRangeCheck
> &Checks
) {
394 if (BI
->isUnconditional() || BI
->getParent() == L
->getLoopLatch())
397 BranchProbability
LikelyTaken(15, 16);
399 if (!SkipProfitabilityChecks
&& BPI
&&
400 BPI
->getEdgeProbability(BI
->getParent(), (unsigned)0) < LikelyTaken
)
403 SmallPtrSet
<Value
*, 8> Visited
;
404 InductiveRangeCheck::extractRangeChecksFromCond(L
, SE
, BI
->getOperandUse(0),
408 // Add metadata to the loop L to disable loop optimizations. Callers need to
409 // confirm that optimizing loop L is not beneficial.
410 static void DisableAllLoopOptsOnLoop(Loop
&L
) {
411 // We do not care about any existing loopID related metadata for L, since we
412 // are setting all loop metadata to false.
413 LLVMContext
&Context
= L
.getHeader()->getContext();
414 // Reserve first location for self reference to the LoopID metadata node.
415 MDNode
*Dummy
= MDNode::get(Context
, {});
416 MDNode
*DisableUnroll
= MDNode::get(
417 Context
, {MDString::get(Context
, "llvm.loop.unroll.disable")});
419 ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context
), 0));
420 MDNode
*DisableVectorize
= MDNode::get(
422 {MDString::get(Context
, "llvm.loop.vectorize.enable"), FalseVal
});
423 MDNode
*DisableLICMVersioning
= MDNode::get(
424 Context
, {MDString::get(Context
, "llvm.loop.licm_versioning.disable")});
425 MDNode
*DisableDistribution
= MDNode::get(
427 {MDString::get(Context
, "llvm.loop.distribute.enable"), FalseVal
});
429 MDNode::get(Context
, {Dummy
, DisableUnroll
, DisableVectorize
,
430 DisableLICMVersioning
, DisableDistribution
});
431 // Set operand 0 to refer to the loop id itself.
432 NewLoopID
->replaceOperandWith(0, NewLoopID
);
433 L
.setLoopID(NewLoopID
);
438 // Keeps track of the structure of a loop. This is similar to llvm::Loop,
439 // except that it is more lightweight and can track the state of a loop through
440 // changing and potentially invalid IR. This structure also formalizes the
441 // kinds of loops we can deal with -- ones that have a single latch that is also
442 // an exiting block *and* have a canonical induction variable.
443 struct LoopStructure
{
444 const char *Tag
= "";
446 BasicBlock
*Header
= nullptr;
447 BasicBlock
*Latch
= nullptr;
449 // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
450 // successor is `LatchExit', the exit block of the loop.
451 BranchInst
*LatchBr
= nullptr;
452 BasicBlock
*LatchExit
= nullptr;
453 unsigned LatchBrExitIdx
= std::numeric_limits
<unsigned>::max();
455 // The loop represented by this instance of LoopStructure is semantically
458 // intN_ty inc = IndVarIncreasing ? 1 : -1;
459 // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
461 // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase)
464 Value
*IndVarBase
= nullptr;
465 Value
*IndVarStart
= nullptr;
466 Value
*IndVarStep
= nullptr;
467 Value
*LoopExitAt
= nullptr;
468 bool IndVarIncreasing
= false;
469 bool IsSignedPredicate
= true;
471 LoopStructure() = default;
473 template <typename M
> LoopStructure
map(M Map
) const {
474 LoopStructure Result
;
476 Result
.Header
= cast
<BasicBlock
>(Map(Header
));
477 Result
.Latch
= cast
<BasicBlock
>(Map(Latch
));
478 Result
.LatchBr
= cast
<BranchInst
>(Map(LatchBr
));
479 Result
.LatchExit
= cast
<BasicBlock
>(Map(LatchExit
));
480 Result
.LatchBrExitIdx
= LatchBrExitIdx
;
481 Result
.IndVarBase
= Map(IndVarBase
);
482 Result
.IndVarStart
= Map(IndVarStart
);
483 Result
.IndVarStep
= Map(IndVarStep
);
484 Result
.LoopExitAt
= Map(LoopExitAt
);
485 Result
.IndVarIncreasing
= IndVarIncreasing
;
486 Result
.IsSignedPredicate
= IsSignedPredicate
;
490 static Optional
<LoopStructure
> parseLoopStructure(ScalarEvolution
&,
491 BranchProbabilityInfo
*BPI
,
492 Loop
&, const char *&);
495 /// This class is used to constrain loops to run within a given iteration space.
496 /// The algorithm this class implements is given a Loop and a range [Begin,
497 /// End). The algorithm then tries to break out a "main loop" out of the loop
498 /// it is given in a way that the "main loop" runs with the induction variable
499 /// in a subset of [Begin, End). The algorithm emits appropriate pre and post
500 /// loops to run any remaining iterations. The pre loop runs any iterations in
501 /// which the induction variable is < Begin, and the post loop runs any
502 /// iterations in which the induction variable is >= End.
503 class LoopConstrainer
{
504 // The representation of a clone of the original loop we started out with.
507 std::vector
<BasicBlock
*> Blocks
;
509 // `Map` maps values in the clonee into values in the cloned version
510 ValueToValueMapTy Map
;
512 // An instance of `LoopStructure` for the cloned loop
513 LoopStructure Structure
;
516 // Result of rewriting the range of a loop. See changeIterationSpaceEnd for
517 // more details on what these fields mean.
518 struct RewrittenRangeInfo
{
519 BasicBlock
*PseudoExit
= nullptr;
520 BasicBlock
*ExitSelector
= nullptr;
521 std::vector
<PHINode
*> PHIValuesAtPseudoExit
;
522 PHINode
*IndVarEnd
= nullptr;
524 RewrittenRangeInfo() = default;
527 // Calculated subranges we restrict the iteration space of the main loop to.
528 // See the implementation of `calculateSubRanges' for more details on how
529 // these fields are computed. `LowLimit` is None if there is no restriction
530 // on low end of the restricted iteration space of the main loop. `HighLimit`
531 // is None if there is no restriction on high end of the restricted iteration
532 // space of the main loop.
535 Optional
<const SCEV
*> LowLimit
;
536 Optional
<const SCEV
*> HighLimit
;
539 // A utility function that does a `replaceUsesOfWith' on the incoming block
540 // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
541 // incoming block list with `ReplaceBy'.
542 static void replacePHIBlock(PHINode
*PN
, BasicBlock
*Block
,
543 BasicBlock
*ReplaceBy
);
545 // Compute a safe set of limits for the main loop to run in -- effectively the
546 // intersection of `Range' and the iteration space of the original loop.
547 // Return None if unable to compute the set of subranges.
548 Optional
<SubRanges
> calculateSubRanges(bool IsSignedPredicate
) const;
550 // Clone `OriginalLoop' and return the result in CLResult. The IR after
551 // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
552 // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
553 // but there is no such edge.
554 void cloneLoop(ClonedLoop
&CLResult
, const char *Tag
) const;
556 // Create the appropriate loop structure needed to describe a cloned copy of
557 // `Original`. The clone is described by `VM`.
558 Loop
*createClonedLoopStructure(Loop
*Original
, Loop
*Parent
,
559 ValueToValueMapTy
&VM
, bool IsSubloop
);
561 // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
562 // iteration space of the rewritten loop ends at ExitLoopAt. The start of the
563 // iteration space is not changed. `ExitLoopAt' is assumed to be slt
564 // `OriginalHeaderCount'.
566 // If there are iterations left to execute, control is made to jump to
567 // `ContinuationBlock', otherwise they take the normal loop exit. The
568 // returned `RewrittenRangeInfo' object is populated as follows:
570 // .PseudoExit is a basic block that unconditionally branches to
571 // `ContinuationBlock'.
573 // .ExitSelector is a basic block that decides, on exit from the loop,
574 // whether to branch to the "true" exit or to `PseudoExit'.
576 // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
577 // for each PHINode in the loop header on taking the pseudo exit.
579 // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
580 // preheader because it is made to branch to the loop header only
583 changeIterationSpaceEnd(const LoopStructure
&LS
, BasicBlock
*Preheader
,
585 BasicBlock
*ContinuationBlock
) const;
587 // The loop denoted by `LS' has `OldPreheader' as its preheader. This
588 // function creates a new preheader for `LS' and returns it.
589 BasicBlock
*createPreheader(const LoopStructure
&LS
, BasicBlock
*OldPreheader
,
590 const char *Tag
) const;
592 // `ContinuationBlockAndPreheader' was the continuation block for some call to
593 // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
594 // This function rewrites the PHI nodes in `LS.Header' to start with the
596 void rewriteIncomingValuesForPHIs(
597 LoopStructure
&LS
, BasicBlock
*ContinuationBlockAndPreheader
,
598 const LoopConstrainer::RewrittenRangeInfo
&RRI
) const;
600 // Even though we do not preserve any passes at this time, we at least need to
601 // keep the parent loop structure consistent. The `LPPassManager' seems to
602 // verify this after running a loop pass. This function adds the list of
603 // blocks denoted by BBs to this loops parent loop if required.
604 void addToParentLoopIfNeeded(ArrayRef
<BasicBlock
*> BBs
);
606 // Some global state.
612 function_ref
<void(Loop
*, bool)> LPMAddNewLoop
;
614 // Information about the original loop we started out with.
617 const SCEV
*LatchTakenCount
= nullptr;
618 BasicBlock
*OriginalPreheader
= nullptr;
620 // The preheader of the main loop. This may or may not be different from
621 // `OriginalPreheader'.
622 BasicBlock
*MainLoopPreheader
= nullptr;
624 // The range we need to run the main loop in.
625 InductiveRangeCheck::Range Range
;
627 // The structure of the main loop (see comment at the beginning of this class
629 LoopStructure MainLoopStructure
;
632 LoopConstrainer(Loop
&L
, LoopInfo
&LI
,
633 function_ref
<void(Loop
*, bool)> LPMAddNewLoop
,
634 const LoopStructure
&LS
, ScalarEvolution
&SE
,
635 DominatorTree
&DT
, InductiveRangeCheck::Range R
)
636 : F(*L
.getHeader()->getParent()), Ctx(L
.getHeader()->getContext()),
637 SE(SE
), DT(DT
), LI(LI
), LPMAddNewLoop(LPMAddNewLoop
), OriginalLoop(L
),
638 Range(R
), MainLoopStructure(LS
) {}
640 // Entry point for the algorithm. Returns true on success.
644 } // end anonymous namespace
646 void LoopConstrainer::replacePHIBlock(PHINode
*PN
, BasicBlock
*Block
,
647 BasicBlock
*ReplaceBy
) {
648 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
649 if (PN
->getIncomingBlock(i
) == Block
)
650 PN
->setIncomingBlock(i
, ReplaceBy
);
653 /// Given a loop with an deccreasing induction variable, is it possible to
654 /// safely calculate the bounds of a new loop using the given Predicate.
655 static bool isSafeDecreasingBound(const SCEV
*Start
,
656 const SCEV
*BoundSCEV
, const SCEV
*Step
,
657 ICmpInst::Predicate Pred
,
658 unsigned LatchBrExitIdx
,
659 Loop
*L
, ScalarEvolution
&SE
) {
660 if (Pred
!= ICmpInst::ICMP_SLT
&& Pred
!= ICmpInst::ICMP_SGT
&&
661 Pred
!= ICmpInst::ICMP_ULT
&& Pred
!= ICmpInst::ICMP_UGT
)
664 if (!SE
.isAvailableAtLoopEntry(BoundSCEV
, L
))
667 assert(SE
.isKnownNegative(Step
) && "expecting negative step");
669 LLVM_DEBUG(dbgs() << "irce: isSafeDecreasingBound with:\n");
670 LLVM_DEBUG(dbgs() << "irce: Start: " << *Start
<< "\n");
671 LLVM_DEBUG(dbgs() << "irce: Step: " << *Step
<< "\n");
672 LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV
<< "\n");
673 LLVM_DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred
)
675 LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx
<< "\n");
677 bool IsSigned
= ICmpInst::isSigned(Pred
);
678 // The predicate that we need to check that the induction variable lies
680 ICmpInst::Predicate BoundPred
=
681 IsSigned
? CmpInst::ICMP_SGT
: CmpInst::ICMP_UGT
;
683 if (LatchBrExitIdx
== 1)
684 return SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
, BoundSCEV
);
686 assert(LatchBrExitIdx
== 0 &&
687 "LatchBrExitIdx should be either 0 or 1");
689 const SCEV
*StepPlusOne
= SE
.getAddExpr(Step
, SE
.getOne(Step
->getType()));
690 unsigned BitWidth
= cast
<IntegerType
>(BoundSCEV
->getType())->getBitWidth();
691 APInt Min
= IsSigned
? APInt::getSignedMinValue(BitWidth
) :
692 APInt::getMinValue(BitWidth
);
693 const SCEV
*Limit
= SE
.getMinusSCEV(SE
.getConstant(Min
), StepPlusOne
);
695 const SCEV
*MinusOne
=
696 SE
.getMinusSCEV(BoundSCEV
, SE
.getOne(BoundSCEV
->getType()));
698 return SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
, MinusOne
) &&
699 SE
.isLoopEntryGuardedByCond(L
, BoundPred
, BoundSCEV
, Limit
);
703 /// Given a loop with an increasing induction variable, is it possible to
704 /// safely calculate the bounds of a new loop using the given Predicate.
705 static bool isSafeIncreasingBound(const SCEV
*Start
,
706 const SCEV
*BoundSCEV
, const SCEV
*Step
,
707 ICmpInst::Predicate Pred
,
708 unsigned LatchBrExitIdx
,
709 Loop
*L
, ScalarEvolution
&SE
) {
710 if (Pred
!= ICmpInst::ICMP_SLT
&& Pred
!= ICmpInst::ICMP_SGT
&&
711 Pred
!= ICmpInst::ICMP_ULT
&& Pred
!= ICmpInst::ICMP_UGT
)
714 if (!SE
.isAvailableAtLoopEntry(BoundSCEV
, L
))
717 LLVM_DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n");
718 LLVM_DEBUG(dbgs() << "irce: Start: " << *Start
<< "\n");
719 LLVM_DEBUG(dbgs() << "irce: Step: " << *Step
<< "\n");
720 LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV
<< "\n");
721 LLVM_DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred
)
723 LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx
<< "\n");
725 bool IsSigned
= ICmpInst::isSigned(Pred
);
726 // The predicate that we need to check that the induction variable lies
728 ICmpInst::Predicate BoundPred
=
729 IsSigned
? CmpInst::ICMP_SLT
: CmpInst::ICMP_ULT
;
731 if (LatchBrExitIdx
== 1)
732 return SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
, BoundSCEV
);
734 assert(LatchBrExitIdx
== 0 && "LatchBrExitIdx should be 0 or 1");
736 const SCEV
*StepMinusOne
=
737 SE
.getMinusSCEV(Step
, SE
.getOne(Step
->getType()));
738 unsigned BitWidth
= cast
<IntegerType
>(BoundSCEV
->getType())->getBitWidth();
739 APInt Max
= IsSigned
? APInt::getSignedMaxValue(BitWidth
) :
740 APInt::getMaxValue(BitWidth
);
741 const SCEV
*Limit
= SE
.getMinusSCEV(SE
.getConstant(Max
), StepMinusOne
);
743 return (SE
.isLoopEntryGuardedByCond(L
, BoundPred
, Start
,
744 SE
.getAddExpr(BoundSCEV
, Step
)) &&
745 SE
.isLoopEntryGuardedByCond(L
, BoundPred
, BoundSCEV
, Limit
));
748 Optional
<LoopStructure
>
749 LoopStructure::parseLoopStructure(ScalarEvolution
&SE
,
750 BranchProbabilityInfo
*BPI
, Loop
&L
,
751 const char *&FailureReason
) {
752 if (!L
.isLoopSimplifyForm()) {
753 FailureReason
= "loop not in LoopSimplify form";
757 BasicBlock
*Latch
= L
.getLoopLatch();
758 assert(Latch
&& "Simplified loops only have one latch!");
760 if (Latch
->getTerminator()->getMetadata(ClonedLoopTag
)) {
761 FailureReason
= "loop has already been cloned";
765 if (!L
.isLoopExiting(Latch
)) {
766 FailureReason
= "no loop latch";
770 BasicBlock
*Header
= L
.getHeader();
771 BasicBlock
*Preheader
= L
.getLoopPreheader();
773 FailureReason
= "no preheader";
777 BranchInst
*LatchBr
= dyn_cast
<BranchInst
>(Latch
->getTerminator());
778 if (!LatchBr
|| LatchBr
->isUnconditional()) {
779 FailureReason
= "latch terminator not conditional branch";
783 unsigned LatchBrExitIdx
= LatchBr
->getSuccessor(0) == Header
? 1 : 0;
785 BranchProbability ExitProbability
=
786 BPI
? BPI
->getEdgeProbability(LatchBr
->getParent(), LatchBrExitIdx
)
787 : BranchProbability::getZero();
789 if (!SkipProfitabilityChecks
&&
790 ExitProbability
> BranchProbability(1, MaxExitProbReciprocal
)) {
791 FailureReason
= "short running loop, not profitable";
795 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(LatchBr
->getCondition());
796 if (!ICI
|| !isa
<IntegerType
>(ICI
->getOperand(0)->getType())) {
797 FailureReason
= "latch terminator branch not conditional on integral icmp";
801 const SCEV
*LatchCount
= SE
.getExitCount(&L
, Latch
);
802 if (isa
<SCEVCouldNotCompute
>(LatchCount
)) {
803 FailureReason
= "could not compute latch count";
807 ICmpInst::Predicate Pred
= ICI
->getPredicate();
808 Value
*LeftValue
= ICI
->getOperand(0);
809 const SCEV
*LeftSCEV
= SE
.getSCEV(LeftValue
);
810 IntegerType
*IndVarTy
= cast
<IntegerType
>(LeftValue
->getType());
812 Value
*RightValue
= ICI
->getOperand(1);
813 const SCEV
*RightSCEV
= SE
.getSCEV(RightValue
);
815 // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
816 if (!isa
<SCEVAddRecExpr
>(LeftSCEV
)) {
817 if (isa
<SCEVAddRecExpr
>(RightSCEV
)) {
818 std::swap(LeftSCEV
, RightSCEV
);
819 std::swap(LeftValue
, RightValue
);
820 Pred
= ICmpInst::getSwappedPredicate(Pred
);
822 FailureReason
= "no add recurrences in the icmp";
827 auto HasNoSignedWrap
= [&](const SCEVAddRecExpr
*AR
) {
828 if (AR
->getNoWrapFlags(SCEV::FlagNSW
))
831 IntegerType
*Ty
= cast
<IntegerType
>(AR
->getType());
832 IntegerType
*WideTy
=
833 IntegerType::get(Ty
->getContext(), Ty
->getBitWidth() * 2);
835 const SCEVAddRecExpr
*ExtendAfterOp
=
836 dyn_cast
<SCEVAddRecExpr
>(SE
.getSignExtendExpr(AR
, WideTy
));
838 const SCEV
*ExtendedStart
= SE
.getSignExtendExpr(AR
->getStart(), WideTy
);
839 const SCEV
*ExtendedStep
=
840 SE
.getSignExtendExpr(AR
->getStepRecurrence(SE
), WideTy
);
842 bool NoSignedWrap
= ExtendAfterOp
->getStart() == ExtendedStart
&&
843 ExtendAfterOp
->getStepRecurrence(SE
) == ExtendedStep
;
849 // We may have proved this when computing the sign extension above.
850 return AR
->getNoWrapFlags(SCEV::FlagNSW
) != SCEV::FlagAnyWrap
;
853 // `ICI` is interpreted as taking the backedge if the *next* value of the
854 // induction variable satisfies some constraint.
856 const SCEVAddRecExpr
*IndVarBase
= cast
<SCEVAddRecExpr
>(LeftSCEV
);
857 if (!IndVarBase
->isAffine()) {
858 FailureReason
= "LHS in icmp not induction variable";
861 const SCEV
* StepRec
= IndVarBase
->getStepRecurrence(SE
);
862 if (!isa
<SCEVConstant
>(StepRec
)) {
863 FailureReason
= "LHS in icmp not induction variable";
866 ConstantInt
*StepCI
= cast
<SCEVConstant
>(StepRec
)->getValue();
868 if (ICI
->isEquality() && !HasNoSignedWrap(IndVarBase
)) {
869 FailureReason
= "LHS in icmp needs nsw for equality predicates";
873 assert(!StepCI
->isZero() && "Zero step?");
874 bool IsIncreasing
= !StepCI
->isNegative();
875 bool IsSignedPredicate
= ICmpInst::isSigned(Pred
);
876 const SCEV
*StartNext
= IndVarBase
->getStart();
877 const SCEV
*Addend
= SE
.getNegativeSCEV(IndVarBase
->getStepRecurrence(SE
));
878 const SCEV
*IndVarStart
= SE
.getAddExpr(StartNext
, Addend
);
879 const SCEV
*Step
= SE
.getSCEV(StepCI
);
881 ConstantInt
*One
= ConstantInt::get(IndVarTy
, 1);
883 bool DecreasedRightValueByOne
= false;
884 if (StepCI
->isOne()) {
885 // Try to turn eq/ne predicates to those we can work with.
886 if (Pred
== ICmpInst::ICMP_NE
&& LatchBrExitIdx
== 1)
887 // while (++i != len) { while (++i < len) {
890 // If both parts are known non-negative, it is profitable to use
891 // unsigned comparison in increasing loop. This allows us to make the
892 // comparison check against "RightSCEV + 1" more optimistic.
893 if (isKnownNonNegativeInLoop(IndVarStart
, &L
, SE
) &&
894 isKnownNonNegativeInLoop(RightSCEV
, &L
, SE
))
895 Pred
= ICmpInst::ICMP_ULT
;
897 Pred
= ICmpInst::ICMP_SLT
;
898 else if (Pred
== ICmpInst::ICMP_EQ
&& LatchBrExitIdx
== 0) {
899 // while (true) { while (true) {
900 // if (++i == len) ---> if (++i > len - 1)
904 if (IndVarBase
->getNoWrapFlags(SCEV::FlagNUW
) &&
905 cannotBeMinInLoop(RightSCEV
, &L
, SE
, /*Signed*/false)) {
906 Pred
= ICmpInst::ICMP_UGT
;
907 RightSCEV
= SE
.getMinusSCEV(RightSCEV
,
908 SE
.getOne(RightSCEV
->getType()));
909 DecreasedRightValueByOne
= true;
910 } else if (cannotBeMinInLoop(RightSCEV
, &L
, SE
, /*Signed*/true)) {
911 Pred
= ICmpInst::ICMP_SGT
;
912 RightSCEV
= SE
.getMinusSCEV(RightSCEV
,
913 SE
.getOne(RightSCEV
->getType()));
914 DecreasedRightValueByOne
= true;
919 bool LTPred
= (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_ULT
);
920 bool GTPred
= (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_UGT
);
921 bool FoundExpectedPred
=
922 (LTPred
&& LatchBrExitIdx
== 1) || (GTPred
&& LatchBrExitIdx
== 0);
924 if (!FoundExpectedPred
) {
925 FailureReason
= "expected icmp slt semantically, found something else";
929 IsSignedPredicate
= ICmpInst::isSigned(Pred
);
930 if (!IsSignedPredicate
&& !AllowUnsignedLatchCondition
) {
931 FailureReason
= "unsigned latch conditions are explicitly prohibited";
935 if (!isSafeIncreasingBound(IndVarStart
, RightSCEV
, Step
, Pred
,
936 LatchBrExitIdx
, &L
, SE
)) {
937 FailureReason
= "Unsafe loop bounds";
940 if (LatchBrExitIdx
== 0) {
941 // We need to increase the right value unless we have already decreased
942 // it virtually when we replaced EQ with SGT.
943 if (!DecreasedRightValueByOne
) {
944 IRBuilder
<> B(Preheader
->getTerminator());
945 RightValue
= B
.CreateAdd(RightValue
, One
);
948 assert(!DecreasedRightValueByOne
&&
949 "Right value can be decreased only for LatchBrExitIdx == 0!");
952 bool IncreasedRightValueByOne
= false;
953 if (StepCI
->isMinusOne()) {
954 // Try to turn eq/ne predicates to those we can work with.
955 if (Pred
== ICmpInst::ICMP_NE
&& LatchBrExitIdx
== 1)
956 // while (--i != len) { while (--i > len) {
959 // We intentionally don't turn the predicate into UGT even if we know
960 // that both operands are non-negative, because it will only pessimize
961 // our check against "RightSCEV - 1".
962 Pred
= ICmpInst::ICMP_SGT
;
963 else if (Pred
== ICmpInst::ICMP_EQ
&& LatchBrExitIdx
== 0) {
964 // while (true) { while (true) {
965 // if (--i == len) ---> if (--i < len + 1)
969 if (IndVarBase
->getNoWrapFlags(SCEV::FlagNUW
) &&
970 cannotBeMaxInLoop(RightSCEV
, &L
, SE
, /* Signed */ false)) {
971 Pred
= ICmpInst::ICMP_ULT
;
972 RightSCEV
= SE
.getAddExpr(RightSCEV
, SE
.getOne(RightSCEV
->getType()));
973 IncreasedRightValueByOne
= true;
974 } else if (cannotBeMaxInLoop(RightSCEV
, &L
, SE
, /* Signed */ true)) {
975 Pred
= ICmpInst::ICMP_SLT
;
976 RightSCEV
= SE
.getAddExpr(RightSCEV
, SE
.getOne(RightSCEV
->getType()));
977 IncreasedRightValueByOne
= true;
982 bool LTPred
= (Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_ULT
);
983 bool GTPred
= (Pred
== ICmpInst::ICMP_SGT
|| Pred
== ICmpInst::ICMP_UGT
);
985 bool FoundExpectedPred
=
986 (GTPred
&& LatchBrExitIdx
== 1) || (LTPred
&& LatchBrExitIdx
== 0);
988 if (!FoundExpectedPred
) {
989 FailureReason
= "expected icmp sgt semantically, found something else";
994 Pred
== ICmpInst::ICMP_SLT
|| Pred
== ICmpInst::ICMP_SGT
;
996 if (!IsSignedPredicate
&& !AllowUnsignedLatchCondition
) {
997 FailureReason
= "unsigned latch conditions are explicitly prohibited";
1001 if (!isSafeDecreasingBound(IndVarStart
, RightSCEV
, Step
, Pred
,
1002 LatchBrExitIdx
, &L
, SE
)) {
1003 FailureReason
= "Unsafe bounds";
1007 if (LatchBrExitIdx
== 0) {
1008 // We need to decrease the right value unless we have already increased
1009 // it virtually when we replaced EQ with SLT.
1010 if (!IncreasedRightValueByOne
) {
1011 IRBuilder
<> B(Preheader
->getTerminator());
1012 RightValue
= B
.CreateSub(RightValue
, One
);
1015 assert(!IncreasedRightValueByOne
&&
1016 "Right value can be increased only for LatchBrExitIdx == 0!");
1019 BasicBlock
*LatchExit
= LatchBr
->getSuccessor(LatchBrExitIdx
);
1021 assert(SE
.getLoopDisposition(LatchCount
, &L
) ==
1022 ScalarEvolution::LoopInvariant
&&
1023 "loop variant exit count doesn't make sense!");
1025 assert(!L
.contains(LatchExit
) && "expected an exit block!");
1026 const DataLayout
&DL
= Preheader
->getModule()->getDataLayout();
1027 Value
*IndVarStartV
=
1028 SCEVExpander(SE
, DL
, "irce")
1029 .expandCodeFor(IndVarStart
, IndVarTy
, Preheader
->getTerminator());
1030 IndVarStartV
->setName("indvar.start");
1032 LoopStructure Result
;
1034 Result
.Tag
= "main";
1035 Result
.Header
= Header
;
1036 Result
.Latch
= Latch
;
1037 Result
.LatchBr
= LatchBr
;
1038 Result
.LatchExit
= LatchExit
;
1039 Result
.LatchBrExitIdx
= LatchBrExitIdx
;
1040 Result
.IndVarStart
= IndVarStartV
;
1041 Result
.IndVarStep
= StepCI
;
1042 Result
.IndVarBase
= LeftValue
;
1043 Result
.IndVarIncreasing
= IsIncreasing
;
1044 Result
.LoopExitAt
= RightValue
;
1045 Result
.IsSignedPredicate
= IsSignedPredicate
;
1047 FailureReason
= nullptr;
1052 /// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
1053 /// signed or unsigned extension of \p S to type \p Ty.
1054 static const SCEV
*NoopOrExtend(const SCEV
*S
, Type
*Ty
, ScalarEvolution
&SE
,
1056 return Signed
? SE
.getNoopOrSignExtend(S
, Ty
) : SE
.getNoopOrZeroExtend(S
, Ty
);
1059 Optional
<LoopConstrainer::SubRanges
>
1060 LoopConstrainer::calculateSubRanges(bool IsSignedPredicate
) const {
1061 IntegerType
*Ty
= cast
<IntegerType
>(LatchTakenCount
->getType());
1063 auto *RTy
= cast
<IntegerType
>(Range
.getType());
1065 // We only support wide range checks and narrow latches.
1066 if (!AllowNarrowLatchCondition
&& RTy
!= Ty
)
1068 if (RTy
->getBitWidth() < Ty
->getBitWidth())
1071 LoopConstrainer::SubRanges Result
;
1073 // I think we can be more aggressive here and make this nuw / nsw if the
1074 // addition that feeds into the icmp for the latch's terminating branch is nuw
1075 // / nsw. In any case, a wrapping 2's complement addition is safe.
1076 const SCEV
*Start
= NoopOrExtend(SE
.getSCEV(MainLoopStructure
.IndVarStart
),
1077 RTy
, SE
, IsSignedPredicate
);
1078 const SCEV
*End
= NoopOrExtend(SE
.getSCEV(MainLoopStructure
.LoopExitAt
), RTy
,
1079 SE
, IsSignedPredicate
);
1081 bool Increasing
= MainLoopStructure
.IndVarIncreasing
;
1083 // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or
1084 // [Smallest, GreatestSeen] is the range of values the induction variable
1087 const SCEV
*Smallest
= nullptr, *Greatest
= nullptr, *GreatestSeen
= nullptr;
1089 const SCEV
*One
= SE
.getOne(RTy
);
1093 // No overflow, because the range [Smallest, GreatestSeen] is not empty.
1094 GreatestSeen
= SE
.getMinusSCEV(End
, One
);
1096 // These two computations may sign-overflow. Here is why that is okay:
1098 // We know that the induction variable does not sign-overflow on any
1099 // iteration except the last one, and it starts at `Start` and ends at
1100 // `End`, decrementing by one every time.
1102 // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
1103 // induction variable is decreasing we know that that the smallest value
1104 // the loop body is actually executed with is `INT_SMIN` == `Smallest`.
1106 // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
1107 // that case, `Clamp` will always return `Smallest` and
1108 // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
1109 // will be an empty range. Returning an empty range is always safe.
1111 Smallest
= SE
.getAddExpr(End
, One
);
1112 Greatest
= SE
.getAddExpr(Start
, One
);
1113 GreatestSeen
= Start
;
1116 auto Clamp
= [this, Smallest
, Greatest
, IsSignedPredicate
](const SCEV
*S
) {
1117 return IsSignedPredicate
1118 ? SE
.getSMaxExpr(Smallest
, SE
.getSMinExpr(Greatest
, S
))
1119 : SE
.getUMaxExpr(Smallest
, SE
.getUMinExpr(Greatest
, S
));
1122 // In some cases we can prove that we don't need a pre or post loop.
1123 ICmpInst::Predicate PredLE
=
1124 IsSignedPredicate
? ICmpInst::ICMP_SLE
: ICmpInst::ICMP_ULE
;
1125 ICmpInst::Predicate PredLT
=
1126 IsSignedPredicate
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
;
1128 bool ProvablyNoPreloop
=
1129 SE
.isKnownPredicate(PredLE
, Range
.getBegin(), Smallest
);
1130 if (!ProvablyNoPreloop
)
1131 Result
.LowLimit
= Clamp(Range
.getBegin());
1133 bool ProvablyNoPostLoop
=
1134 SE
.isKnownPredicate(PredLT
, GreatestSeen
, Range
.getEnd());
1135 if (!ProvablyNoPostLoop
)
1136 Result
.HighLimit
= Clamp(Range
.getEnd());
1141 void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop
&Result
,
1142 const char *Tag
) const {
1143 for (BasicBlock
*BB
: OriginalLoop
.getBlocks()) {
1144 BasicBlock
*Clone
= CloneBasicBlock(BB
, Result
.Map
, Twine(".") + Tag
, &F
);
1145 Result
.Blocks
.push_back(Clone
);
1146 Result
.Map
[BB
] = Clone
;
1149 auto GetClonedValue
= [&Result
](Value
*V
) {
1150 assert(V
&& "null values not in domain!");
1151 auto It
= Result
.Map
.find(V
);
1152 if (It
== Result
.Map
.end())
1154 return static_cast<Value
*>(It
->second
);
1158 cast
<BasicBlock
>(GetClonedValue(OriginalLoop
.getLoopLatch()));
1159 ClonedLatch
->getTerminator()->setMetadata(ClonedLoopTag
,
1160 MDNode::get(Ctx
, {}));
1162 Result
.Structure
= MainLoopStructure
.map(GetClonedValue
);
1163 Result
.Structure
.Tag
= Tag
;
1165 for (unsigned i
= 0, e
= Result
.Blocks
.size(); i
!= e
; ++i
) {
1166 BasicBlock
*ClonedBB
= Result
.Blocks
[i
];
1167 BasicBlock
*OriginalBB
= OriginalLoop
.getBlocks()[i
];
1169 assert(Result
.Map
[OriginalBB
] == ClonedBB
&& "invariant!");
1171 for (Instruction
&I
: *ClonedBB
)
1172 RemapInstruction(&I
, Result
.Map
,
1173 RF_NoModuleLevelChanges
| RF_IgnoreMissingLocals
);
1175 // Exit blocks will now have one more predecessor and their PHI nodes need
1176 // to be edited to reflect that. No phi nodes need to be introduced because
1177 // the loop is in LCSSA.
1179 for (auto *SBB
: successors(OriginalBB
)) {
1180 if (OriginalLoop
.contains(SBB
))
1181 continue; // not an exit block
1183 for (PHINode
&PN
: SBB
->phis()) {
1184 Value
*OldIncoming
= PN
.getIncomingValueForBlock(OriginalBB
);
1185 PN
.addIncoming(GetClonedValue(OldIncoming
), ClonedBB
);
1191 LoopConstrainer::RewrittenRangeInfo
LoopConstrainer::changeIterationSpaceEnd(
1192 const LoopStructure
&LS
, BasicBlock
*Preheader
, Value
*ExitSubloopAt
,
1193 BasicBlock
*ContinuationBlock
) const {
1194 // We start with a loop with a single latch:
1196 // +--------------------+
1200 // +--------+-----------+
1201 // | ----------------\
1203 // +--------v----v------+ |
1207 // +--------------------+ |
1211 // +--------------------+ |
1213 // | latch >----------/
1215 // +-------v------------+
1218 // | +--------------------+
1220 // +---> original exit |
1222 // +--------------------+
1224 // We change the control flow to look like
1227 // +--------------------+
1229 // | preheader >-------------------------+
1231 // +--------v-----------+ |
1232 // | /-------------+ |
1234 // +--------v--v--------+ | |
1236 // | header | | +--------+ |
1238 // +--------------------+ | | +-----v-----v-----------+
1240 // | | | .pseudo.exit |
1242 // | | +-----------v-----------+
1245 // | | +--------v-------------+
1246 // +--------------------+ | | | |
1247 // | | | | | ContinuationBlock |
1248 // | latch >------+ | | |
1249 // | | | +----------------------+
1250 // +---------v----------+ |
1253 // | +---------------^-----+
1255 // +-----> .exit.selector |
1257 // +----------v----------+
1259 // +--------------------+ |
1261 // | original exit <----+
1263 // +--------------------+
1265 RewrittenRangeInfo RRI
;
1267 BasicBlock
*BBInsertLocation
= LS
.Latch
->getNextNode();
1268 RRI
.ExitSelector
= BasicBlock::Create(Ctx
, Twine(LS
.Tag
) + ".exit.selector",
1269 &F
, BBInsertLocation
);
1270 RRI
.PseudoExit
= BasicBlock::Create(Ctx
, Twine(LS
.Tag
) + ".pseudo.exit", &F
,
1273 BranchInst
*PreheaderJump
= cast
<BranchInst
>(Preheader
->getTerminator());
1274 bool Increasing
= LS
.IndVarIncreasing
;
1275 bool IsSignedPredicate
= LS
.IsSignedPredicate
;
1277 IRBuilder
<> B(PreheaderJump
);
1278 auto *RangeTy
= Range
.getBegin()->getType();
1279 auto NoopOrExt
= [&](Value
*V
) {
1280 if (V
->getType() == RangeTy
)
1282 return IsSignedPredicate
? B
.CreateSExt(V
, RangeTy
, "wide." + V
->getName())
1283 : B
.CreateZExt(V
, RangeTy
, "wide." + V
->getName());
1286 // EnterLoopCond - is it okay to start executing this `LS'?
1287 Value
*EnterLoopCond
= nullptr;
1290 ? (IsSignedPredicate
? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT
)
1291 : (IsSignedPredicate
? ICmpInst::ICMP_SGT
: ICmpInst::ICMP_UGT
);
1292 Value
*IndVarStart
= NoopOrExt(LS
.IndVarStart
);
1293 EnterLoopCond
= B
.CreateICmp(Pred
, IndVarStart
, ExitSubloopAt
);
1295 B
.CreateCondBr(EnterLoopCond
, LS
.Header
, RRI
.PseudoExit
);
1296 PreheaderJump
->eraseFromParent();
1298 LS
.LatchBr
->setSuccessor(LS
.LatchBrExitIdx
, RRI
.ExitSelector
);
1299 B
.SetInsertPoint(LS
.LatchBr
);
1300 Value
*IndVarBase
= NoopOrExt(LS
.IndVarBase
);
1301 Value
*TakeBackedgeLoopCond
= B
.CreateICmp(Pred
, IndVarBase
, ExitSubloopAt
);
1303 Value
*CondForBranch
= LS
.LatchBrExitIdx
== 1
1304 ? TakeBackedgeLoopCond
1305 : B
.CreateNot(TakeBackedgeLoopCond
);
1307 LS
.LatchBr
->setCondition(CondForBranch
);
1309 B
.SetInsertPoint(RRI
.ExitSelector
);
1311 // IterationsLeft - are there any more iterations left, given the original
1312 // upper bound on the induction variable? If not, we branch to the "real"
1314 Value
*LoopExitAt
= NoopOrExt(LS
.LoopExitAt
);
1315 Value
*IterationsLeft
= B
.CreateICmp(Pred
, IndVarBase
, LoopExitAt
);
1316 B
.CreateCondBr(IterationsLeft
, RRI
.PseudoExit
, LS
.LatchExit
);
1318 BranchInst
*BranchToContinuation
=
1319 BranchInst::Create(ContinuationBlock
, RRI
.PseudoExit
);
1321 // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
1322 // each of the PHI nodes in the loop header. This feeds into the initial
1323 // value of the same PHI nodes if/when we continue execution.
1324 for (PHINode
&PN
: LS
.Header
->phis()) {
1325 PHINode
*NewPHI
= PHINode::Create(PN
.getType(), 2, PN
.getName() + ".copy",
1326 BranchToContinuation
);
1328 NewPHI
->addIncoming(PN
.getIncomingValueForBlock(Preheader
), Preheader
);
1329 NewPHI
->addIncoming(PN
.getIncomingValueForBlock(LS
.Latch
),
1331 RRI
.PHIValuesAtPseudoExit
.push_back(NewPHI
);
1334 RRI
.IndVarEnd
= PHINode::Create(IndVarBase
->getType(), 2, "indvar.end",
1335 BranchToContinuation
);
1336 RRI
.IndVarEnd
->addIncoming(IndVarStart
, Preheader
);
1337 RRI
.IndVarEnd
->addIncoming(IndVarBase
, RRI
.ExitSelector
);
1339 // The latch exit now has a branch from `RRI.ExitSelector' instead of
1340 // `LS.Latch'. The PHI nodes need to be updated to reflect that.
1341 for (PHINode
&PN
: LS
.LatchExit
->phis())
1342 replacePHIBlock(&PN
, LS
.Latch
, RRI
.ExitSelector
);
1347 void LoopConstrainer::rewriteIncomingValuesForPHIs(
1348 LoopStructure
&LS
, BasicBlock
*ContinuationBlock
,
1349 const LoopConstrainer::RewrittenRangeInfo
&RRI
) const {
1350 unsigned PHIIndex
= 0;
1351 for (PHINode
&PN
: LS
.Header
->phis())
1352 for (unsigned i
= 0, e
= PN
.getNumIncomingValues(); i
< e
; ++i
)
1353 if (PN
.getIncomingBlock(i
) == ContinuationBlock
)
1354 PN
.setIncomingValue(i
, RRI
.PHIValuesAtPseudoExit
[PHIIndex
++]);
1356 LS
.IndVarStart
= RRI
.IndVarEnd
;
1359 BasicBlock
*LoopConstrainer::createPreheader(const LoopStructure
&LS
,
1360 BasicBlock
*OldPreheader
,
1361 const char *Tag
) const {
1362 BasicBlock
*Preheader
= BasicBlock::Create(Ctx
, Tag
, &F
, LS
.Header
);
1363 BranchInst::Create(LS
.Header
, Preheader
);
1365 for (PHINode
&PN
: LS
.Header
->phis())
1366 for (unsigned i
= 0, e
= PN
.getNumIncomingValues(); i
< e
; ++i
)
1367 replacePHIBlock(&PN
, OldPreheader
, Preheader
);
1372 void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef
<BasicBlock
*> BBs
) {
1373 Loop
*ParentLoop
= OriginalLoop
.getParentLoop();
1377 for (BasicBlock
*BB
: BBs
)
1378 ParentLoop
->addBasicBlockToLoop(BB
, LI
);
1381 Loop
*LoopConstrainer::createClonedLoopStructure(Loop
*Original
, Loop
*Parent
,
1382 ValueToValueMapTy
&VM
,
1384 Loop
&New
= *LI
.AllocateLoop();
1386 Parent
->addChildLoop(&New
);
1388 LI
.addTopLevelLoop(&New
);
1389 LPMAddNewLoop(&New
, IsSubloop
);
1391 // Add all of the blocks in Original to the new loop.
1392 for (auto *BB
: Original
->blocks())
1393 if (LI
.getLoopFor(BB
) == Original
)
1394 New
.addBasicBlockToLoop(cast
<BasicBlock
>(VM
[BB
]), LI
);
1396 // Add all of the subloops to the new loop.
1397 for (Loop
*SubLoop
: *Original
)
1398 createClonedLoopStructure(SubLoop
, &New
, VM
, /* IsSubloop */ true);
1403 bool LoopConstrainer::run() {
1404 BasicBlock
*Preheader
= nullptr;
1405 LatchTakenCount
= SE
.getExitCount(&OriginalLoop
, MainLoopStructure
.Latch
);
1406 Preheader
= OriginalLoop
.getLoopPreheader();
1407 assert(!isa
<SCEVCouldNotCompute
>(LatchTakenCount
) && Preheader
!= nullptr &&
1410 OriginalPreheader
= Preheader
;
1411 MainLoopPreheader
= Preheader
;
1413 bool IsSignedPredicate
= MainLoopStructure
.IsSignedPredicate
;
1414 Optional
<SubRanges
> MaybeSR
= calculateSubRanges(IsSignedPredicate
);
1415 if (!MaybeSR
.hasValue()) {
1416 LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
1420 SubRanges SR
= MaybeSR
.getValue();
1421 bool Increasing
= MainLoopStructure
.IndVarIncreasing
;
1423 cast
<IntegerType
>(Range
.getBegin()->getType());
1425 SCEVExpander
Expander(SE
, F
.getParent()->getDataLayout(), "irce");
1426 Instruction
*InsertPt
= OriginalPreheader
->getTerminator();
1428 // It would have been better to make `PreLoop' and `PostLoop'
1429 // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
1431 ClonedLoop PreLoop
, PostLoop
;
1433 Increasing
? SR
.LowLimit
.hasValue() : SR
.HighLimit
.hasValue();
1434 bool NeedsPostLoop
=
1435 Increasing
? SR
.HighLimit
.hasValue() : SR
.LowLimit
.hasValue();
1437 Value
*ExitPreLoopAt
= nullptr;
1438 Value
*ExitMainLoopAt
= nullptr;
1439 const SCEVConstant
*MinusOneS
=
1440 cast
<SCEVConstant
>(SE
.getConstant(IVTy
, -1, true /* isSigned */));
1443 const SCEV
*ExitPreLoopAtSCEV
= nullptr;
1446 ExitPreLoopAtSCEV
= *SR
.LowLimit
;
1447 else if (cannotBeMinInLoop(*SR
.HighLimit
, &OriginalLoop
, SE
,
1449 ExitPreLoopAtSCEV
= SE
.getAddExpr(*SR
.HighLimit
, MinusOneS
);
1451 LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
1452 << "preloop exit limit. HighLimit = "
1453 << *(*SR
.HighLimit
) << "\n");
1457 if (!isSafeToExpandAt(ExitPreLoopAtSCEV
, InsertPt
, SE
)) {
1458 LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
1459 << " preloop exit limit " << *ExitPreLoopAtSCEV
1460 << " at block " << InsertPt
->getParent()->getName()
1465 ExitPreLoopAt
= Expander
.expandCodeFor(ExitPreLoopAtSCEV
, IVTy
, InsertPt
);
1466 ExitPreLoopAt
->setName("exit.preloop.at");
1469 if (NeedsPostLoop
) {
1470 const SCEV
*ExitMainLoopAtSCEV
= nullptr;
1473 ExitMainLoopAtSCEV
= *SR
.HighLimit
;
1474 else if (cannotBeMinInLoop(*SR
.LowLimit
, &OriginalLoop
, SE
,
1476 ExitMainLoopAtSCEV
= SE
.getAddExpr(*SR
.LowLimit
, MinusOneS
);
1478 LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
1479 << "mainloop exit limit. LowLimit = "
1480 << *(*SR
.LowLimit
) << "\n");
1484 if (!isSafeToExpandAt(ExitMainLoopAtSCEV
, InsertPt
, SE
)) {
1485 LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
1486 << " main loop exit limit " << *ExitMainLoopAtSCEV
1487 << " at block " << InsertPt
->getParent()->getName()
1492 ExitMainLoopAt
= Expander
.expandCodeFor(ExitMainLoopAtSCEV
, IVTy
, InsertPt
);
1493 ExitMainLoopAt
->setName("exit.mainloop.at");
1496 // We clone these ahead of time so that we don't have to deal with changing
1497 // and temporarily invalid IR as we transform the loops.
1499 cloneLoop(PreLoop
, "preloop");
1501 cloneLoop(PostLoop
, "postloop");
1503 RewrittenRangeInfo PreLoopRRI
;
1506 Preheader
->getTerminator()->replaceUsesOfWith(MainLoopStructure
.Header
,
1507 PreLoop
.Structure
.Header
);
1510 createPreheader(MainLoopStructure
, Preheader
, "mainloop");
1511 PreLoopRRI
= changeIterationSpaceEnd(PreLoop
.Structure
, Preheader
,
1512 ExitPreLoopAt
, MainLoopPreheader
);
1513 rewriteIncomingValuesForPHIs(MainLoopStructure
, MainLoopPreheader
,
1517 BasicBlock
*PostLoopPreheader
= nullptr;
1518 RewrittenRangeInfo PostLoopRRI
;
1520 if (NeedsPostLoop
) {
1522 createPreheader(PostLoop
.Structure
, Preheader
, "postloop");
1523 PostLoopRRI
= changeIterationSpaceEnd(MainLoopStructure
, MainLoopPreheader
,
1524 ExitMainLoopAt
, PostLoopPreheader
);
1525 rewriteIncomingValuesForPHIs(PostLoop
.Structure
, PostLoopPreheader
,
1529 BasicBlock
*NewMainLoopPreheader
=
1530 MainLoopPreheader
!= Preheader
? MainLoopPreheader
: nullptr;
1531 BasicBlock
*NewBlocks
[] = {PostLoopPreheader
, PreLoopRRI
.PseudoExit
,
1532 PreLoopRRI
.ExitSelector
, PostLoopRRI
.PseudoExit
,
1533 PostLoopRRI
.ExitSelector
, NewMainLoopPreheader
};
1535 // Some of the above may be nullptr, filter them out before passing to
1536 // addToParentLoopIfNeeded.
1538 std::remove(std::begin(NewBlocks
), std::end(NewBlocks
), nullptr);
1540 addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks
), NewBlocksEnd
));
1544 // We need to first add all the pre and post loop blocks into the loop
1545 // structures (as part of createClonedLoopStructure), and then update the
1546 // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
1547 // LI when LoopSimplifyForm is generated.
1548 Loop
*PreL
= nullptr, *PostL
= nullptr;
1549 if (!PreLoop
.Blocks
.empty()) {
1550 PreL
= createClonedLoopStructure(&OriginalLoop
,
1551 OriginalLoop
.getParentLoop(), PreLoop
.Map
,
1552 /* IsSubLoop */ false);
1555 if (!PostLoop
.Blocks
.empty()) {
1557 createClonedLoopStructure(&OriginalLoop
, OriginalLoop
.getParentLoop(),
1558 PostLoop
.Map
, /* IsSubLoop */ false);
1561 // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
1562 auto CanonicalizeLoop
= [&] (Loop
*L
, bool IsOriginalLoop
) {
1563 formLCSSARecursively(*L
, DT
, &LI
, &SE
);
1564 simplifyLoop(L
, &DT
, &LI
, &SE
, nullptr, true);
1565 // Pre/post loops are slow paths, we do not need to perform any loop
1566 // optimizations on them.
1567 if (!IsOriginalLoop
)
1568 DisableAllLoopOptsOnLoop(*L
);
1571 CanonicalizeLoop(PreL
, false);
1573 CanonicalizeLoop(PostL
, false);
1574 CanonicalizeLoop(&OriginalLoop
, true);
1579 /// Computes and returns a range of values for the induction variable (IndVar)
1580 /// in which the range check can be safely elided. If it cannot compute such a
1581 /// range, returns None.
1582 Optional
<InductiveRangeCheck::Range
>
1583 InductiveRangeCheck::computeSafeIterationSpace(
1584 ScalarEvolution
&SE
, const SCEVAddRecExpr
*IndVar
,
1585 bool IsLatchSigned
) const {
1586 // We can deal when types of latch check and range checks don't match in case
1587 // if latch check is more narrow.
1588 auto *IVType
= cast
<IntegerType
>(IndVar
->getType());
1589 auto *RCType
= cast
<IntegerType
>(getBegin()->getType());
1590 if (IVType
->getBitWidth() > RCType
->getBitWidth())
1592 // IndVar is of the form "A + B * I" (where "I" is the canonical induction
1593 // variable, that may or may not exist as a real llvm::Value in the loop) and
1594 // this inductive range check is a range check on the "C + D * I" ("C" is
1595 // getBegin() and "D" is getStep()). We rewrite the value being range
1596 // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
1598 // The actual inequalities we solve are of the form
1600 // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1)
1602 // Here L stands for upper limit of the safe iteration space.
1603 // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid
1604 // overflows when calculating (0 - M) and (L - M) we, depending on type of
1605 // IV's iteration space, limit the calculations by borders of the iteration
1606 // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0.
1607 // If we figured out that "anything greater than (-M) is safe", we strengthen
1608 // this to "everything greater than 0 is safe", assuming that values between
1609 // -M and 0 just do not exist in unsigned iteration space, and we don't want
1610 // to deal with overflown values.
1612 if (!IndVar
->isAffine())
1615 const SCEV
*A
= NoopOrExtend(IndVar
->getStart(), RCType
, SE
, IsLatchSigned
);
1616 const SCEVConstant
*B
= dyn_cast
<SCEVConstant
>(
1617 NoopOrExtend(IndVar
->getStepRecurrence(SE
), RCType
, SE
, IsLatchSigned
));
1620 assert(!B
->isZero() && "Recurrence with zero step?");
1622 const SCEV
*C
= getBegin();
1623 const SCEVConstant
*D
= dyn_cast
<SCEVConstant
>(getStep());
1627 assert(!D
->getValue()->isZero() && "Recurrence with zero step?");
1628 unsigned BitWidth
= RCType
->getBitWidth();
1629 const SCEV
*SIntMax
= SE
.getConstant(APInt::getSignedMaxValue(BitWidth
));
1631 // Subtract Y from X so that it does not go through border of the IV
1632 // iteration space. Mathematically, it is equivalent to:
1634 // ClampedSubtract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX). [1]
1636 // In [1], 'X - Y' is a mathematical subtraction (result is not bounded to
1637 // any width of bit grid). But after we take min/max, the result is
1638 // guaranteed to be within [INT_MIN, INT_MAX].
1640 // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min
1641 // values, depending on type of latch condition that defines IV iteration
1643 auto ClampedSubtract
= [&](const SCEV
*X
, const SCEV
*Y
) {
1644 // FIXME: The current implementation assumes that X is in [0, SINT_MAX].
1645 // This is required to ensure that SINT_MAX - X does not overflow signed and
1646 // that X - Y does not overflow unsigned if Y is negative. Can we lift this
1647 // restriction and make it work for negative X either?
1648 if (IsLatchSigned
) {
1649 // X is a number from signed range, Y is interpreted as signed.
1650 // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only
1651 // thing we should care about is that we didn't cross SINT_MAX.
1652 // So, if Y is positive, we subtract Y safely.
1653 // Rule 1: Y > 0 ---> Y.
1654 // If 0 <= -Y <= (SINT_MAX - X), we subtract Y safely.
1655 // Rule 2: Y >=s (X - SINT_MAX) ---> Y.
1656 // If 0 <= (SINT_MAX - X) < -Y, we can only subtract (X - SINT_MAX).
1657 // Rule 3: Y <s (X - SINT_MAX) ---> (X - SINT_MAX).
1658 // It gives us smax(Y, X - SINT_MAX) to subtract in all cases.
1659 const SCEV
*XMinusSIntMax
= SE
.getMinusSCEV(X
, SIntMax
);
1660 return SE
.getMinusSCEV(X
, SE
.getSMaxExpr(Y
, XMinusSIntMax
),
1663 // X is a number from unsigned range, Y is interpreted as signed.
1664 // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only
1665 // thing we should care about is that we didn't cross zero.
1666 // So, if Y is negative, we subtract Y safely.
1667 // Rule 1: Y <s 0 ---> Y.
1668 // If 0 <= Y <= X, we subtract Y safely.
1669 // Rule 2: Y <=s X ---> Y.
1670 // If 0 <= X < Y, we should stop at 0 and can only subtract X.
1671 // Rule 3: Y >s X ---> X.
1672 // It gives us smin(X, Y) to subtract in all cases.
1673 return SE
.getMinusSCEV(X
, SE
.getSMinExpr(X
, Y
), SCEV::FlagNUW
);
1675 const SCEV
*M
= SE
.getMinusSCEV(C
, A
);
1676 const SCEV
*Zero
= SE
.getZero(M
->getType());
1678 // This function returns SCEV equal to 1 if X is non-negative 0 otherwise.
1679 auto SCEVCheckNonNegative
= [&](const SCEV
*X
) {
1680 const Loop
*L
= IndVar
->getLoop();
1681 const SCEV
*One
= SE
.getOne(X
->getType());
1682 // Can we trivially prove that X is a non-negative or negative value?
1683 if (isKnownNonNegativeInLoop(X
, L
, SE
))
1685 else if (isKnownNegativeInLoop(X
, L
, SE
))
1687 // If not, we will have to figure it out during the execution.
1688 // Function smax(smin(X, 0), -1) + 1 equals to 1 if X >= 0 and 0 if X < 0.
1689 const SCEV
*NegOne
= SE
.getNegativeSCEV(One
);
1690 return SE
.getAddExpr(SE
.getSMaxExpr(SE
.getSMinExpr(X
, Zero
), NegOne
), One
);
1692 // FIXME: Current implementation of ClampedSubtract implicitly assumes that
1693 // X is non-negative (in sense of a signed value). We need to re-implement
1694 // this function in a way that it will correctly handle negative X as well.
1695 // We use it twice: for X = 0 everything is fine, but for X = getEnd() we can
1696 // end up with a negative X and produce wrong results. So currently we ensure
1697 // that if getEnd() is negative then both ends of the safe range are zero.
1698 // Note that this may pessimize elimination of unsigned range checks against
1700 const SCEV
*REnd
= getEnd();
1701 const SCEV
*EndIsNonNegative
= SCEVCheckNonNegative(REnd
);
1703 const SCEV
*Begin
= SE
.getMulExpr(ClampedSubtract(Zero
, M
), EndIsNonNegative
);
1704 const SCEV
*End
= SE
.getMulExpr(ClampedSubtract(REnd
, M
), EndIsNonNegative
);
1705 return InductiveRangeCheck::Range(Begin
, End
);
1708 static Optional
<InductiveRangeCheck::Range
>
1709 IntersectSignedRange(ScalarEvolution
&SE
,
1710 const Optional
<InductiveRangeCheck::Range
> &R1
,
1711 const InductiveRangeCheck::Range
&R2
) {
1712 if (R2
.isEmpty(SE
, /* IsSigned */ true))
1716 auto &R1Value
= R1
.getValue();
1717 // We never return empty ranges from this function, and R1 is supposed to be
1718 // a result of intersection. Thus, R1 is never empty.
1719 assert(!R1Value
.isEmpty(SE
, /* IsSigned */ true) &&
1720 "We should never have empty R1!");
1722 // TODO: we could widen the smaller range and have this work; but for now we
1723 // bail out to keep things simple.
1724 if (R1Value
.getType() != R2
.getType())
1727 const SCEV
*NewBegin
= SE
.getSMaxExpr(R1Value
.getBegin(), R2
.getBegin());
1728 const SCEV
*NewEnd
= SE
.getSMinExpr(R1Value
.getEnd(), R2
.getEnd());
1730 // If the resulting range is empty, just return None.
1731 auto Ret
= InductiveRangeCheck::Range(NewBegin
, NewEnd
);
1732 if (Ret
.isEmpty(SE
, /* IsSigned */ true))
1737 static Optional
<InductiveRangeCheck::Range
>
1738 IntersectUnsignedRange(ScalarEvolution
&SE
,
1739 const Optional
<InductiveRangeCheck::Range
> &R1
,
1740 const InductiveRangeCheck::Range
&R2
) {
1741 if (R2
.isEmpty(SE
, /* IsSigned */ false))
1745 auto &R1Value
= R1
.getValue();
1746 // We never return empty ranges from this function, and R1 is supposed to be
1747 // a result of intersection. Thus, R1 is never empty.
1748 assert(!R1Value
.isEmpty(SE
, /* IsSigned */ false) &&
1749 "We should never have empty R1!");
1751 // TODO: we could widen the smaller range and have this work; but for now we
1752 // bail out to keep things simple.
1753 if (R1Value
.getType() != R2
.getType())
1756 const SCEV
*NewBegin
= SE
.getUMaxExpr(R1Value
.getBegin(), R2
.getBegin());
1757 const SCEV
*NewEnd
= SE
.getUMinExpr(R1Value
.getEnd(), R2
.getEnd());
1759 // If the resulting range is empty, just return None.
1760 auto Ret
= InductiveRangeCheck::Range(NewBegin
, NewEnd
);
1761 if (Ret
.isEmpty(SE
, /* IsSigned */ false))
1766 PreservedAnalyses
IRCEPass::run(Loop
&L
, LoopAnalysisManager
&AM
,
1767 LoopStandardAnalysisResults
&AR
,
1769 Function
*F
= L
.getHeader()->getParent();
1771 AM
.getResult
<FunctionAnalysisManagerLoopProxy
>(L
, AR
).getManager();
1772 auto *BPI
= FAM
.getCachedResult
<BranchProbabilityAnalysis
>(*F
);
1773 InductiveRangeCheckElimination
IRCE(AR
.SE
, BPI
, AR
.DT
, AR
.LI
);
1774 auto LPMAddNewLoop
= [&U
](Loop
*NL
, bool IsSubloop
) {
1776 U
.addSiblingLoops(NL
);
1778 bool Changed
= IRCE
.run(&L
, LPMAddNewLoop
);
1780 return PreservedAnalyses::all();
1782 return getLoopPassPreservedAnalyses();
1785 bool IRCELegacyPass::runOnLoop(Loop
*L
, LPPassManager
&LPM
) {
1789 ScalarEvolution
&SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
1790 BranchProbabilityInfo
&BPI
=
1791 getAnalysis
<BranchProbabilityInfoWrapperPass
>().getBPI();
1792 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1793 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1794 InductiveRangeCheckElimination
IRCE(SE
, &BPI
, DT
, LI
);
1795 auto LPMAddNewLoop
= [&LPM
](Loop
*NL
, bool /* IsSubLoop */) {
1798 return IRCE
.run(L
, LPMAddNewLoop
);
1801 bool InductiveRangeCheckElimination::run(
1802 Loop
*L
, function_ref
<void(Loop
*, bool)> LPMAddNewLoop
) {
1803 if (L
->getBlocks().size() >= LoopSizeCutoff
) {
1804 LLVM_DEBUG(dbgs() << "irce: giving up constraining loop, too large\n");
1808 BasicBlock
*Preheader
= L
->getLoopPreheader();
1810 LLVM_DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
1814 LLVMContext
&Context
= Preheader
->getContext();
1815 SmallVector
<InductiveRangeCheck
, 16> RangeChecks
;
1817 for (auto BBI
: L
->getBlocks())
1818 if (BranchInst
*TBI
= dyn_cast
<BranchInst
>(BBI
->getTerminator()))
1819 InductiveRangeCheck::extractRangeChecksFromBranch(TBI
, L
, SE
, BPI
,
1822 if (RangeChecks
.empty())
1825 auto PrintRecognizedRangeChecks
= [&](raw_ostream
&OS
) {
1826 OS
<< "irce: looking at loop "; L
->print(OS
);
1827 OS
<< "irce: loop has " << RangeChecks
.size()
1828 << " inductive range checks: \n";
1829 for (InductiveRangeCheck
&IRC
: RangeChecks
)
1833 LLVM_DEBUG(PrintRecognizedRangeChecks(dbgs()));
1835 if (PrintRangeChecks
)
1836 PrintRecognizedRangeChecks(errs());
1838 const char *FailureReason
= nullptr;
1839 Optional
<LoopStructure
> MaybeLoopStructure
=
1840 LoopStructure::parseLoopStructure(SE
, BPI
, *L
, FailureReason
);
1841 if (!MaybeLoopStructure
.hasValue()) {
1842 LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
1843 << FailureReason
<< "\n";);
1846 LoopStructure LS
= MaybeLoopStructure
.getValue();
1847 const SCEVAddRecExpr
*IndVar
=
1848 cast
<SCEVAddRecExpr
>(SE
.getMinusSCEV(SE
.getSCEV(LS
.IndVarBase
), SE
.getSCEV(LS
.IndVarStep
)));
1850 Optional
<InductiveRangeCheck::Range
> SafeIterRange
;
1851 Instruction
*ExprInsertPt
= Preheader
->getTerminator();
1853 SmallVector
<InductiveRangeCheck
, 4> RangeChecksToEliminate
;
1854 // Basing on the type of latch predicate, we interpret the IV iteration range
1855 // as signed or unsigned range. We use different min/max functions (signed or
1856 // unsigned) when intersecting this range with safe iteration ranges implied
1858 auto IntersectRange
=
1859 LS
.IsSignedPredicate
? IntersectSignedRange
: IntersectUnsignedRange
;
1861 IRBuilder
<> B(ExprInsertPt
);
1862 for (InductiveRangeCheck
&IRC
: RangeChecks
) {
1863 auto Result
= IRC
.computeSafeIterationSpace(SE
, IndVar
,
1864 LS
.IsSignedPredicate
);
1865 if (Result
.hasValue()) {
1866 auto MaybeSafeIterRange
=
1867 IntersectRange(SE
, SafeIterRange
, Result
.getValue());
1868 if (MaybeSafeIterRange
.hasValue()) {
1870 !MaybeSafeIterRange
.getValue().isEmpty(SE
, LS
.IsSignedPredicate
) &&
1871 "We should never return empty ranges!");
1872 RangeChecksToEliminate
.push_back(IRC
);
1873 SafeIterRange
= MaybeSafeIterRange
.getValue();
1878 if (!SafeIterRange
.hasValue())
1881 LoopConstrainer
LC(*L
, LI
, LPMAddNewLoop
, LS
, SE
, DT
,
1882 SafeIterRange
.getValue());
1883 bool Changed
= LC
.run();
1886 auto PrintConstrainedLoopInfo
= [L
]() {
1887 dbgs() << "irce: in function ";
1888 dbgs() << L
->getHeader()->getParent()->getName() << ": ";
1889 dbgs() << "constrained ";
1893 LLVM_DEBUG(PrintConstrainedLoopInfo());
1895 if (PrintChangedLoops
)
1896 PrintConstrainedLoopInfo();
1898 // Optimize away the now-redundant range checks.
1900 for (InductiveRangeCheck
&IRC
: RangeChecksToEliminate
) {
1901 ConstantInt
*FoldedRangeCheck
= IRC
.getPassingDirection()
1902 ? ConstantInt::getTrue(Context
)
1903 : ConstantInt::getFalse(Context
);
1904 IRC
.getCheckUse()->set(FoldedRangeCheck
);
1911 Pass
*llvm::createInductiveRangeCheckEliminationPass() {
1912 return new IRCELegacyPass();