1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the guard widening pass. The semantics of the
10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails
11 // more often that it did before the transform. This optimization is called
12 // "widening" and can be used hoist and common runtime checks in situations like
15 // %cmp0 = 7 u< Length
16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
17 // call @unknown_side_effects()
18 // %cmp1 = 9 u< Length
19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]
24 // %cmp0 = 9 u< Length
25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
26 // call @unknown_side_effects()
29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a
30 // generic implementation of the same function, which will have the correct
31 // semantics from that point onward. It is always _legal_ to deoptimize (so
32 // replacing %cmp0 with false is "correct"), though it may not always be
33 // profitable to do so.
35 // NB! This pass is a work in progress. It hasn't been tuned to be "production
36 // ready" yet. It is known to have quadriatic running time and will not scale
37 // to large numbers of guards
39 //===----------------------------------------------------------------------===//
41 #include "llvm/Transforms/Scalar/GuardWidening.h"
43 #include "llvm/ADT/DenseMap.h"
44 #include "llvm/ADT/DepthFirstIterator.h"
45 #include "llvm/ADT/Statistic.h"
46 #include "llvm/Analysis/BranchProbabilityInfo.h"
47 #include "llvm/Analysis/GuardUtils.h"
48 #include "llvm/Analysis/LoopInfo.h"
49 #include "llvm/Analysis/LoopPass.h"
50 #include "llvm/Analysis/PostDominators.h"
51 #include "llvm/Analysis/ValueTracking.h"
52 #include "llvm/IR/ConstantRange.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/PatternMatch.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/KnownBits.h"
59 #include "llvm/Transforms/Scalar.h"
60 #include "llvm/Transforms/Utils/LoopUtils.h"
64 #define DEBUG_TYPE "guard-widening"
66 STATISTIC(GuardsEliminated
, "Number of eliminated guards");
67 STATISTIC(CondBranchEliminated
, "Number of eliminated conditional branches");
69 static cl::opt
<bool> WidenFrequentBranches(
70 "guard-widening-widen-frequent-branches", cl::Hidden
,
71 cl::desc("Widen conditions of explicit branches into dominating guards in "
72 "case if their taken frequency exceeds threshold set by "
73 "guard-widening-frequent-branch-threshold option"),
76 static cl::opt
<unsigned> FrequentBranchThreshold(
77 "guard-widening-frequent-branch-threshold", cl::Hidden
,
78 cl::desc("When WidenFrequentBranches is set to true, this option is used "
79 "to determine which branches are frequently taken. The criteria "
80 "that a branch is taken more often than "
81 "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then "
82 "it is considered frequently taken"),
86 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden
,
87 cl::desc("Whether or not we should widen guards "
88 "expressed as branches by widenable conditions"),
93 // Get the condition of \p I. It can either be a guard or a conditional branch.
94 static Value
*getCondition(Instruction
*I
) {
95 if (IntrinsicInst
*GI
= dyn_cast
<IntrinsicInst
>(I
)) {
96 assert(GI
->getIntrinsicID() == Intrinsic::experimental_guard
&&
97 "Bad guard intrinsic?");
98 return GI
->getArgOperand(0);
100 if (isGuardAsWidenableBranch(I
)) {
101 auto *Cond
= cast
<BranchInst
>(I
)->getCondition();
102 return cast
<BinaryOperator
>(Cond
)->getOperand(0);
104 return cast
<BranchInst
>(I
)->getCondition();
107 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a
108 // conditional branch.
109 static void setCondition(Instruction
*I
, Value
*NewCond
) {
110 if (IntrinsicInst
*GI
= dyn_cast
<IntrinsicInst
>(I
)) {
111 assert(GI
->getIntrinsicID() == Intrinsic::experimental_guard
&&
112 "Bad guard intrinsic?");
113 GI
->setArgOperand(0, NewCond
);
116 cast
<BranchInst
>(I
)->setCondition(NewCond
);
119 // Eliminates the guard instruction properly.
120 static void eliminateGuard(Instruction
*GuardInst
) {
121 GuardInst
->eraseFromParent();
125 class GuardWideningImpl
{
127 PostDominatorTree
*PDT
;
129 BranchProbabilityInfo
*BPI
;
131 /// Together, these describe the region of interest. This might be all of
132 /// the blocks within a function, or only a given loop's blocks and preheader.
134 std::function
<bool(BasicBlock
*)> BlockFilter
;
136 /// The set of guards and conditional branches whose conditions have been
137 /// widened into dominating guards.
138 SmallVector
<Instruction
*, 16> EliminatedGuardsAndBranches
;
140 /// The set of guards which have been widened to include conditions to other
142 DenseSet
<Instruction
*> WidenedGuards
;
144 /// Try to eliminate instruction \p Instr by widening it into an earlier
145 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that
146 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
147 /// maps BasicBlocks to the set of guards seen in that block.
148 bool eliminateInstrViaWidening(
149 Instruction
*Instr
, const df_iterator
<DomTreeNode
*> &DFSI
,
150 const DenseMap
<BasicBlock
*, SmallVector
<Instruction
*, 8>> &
151 GuardsPerBlock
, bool InvertCondition
= false);
153 /// Used to keep track of which widening potential is more effective.
156 WS_IllegalOrNegative
,
158 /// Widening is performance neutral as far as the cycles spent in check
159 /// conditions goes (but can still help, e.g., code layout, having less
163 /// Widening is profitable.
166 /// Widening is very profitable. Not significantly different from \c
167 /// WS_Positive, except by the order.
171 static StringRef
scoreTypeToString(WideningScore WS
);
173 /// Compute the score for widening the condition in \p DominatedInstr
174 /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
175 /// inverted condition of the dominating guard.
176 WideningScore
computeWideningScore(Instruction
*DominatedInstr
,
177 Instruction
*DominatingGuard
,
180 /// Helper to check if \p V can be hoisted to \p InsertPos.
181 bool isAvailableAt(const Value
*V
, const Instruction
*InsertPos
) const {
182 SmallPtrSet
<const Instruction
*, 8> Visited
;
183 return isAvailableAt(V
, InsertPos
, Visited
);
186 bool isAvailableAt(const Value
*V
, const Instruction
*InsertPos
,
187 SmallPtrSetImpl
<const Instruction
*> &Visited
) const;
189 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
190 /// isAvailableAt returned true.
191 void makeAvailableAt(Value
*V
, Instruction
*InsertPos
) const;
193 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
194 /// to generate an expression computing the logical AND of \p Cond0 and (\p
195 /// Cond1 XOR \p InvertCondition).
196 /// Return true if the expression computing the AND is only as
197 /// expensive as computing one of the two. If \p InsertPt is true then
198 /// actually generate the resulting expression, make it available at \p
199 /// InsertPt and return it in \p Result (else no change to the IR is made).
200 bool widenCondCommon(Value
*Cond0
, Value
*Cond1
, Instruction
*InsertPt
,
201 Value
*&Result
, bool InvertCondition
);
203 /// Represents a range check of the form \c Base + \c Offset u< \c Length,
204 /// with the constraint that \c Length is not negative. \c CheckInst is the
205 /// pre-existing instruction in the IR that computes the result of this range
209 const ConstantInt
*Offset
;
214 explicit RangeCheck(const Value
*Base
, const ConstantInt
*Offset
,
215 const Value
*Length
, ICmpInst
*CheckInst
)
216 : Base(Base
), Offset(Offset
), Length(Length
), CheckInst(CheckInst
) {}
218 void setBase(const Value
*NewBase
) { Base
= NewBase
; }
219 void setOffset(const ConstantInt
*NewOffset
) { Offset
= NewOffset
; }
221 const Value
*getBase() const { return Base
; }
222 const ConstantInt
*getOffset() const { return Offset
; }
223 const APInt
&getOffsetValue() const { return getOffset()->getValue(); }
224 const Value
*getLength() const { return Length
; };
225 ICmpInst
*getCheckInst() const { return CheckInst
; }
227 void print(raw_ostream
&OS
, bool PrintTypes
= false) {
229 Base
->printAsOperand(OS
, PrintTypes
);
231 Offset
->printAsOperand(OS
, PrintTypes
);
233 Length
->printAsOperand(OS
, PrintTypes
);
236 LLVM_DUMP_METHOD
void dump() {
242 /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
243 /// append them to \p Checks. Returns true on success, may clobber \c Checks
245 bool parseRangeChecks(Value
*CheckCond
, SmallVectorImpl
<RangeCheck
> &Checks
) {
246 SmallPtrSet
<const Value
*, 8> Visited
;
247 return parseRangeChecks(CheckCond
, Checks
, Visited
);
250 bool parseRangeChecks(Value
*CheckCond
, SmallVectorImpl
<RangeCheck
> &Checks
,
251 SmallPtrSetImpl
<const Value
*> &Visited
);
253 /// Combine the checks in \p Checks into a smaller set of checks and append
254 /// them into \p CombinedChecks. Return true on success (i.e. all of checks
255 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
256 /// and \p CombinedChecks on success and on failure.
257 bool combineRangeChecks(SmallVectorImpl
<RangeCheck
> &Checks
,
258 SmallVectorImpl
<RangeCheck
> &CombinedChecks
) const;
260 /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
261 /// computing only one of the two expressions?
262 bool isWideningCondProfitable(Value
*Cond0
, Value
*Cond1
, bool InvertCond
) {
264 return widenCondCommon(Cond0
, Cond1
, /*InsertPt=*/nullptr, ResultUnused
,
268 /// If \p InvertCondition is false, Widen \p ToWiden to fail if
269 /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
270 /// true (in addition to whatever it is already checking).
271 void widenGuard(Instruction
*ToWiden
, Value
*NewCondition
,
272 bool InvertCondition
) {
274 widenCondCommon(getCondition(ToWiden
), NewCondition
, ToWiden
, Result
,
276 Value
*WidenableCondition
= nullptr;
277 if (isGuardAsWidenableBranch(ToWiden
)) {
278 auto *Cond
= cast
<BranchInst
>(ToWiden
)->getCondition();
279 WidenableCondition
= cast
<BinaryOperator
>(Cond
)->getOperand(1);
281 if (WidenableCondition
)
282 Result
= BinaryOperator::CreateAnd(Result
, WidenableCondition
,
283 "guard.chk", ToWiden
);
284 setCondition(ToWiden
, Result
);
289 explicit GuardWideningImpl(DominatorTree
&DT
, PostDominatorTree
*PDT
,
290 LoopInfo
&LI
, BranchProbabilityInfo
*BPI
,
292 std::function
<bool(BasicBlock
*)> BlockFilter
)
293 : DT(DT
), PDT(PDT
), LI(LI
), BPI(BPI
), Root(Root
), BlockFilter(BlockFilter
)
296 /// The entry point for this pass.
301 static bool isSupportedGuardInstruction(const Instruction
*Insn
) {
304 if (WidenBranchGuards
&& isGuardAsWidenableBranch(Insn
))
309 bool GuardWideningImpl::run() {
310 DenseMap
<BasicBlock
*, SmallVector
<Instruction
*, 8>> GuardsInBlock
;
311 bool Changed
= false;
312 Optional
<BranchProbability
> LikelyTaken
= None
;
313 if (WidenFrequentBranches
&& BPI
) {
314 unsigned Threshold
= FrequentBranchThreshold
;
315 assert(Threshold
> 0 && "Zero threshold makes no sense!");
316 LikelyTaken
= BranchProbability(Threshold
- 1, Threshold
);
319 for (auto DFI
= df_begin(Root
), DFE
= df_end(Root
);
321 auto *BB
= (*DFI
)->getBlock();
322 if (!BlockFilter(BB
))
325 auto &CurrentList
= GuardsInBlock
[BB
];
328 if (isSupportedGuardInstruction(&I
))
329 CurrentList
.push_back(cast
<Instruction
>(&I
));
331 for (auto *II
: CurrentList
)
332 Changed
|= eliminateInstrViaWidening(II
, DFI
, GuardsInBlock
);
333 if (WidenFrequentBranches
&& BPI
)
334 if (auto *BI
= dyn_cast
<BranchInst
>(BB
->getTerminator()))
335 if (BI
->isConditional()) {
336 // If one of branches of a conditional is likely taken, try to
338 if (BPI
->getEdgeProbability(BB
, 0U) >= *LikelyTaken
)
339 Changed
|= eliminateInstrViaWidening(BI
, DFI
, GuardsInBlock
);
340 else if (BPI
->getEdgeProbability(BB
, 1U) >= *LikelyTaken
)
341 Changed
|= eliminateInstrViaWidening(BI
, DFI
, GuardsInBlock
,
342 /*InvertCondition*/true);
346 assert(EliminatedGuardsAndBranches
.empty() || Changed
);
347 for (auto *I
: EliminatedGuardsAndBranches
)
348 if (!WidenedGuards
.count(I
)) {
349 assert(isa
<ConstantInt
>(getCondition(I
)) && "Should be!");
350 if (isSupportedGuardInstruction(I
))
353 assert(isa
<BranchInst
>(I
) &&
354 "Eliminated something other than guard or branch?");
355 ++CondBranchEliminated
;
362 bool GuardWideningImpl::eliminateInstrViaWidening(
363 Instruction
*Instr
, const df_iterator
<DomTreeNode
*> &DFSI
,
364 const DenseMap
<BasicBlock
*, SmallVector
<Instruction
*, 8>> &
365 GuardsInBlock
, bool InvertCondition
) {
366 // Ignore trivial true or false conditions. These instructions will be
367 // trivially eliminated by any cleanup pass. Do not erase them because other
368 // guards can possibly be widened into them.
369 if (isa
<ConstantInt
>(getCondition(Instr
)))
372 Instruction
*BestSoFar
= nullptr;
373 auto BestScoreSoFar
= WS_IllegalOrNegative
;
375 // In the set of dominating guards, find the one we can merge GuardInst with
376 // for the most profit.
377 for (unsigned i
= 0, e
= DFSI
.getPathLength(); i
!= e
; ++i
) {
378 auto *CurBB
= DFSI
.getPath(i
)->getBlock();
379 if (!BlockFilter(CurBB
))
381 assert(GuardsInBlock
.count(CurBB
) && "Must have been populated by now!");
382 const auto &GuardsInCurBB
= GuardsInBlock
.find(CurBB
)->second
;
384 auto I
= GuardsInCurBB
.begin();
385 auto E
= Instr
->getParent() == CurBB
386 ? std::find(GuardsInCurBB
.begin(), GuardsInCurBB
.end(), Instr
)
387 : GuardsInCurBB
.end();
392 for (auto &I
: *CurBB
) {
393 if (Index
== GuardsInCurBB
.size())
395 if (GuardsInCurBB
[Index
] == &I
)
398 assert(Index
== GuardsInCurBB
.size() &&
399 "Guards expected to be in order!");
403 assert((i
== (e
- 1)) == (Instr
->getParent() == CurBB
) && "Bad DFS?");
405 for (auto *Candidate
: make_range(I
, E
)) {
406 auto Score
= computeWideningScore(Instr
, Candidate
, InvertCondition
);
407 LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr
)
408 << " and " << *getCondition(Candidate
) << " is "
409 << scoreTypeToString(Score
) << "\n");
410 if (Score
> BestScoreSoFar
) {
411 BestScoreSoFar
= Score
;
412 BestSoFar
= Candidate
;
417 if (BestScoreSoFar
== WS_IllegalOrNegative
) {
418 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr
<< "\n");
422 assert(BestSoFar
!= Instr
&& "Should have never visited same guard!");
423 assert(DT
.dominates(BestSoFar
, Instr
) && "Should be!");
425 LLVM_DEBUG(dbgs() << "Widening " << *Instr
<< " into " << *BestSoFar
426 << " with score " << scoreTypeToString(BestScoreSoFar
)
428 widenGuard(BestSoFar
, getCondition(Instr
), InvertCondition
);
429 auto NewGuardCondition
= InvertCondition
430 ? ConstantInt::getFalse(Instr
->getContext())
431 : ConstantInt::getTrue(Instr
->getContext());
432 setCondition(Instr
, NewGuardCondition
);
433 EliminatedGuardsAndBranches
.push_back(Instr
);
434 WidenedGuards
.insert(BestSoFar
);
438 GuardWideningImpl::WideningScore
439 GuardWideningImpl::computeWideningScore(Instruction
*DominatedInstr
,
440 Instruction
*DominatingGuard
,
442 Loop
*DominatedInstrLoop
= LI
.getLoopFor(DominatedInstr
->getParent());
443 Loop
*DominatingGuardLoop
= LI
.getLoopFor(DominatingGuard
->getParent());
444 bool HoistingOutOfLoop
= false;
446 if (DominatingGuardLoop
!= DominatedInstrLoop
) {
447 // Be conservative and don't widen into a sibling loop. TODO: If the
448 // sibling is colder, we should consider allowing this.
449 if (DominatingGuardLoop
&&
450 !DominatingGuardLoop
->contains(DominatedInstrLoop
))
451 return WS_IllegalOrNegative
;
453 HoistingOutOfLoop
= true;
456 if (!isAvailableAt(getCondition(DominatedInstr
), DominatingGuard
))
457 return WS_IllegalOrNegative
;
459 // If the guard was conditional executed, it may never be reached
460 // dynamically. There are two potential downsides to hoisting it out of the
461 // conditionally executed region: 1) we may spuriously deopt without need and
462 // 2) we have the extra cost of computing the guard condition in the common
463 // case. At the moment, we really only consider the second in our heuristic
464 // here. TODO: evaluate cost model for spurious deopt
465 // NOTE: As written, this also lets us hoist right over another guard which
466 // is essentially just another spelling for control flow.
467 if (isWideningCondProfitable(getCondition(DominatedInstr
),
468 getCondition(DominatingGuard
), InvertCond
))
469 return HoistingOutOfLoop
? WS_VeryPositive
: WS_Positive
;
471 if (HoistingOutOfLoop
)
474 // Returns true if we might be hoisting above explicit control flow. Note
475 // that this completely ignores implicit control flow (guards, calls which
476 // throw, etc...). That choice appears arbitrary.
477 auto MaybeHoistingOutOfIf
= [&]() {
478 auto *DominatingBlock
= DominatingGuard
->getParent();
479 auto *DominatedBlock
= DominatedInstr
->getParent();
480 if (isGuardAsWidenableBranch(DominatingGuard
))
481 DominatingBlock
= cast
<BranchInst
>(DominatingGuard
)->getSuccessor(0);
484 if (DominatedBlock
== DominatingBlock
)
486 // Obvious successor (common loop header/preheader case)
487 if (DominatedBlock
== DominatingBlock
->getUniqueSuccessor())
489 // TODO: diamond, triangle cases
490 if (!PDT
) return true;
491 return !PDT
->dominates(DominatedBlock
, DominatingBlock
);
494 return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative
: WS_Neutral
;
497 bool GuardWideningImpl::isAvailableAt(
498 const Value
*V
, const Instruction
*Loc
,
499 SmallPtrSetImpl
<const Instruction
*> &Visited
) const {
500 auto *Inst
= dyn_cast
<Instruction
>(V
);
501 if (!Inst
|| DT
.dominates(Inst
, Loc
) || Visited
.count(Inst
))
504 if (!isSafeToSpeculativelyExecute(Inst
, Loc
, &DT
) ||
505 Inst
->mayReadFromMemory())
508 Visited
.insert(Inst
);
510 // We only want to go _up_ the dominance chain when recursing.
511 assert(!isa
<PHINode
>(Loc
) &&
512 "PHIs should return false for isSafeToSpeculativelyExecute");
513 assert(DT
.isReachableFromEntry(Inst
->getParent()) &&
514 "We did a DFS from the block entry!");
515 return all_of(Inst
->operands(),
516 [&](Value
*Op
) { return isAvailableAt(Op
, Loc
, Visited
); });
519 void GuardWideningImpl::makeAvailableAt(Value
*V
, Instruction
*Loc
) const {
520 auto *Inst
= dyn_cast
<Instruction
>(V
);
521 if (!Inst
|| DT
.dominates(Inst
, Loc
))
524 assert(isSafeToSpeculativelyExecute(Inst
, Loc
, &DT
) &&
525 !Inst
->mayReadFromMemory() && "Should've checked with isAvailableAt!");
527 for (Value
*Op
: Inst
->operands())
528 makeAvailableAt(Op
, Loc
);
530 Inst
->moveBefore(Loc
);
533 bool GuardWideningImpl::widenCondCommon(Value
*Cond0
, Value
*Cond1
,
534 Instruction
*InsertPt
, Value
*&Result
,
535 bool InvertCondition
) {
536 using namespace llvm::PatternMatch
;
539 // L >u C0 && L >u C1 -> L >u max(C0, C1)
540 ConstantInt
*RHS0
, *RHS1
;
542 ICmpInst::Predicate Pred0
, Pred1
;
543 if (match(Cond0
, m_ICmp(Pred0
, m_Value(LHS
), m_ConstantInt(RHS0
))) &&
544 match(Cond1
, m_ICmp(Pred1
, m_Specific(LHS
), m_ConstantInt(RHS1
)))) {
546 Pred1
= ICmpInst::getInversePredicate(Pred1
);
549 ConstantRange::makeExactICmpRegion(Pred0
, RHS0
->getValue());
551 ConstantRange::makeExactICmpRegion(Pred1
, RHS1
->getValue());
553 // SubsetIntersect is a subset of the actual mathematical intersection of
554 // CR0 and CR1, while SupersetIntersect is a superset of the actual
555 // mathematical intersection. If these two ConstantRanges are equal, then
556 // we know we were able to represent the actual mathematical intersection
557 // of CR0 and CR1, and can use the same to generate an icmp instruction.
559 // Given what we're doing here and the semantics of guards, it would
560 // actually be correct to just use SubsetIntersect, but that may be too
561 // aggressive in cases we care about.
562 auto SubsetIntersect
= CR0
.inverse().unionWith(CR1
.inverse()).inverse();
563 auto SupersetIntersect
= CR0
.intersectWith(CR1
);
566 CmpInst::Predicate Pred
;
567 if (SubsetIntersect
== SupersetIntersect
&&
568 SubsetIntersect
.getEquivalentICmp(Pred
, NewRHSAP
)) {
570 ConstantInt
*NewRHS
= ConstantInt::get(Cond0
->getContext(), NewRHSAP
);
571 Result
= new ICmpInst(InsertPt
, Pred
, LHS
, NewRHS
, "wide.chk");
579 SmallVector
<GuardWideningImpl::RangeCheck
, 4> Checks
, CombinedChecks
;
580 // TODO: Support InvertCondition case?
581 if (!InvertCondition
&&
582 parseRangeChecks(Cond0
, Checks
) && parseRangeChecks(Cond1
, Checks
) &&
583 combineRangeChecks(Checks
, CombinedChecks
)) {
586 for (auto &RC
: CombinedChecks
) {
587 makeAvailableAt(RC
.getCheckInst(), InsertPt
);
589 Result
= BinaryOperator::CreateAnd(RC
.getCheckInst(), Result
, "",
592 Result
= RC
.getCheckInst();
595 Result
->setName("wide.chk");
601 // Base case -- just logical-and the two conditions together.
604 makeAvailableAt(Cond0
, InsertPt
);
605 makeAvailableAt(Cond1
, InsertPt
);
607 Cond1
= BinaryOperator::CreateNot(Cond1
, "inverted", InsertPt
);
608 Result
= BinaryOperator::CreateAnd(Cond0
, Cond1
, "wide.chk", InsertPt
);
611 // We were not able to compute Cond0 AND Cond1 for the price of one.
615 bool GuardWideningImpl::parseRangeChecks(
616 Value
*CheckCond
, SmallVectorImpl
<GuardWideningImpl::RangeCheck
> &Checks
,
617 SmallPtrSetImpl
<const Value
*> &Visited
) {
618 if (!Visited
.insert(CheckCond
).second
)
621 using namespace llvm::PatternMatch
;
624 Value
*AndLHS
, *AndRHS
;
625 if (match(CheckCond
, m_And(m_Value(AndLHS
), m_Value(AndRHS
))))
626 return parseRangeChecks(AndLHS
, Checks
) &&
627 parseRangeChecks(AndRHS
, Checks
);
630 auto *IC
= dyn_cast
<ICmpInst
>(CheckCond
);
631 if (!IC
|| !IC
->getOperand(0)->getType()->isIntegerTy() ||
632 (IC
->getPredicate() != ICmpInst::ICMP_ULT
&&
633 IC
->getPredicate() != ICmpInst::ICMP_UGT
))
636 const Value
*CmpLHS
= IC
->getOperand(0), *CmpRHS
= IC
->getOperand(1);
637 if (IC
->getPredicate() == ICmpInst::ICMP_UGT
)
638 std::swap(CmpLHS
, CmpRHS
);
640 auto &DL
= IC
->getModule()->getDataLayout();
642 GuardWideningImpl::RangeCheck
Check(
643 CmpLHS
, cast
<ConstantInt
>(ConstantInt::getNullValue(CmpRHS
->getType())),
646 if (!isKnownNonNegative(Check
.getLength(), DL
))
649 // What we have in \c Check now is a correct interpretation of \p CheckCond.
650 // Try to see if we can move some constant offsets into the \c Offset field.
653 auto &Ctx
= CheckCond
->getContext();
661 auto *BaseInst
= dyn_cast
<Instruction
>(Check
.getBase());
662 assert((!BaseInst
|| DT
.isReachableFromEntry(BaseInst
->getParent())) &&
663 "Unreachable instruction?");
666 if (match(Check
.getBase(), m_Add(m_Value(OpLHS
), m_ConstantInt(OpRHS
)))) {
667 Check
.setBase(OpLHS
);
668 APInt NewOffset
= Check
.getOffsetValue() + OpRHS
->getValue();
669 Check
.setOffset(ConstantInt::get(Ctx
, NewOffset
));
671 } else if (match(Check
.getBase(),
672 m_Or(m_Value(OpLHS
), m_ConstantInt(OpRHS
)))) {
673 KnownBits Known
= computeKnownBits(OpLHS
, DL
);
674 if ((OpRHS
->getValue() & Known
.Zero
) == OpRHS
->getValue()) {
675 Check
.setBase(OpLHS
);
676 APInt NewOffset
= Check
.getOffsetValue() + OpRHS
->getValue();
677 Check
.setOffset(ConstantInt::get(Ctx
, NewOffset
));
683 Checks
.push_back(Check
);
687 bool GuardWideningImpl::combineRangeChecks(
688 SmallVectorImpl
<GuardWideningImpl::RangeCheck
> &Checks
,
689 SmallVectorImpl
<GuardWideningImpl::RangeCheck
> &RangeChecksOut
) const {
690 unsigned OldCount
= Checks
.size();
691 while (!Checks
.empty()) {
692 // Pick all of the range checks with a specific base and length, and try to
694 const Value
*CurrentBase
= Checks
.front().getBase();
695 const Value
*CurrentLength
= Checks
.front().getLength();
697 SmallVector
<GuardWideningImpl::RangeCheck
, 3> CurrentChecks
;
699 auto IsCurrentCheck
= [&](GuardWideningImpl::RangeCheck
&RC
) {
700 return RC
.getBase() == CurrentBase
&& RC
.getLength() == CurrentLength
;
703 copy_if(Checks
, std::back_inserter(CurrentChecks
), IsCurrentCheck
);
704 Checks
.erase(remove_if(Checks
, IsCurrentCheck
), Checks
.end());
706 assert(CurrentChecks
.size() != 0 && "We know we have at least one!");
708 if (CurrentChecks
.size() < 3) {
709 RangeChecksOut
.insert(RangeChecksOut
.end(), CurrentChecks
.begin(),
710 CurrentChecks
.end());
714 // CurrentChecks.size() will typically be 3 here, but so far there has been
715 // no need to hard-code that fact.
717 llvm::sort(CurrentChecks
, [&](const GuardWideningImpl::RangeCheck
&LHS
,
718 const GuardWideningImpl::RangeCheck
&RHS
) {
719 return LHS
.getOffsetValue().slt(RHS
.getOffsetValue());
722 // Note: std::sort should not invalidate the ChecksStart iterator.
724 const ConstantInt
*MinOffset
= CurrentChecks
.front().getOffset();
725 const ConstantInt
*MaxOffset
= CurrentChecks
.back().getOffset();
727 unsigned BitWidth
= MaxOffset
->getValue().getBitWidth();
728 if ((MaxOffset
->getValue() - MinOffset
->getValue())
729 .ugt(APInt::getSignedMinValue(BitWidth
)))
732 APInt MaxDiff
= MaxOffset
->getValue() - MinOffset
->getValue();
733 const APInt
&HighOffset
= MaxOffset
->getValue();
734 auto OffsetOK
= [&](const GuardWideningImpl::RangeCheck
&RC
) {
735 return (HighOffset
- RC
.getOffsetValue()).ult(MaxDiff
);
738 if (MaxDiff
.isMinValue() ||
739 !std::all_of(std::next(CurrentChecks
.begin()), CurrentChecks
.end(),
743 // We have a series of f+1 checks as:
745 // I+k_0 u< L ... Chk_0
746 // I+k_1 u< L ... Chk_1
748 // I+k_f u< L ... Chk_f
750 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0
751 // k_f-k_0 u< INT_MIN+k_f ... Precond_1
752 // k_f != k_0 ... Precond_2
755 // Chk_0 AND Chk_f implies all the other checks
757 // Informal proof sketch:
759 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap
760 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and
761 // thus I+k_f is the greatest unsigned value in that range.
763 // This combined with Ckh_(f+1) shows that everything in that range is u< L.
764 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)
765 // lie in [I+k_0,I+k_f], this proving our claim.
767 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are
768 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal
769 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping
770 // range by definition, and the latter case is impossible:
772 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)
773 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
775 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted
776 // with 'x' above) to be at least >u INT_MIN.
778 RangeChecksOut
.emplace_back(CurrentChecks
.front());
779 RangeChecksOut
.emplace_back(CurrentChecks
.back());
782 assert(RangeChecksOut
.size() <= OldCount
&& "We pessimized!");
783 return RangeChecksOut
.size() != OldCount
;
787 StringRef
GuardWideningImpl::scoreTypeToString(WideningScore WS
) {
789 case WS_IllegalOrNegative
:
790 return "IllegalOrNegative";
795 case WS_VeryPositive
:
796 return "VeryPositive";
799 llvm_unreachable("Fully covered switch above!");
803 PreservedAnalyses
GuardWideningPass::run(Function
&F
,
804 FunctionAnalysisManager
&AM
) {
805 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
806 auto &LI
= AM
.getResult
<LoopAnalysis
>(F
);
807 auto &PDT
= AM
.getResult
<PostDominatorTreeAnalysis
>(F
);
808 BranchProbabilityInfo
*BPI
= nullptr;
809 if (WidenFrequentBranches
)
810 BPI
= AM
.getCachedResult
<BranchProbabilityAnalysis
>(F
);
811 if (!GuardWideningImpl(DT
, &PDT
, LI
, BPI
, DT
.getRootNode(),
812 [](BasicBlock
*) { return true; } ).run())
813 return PreservedAnalyses::all();
815 PreservedAnalyses PA
;
816 PA
.preserveSet
<CFGAnalyses
>();
820 PreservedAnalyses
GuardWideningPass::run(Loop
&L
, LoopAnalysisManager
&AM
,
821 LoopStandardAnalysisResults
&AR
,
825 AM
.getResult
<FunctionAnalysisManagerLoopProxy
>(L
, AR
).getManager();
826 Function
&F
= *L
.getHeader()->getParent();
827 BranchProbabilityInfo
*BPI
= nullptr;
828 if (WidenFrequentBranches
)
829 BPI
= FAM
.getCachedResult
<BranchProbabilityAnalysis
>(F
);
831 BasicBlock
*RootBB
= L
.getLoopPredecessor();
833 RootBB
= L
.getHeader();
834 auto BlockFilter
= [&](BasicBlock
*BB
) {
835 return BB
== RootBB
|| L
.contains(BB
);
837 if (!GuardWideningImpl(AR
.DT
, nullptr, AR
.LI
, BPI
,
838 AR
.DT
.getNode(RootBB
),
840 return PreservedAnalyses::all();
842 return getLoopPassPreservedAnalyses();
846 struct GuardWideningLegacyPass
: public FunctionPass
{
849 GuardWideningLegacyPass() : FunctionPass(ID
) {
850 initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
853 bool runOnFunction(Function
&F
) override
{
856 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
857 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
858 auto &PDT
= getAnalysis
<PostDominatorTreeWrapperPass
>().getPostDomTree();
859 BranchProbabilityInfo
*BPI
= nullptr;
860 if (WidenFrequentBranches
)
861 BPI
= &getAnalysis
<BranchProbabilityInfoWrapperPass
>().getBPI();
862 return GuardWideningImpl(DT
, &PDT
, LI
, BPI
, DT
.getRootNode(),
863 [](BasicBlock
*) { return true; } ).run();
866 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
867 AU
.setPreservesCFG();
868 AU
.addRequired
<DominatorTreeWrapperPass
>();
869 AU
.addRequired
<PostDominatorTreeWrapperPass
>();
870 AU
.addRequired
<LoopInfoWrapperPass
>();
871 if (WidenFrequentBranches
)
872 AU
.addRequired
<BranchProbabilityInfoWrapperPass
>();
876 /// Same as above, but restricted to a single loop at a time. Can be
877 /// scheduled with other loop passes w/o breaking out of LPM
878 struct LoopGuardWideningLegacyPass
: public LoopPass
{
881 LoopGuardWideningLegacyPass() : LoopPass(ID
) {
882 initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
885 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
) override
{
888 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
889 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
890 auto *PDTWP
= getAnalysisIfAvailable
<PostDominatorTreeWrapperPass
>();
891 auto *PDT
= PDTWP
? &PDTWP
->getPostDomTree() : nullptr;
892 BasicBlock
*RootBB
= L
->getLoopPredecessor();
894 RootBB
= L
->getHeader();
895 auto BlockFilter
= [&](BasicBlock
*BB
) {
896 return BB
== RootBB
|| L
->contains(BB
);
898 BranchProbabilityInfo
*BPI
= nullptr;
899 if (WidenFrequentBranches
)
900 BPI
= &getAnalysis
<BranchProbabilityInfoWrapperPass
>().getBPI();
901 return GuardWideningImpl(DT
, PDT
, LI
, BPI
,
902 DT
.getNode(RootBB
), BlockFilter
).run();
905 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
906 if (WidenFrequentBranches
)
907 AU
.addRequired
<BranchProbabilityInfoWrapperPass
>();
908 AU
.setPreservesCFG();
909 getLoopAnalysisUsage(AU
);
910 AU
.addPreserved
<PostDominatorTreeWrapperPass
>();
915 char GuardWideningLegacyPass::ID
= 0;
916 char LoopGuardWideningLegacyPass::ID
= 0;
918 INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass
, "guard-widening", "Widen guards",
920 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
921 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
922 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
923 if (WidenFrequentBranches
)
924 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass
)
925 INITIALIZE_PASS_END(GuardWideningLegacyPass
, "guard-widening", "Widen guards",
928 INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass
, "loop-guard-widening",
929 "Widen guards (within a single loop, as a loop pass)",
931 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
932 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
933 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
934 if (WidenFrequentBranches
)
935 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass
)
936 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass
, "loop-guard-widening",
937 "Widen guards (within a single loop, as a loop pass)",
940 FunctionPass
*llvm::createGuardWideningPass() {
941 return new GuardWideningLegacyPass();
944 Pass
*llvm::createLoopGuardWideningPass() {
945 return new LoopGuardWideningLegacyPass();