1 //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Loops should be simplified before this analysis.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/BranchProbabilityInfo.h"
14 #include "llvm/ADT/PostOrderIterator.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/PostDominators.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/IR/Attributes.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/PassManager.h"
34 #include "llvm/IR/ProfDataUtils.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/BranchProbability.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/raw_ostream.h"
52 #define DEBUG_TYPE "branch-prob"
54 static cl::opt
<bool> PrintBranchProb(
55 "print-bpi", cl::init(false), cl::Hidden
,
56 cl::desc("Print the branch probability info."));
58 cl::opt
<std::string
> PrintBranchProbFuncName(
59 "print-bpi-func-name", cl::Hidden
,
60 cl::desc("The option to specify the name of the function "
61 "whose branch probability info is printed."));
63 INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass
, "branch-prob",
64 "Branch Probability Analysis", false, true)
65 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
66 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
67 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
68 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
69 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass
, "branch-prob",
70 "Branch Probability Analysis", false, true)
72 BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass()
74 initializeBranchProbabilityInfoWrapperPassPass(
75 *PassRegistry::getPassRegistry());
78 char BranchProbabilityInfoWrapperPass::ID
= 0;
80 // Weights are for internal use only. They are used by heuristics to help to
81 // estimate edges' probability. Example:
83 // Using "Loop Branch Heuristics" we predict weights of edges for the
98 // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
99 // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
100 static const uint32_t LBH_TAKEN_WEIGHT
= 124;
101 static const uint32_t LBH_NONTAKEN_WEIGHT
= 4;
103 /// Unreachable-terminating branch taken probability.
105 /// This is the probability for a branch being taken to a block that terminates
106 /// (eventually) in unreachable. These are predicted as unlikely as possible.
107 /// All reachable probability will proportionally share the remaining part.
108 static const BranchProbability UR_TAKEN_PROB
= BranchProbability::getRaw(1);
110 /// Heuristics and lookup tables for non-loop branches:
111 /// Pointer Heuristics (PH)
112 static const uint32_t PH_TAKEN_WEIGHT
= 20;
113 static const uint32_t PH_NONTAKEN_WEIGHT
= 12;
114 static const BranchProbability
115 PtrTakenProb(PH_TAKEN_WEIGHT
, PH_TAKEN_WEIGHT
+ PH_NONTAKEN_WEIGHT
);
116 static const BranchProbability
117 PtrUntakenProb(PH_NONTAKEN_WEIGHT
, PH_TAKEN_WEIGHT
+ PH_NONTAKEN_WEIGHT
);
119 using ProbabilityList
= SmallVector
<BranchProbability
>;
120 using ProbabilityTable
= std::map
<CmpInst::Predicate
, ProbabilityList
>;
122 /// Pointer comparisons:
123 static const ProbabilityTable PointerTable
{
124 {ICmpInst::ICMP_NE
, {PtrTakenProb
, PtrUntakenProb
}}, /// p != q -> Likely
125 {ICmpInst::ICMP_EQ
, {PtrUntakenProb
, PtrTakenProb
}}, /// p == q -> Unlikely
128 /// Zero Heuristics (ZH)
129 static const uint32_t ZH_TAKEN_WEIGHT
= 20;
130 static const uint32_t ZH_NONTAKEN_WEIGHT
= 12;
131 static const BranchProbability
132 ZeroTakenProb(ZH_TAKEN_WEIGHT
, ZH_TAKEN_WEIGHT
+ ZH_NONTAKEN_WEIGHT
);
133 static const BranchProbability
134 ZeroUntakenProb(ZH_NONTAKEN_WEIGHT
, ZH_TAKEN_WEIGHT
+ ZH_NONTAKEN_WEIGHT
);
136 /// Integer compares with 0:
137 static const ProbabilityTable ICmpWithZeroTable
{
138 {CmpInst::ICMP_EQ
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X == 0 -> Unlikely
139 {CmpInst::ICMP_NE
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X != 0 -> Likely
140 {CmpInst::ICMP_SLT
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X < 0 -> Unlikely
141 {CmpInst::ICMP_SGT
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X > 0 -> Likely
144 /// Integer compares with -1:
145 static const ProbabilityTable ICmpWithMinusOneTable
{
146 {CmpInst::ICMP_EQ
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X == -1 -> Unlikely
147 {CmpInst::ICMP_NE
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X != -1 -> Likely
148 // InstCombine canonicalizes X >= 0 into X > -1
149 {CmpInst::ICMP_SGT
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X >= 0 -> Likely
152 /// Integer compares with 1:
153 static const ProbabilityTable ICmpWithOneTable
{
154 // InstCombine canonicalizes X <= 0 into X < 1
155 {CmpInst::ICMP_SLT
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X <= 0 -> Unlikely
158 /// strcmp and similar functions return zero, negative, or positive, if the
159 /// first string is equal, less, or greater than the second. We consider it
160 /// likely that the strings are not equal, so a comparison with zero is
161 /// probably false, but also a comparison with any other number is also
162 /// probably false given that what exactly is returned for nonzero values is
163 /// not specified. Any kind of comparison other than equality we know
165 static const ProbabilityTable ICmpWithLibCallTable
{
166 {CmpInst::ICMP_EQ
, {ZeroUntakenProb
, ZeroTakenProb
}},
167 {CmpInst::ICMP_NE
, {ZeroTakenProb
, ZeroUntakenProb
}},
170 // Floating-Point Heuristics (FPH)
171 static const uint32_t FPH_TAKEN_WEIGHT
= 20;
172 static const uint32_t FPH_NONTAKEN_WEIGHT
= 12;
174 /// This is the probability for an ordered floating point comparison.
175 static const uint32_t FPH_ORD_WEIGHT
= 1024 * 1024 - 1;
176 /// This is the probability for an unordered floating point comparison, it means
177 /// one or two of the operands are NaN. Usually it is used to test for an
178 /// exceptional case, so the result is unlikely.
179 static const uint32_t FPH_UNO_WEIGHT
= 1;
181 static const BranchProbability
FPOrdTakenProb(FPH_ORD_WEIGHT
,
182 FPH_ORD_WEIGHT
+ FPH_UNO_WEIGHT
);
183 static const BranchProbability
184 FPOrdUntakenProb(FPH_UNO_WEIGHT
, FPH_ORD_WEIGHT
+ FPH_UNO_WEIGHT
);
185 static const BranchProbability
186 FPTakenProb(FPH_TAKEN_WEIGHT
, FPH_TAKEN_WEIGHT
+ FPH_NONTAKEN_WEIGHT
);
187 static const BranchProbability
188 FPUntakenProb(FPH_NONTAKEN_WEIGHT
, FPH_TAKEN_WEIGHT
+ FPH_NONTAKEN_WEIGHT
);
190 /// Floating-Point compares:
191 static const ProbabilityTable FCmpTable
{
192 {FCmpInst::FCMP_ORD
, {FPOrdTakenProb
, FPOrdUntakenProb
}}, /// !isnan -> Likely
193 {FCmpInst::FCMP_UNO
, {FPOrdUntakenProb
, FPOrdTakenProb
}}, /// isnan -> Unlikely
196 /// Set of dedicated "absolute" execution weights for a block. These weights are
197 /// meaningful relative to each other and their derivatives only.
198 enum class BlockExecWeight
: std::uint32_t {
199 /// Special weight used for cases with exact zero probability.
201 /// Minimal possible non zero weight.
202 LOWEST_NON_ZERO
= 0x1,
203 /// Weight to an 'unreachable' block.
205 /// Weight to a block containing non returning call.
206 NORETURN
= LOWEST_NON_ZERO
,
207 /// Weight to 'unwind' block of an invoke instruction.
208 UNWIND
= LOWEST_NON_ZERO
,
209 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
210 /// with attribute 'cold'.
212 /// Default weight is used in cases when there is no dedicated execution
213 /// weight set. It is not propagated through the domination line either.
217 BranchProbabilityInfo::SccInfo::SccInfo(const Function
&F
) {
218 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
219 // FIXME: We could only calculate this if the CFG is known to be irreducible
220 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
222 for (scc_iterator
<const Function
*> It
= scc_begin(&F
); !It
.isAtEnd();
224 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
226 const std::vector
<const BasicBlock
*> &Scc
= *It
;
230 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum
<< ":");
231 for (const auto *BB
: Scc
) {
232 LLVM_DEBUG(dbgs() << " " << BB
->getName());
233 SccNums
[BB
] = SccNum
;
234 calculateSccBlockType(BB
, SccNum
);
236 LLVM_DEBUG(dbgs() << "\n");
240 int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock
*BB
) const {
241 auto SccIt
= SccNums
.find(BB
);
242 if (SccIt
== SccNums
.end())
244 return SccIt
->second
;
247 void BranchProbabilityInfo::SccInfo::getSccEnterBlocks(
248 int SccNum
, SmallVectorImpl
<BasicBlock
*> &Enters
) const {
250 for (auto MapIt
: SccBlocks
[SccNum
]) {
251 const auto *BB
= MapIt
.first
;
252 if (isSCCHeader(BB
, SccNum
))
253 for (const auto *Pred
: predecessors(BB
))
254 if (getSCCNum(Pred
) != SccNum
)
255 Enters
.push_back(const_cast<BasicBlock
*>(BB
));
259 void BranchProbabilityInfo::SccInfo::getSccExitBlocks(
260 int SccNum
, SmallVectorImpl
<BasicBlock
*> &Exits
) const {
261 for (auto MapIt
: SccBlocks
[SccNum
]) {
262 const auto *BB
= MapIt
.first
;
263 if (isSCCExitingBlock(BB
, SccNum
))
264 for (const auto *Succ
: successors(BB
))
265 if (getSCCNum(Succ
) != SccNum
)
266 Exits
.push_back(const_cast<BasicBlock
*>(Succ
));
270 uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock
*BB
,
272 assert(getSCCNum(BB
) == SccNum
);
274 assert(SccBlocks
.size() > static_cast<unsigned>(SccNum
) && "Unknown SCC");
275 const auto &SccBlockTypes
= SccBlocks
[SccNum
];
277 auto It
= SccBlockTypes
.find(BB
);
278 if (It
!= SccBlockTypes
.end()) {
284 void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock
*BB
,
286 assert(getSCCNum(BB
) == SccNum
);
287 uint32_t BlockType
= Inner
;
289 if (llvm::any_of(predecessors(BB
), [&](const BasicBlock
*Pred
) {
290 // Consider any block that is an entry point to the SCC as
292 return getSCCNum(Pred
) != SccNum
;
296 if (llvm::any_of(successors(BB
), [&](const BasicBlock
*Succ
) {
297 return getSCCNum(Succ
) != SccNum
;
299 BlockType
|= Exiting
;
301 // Lazily compute the set of headers for a given SCC and cache the results
302 // in the SccHeaderMap.
303 if (SccBlocks
.size() <= static_cast<unsigned>(SccNum
))
304 SccBlocks
.resize(SccNum
+ 1);
305 auto &SccBlockTypes
= SccBlocks
[SccNum
];
307 if (BlockType
!= Inner
) {
309 std::tie(std::ignore
, IsInserted
) =
310 SccBlockTypes
.insert(std::make_pair(BB
, BlockType
));
311 assert(IsInserted
&& "Duplicated block in SCC");
315 BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock
*BB
,
319 LD
.first
= LI
.getLoopFor(BB
);
321 LD
.second
= SccI
.getSCCNum(BB
);
325 bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge
&Edge
) const {
326 const auto &SrcBlock
= Edge
.first
;
327 const auto &DstBlock
= Edge
.second
;
328 return (DstBlock
.getLoop() &&
329 !DstBlock
.getLoop()->contains(SrcBlock
.getLoop())) ||
330 // Assume that SCCs can't be nested.
331 (DstBlock
.getSccNum() != -1 &&
332 SrcBlock
.getSccNum() != DstBlock
.getSccNum());
335 bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge
&Edge
) const {
336 return isLoopEnteringEdge({Edge
.second
, Edge
.first
});
339 bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
340 const LoopEdge
&Edge
) const {
341 return isLoopEnteringEdge(Edge
) || isLoopExitingEdge(Edge
);
344 bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge
&Edge
) const {
345 const auto &SrcBlock
= Edge
.first
;
346 const auto &DstBlock
= Edge
.second
;
347 return SrcBlock
.belongsToSameLoop(DstBlock
) &&
348 ((DstBlock
.getLoop() &&
349 DstBlock
.getLoop()->getHeader() == DstBlock
.getBlock()) ||
350 (DstBlock
.getSccNum() != -1 &&
351 SccI
->isSCCHeader(DstBlock
.getBlock(), DstBlock
.getSccNum())));
354 void BranchProbabilityInfo::getLoopEnterBlocks(
355 const LoopBlock
&LB
, SmallVectorImpl
<BasicBlock
*> &Enters
) const {
357 auto *Header
= LB
.getLoop()->getHeader();
358 Enters
.append(pred_begin(Header
), pred_end(Header
));
360 assert(LB
.getSccNum() != -1 && "LB doesn't belong to any loop?");
361 SccI
->getSccEnterBlocks(LB
.getSccNum(), Enters
);
365 void BranchProbabilityInfo::getLoopExitBlocks(
366 const LoopBlock
&LB
, SmallVectorImpl
<BasicBlock
*> &Exits
) const {
368 LB
.getLoop()->getExitBlocks(Exits
);
370 assert(LB
.getSccNum() != -1 && "LB doesn't belong to any loop?");
371 SccI
->getSccExitBlocks(LB
.getSccNum(), Exits
);
375 // Propagate existing explicit probabilities from either profile data or
376 // 'expect' intrinsic processing. Examine metadata against unreachable
377 // heuristic. The probability of the edge coming to unreachable block is
378 // set to min of metadata and unreachable heuristic.
379 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock
*BB
) {
380 const Instruction
*TI
= BB
->getTerminator();
381 assert(TI
->getNumSuccessors() > 1 && "expected more than one successor!");
382 if (!(isa
<BranchInst
>(TI
) || isa
<SwitchInst
>(TI
) || isa
<IndirectBrInst
>(TI
) ||
383 isa
<InvokeInst
>(TI
) || isa
<CallBrInst
>(TI
)))
386 MDNode
*WeightsNode
= getValidBranchWeightMDNode(*TI
);
390 // Check that the number of successors is manageable.
391 assert(TI
->getNumSuccessors() < UINT32_MAX
&& "Too many successors");
393 // Build up the final weights that will be used in a temporary buffer.
394 // Compute the sum of all weights to later decide whether they need to
395 // be scaled to fit in 32 bits.
396 uint64_t WeightSum
= 0;
397 SmallVector
<uint32_t, 2> Weights
;
398 SmallVector
<unsigned, 2> UnreachableIdxs
;
399 SmallVector
<unsigned, 2> ReachableIdxs
;
401 extractBranchWeights(WeightsNode
, Weights
);
402 for (unsigned I
= 0, E
= Weights
.size(); I
!= E
; ++I
) {
403 WeightSum
+= Weights
[I
];
404 const LoopBlock SrcLoopBB
= getLoopBlock(BB
);
405 const LoopBlock DstLoopBB
= getLoopBlock(TI
->getSuccessor(I
));
406 auto EstimatedWeight
= getEstimatedEdgeWeight({SrcLoopBB
, DstLoopBB
});
407 if (EstimatedWeight
&&
408 *EstimatedWeight
<= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE
))
409 UnreachableIdxs
.push_back(I
);
411 ReachableIdxs
.push_back(I
);
413 assert(Weights
.size() == TI
->getNumSuccessors() && "Checked above");
415 // If the sum of weights does not fit in 32 bits, scale every weight down
417 uint64_t ScalingFactor
=
418 (WeightSum
> UINT32_MAX
) ? WeightSum
/ UINT32_MAX
+ 1 : 1;
420 if (ScalingFactor
> 1) {
422 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
) {
423 Weights
[I
] /= ScalingFactor
;
424 WeightSum
+= Weights
[I
];
427 assert(WeightSum
<= UINT32_MAX
&&
428 "Expected weights to scale down to 32 bits");
430 if (WeightSum
== 0 || ReachableIdxs
.size() == 0) {
431 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
)
433 WeightSum
= TI
->getNumSuccessors();
436 // Set the probability.
437 SmallVector
<BranchProbability
, 2> BP
;
438 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
)
439 BP
.push_back({ Weights
[I
], static_cast<uint32_t>(WeightSum
) });
441 // Examine the metadata against unreachable heuristic.
442 // If the unreachable heuristic is more strong then we use it for this edge.
443 if (UnreachableIdxs
.size() == 0 || ReachableIdxs
.size() == 0) {
444 setEdgeProbability(BB
, BP
);
448 auto UnreachableProb
= UR_TAKEN_PROB
;
449 for (auto I
: UnreachableIdxs
)
450 if (UnreachableProb
< BP
[I
]) {
451 BP
[I
] = UnreachableProb
;
454 // Sum of all edge probabilities must be 1.0. If we modified the probability
455 // of some edges then we must distribute the introduced difference over the
458 // Proportional distribution: the relation between probabilities of the
459 // reachable edges is kept unchanged. That is for any reachable edges i and j:
460 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
461 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
462 // Where K is independent of i,j.
463 // newBP[i] == oldBP[i] * K
464 // We need to find K.
465 // Make sum of all reachables of the left and right parts:
466 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
467 // Sum of newBP must be equal to 1.0:
468 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
469 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
470 // Where sum_of_unreachable(newBP) is what has been just changed.
472 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
473 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
474 BranchProbability NewUnreachableSum
= BranchProbability::getZero();
475 for (auto I
: UnreachableIdxs
)
476 NewUnreachableSum
+= BP
[I
];
478 BranchProbability NewReachableSum
=
479 BranchProbability::getOne() - NewUnreachableSum
;
481 BranchProbability OldReachableSum
= BranchProbability::getZero();
482 for (auto I
: ReachableIdxs
)
483 OldReachableSum
+= BP
[I
];
485 if (OldReachableSum
!= NewReachableSum
) { // Anything to dsitribute?
486 if (OldReachableSum
.isZero()) {
487 // If all oldBP[i] are zeroes then the proportional distribution results
488 // in all zero probabilities and the error stays big. In this case we
489 // evenly spread NewReachableSum over the reachable edges.
490 BranchProbability PerEdge
= NewReachableSum
/ ReachableIdxs
.size();
491 for (auto I
: ReachableIdxs
)
494 for (auto I
: ReachableIdxs
) {
495 // We use uint64_t to avoid double rounding error of the following
496 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
497 // The formula is taken from the private constructor
498 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
499 uint64_t Mul
= static_cast<uint64_t>(NewReachableSum
.getNumerator()) *
500 BP
[I
].getNumerator();
501 uint32_t Div
= static_cast<uint32_t>(
502 divideNearest(Mul
, OldReachableSum
.getNumerator()));
503 BP
[I
] = BranchProbability::getRaw(Div
);
508 setEdgeProbability(BB
, BP
);
513 // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
514 // between two pointer or pointer and NULL will fail.
515 bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock
*BB
) {
516 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
517 if (!BI
|| !BI
->isConditional())
520 Value
*Cond
= BI
->getCondition();
521 ICmpInst
*CI
= dyn_cast
<ICmpInst
>(Cond
);
522 if (!CI
|| !CI
->isEquality())
525 Value
*LHS
= CI
->getOperand(0);
527 if (!LHS
->getType()->isPointerTy())
530 assert(CI
->getOperand(1)->getType()->isPointerTy());
532 auto Search
= PointerTable
.find(CI
->getPredicate());
533 if (Search
== PointerTable
.end())
535 setEdgeProbability(BB
, Search
->second
);
539 // Compute the unlikely successors to the block BB in the loop L, specifically
540 // those that are unlikely because this is a loop, and add them to the
541 // UnlikelyBlocks set.
543 computeUnlikelySuccessors(const BasicBlock
*BB
, Loop
*L
,
544 SmallPtrSetImpl
<const BasicBlock
*> &UnlikelyBlocks
) {
545 // Sometimes in a loop we have a branch whose condition is made false by
546 // taking it. This is typically something like
553 // In this sort of situation taking the branch means that at the very least it
554 // won't be taken again in the next iteration of the loop, so we should
555 // consider it less likely than a typical branch.
557 // We detect this by looking back through the graph of PHI nodes that sets the
558 // value that the condition depends on, and seeing if we can reach a successor
559 // block which can be determined to make the condition false.
561 // FIXME: We currently consider unlikely blocks to be half as likely as other
562 // blocks, but if we consider the example above the likelyhood is actually
563 // 1/MAX. We could therefore be more precise in how unlikely we consider
564 // blocks to be, but it would require more careful examination of the form
565 // of the comparison expression.
566 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
567 if (!BI
|| !BI
->isConditional())
570 // Check if the branch is based on an instruction compared with a constant
571 CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition());
572 if (!CI
|| !isa
<Instruction
>(CI
->getOperand(0)) ||
573 !isa
<Constant
>(CI
->getOperand(1)))
576 // Either the instruction must be a PHI, or a chain of operations involving
577 // constants that ends in a PHI which we can then collapse into a single value
578 // if the PHI value is known.
579 Instruction
*CmpLHS
= dyn_cast
<Instruction
>(CI
->getOperand(0));
580 PHINode
*CmpPHI
= dyn_cast
<PHINode
>(CmpLHS
);
581 Constant
*CmpConst
= dyn_cast
<Constant
>(CI
->getOperand(1));
582 // Collect the instructions until we hit a PHI
583 SmallVector
<BinaryOperator
*, 1> InstChain
;
584 while (!CmpPHI
&& CmpLHS
&& isa
<BinaryOperator
>(CmpLHS
) &&
585 isa
<Constant
>(CmpLHS
->getOperand(1))) {
586 // Stop if the chain extends outside of the loop
587 if (!L
->contains(CmpLHS
))
589 InstChain
.push_back(cast
<BinaryOperator
>(CmpLHS
));
590 CmpLHS
= dyn_cast
<Instruction
>(CmpLHS
->getOperand(0));
592 CmpPHI
= dyn_cast
<PHINode
>(CmpLHS
);
594 if (!CmpPHI
|| !L
->contains(CmpPHI
))
597 // Trace the phi node to find all values that come from successors of BB
598 SmallPtrSet
<PHINode
*, 8> VisitedInsts
;
599 SmallVector
<PHINode
*, 8> WorkList
;
600 WorkList
.push_back(CmpPHI
);
601 VisitedInsts
.insert(CmpPHI
);
602 while (!WorkList
.empty()) {
603 PHINode
*P
= WorkList
.pop_back_val();
604 for (BasicBlock
*B
: P
->blocks()) {
605 // Skip blocks that aren't part of the loop
608 Value
*V
= P
->getIncomingValueForBlock(B
);
609 // If the source is a PHI add it to the work list if we haven't
610 // already visited it.
611 if (PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
612 if (VisitedInsts
.insert(PN
).second
)
613 WorkList
.push_back(PN
);
616 // If this incoming value is a constant and B is a successor of BB, then
617 // we can constant-evaluate the compare to see if it makes the branch be
619 Constant
*CmpLHSConst
= dyn_cast
<Constant
>(V
);
620 if (!CmpLHSConst
|| !llvm::is_contained(successors(BB
), B
))
622 // First collapse InstChain
623 const DataLayout
&DL
= BB
->getModule()->getDataLayout();
624 for (Instruction
*I
: llvm::reverse(InstChain
)) {
625 CmpLHSConst
= ConstantFoldBinaryOpOperands(
626 I
->getOpcode(), CmpLHSConst
, cast
<Constant
>(I
->getOperand(1)), DL
);
632 // Now constant-evaluate the compare
633 Constant
*Result
= ConstantExpr::getCompare(CI
->getPredicate(),
634 CmpLHSConst
, CmpConst
, true);
635 // If the result means we don't branch to the block then that block is
638 ((Result
->isZeroValue() && B
== BI
->getSuccessor(0)) ||
639 (Result
->isOneValue() && B
== BI
->getSuccessor(1))))
640 UnlikelyBlocks
.insert(B
);
645 std::optional
<uint32_t>
646 BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock
*BB
) const {
647 auto WeightIt
= EstimatedBlockWeight
.find(BB
);
648 if (WeightIt
== EstimatedBlockWeight
.end())
650 return WeightIt
->second
;
653 std::optional
<uint32_t>
654 BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData
&L
) const {
655 auto WeightIt
= EstimatedLoopWeight
.find(L
);
656 if (WeightIt
== EstimatedLoopWeight
.end())
658 return WeightIt
->second
;
661 std::optional
<uint32_t>
662 BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge
&Edge
) const {
663 // For edges entering a loop take weight of a loop rather than an individual
664 // block in the loop.
665 return isLoopEnteringEdge(Edge
)
666 ? getEstimatedLoopWeight(Edge
.second
.getLoopData())
667 : getEstimatedBlockWeight(Edge
.second
.getBlock());
670 template <class IterT
>
671 std::optional
<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
672 const LoopBlock
&SrcLoopBB
, iterator_range
<IterT
> Successors
) const {
673 SmallVector
<uint32_t, 4> Weights
;
674 std::optional
<uint32_t> MaxWeight
;
675 for (const BasicBlock
*DstBB
: Successors
) {
676 const LoopBlock DstLoopBB
= getLoopBlock(DstBB
);
677 auto Weight
= getEstimatedEdgeWeight({SrcLoopBB
, DstLoopBB
});
682 if (!MaxWeight
|| *MaxWeight
< *Weight
)
689 // Updates \p LoopBB's weight and returns true. If \p LoopBB has already
690 // an associated weight it is unchanged and false is returned.
692 // Please note by the algorithm the weight is not expected to change once set
693 // thus 'false' status is used to track visited blocks.
694 bool BranchProbabilityInfo::updateEstimatedBlockWeight(
695 LoopBlock
&LoopBB
, uint32_t BBWeight
,
696 SmallVectorImpl
<BasicBlock
*> &BlockWorkList
,
697 SmallVectorImpl
<LoopBlock
> &LoopWorkList
) {
698 BasicBlock
*BB
= LoopBB
.getBlock();
700 // In general, weight is assigned to a block when it has final value and
701 // can't/shouldn't be changed. However, there are cases when a block
702 // inherently has several (possibly "contradicting") weights. For example,
703 // "unwind" block may also contain "cold" call. In that case the first
704 // set weight is favored and all consequent weights are ignored.
705 if (!EstimatedBlockWeight
.insert({BB
, BBWeight
}).second
)
708 for (BasicBlock
*PredBlock
: predecessors(BB
)) {
709 LoopBlock PredLoop
= getLoopBlock(PredBlock
);
710 // Add affected block/loop to a working list.
711 if (isLoopExitingEdge({PredLoop
, LoopBB
})) {
712 if (!EstimatedLoopWeight
.count(PredLoop
.getLoopData()))
713 LoopWorkList
.push_back(PredLoop
);
714 } else if (!EstimatedBlockWeight
.count(PredBlock
))
715 BlockWorkList
.push_back(PredBlock
);
720 // Starting from \p BB traverse through dominator blocks and assign \p BBWeight
721 // to all such blocks that are post dominated by \BB. In other words to all
722 // blocks that the one is executed if and only if another one is executed.
723 // Importantly, we skip loops here for two reasons. First weights of blocks in
724 // a loop should be scaled by trip count (yet possibly unknown). Second there is
725 // no any value in doing that because that doesn't give any additional
726 // information regarding distribution of probabilities inside the loop.
727 // Exception is loop 'enter' and 'exit' edges that are handled in a special way
728 // at calcEstimatedHeuristics.
730 // In addition, \p WorkList is populated with basic blocks if at leas one
731 // successor has updated estimated weight.
732 void BranchProbabilityInfo::propagateEstimatedBlockWeight(
733 const LoopBlock
&LoopBB
, DominatorTree
*DT
, PostDominatorTree
*PDT
,
734 uint32_t BBWeight
, SmallVectorImpl
<BasicBlock
*> &BlockWorkList
,
735 SmallVectorImpl
<LoopBlock
> &LoopWorkList
) {
736 const BasicBlock
*BB
= LoopBB
.getBlock();
737 const auto *DTStartNode
= DT
->getNode(BB
);
738 const auto *PDTStartNode
= PDT
->getNode(BB
);
740 // TODO: Consider propagating weight down the domination line as well.
741 for (const auto *DTNode
= DTStartNode
; DTNode
!= nullptr;
742 DTNode
= DTNode
->getIDom()) {
743 auto *DomBB
= DTNode
->getBlock();
744 // Consider blocks which lie on one 'line'.
745 if (!PDT
->dominates(PDTStartNode
, PDT
->getNode(DomBB
)))
746 // If BB doesn't post dominate DomBB it will not post dominate dominators
750 LoopBlock DomLoopBB
= getLoopBlock(DomBB
);
751 const LoopEdge Edge
{DomLoopBB
, LoopBB
};
752 // Don't propagate weight to blocks belonging to different loops.
753 if (!isLoopEnteringExitingEdge(Edge
)) {
754 if (!updateEstimatedBlockWeight(DomLoopBB
, BBWeight
, BlockWorkList
,
756 // If DomBB has weight set then all it's predecessors are already
757 // processed (since we propagate weight up to the top of IR each time).
759 } else if (isLoopExitingEdge(Edge
)) {
760 LoopWorkList
.push_back(DomLoopBB
);
765 std::optional
<uint32_t>
766 BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock
*BB
) {
767 // Returns true if \p BB has call marked with "NoReturn" attribute.
768 auto hasNoReturn
= [&](const BasicBlock
*BB
) {
769 for (const auto &I
: reverse(*BB
))
770 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
771 if (CI
->hasFnAttr(Attribute::NoReturn
))
777 // Important note regarding the order of checks. They are ordered by weight
778 // from lowest to highest. Doing that allows to avoid "unstable" results
779 // when several conditions heuristics can be applied simultaneously.
780 if (isa
<UnreachableInst
>(BB
->getTerminator()) ||
781 // If this block is terminated by a call to
782 // @llvm.experimental.deoptimize then treat it like an unreachable
783 // since it is expected to practically never execute.
784 // TODO: Should we actually treat as never returning call?
785 BB
->getTerminatingDeoptimizeCall())
786 return hasNoReturn(BB
)
787 ? static_cast<uint32_t>(BlockExecWeight::NORETURN
)
788 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE
);
790 // Check if the block is 'unwind' handler of some invoke instruction.
791 for (const auto *Pred
: predecessors(BB
))
793 if (const auto *II
= dyn_cast
<InvokeInst
>(Pred
->getTerminator()))
794 if (II
->getUnwindDest() == BB
)
795 return static_cast<uint32_t>(BlockExecWeight::UNWIND
);
797 // Check if the block contains 'cold' call.
798 for (const auto &I
: *BB
)
799 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
800 if (CI
->hasFnAttr(Attribute::Cold
))
801 return static_cast<uint32_t>(BlockExecWeight::COLD
);
806 // Does RPO traversal over all blocks in \p F and assigns weights to
807 // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
808 // best to propagate the weight to up/down the IR.
809 void BranchProbabilityInfo::computeEestimateBlockWeight(
810 const Function
&F
, DominatorTree
*DT
, PostDominatorTree
*PDT
) {
811 SmallVector
<BasicBlock
*, 8> BlockWorkList
;
812 SmallVector
<LoopBlock
, 8> LoopWorkList
;
814 // By doing RPO we make sure that all predecessors already have weights
815 // calculated before visiting theirs successors.
816 ReversePostOrderTraversal
<const Function
*> RPOT(&F
);
817 for (const auto *BB
: RPOT
)
818 if (auto BBWeight
= getInitialEstimatedBlockWeight(BB
))
819 // If we were able to find estimated weight for the block set it to this
820 // block and propagate up the IR.
821 propagateEstimatedBlockWeight(getLoopBlock(BB
), DT
, PDT
, *BBWeight
,
822 BlockWorkList
, LoopWorkList
);
824 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
825 // successor/exit having estimated weight. Try to propagate weight to such
826 // blocks/loops from successors/exits.
827 // Process loops and blocks. Order is not important.
829 while (!LoopWorkList
.empty()) {
830 const LoopBlock LoopBB
= LoopWorkList
.pop_back_val();
832 if (EstimatedLoopWeight
.count(LoopBB
.getLoopData()))
835 SmallVector
<BasicBlock
*, 4> Exits
;
836 getLoopExitBlocks(LoopBB
, Exits
);
837 auto LoopWeight
= getMaxEstimatedEdgeWeight(
838 LoopBB
, make_range(Exits
.begin(), Exits
.end()));
841 // If we never exit the loop then we can enter it once at maximum.
842 if (LoopWeight
<= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE
))
843 LoopWeight
= static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
);
845 EstimatedLoopWeight
.insert({LoopBB
.getLoopData(), *LoopWeight
});
846 // Add all blocks entering the loop into working list.
847 getLoopEnterBlocks(LoopBB
, BlockWorkList
);
851 while (!BlockWorkList
.empty()) {
852 // We can reach here only if BlockWorkList is not empty.
853 const BasicBlock
*BB
= BlockWorkList
.pop_back_val();
854 if (EstimatedBlockWeight
.count(BB
))
857 // We take maximum over all weights of successors. In other words we take
858 // weight of "hot" path. In theory we can probably find a better function
859 // which gives higher accuracy results (comparing to "maximum") but I
861 // think of any right now. And I doubt it will make any difference in
863 const LoopBlock LoopBB
= getLoopBlock(BB
);
864 auto MaxWeight
= getMaxEstimatedEdgeWeight(LoopBB
, successors(BB
));
867 propagateEstimatedBlockWeight(LoopBB
, DT
, PDT
, *MaxWeight
,
868 BlockWorkList
, LoopWorkList
);
870 } while (!BlockWorkList
.empty() || !LoopWorkList
.empty());
873 // Calculate edge probabilities based on block's estimated weight.
874 // Note that gathered weights were not scaled for loops. Thus edges entering
875 // and exiting loops requires special processing.
876 bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock
*BB
) {
877 assert(BB
->getTerminator()->getNumSuccessors() > 1 &&
878 "expected more than one successor!");
880 const LoopBlock LoopBB
= getLoopBlock(BB
);
882 SmallPtrSet
<const BasicBlock
*, 8> UnlikelyBlocks
;
883 uint32_t TC
= LBH_TAKEN_WEIGHT
/ LBH_NONTAKEN_WEIGHT
;
884 if (LoopBB
.getLoop())
885 computeUnlikelySuccessors(BB
, LoopBB
.getLoop(), UnlikelyBlocks
);
887 // Changed to 'true' if at least one successor has estimated weight.
888 bool FoundEstimatedWeight
= false;
889 SmallVector
<uint32_t, 4> SuccWeights
;
890 uint64_t TotalWeight
= 0;
891 // Go over all successors of BB and put their weights into SuccWeights.
892 for (const BasicBlock
*SuccBB
: successors(BB
)) {
893 std::optional
<uint32_t> Weight
;
894 const LoopBlock SuccLoopBB
= getLoopBlock(SuccBB
);
895 const LoopEdge Edge
{LoopBB
, SuccLoopBB
};
897 Weight
= getEstimatedEdgeWeight(Edge
);
899 if (isLoopExitingEdge(Edge
) &&
900 // Avoid adjustment of ZERO weight since it should remain unchanged.
901 Weight
!= static_cast<uint32_t>(BlockExecWeight::ZERO
)) {
902 // Scale down loop exiting weight by trip count.
904 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
),
905 Weight
.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT
)) /
908 bool IsUnlikelyEdge
= LoopBB
.getLoop() && UnlikelyBlocks
.contains(SuccBB
);
909 if (IsUnlikelyEdge
&&
910 // Avoid adjustment of ZERO weight since it should remain unchanged.
911 Weight
!= static_cast<uint32_t>(BlockExecWeight::ZERO
)) {
912 // 'Unlikely' blocks have twice lower weight.
914 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
),
915 Weight
.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT
)) / 2);
919 FoundEstimatedWeight
= true;
922 Weight
.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT
));
923 TotalWeight
+= WeightVal
;
924 SuccWeights
.push_back(WeightVal
);
927 // If non of blocks have estimated weight bail out.
928 // If TotalWeight is 0 that means weight of each successor is 0 as well and
929 // equally likely. Bail out early to not deal with devision by zero.
930 if (!FoundEstimatedWeight
|| TotalWeight
== 0)
933 assert(SuccWeights
.size() == succ_size(BB
) && "Missed successor?");
934 const unsigned SuccCount
= SuccWeights
.size();
936 // If the sum of weights does not fit in 32 bits, scale every weight down
938 if (TotalWeight
> UINT32_MAX
) {
939 uint64_t ScalingFactor
= TotalWeight
/ UINT32_MAX
+ 1;
941 for (unsigned Idx
= 0; Idx
< SuccCount
; ++Idx
) {
942 SuccWeights
[Idx
] /= ScalingFactor
;
943 if (SuccWeights
[Idx
] == static_cast<uint32_t>(BlockExecWeight::ZERO
))
945 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
);
946 TotalWeight
+= SuccWeights
[Idx
];
948 assert(TotalWeight
<= UINT32_MAX
&& "Total weight overflows");
951 // Finally set probabilities to edges according to estimated block weights.
952 SmallVector
<BranchProbability
, 4> EdgeProbabilities(
953 SuccCount
, BranchProbability::getUnknown());
955 for (unsigned Idx
= 0; Idx
< SuccCount
; ++Idx
) {
956 EdgeProbabilities
[Idx
] =
957 BranchProbability(SuccWeights
[Idx
], (uint32_t)TotalWeight
);
959 setEdgeProbability(BB
, EdgeProbabilities
);
963 bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock
*BB
,
964 const TargetLibraryInfo
*TLI
) {
965 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
966 if (!BI
|| !BI
->isConditional())
969 Value
*Cond
= BI
->getCondition();
970 ICmpInst
*CI
= dyn_cast
<ICmpInst
>(Cond
);
974 auto GetConstantInt
= [](Value
*V
) {
975 if (auto *I
= dyn_cast
<BitCastInst
>(V
))
976 return dyn_cast
<ConstantInt
>(I
->getOperand(0));
977 return dyn_cast
<ConstantInt
>(V
);
980 Value
*RHS
= CI
->getOperand(1);
981 ConstantInt
*CV
= GetConstantInt(RHS
);
985 // If the LHS is the result of AND'ing a value with a single bit bitmask,
986 // we don't have information about probabilities.
987 if (Instruction
*LHS
= dyn_cast
<Instruction
>(CI
->getOperand(0)))
988 if (LHS
->getOpcode() == Instruction::And
)
989 if (ConstantInt
*AndRHS
= GetConstantInt(LHS
->getOperand(1)))
990 if (AndRHS
->getValue().isPowerOf2())
993 // Check if the LHS is the return value of a library function
994 LibFunc Func
= NumLibFuncs
;
996 if (CallInst
*Call
= dyn_cast
<CallInst
>(CI
->getOperand(0)))
997 if (Function
*CalledFn
= Call
->getCalledFunction())
998 TLI
->getLibFunc(*CalledFn
, Func
);
1000 ProbabilityTable::const_iterator Search
;
1001 if (Func
== LibFunc_strcasecmp
||
1002 Func
== LibFunc_strcmp
||
1003 Func
== LibFunc_strncasecmp
||
1004 Func
== LibFunc_strncmp
||
1005 Func
== LibFunc_memcmp
||
1006 Func
== LibFunc_bcmp
) {
1007 Search
= ICmpWithLibCallTable
.find(CI
->getPredicate());
1008 if (Search
== ICmpWithLibCallTable
.end())
1010 } else if (CV
->isZero()) {
1011 Search
= ICmpWithZeroTable
.find(CI
->getPredicate());
1012 if (Search
== ICmpWithZeroTable
.end())
1014 } else if (CV
->isOne()) {
1015 Search
= ICmpWithOneTable
.find(CI
->getPredicate());
1016 if (Search
== ICmpWithOneTable
.end())
1018 } else if (CV
->isMinusOne()) {
1019 Search
= ICmpWithMinusOneTable
.find(CI
->getPredicate());
1020 if (Search
== ICmpWithMinusOneTable
.end())
1026 setEdgeProbability(BB
, Search
->second
);
1030 bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock
*BB
) {
1031 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
1032 if (!BI
|| !BI
->isConditional())
1035 Value
*Cond
= BI
->getCondition();
1036 FCmpInst
*FCmp
= dyn_cast
<FCmpInst
>(Cond
);
1040 ProbabilityList ProbList
;
1041 if (FCmp
->isEquality()) {
1042 ProbList
= !FCmp
->isTrueWhenEqual() ?
1043 // f1 == f2 -> Unlikely
1044 ProbabilityList({FPTakenProb
, FPUntakenProb
}) :
1045 // f1 != f2 -> Likely
1046 ProbabilityList({FPUntakenProb
, FPTakenProb
});
1048 auto Search
= FCmpTable
.find(FCmp
->getPredicate());
1049 if (Search
== FCmpTable
.end())
1051 ProbList
= Search
->second
;
1054 setEdgeProbability(BB
, ProbList
);
1058 void BranchProbabilityInfo::releaseMemory() {
1063 bool BranchProbabilityInfo::invalidate(Function
&, const PreservedAnalyses
&PA
,
1064 FunctionAnalysisManager::Invalidator
&) {
1065 // Check whether the analysis, all analyses on functions, or the function's
1066 // CFG have been preserved.
1067 auto PAC
= PA
.getChecker
<BranchProbabilityAnalysis
>();
1068 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>() ||
1069 PAC
.preservedSet
<CFGAnalyses
>());
1072 void BranchProbabilityInfo::print(raw_ostream
&OS
) const {
1073 OS
<< "---- Branch Probabilities ----\n";
1074 // We print the probabilities from the last function the analysis ran over,
1075 // or the function it is currently running over.
1076 assert(LastF
&& "Cannot print prior to running over a function");
1077 for (const auto &BI
: *LastF
) {
1078 for (const BasicBlock
*Succ
: successors(&BI
))
1079 printEdgeProbability(OS
<< " ", &BI
, Succ
);
1083 bool BranchProbabilityInfo::
1084 isEdgeHot(const BasicBlock
*Src
, const BasicBlock
*Dst
) const {
1085 // Hot probability is at least 4/5 = 80%
1086 // FIXME: Compare against a static "hot" BranchProbability.
1087 return getEdgeProbability(Src
, Dst
) > BranchProbability(4, 5);
1090 /// Get the raw edge probability for the edge. If can't find it, return a
1091 /// default probability 1/N where N is the number of successors. Here an edge is
1092 /// specified using PredBlock and an
1093 /// index to the successors.
1095 BranchProbabilityInfo::getEdgeProbability(const BasicBlock
*Src
,
1096 unsigned IndexInSuccessors
) const {
1097 auto I
= Probs
.find(std::make_pair(Src
, IndexInSuccessors
));
1098 assert((Probs
.end() == Probs
.find(std::make_pair(Src
, 0))) ==
1099 (Probs
.end() == I
) &&
1100 "Probability for I-th successor must always be defined along with the "
1101 "probability for the first successor");
1103 if (I
!= Probs
.end())
1106 return {1, static_cast<uint32_t>(succ_size(Src
))};
1110 BranchProbabilityInfo::getEdgeProbability(const BasicBlock
*Src
,
1111 const_succ_iterator Dst
) const {
1112 return getEdgeProbability(Src
, Dst
.getSuccessorIndex());
1115 /// Get the raw edge probability calculated for the block pair. This returns the
1116 /// sum of all raw edge probabilities from Src to Dst.
1118 BranchProbabilityInfo::getEdgeProbability(const BasicBlock
*Src
,
1119 const BasicBlock
*Dst
) const {
1120 if (!Probs
.count(std::make_pair(Src
, 0)))
1121 return BranchProbability(llvm::count(successors(Src
), Dst
), succ_size(Src
));
1123 auto Prob
= BranchProbability::getZero();
1124 for (const_succ_iterator I
= succ_begin(Src
), E
= succ_end(Src
); I
!= E
; ++I
)
1126 Prob
+= Probs
.find(std::make_pair(Src
, I
.getSuccessorIndex()))->second
;
1131 /// Set the edge probability for all edges at once.
1132 void BranchProbabilityInfo::setEdgeProbability(
1133 const BasicBlock
*Src
, const SmallVectorImpl
<BranchProbability
> &Probs
) {
1134 assert(Src
->getTerminator()->getNumSuccessors() == Probs
.size());
1135 eraseBlock(Src
); // Erase stale data if any.
1136 if (Probs
.size() == 0)
1137 return; // Nothing to set.
1139 Handles
.insert(BasicBlockCallbackVH(Src
, this));
1140 uint64_t TotalNumerator
= 0;
1141 for (unsigned SuccIdx
= 0; SuccIdx
< Probs
.size(); ++SuccIdx
) {
1142 this->Probs
[std::make_pair(Src
, SuccIdx
)] = Probs
[SuccIdx
];
1143 LLVM_DEBUG(dbgs() << "set edge " << Src
->getName() << " -> " << SuccIdx
1144 << " successor probability to " << Probs
[SuccIdx
]
1146 TotalNumerator
+= Probs
[SuccIdx
].getNumerator();
1149 // Because of rounding errors the total probability cannot be checked to be
1150 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1151 // Instead, every single probability in Probs must be as accurate as possible.
1152 // This results in error 1/denominator at most, thus the total absolute error
1153 // should be within Probs.size / BranchProbability::getDenominator.
1154 assert(TotalNumerator
<= BranchProbability::getDenominator() + Probs
.size());
1155 assert(TotalNumerator
>= BranchProbability::getDenominator() - Probs
.size());
1156 (void)TotalNumerator
;
1159 void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock
*Src
,
1161 eraseBlock(Dst
); // Erase stale data if any.
1162 unsigned NumSuccessors
= Src
->getTerminator()->getNumSuccessors();
1163 assert(NumSuccessors
== Dst
->getTerminator()->getNumSuccessors());
1164 if (NumSuccessors
== 0)
1165 return; // Nothing to set.
1166 if (!this->Probs
.contains(std::make_pair(Src
, 0)))
1167 return; // No probability is set for edges from Src. Keep the same for Dst.
1169 Handles
.insert(BasicBlockCallbackVH(Dst
, this));
1170 for (unsigned SuccIdx
= 0; SuccIdx
< NumSuccessors
; ++SuccIdx
) {
1171 auto Prob
= this->Probs
[std::make_pair(Src
, SuccIdx
)];
1172 this->Probs
[std::make_pair(Dst
, SuccIdx
)] = Prob
;
1173 LLVM_DEBUG(dbgs() << "set edge " << Dst
->getName() << " -> " << SuccIdx
1174 << " successor probability to " << Prob
<< "\n");
1178 void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock
*Src
) {
1179 assert(Src
->getTerminator()->getNumSuccessors() == 2);
1180 if (!Probs
.contains(std::make_pair(Src
, 0)))
1181 return; // No probability is set for edges from Src
1182 assert(Probs
.contains(std::make_pair(Src
, 1)));
1183 std::swap(Probs
[std::make_pair(Src
, 0)], Probs
[std::make_pair(Src
, 1)]);
1187 BranchProbabilityInfo::printEdgeProbability(raw_ostream
&OS
,
1188 const BasicBlock
*Src
,
1189 const BasicBlock
*Dst
) const {
1190 const BranchProbability Prob
= getEdgeProbability(Src
, Dst
);
1192 Src
->printAsOperand(OS
, false, Src
->getModule());
1194 Dst
->printAsOperand(OS
, false, Dst
->getModule());
1195 OS
<< " probability is " << Prob
1196 << (isEdgeHot(Src
, Dst
) ? " [HOT edge]\n" : "\n");
1201 void BranchProbabilityInfo::eraseBlock(const BasicBlock
*BB
) {
1202 LLVM_DEBUG(dbgs() << "eraseBlock " << BB
->getName() << "\n");
1204 // Note that we cannot use successors of BB because the terminator of BB may
1205 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1206 // Instead we remove prob data for the block by iterating successors by their
1207 // indices from 0 till the last which exists. There could not be prob data for
1208 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1209 // set for all successors from 0 to M at once by the method
1210 // setEdgeProbability().
1211 Handles
.erase(BasicBlockCallbackVH(BB
, this));
1212 for (unsigned I
= 0;; ++I
) {
1213 auto MapI
= Probs
.find(std::make_pair(BB
, I
));
1214 if (MapI
== Probs
.end()) {
1215 assert(Probs
.count(std::make_pair(BB
, I
+ 1)) == 0 &&
1216 "Must be no more successors");
1223 void BranchProbabilityInfo::calculate(const Function
&F
, const LoopInfo
&LoopI
,
1224 const TargetLibraryInfo
*TLI
,
1226 PostDominatorTree
*PDT
) {
1227 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F
.getName()
1229 LastF
= &F
; // Store the last function we ran on for printing.
1232 SccI
= std::make_unique
<SccInfo
>(F
);
1234 assert(EstimatedBlockWeight
.empty());
1235 assert(EstimatedLoopWeight
.empty());
1237 std::unique_ptr
<DominatorTree
> DTPtr
;
1238 std::unique_ptr
<PostDominatorTree
> PDTPtr
;
1241 DTPtr
= std::make_unique
<DominatorTree
>(const_cast<Function
&>(F
));
1246 PDTPtr
= std::make_unique
<PostDominatorTree
>(const_cast<Function
&>(F
));
1250 computeEestimateBlockWeight(F
, DT
, PDT
);
1252 // Walk the basic blocks in post-order so that we can build up state about
1253 // the successors of a block iteratively.
1254 for (const auto *BB
: post_order(&F
.getEntryBlock())) {
1255 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB
->getName()
1257 // If there is no at least two successors, no sense to set probability.
1258 if (BB
->getTerminator()->getNumSuccessors() < 2)
1260 if (calcMetadataWeights(BB
))
1262 if (calcEstimatedHeuristics(BB
))
1264 if (calcPointerHeuristics(BB
))
1266 if (calcZeroHeuristics(BB
, TLI
))
1268 if (calcFloatingPointHeuristics(BB
))
1272 EstimatedLoopWeight
.clear();
1273 EstimatedBlockWeight
.clear();
1276 if (PrintBranchProb
&&
1277 (PrintBranchProbFuncName
.empty() ||
1278 F
.getName().equals(PrintBranchProbFuncName
))) {
1283 void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
1284 AnalysisUsage
&AU
) const {
1285 // We require DT so it's available when LI is available. The LI updating code
1286 // asserts that DT is also present so if we don't make sure that we have DT
1287 // here, that assert will trigger.
1288 AU
.addRequired
<DominatorTreeWrapperPass
>();
1289 AU
.addRequired
<LoopInfoWrapperPass
>();
1290 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1291 AU
.addRequired
<DominatorTreeWrapperPass
>();
1292 AU
.addRequired
<PostDominatorTreeWrapperPass
>();
1293 AU
.setPreservesAll();
1296 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function
&F
) {
1297 const LoopInfo
&LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1298 const TargetLibraryInfo
&TLI
=
1299 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
1300 DominatorTree
&DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1301 PostDominatorTree
&PDT
=
1302 getAnalysis
<PostDominatorTreeWrapperPass
>().getPostDomTree();
1303 BPI
.calculate(F
, LI
, &TLI
, &DT
, &PDT
);
1307 void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI
.releaseMemory(); }
1309 void BranchProbabilityInfoWrapperPass::print(raw_ostream
&OS
,
1310 const Module
*) const {
1314 AnalysisKey
BranchProbabilityAnalysis::Key
;
1315 BranchProbabilityInfo
1316 BranchProbabilityAnalysis::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1317 auto &LI
= AM
.getResult
<LoopAnalysis
>(F
);
1318 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
1319 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1320 auto &PDT
= AM
.getResult
<PostDominatorTreeAnalysis
>(F
);
1321 BranchProbabilityInfo BPI
;
1322 BPI
.calculate(F
, LI
, &TLI
, &DT
, &PDT
);
1327 BranchProbabilityPrinterPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1328 OS
<< "Printing analysis 'Branch Probability Analysis' for function '"
1329 << F
.getName() << "':\n";
1330 AM
.getResult
<BranchProbabilityAnalysis
>(F
).print(OS
);
1331 return PreservedAnalyses::all();