1 //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Loops should be simplified before this analysis.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/BranchProbabilityInfo.h"
14 #include "llvm/ADT/PostOrderIterator.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/PostDominators.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/IR/Attributes.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/PassManager.h"
34 #include "llvm/IR/ProfDataUtils.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/BranchProbability.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/raw_ostream.h"
51 #define DEBUG_TYPE "branch-prob"
53 static cl::opt
<bool> PrintBranchProb(
54 "print-bpi", cl::init(false), cl::Hidden
,
55 cl::desc("Print the branch probability info."));
57 cl::opt
<std::string
> PrintBranchProbFuncName(
58 "print-bpi-func-name", cl::Hidden
,
59 cl::desc("The option to specify the name of the function "
60 "whose branch probability info is printed."));
62 INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass
, "branch-prob",
63 "Branch Probability Analysis", false, true)
64 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
65 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
66 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
67 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
68 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass
, "branch-prob",
69 "Branch Probability Analysis", false, true)
71 BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass()
73 initializeBranchProbabilityInfoWrapperPassPass(
74 *PassRegistry::getPassRegistry());
77 char BranchProbabilityInfoWrapperPass::ID
= 0;
79 // Weights are for internal use only. They are used by heuristics to help to
80 // estimate edges' probability. Example:
82 // Using "Loop Branch Heuristics" we predict weights of edges for the
97 // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
98 // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
99 static const uint32_t LBH_TAKEN_WEIGHT
= 124;
100 static const uint32_t LBH_NONTAKEN_WEIGHT
= 4;
102 /// Unreachable-terminating branch taken probability.
104 /// This is the probability for a branch being taken to a block that terminates
105 /// (eventually) in unreachable. These are predicted as unlikely as possible.
106 /// All reachable probability will proportionally share the remaining part.
107 static const BranchProbability UR_TAKEN_PROB
= BranchProbability::getRaw(1);
109 /// Heuristics and lookup tables for non-loop branches:
110 /// Pointer Heuristics (PH)
111 static const uint32_t PH_TAKEN_WEIGHT
= 20;
112 static const uint32_t PH_NONTAKEN_WEIGHT
= 12;
113 static const BranchProbability
114 PtrTakenProb(PH_TAKEN_WEIGHT
, PH_TAKEN_WEIGHT
+ PH_NONTAKEN_WEIGHT
);
115 static const BranchProbability
116 PtrUntakenProb(PH_NONTAKEN_WEIGHT
, PH_TAKEN_WEIGHT
+ PH_NONTAKEN_WEIGHT
);
118 using ProbabilityList
= SmallVector
<BranchProbability
>;
119 using ProbabilityTable
= std::map
<CmpInst::Predicate
, ProbabilityList
>;
121 /// Pointer comparisons:
122 static const ProbabilityTable PointerTable
{
123 {ICmpInst::ICMP_NE
, {PtrTakenProb
, PtrUntakenProb
}}, /// p != q -> Likely
124 {ICmpInst::ICMP_EQ
, {PtrUntakenProb
, PtrTakenProb
}}, /// p == q -> Unlikely
127 /// Zero Heuristics (ZH)
128 static const uint32_t ZH_TAKEN_WEIGHT
= 20;
129 static const uint32_t ZH_NONTAKEN_WEIGHT
= 12;
130 static const BranchProbability
131 ZeroTakenProb(ZH_TAKEN_WEIGHT
, ZH_TAKEN_WEIGHT
+ ZH_NONTAKEN_WEIGHT
);
132 static const BranchProbability
133 ZeroUntakenProb(ZH_NONTAKEN_WEIGHT
, ZH_TAKEN_WEIGHT
+ ZH_NONTAKEN_WEIGHT
);
135 /// Integer compares with 0:
136 static const ProbabilityTable ICmpWithZeroTable
{
137 {CmpInst::ICMP_EQ
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X == 0 -> Unlikely
138 {CmpInst::ICMP_NE
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X != 0 -> Likely
139 {CmpInst::ICMP_SLT
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X < 0 -> Unlikely
140 {CmpInst::ICMP_SGT
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X > 0 -> Likely
143 /// Integer compares with -1:
144 static const ProbabilityTable ICmpWithMinusOneTable
{
145 {CmpInst::ICMP_EQ
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X == -1 -> Unlikely
146 {CmpInst::ICMP_NE
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X != -1 -> Likely
147 // InstCombine canonicalizes X >= 0 into X > -1
148 {CmpInst::ICMP_SGT
, {ZeroTakenProb
, ZeroUntakenProb
}}, /// X >= 0 -> Likely
151 /// Integer compares with 1:
152 static const ProbabilityTable ICmpWithOneTable
{
153 // InstCombine canonicalizes X <= 0 into X < 1
154 {CmpInst::ICMP_SLT
, {ZeroUntakenProb
, ZeroTakenProb
}}, /// X <= 0 -> Unlikely
157 /// strcmp and similar functions return zero, negative, or positive, if the
158 /// first string is equal, less, or greater than the second. We consider it
159 /// likely that the strings are not equal, so a comparison with zero is
160 /// probably false, but also a comparison with any other number is also
161 /// probably false given that what exactly is returned for nonzero values is
162 /// not specified. Any kind of comparison other than equality we know
164 static const ProbabilityTable ICmpWithLibCallTable
{
165 {CmpInst::ICMP_EQ
, {ZeroUntakenProb
, ZeroTakenProb
}},
166 {CmpInst::ICMP_NE
, {ZeroTakenProb
, ZeroUntakenProb
}},
169 // Floating-Point Heuristics (FPH)
170 static const uint32_t FPH_TAKEN_WEIGHT
= 20;
171 static const uint32_t FPH_NONTAKEN_WEIGHT
= 12;
173 /// This is the probability for an ordered floating point comparison.
174 static const uint32_t FPH_ORD_WEIGHT
= 1024 * 1024 - 1;
175 /// This is the probability for an unordered floating point comparison, it means
176 /// one or two of the operands are NaN. Usually it is used to test for an
177 /// exceptional case, so the result is unlikely.
178 static const uint32_t FPH_UNO_WEIGHT
= 1;
180 static const BranchProbability
FPOrdTakenProb(FPH_ORD_WEIGHT
,
181 FPH_ORD_WEIGHT
+ FPH_UNO_WEIGHT
);
182 static const BranchProbability
183 FPOrdUntakenProb(FPH_UNO_WEIGHT
, FPH_ORD_WEIGHT
+ FPH_UNO_WEIGHT
);
184 static const BranchProbability
185 FPTakenProb(FPH_TAKEN_WEIGHT
, FPH_TAKEN_WEIGHT
+ FPH_NONTAKEN_WEIGHT
);
186 static const BranchProbability
187 FPUntakenProb(FPH_NONTAKEN_WEIGHT
, FPH_TAKEN_WEIGHT
+ FPH_NONTAKEN_WEIGHT
);
189 /// Floating-Point compares:
190 static const ProbabilityTable FCmpTable
{
191 {FCmpInst::FCMP_ORD
, {FPOrdTakenProb
, FPOrdUntakenProb
}}, /// !isnan -> Likely
192 {FCmpInst::FCMP_UNO
, {FPOrdUntakenProb
, FPOrdTakenProb
}}, /// isnan -> Unlikely
195 /// Set of dedicated "absolute" execution weights for a block. These weights are
196 /// meaningful relative to each other and their derivatives only.
197 enum class BlockExecWeight
: std::uint32_t {
198 /// Special weight used for cases with exact zero probability.
200 /// Minimal possible non zero weight.
201 LOWEST_NON_ZERO
= 0x1,
202 /// Weight to an 'unreachable' block.
204 /// Weight to a block containing non returning call.
205 NORETURN
= LOWEST_NON_ZERO
,
206 /// Weight to 'unwind' block of an invoke instruction.
207 UNWIND
= LOWEST_NON_ZERO
,
208 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
209 /// with attribute 'cold'.
211 /// Default weight is used in cases when there is no dedicated execution
212 /// weight set. It is not propagated through the domination line either.
216 BranchProbabilityInfo::SccInfo::SccInfo(const Function
&F
) {
217 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
218 // FIXME: We could only calculate this if the CFG is known to be irreducible
219 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
221 for (scc_iterator
<const Function
*> It
= scc_begin(&F
); !It
.isAtEnd();
223 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
225 const std::vector
<const BasicBlock
*> &Scc
= *It
;
229 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum
<< ":");
230 for (const auto *BB
: Scc
) {
231 LLVM_DEBUG(dbgs() << " " << BB
->getName());
232 SccNums
[BB
] = SccNum
;
233 calculateSccBlockType(BB
, SccNum
);
235 LLVM_DEBUG(dbgs() << "\n");
239 int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock
*BB
) const {
240 auto SccIt
= SccNums
.find(BB
);
241 if (SccIt
== SccNums
.end())
243 return SccIt
->second
;
246 void BranchProbabilityInfo::SccInfo::getSccEnterBlocks(
247 int SccNum
, SmallVectorImpl
<BasicBlock
*> &Enters
) const {
249 for (auto MapIt
: SccBlocks
[SccNum
]) {
250 const auto *BB
= MapIt
.first
;
251 if (isSCCHeader(BB
, SccNum
))
252 for (const auto *Pred
: predecessors(BB
))
253 if (getSCCNum(Pred
) != SccNum
)
254 Enters
.push_back(const_cast<BasicBlock
*>(BB
));
258 void BranchProbabilityInfo::SccInfo::getSccExitBlocks(
259 int SccNum
, SmallVectorImpl
<BasicBlock
*> &Exits
) const {
260 for (auto MapIt
: SccBlocks
[SccNum
]) {
261 const auto *BB
= MapIt
.first
;
262 if (isSCCExitingBlock(BB
, SccNum
))
263 for (const auto *Succ
: successors(BB
))
264 if (getSCCNum(Succ
) != SccNum
)
265 Exits
.push_back(const_cast<BasicBlock
*>(Succ
));
269 uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock
*BB
,
271 assert(getSCCNum(BB
) == SccNum
);
273 assert(SccBlocks
.size() > static_cast<unsigned>(SccNum
) && "Unknown SCC");
274 const auto &SccBlockTypes
= SccBlocks
[SccNum
];
276 auto It
= SccBlockTypes
.find(BB
);
277 if (It
!= SccBlockTypes
.end()) {
283 void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock
*BB
,
285 assert(getSCCNum(BB
) == SccNum
);
286 uint32_t BlockType
= Inner
;
288 if (llvm::any_of(predecessors(BB
), [&](const BasicBlock
*Pred
) {
289 // Consider any block that is an entry point to the SCC as
291 return getSCCNum(Pred
) != SccNum
;
295 if (llvm::any_of(successors(BB
), [&](const BasicBlock
*Succ
) {
296 return getSCCNum(Succ
) != SccNum
;
298 BlockType
|= Exiting
;
300 // Lazily compute the set of headers for a given SCC and cache the results
301 // in the SccHeaderMap.
302 if (SccBlocks
.size() <= static_cast<unsigned>(SccNum
))
303 SccBlocks
.resize(SccNum
+ 1);
304 auto &SccBlockTypes
= SccBlocks
[SccNum
];
306 if (BlockType
!= Inner
) {
308 std::tie(std::ignore
, IsInserted
) =
309 SccBlockTypes
.insert(std::make_pair(BB
, BlockType
));
310 assert(IsInserted
&& "Duplicated block in SCC");
314 BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock
*BB
,
318 LD
.first
= LI
.getLoopFor(BB
);
320 LD
.second
= SccI
.getSCCNum(BB
);
324 bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge
&Edge
) const {
325 const auto &SrcBlock
= Edge
.first
;
326 const auto &DstBlock
= Edge
.second
;
327 return (DstBlock
.getLoop() &&
328 !DstBlock
.getLoop()->contains(SrcBlock
.getLoop())) ||
329 // Assume that SCCs can't be nested.
330 (DstBlock
.getSccNum() != -1 &&
331 SrcBlock
.getSccNum() != DstBlock
.getSccNum());
334 bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge
&Edge
) const {
335 return isLoopEnteringEdge({Edge
.second
, Edge
.first
});
338 bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
339 const LoopEdge
&Edge
) const {
340 return isLoopEnteringEdge(Edge
) || isLoopExitingEdge(Edge
);
343 bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge
&Edge
) const {
344 const auto &SrcBlock
= Edge
.first
;
345 const auto &DstBlock
= Edge
.second
;
346 return SrcBlock
.belongsToSameLoop(DstBlock
) &&
347 ((DstBlock
.getLoop() &&
348 DstBlock
.getLoop()->getHeader() == DstBlock
.getBlock()) ||
349 (DstBlock
.getSccNum() != -1 &&
350 SccI
->isSCCHeader(DstBlock
.getBlock(), DstBlock
.getSccNum())));
353 void BranchProbabilityInfo::getLoopEnterBlocks(
354 const LoopBlock
&LB
, SmallVectorImpl
<BasicBlock
*> &Enters
) const {
356 auto *Header
= LB
.getLoop()->getHeader();
357 Enters
.append(pred_begin(Header
), pred_end(Header
));
359 assert(LB
.getSccNum() != -1 && "LB doesn't belong to any loop?");
360 SccI
->getSccEnterBlocks(LB
.getSccNum(), Enters
);
364 void BranchProbabilityInfo::getLoopExitBlocks(
365 const LoopBlock
&LB
, SmallVectorImpl
<BasicBlock
*> &Exits
) const {
367 LB
.getLoop()->getExitBlocks(Exits
);
369 assert(LB
.getSccNum() != -1 && "LB doesn't belong to any loop?");
370 SccI
->getSccExitBlocks(LB
.getSccNum(), Exits
);
374 // Propagate existing explicit probabilities from either profile data or
375 // 'expect' intrinsic processing. Examine metadata against unreachable
376 // heuristic. The probability of the edge coming to unreachable block is
377 // set to min of metadata and unreachable heuristic.
378 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock
*BB
) {
379 const Instruction
*TI
= BB
->getTerminator();
380 assert(TI
->getNumSuccessors() > 1 && "expected more than one successor!");
381 if (!(isa
<BranchInst
>(TI
) || isa
<SwitchInst
>(TI
) || isa
<IndirectBrInst
>(TI
) ||
382 isa
<InvokeInst
>(TI
) || isa
<CallBrInst
>(TI
)))
385 MDNode
*WeightsNode
= getValidBranchWeightMDNode(*TI
);
389 // Check that the number of successors is manageable.
390 assert(TI
->getNumSuccessors() < UINT32_MAX
&& "Too many successors");
392 // Build up the final weights that will be used in a temporary buffer.
393 // Compute the sum of all weights to later decide whether they need to
394 // be scaled to fit in 32 bits.
395 uint64_t WeightSum
= 0;
396 SmallVector
<uint32_t, 2> Weights
;
397 SmallVector
<unsigned, 2> UnreachableIdxs
;
398 SmallVector
<unsigned, 2> ReachableIdxs
;
400 extractBranchWeights(WeightsNode
, Weights
);
401 for (unsigned I
= 0, E
= Weights
.size(); I
!= E
; ++I
) {
402 WeightSum
+= Weights
[I
];
403 const LoopBlock SrcLoopBB
= getLoopBlock(BB
);
404 const LoopBlock DstLoopBB
= getLoopBlock(TI
->getSuccessor(I
));
405 auto EstimatedWeight
= getEstimatedEdgeWeight({SrcLoopBB
, DstLoopBB
});
406 if (EstimatedWeight
&&
407 *EstimatedWeight
<= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE
))
408 UnreachableIdxs
.push_back(I
);
410 ReachableIdxs
.push_back(I
);
412 assert(Weights
.size() == TI
->getNumSuccessors() && "Checked above");
414 // If the sum of weights does not fit in 32 bits, scale every weight down
416 uint64_t ScalingFactor
=
417 (WeightSum
> UINT32_MAX
) ? WeightSum
/ UINT32_MAX
+ 1 : 1;
419 if (ScalingFactor
> 1) {
421 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
) {
422 Weights
[I
] /= ScalingFactor
;
423 WeightSum
+= Weights
[I
];
426 assert(WeightSum
<= UINT32_MAX
&&
427 "Expected weights to scale down to 32 bits");
429 if (WeightSum
== 0 || ReachableIdxs
.size() == 0) {
430 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
)
432 WeightSum
= TI
->getNumSuccessors();
435 // Set the probability.
436 SmallVector
<BranchProbability
, 2> BP
;
437 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
)
438 BP
.push_back({ Weights
[I
], static_cast<uint32_t>(WeightSum
) });
440 // Examine the metadata against unreachable heuristic.
441 // If the unreachable heuristic is more strong then we use it for this edge.
442 if (UnreachableIdxs
.size() == 0 || ReachableIdxs
.size() == 0) {
443 setEdgeProbability(BB
, BP
);
447 auto UnreachableProb
= UR_TAKEN_PROB
;
448 for (auto I
: UnreachableIdxs
)
449 if (UnreachableProb
< BP
[I
]) {
450 BP
[I
] = UnreachableProb
;
453 // Sum of all edge probabilities must be 1.0. If we modified the probability
454 // of some edges then we must distribute the introduced difference over the
457 // Proportional distribution: the relation between probabilities of the
458 // reachable edges is kept unchanged. That is for any reachable edges i and j:
459 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
460 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
461 // Where K is independent of i,j.
462 // newBP[i] == oldBP[i] * K
463 // We need to find K.
464 // Make sum of all reachables of the left and right parts:
465 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
466 // Sum of newBP must be equal to 1.0:
467 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
468 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
469 // Where sum_of_unreachable(newBP) is what has been just changed.
471 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
472 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
473 BranchProbability NewUnreachableSum
= BranchProbability::getZero();
474 for (auto I
: UnreachableIdxs
)
475 NewUnreachableSum
+= BP
[I
];
477 BranchProbability NewReachableSum
=
478 BranchProbability::getOne() - NewUnreachableSum
;
480 BranchProbability OldReachableSum
= BranchProbability::getZero();
481 for (auto I
: ReachableIdxs
)
482 OldReachableSum
+= BP
[I
];
484 if (OldReachableSum
!= NewReachableSum
) { // Anything to dsitribute?
485 if (OldReachableSum
.isZero()) {
486 // If all oldBP[i] are zeroes then the proportional distribution results
487 // in all zero probabilities and the error stays big. In this case we
488 // evenly spread NewReachableSum over the reachable edges.
489 BranchProbability PerEdge
= NewReachableSum
/ ReachableIdxs
.size();
490 for (auto I
: ReachableIdxs
)
493 for (auto I
: ReachableIdxs
) {
494 // We use uint64_t to avoid double rounding error of the following
495 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
496 // The formula is taken from the private constructor
497 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
498 uint64_t Mul
= static_cast<uint64_t>(NewReachableSum
.getNumerator()) *
499 BP
[I
].getNumerator();
500 uint32_t Div
= static_cast<uint32_t>(
501 divideNearest(Mul
, OldReachableSum
.getNumerator()));
502 BP
[I
] = BranchProbability::getRaw(Div
);
507 setEdgeProbability(BB
, BP
);
512 // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
513 // between two pointer or pointer and NULL will fail.
514 bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock
*BB
) {
515 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
516 if (!BI
|| !BI
->isConditional())
519 Value
*Cond
= BI
->getCondition();
520 ICmpInst
*CI
= dyn_cast
<ICmpInst
>(Cond
);
521 if (!CI
|| !CI
->isEquality())
524 Value
*LHS
= CI
->getOperand(0);
526 if (!LHS
->getType()->isPointerTy())
529 assert(CI
->getOperand(1)->getType()->isPointerTy());
531 auto Search
= PointerTable
.find(CI
->getPredicate());
532 if (Search
== PointerTable
.end())
534 setEdgeProbability(BB
, Search
->second
);
538 // Compute the unlikely successors to the block BB in the loop L, specifically
539 // those that are unlikely because this is a loop, and add them to the
540 // UnlikelyBlocks set.
542 computeUnlikelySuccessors(const BasicBlock
*BB
, Loop
*L
,
543 SmallPtrSetImpl
<const BasicBlock
*> &UnlikelyBlocks
) {
544 // Sometimes in a loop we have a branch whose condition is made false by
545 // taking it. This is typically something like
552 // In this sort of situation taking the branch means that at the very least it
553 // won't be taken again in the next iteration of the loop, so we should
554 // consider it less likely than a typical branch.
556 // We detect this by looking back through the graph of PHI nodes that sets the
557 // value that the condition depends on, and seeing if we can reach a successor
558 // block which can be determined to make the condition false.
560 // FIXME: We currently consider unlikely blocks to be half as likely as other
561 // blocks, but if we consider the example above the likelyhood is actually
562 // 1/MAX. We could therefore be more precise in how unlikely we consider
563 // blocks to be, but it would require more careful examination of the form
564 // of the comparison expression.
565 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
566 if (!BI
|| !BI
->isConditional())
569 // Check if the branch is based on an instruction compared with a constant
570 CmpInst
*CI
= dyn_cast
<CmpInst
>(BI
->getCondition());
571 if (!CI
|| !isa
<Instruction
>(CI
->getOperand(0)) ||
572 !isa
<Constant
>(CI
->getOperand(1)))
575 // Either the instruction must be a PHI, or a chain of operations involving
576 // constants that ends in a PHI which we can then collapse into a single value
577 // if the PHI value is known.
578 Instruction
*CmpLHS
= dyn_cast
<Instruction
>(CI
->getOperand(0));
579 PHINode
*CmpPHI
= dyn_cast
<PHINode
>(CmpLHS
);
580 Constant
*CmpConst
= dyn_cast
<Constant
>(CI
->getOperand(1));
581 // Collect the instructions until we hit a PHI
582 SmallVector
<BinaryOperator
*, 1> InstChain
;
583 while (!CmpPHI
&& CmpLHS
&& isa
<BinaryOperator
>(CmpLHS
) &&
584 isa
<Constant
>(CmpLHS
->getOperand(1))) {
585 // Stop if the chain extends outside of the loop
586 if (!L
->contains(CmpLHS
))
588 InstChain
.push_back(cast
<BinaryOperator
>(CmpLHS
));
589 CmpLHS
= dyn_cast
<Instruction
>(CmpLHS
->getOperand(0));
591 CmpPHI
= dyn_cast
<PHINode
>(CmpLHS
);
593 if (!CmpPHI
|| !L
->contains(CmpPHI
))
596 // Trace the phi node to find all values that come from successors of BB
597 SmallPtrSet
<PHINode
*, 8> VisitedInsts
;
598 SmallVector
<PHINode
*, 8> WorkList
;
599 WorkList
.push_back(CmpPHI
);
600 VisitedInsts
.insert(CmpPHI
);
601 while (!WorkList
.empty()) {
602 PHINode
*P
= WorkList
.pop_back_val();
603 for (BasicBlock
*B
: P
->blocks()) {
604 // Skip blocks that aren't part of the loop
607 Value
*V
= P
->getIncomingValueForBlock(B
);
608 // If the source is a PHI add it to the work list if we haven't
609 // already visited it.
610 if (PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
611 if (VisitedInsts
.insert(PN
).second
)
612 WorkList
.push_back(PN
);
615 // If this incoming value is a constant and B is a successor of BB, then
616 // we can constant-evaluate the compare to see if it makes the branch be
618 Constant
*CmpLHSConst
= dyn_cast
<Constant
>(V
);
619 if (!CmpLHSConst
|| !llvm::is_contained(successors(BB
), B
))
621 // First collapse InstChain
622 const DataLayout
&DL
= BB
->getDataLayout();
623 for (Instruction
*I
: llvm::reverse(InstChain
)) {
624 CmpLHSConst
= ConstantFoldBinaryOpOperands(
625 I
->getOpcode(), CmpLHSConst
, cast
<Constant
>(I
->getOperand(1)), DL
);
631 // Now constant-evaluate the compare
632 Constant
*Result
= ConstantFoldCompareInstOperands(
633 CI
->getPredicate(), CmpLHSConst
, CmpConst
, DL
);
634 // If the result means we don't branch to the block then that block is
637 ((Result
->isZeroValue() && B
== BI
->getSuccessor(0)) ||
638 (Result
->isOneValue() && B
== BI
->getSuccessor(1))))
639 UnlikelyBlocks
.insert(B
);
644 std::optional
<uint32_t>
645 BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock
*BB
) const {
646 auto WeightIt
= EstimatedBlockWeight
.find(BB
);
647 if (WeightIt
== EstimatedBlockWeight
.end())
649 return WeightIt
->second
;
652 std::optional
<uint32_t>
653 BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData
&L
) const {
654 auto WeightIt
= EstimatedLoopWeight
.find(L
);
655 if (WeightIt
== EstimatedLoopWeight
.end())
657 return WeightIt
->second
;
660 std::optional
<uint32_t>
661 BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge
&Edge
) const {
662 // For edges entering a loop take weight of a loop rather than an individual
663 // block in the loop.
664 return isLoopEnteringEdge(Edge
)
665 ? getEstimatedLoopWeight(Edge
.second
.getLoopData())
666 : getEstimatedBlockWeight(Edge
.second
.getBlock());
669 template <class IterT
>
670 std::optional
<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
671 const LoopBlock
&SrcLoopBB
, iterator_range
<IterT
> Successors
) const {
672 SmallVector
<uint32_t, 4> Weights
;
673 std::optional
<uint32_t> MaxWeight
;
674 for (const BasicBlock
*DstBB
: Successors
) {
675 const LoopBlock DstLoopBB
= getLoopBlock(DstBB
);
676 auto Weight
= getEstimatedEdgeWeight({SrcLoopBB
, DstLoopBB
});
681 if (!MaxWeight
|| *MaxWeight
< *Weight
)
688 // Updates \p LoopBB's weight and returns true. If \p LoopBB has already
689 // an associated weight it is unchanged and false is returned.
691 // Please note by the algorithm the weight is not expected to change once set
692 // thus 'false' status is used to track visited blocks.
693 bool BranchProbabilityInfo::updateEstimatedBlockWeight(
694 LoopBlock
&LoopBB
, uint32_t BBWeight
,
695 SmallVectorImpl
<BasicBlock
*> &BlockWorkList
,
696 SmallVectorImpl
<LoopBlock
> &LoopWorkList
) {
697 BasicBlock
*BB
= LoopBB
.getBlock();
699 // In general, weight is assigned to a block when it has final value and
700 // can't/shouldn't be changed. However, there are cases when a block
701 // inherently has several (possibly "contradicting") weights. For example,
702 // "unwind" block may also contain "cold" call. In that case the first
703 // set weight is favored and all consequent weights are ignored.
704 if (!EstimatedBlockWeight
.insert({BB
, BBWeight
}).second
)
707 for (BasicBlock
*PredBlock
: predecessors(BB
)) {
708 LoopBlock PredLoop
= getLoopBlock(PredBlock
);
709 // Add affected block/loop to a working list.
710 if (isLoopExitingEdge({PredLoop
, LoopBB
})) {
711 if (!EstimatedLoopWeight
.count(PredLoop
.getLoopData()))
712 LoopWorkList
.push_back(PredLoop
);
713 } else if (!EstimatedBlockWeight
.count(PredBlock
))
714 BlockWorkList
.push_back(PredBlock
);
719 // Starting from \p BB traverse through dominator blocks and assign \p BBWeight
720 // to all such blocks that are post dominated by \BB. In other words to all
721 // blocks that the one is executed if and only if another one is executed.
722 // Importantly, we skip loops here for two reasons. First weights of blocks in
723 // a loop should be scaled by trip count (yet possibly unknown). Second there is
724 // no any value in doing that because that doesn't give any additional
725 // information regarding distribution of probabilities inside the loop.
726 // Exception is loop 'enter' and 'exit' edges that are handled in a special way
727 // at calcEstimatedHeuristics.
729 // In addition, \p WorkList is populated with basic blocks if at leas one
730 // successor has updated estimated weight.
731 void BranchProbabilityInfo::propagateEstimatedBlockWeight(
732 const LoopBlock
&LoopBB
, DominatorTree
*DT
, PostDominatorTree
*PDT
,
733 uint32_t BBWeight
, SmallVectorImpl
<BasicBlock
*> &BlockWorkList
,
734 SmallVectorImpl
<LoopBlock
> &LoopWorkList
) {
735 const BasicBlock
*BB
= LoopBB
.getBlock();
736 const auto *DTStartNode
= DT
->getNode(BB
);
737 const auto *PDTStartNode
= PDT
->getNode(BB
);
739 // TODO: Consider propagating weight down the domination line as well.
740 for (const auto *DTNode
= DTStartNode
; DTNode
!= nullptr;
741 DTNode
= DTNode
->getIDom()) {
742 auto *DomBB
= DTNode
->getBlock();
743 // Consider blocks which lie on one 'line'.
744 if (!PDT
->dominates(PDTStartNode
, PDT
->getNode(DomBB
)))
745 // If BB doesn't post dominate DomBB it will not post dominate dominators
749 LoopBlock DomLoopBB
= getLoopBlock(DomBB
);
750 const LoopEdge Edge
{DomLoopBB
, LoopBB
};
751 // Don't propagate weight to blocks belonging to different loops.
752 if (!isLoopEnteringExitingEdge(Edge
)) {
753 if (!updateEstimatedBlockWeight(DomLoopBB
, BBWeight
, BlockWorkList
,
755 // If DomBB has weight set then all it's predecessors are already
756 // processed (since we propagate weight up to the top of IR each time).
758 } else if (isLoopExitingEdge(Edge
)) {
759 LoopWorkList
.push_back(DomLoopBB
);
764 std::optional
<uint32_t>
765 BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock
*BB
) {
766 // Returns true if \p BB has call marked with "NoReturn" attribute.
767 auto hasNoReturn
= [&](const BasicBlock
*BB
) {
768 for (const auto &I
: reverse(*BB
))
769 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
770 if (CI
->hasFnAttr(Attribute::NoReturn
))
776 // Important note regarding the order of checks. They are ordered by weight
777 // from lowest to highest. Doing that allows to avoid "unstable" results
778 // when several conditions heuristics can be applied simultaneously.
779 if (isa
<UnreachableInst
>(BB
->getTerminator()) ||
780 // If this block is terminated by a call to
781 // @llvm.experimental.deoptimize then treat it like an unreachable
782 // since it is expected to practically never execute.
783 // TODO: Should we actually treat as never returning call?
784 BB
->getTerminatingDeoptimizeCall())
785 return hasNoReturn(BB
)
786 ? static_cast<uint32_t>(BlockExecWeight::NORETURN
)
787 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE
);
789 // Check if the block is an exception handling block.
791 return static_cast<uint32_t>(BlockExecWeight::UNWIND
);
793 // Check if the block contains 'cold' call.
794 for (const auto &I
: *BB
)
795 if (const CallInst
*CI
= dyn_cast
<CallInst
>(&I
))
796 if (CI
->hasFnAttr(Attribute::Cold
))
797 return static_cast<uint32_t>(BlockExecWeight::COLD
);
802 // Does RPO traversal over all blocks in \p F and assigns weights to
803 // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
804 // best to propagate the weight to up/down the IR.
805 void BranchProbabilityInfo::computeEestimateBlockWeight(
806 const Function
&F
, DominatorTree
*DT
, PostDominatorTree
*PDT
) {
807 SmallVector
<BasicBlock
*, 8> BlockWorkList
;
808 SmallVector
<LoopBlock
, 8> LoopWorkList
;
809 SmallDenseMap
<LoopData
, SmallVector
<BasicBlock
*, 4>> LoopExitBlocks
;
811 // By doing RPO we make sure that all predecessors already have weights
812 // calculated before visiting theirs successors.
813 ReversePostOrderTraversal
<const Function
*> RPOT(&F
);
814 for (const auto *BB
: RPOT
)
815 if (auto BBWeight
= getInitialEstimatedBlockWeight(BB
))
816 // If we were able to find estimated weight for the block set it to this
817 // block and propagate up the IR.
818 propagateEstimatedBlockWeight(getLoopBlock(BB
), DT
, PDT
, *BBWeight
,
819 BlockWorkList
, LoopWorkList
);
821 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
822 // successor/exit having estimated weight. Try to propagate weight to such
823 // blocks/loops from successors/exits.
824 // Process loops and blocks. Order is not important.
826 while (!LoopWorkList
.empty()) {
827 const LoopBlock LoopBB
= LoopWorkList
.pop_back_val();
828 const LoopData LD
= LoopBB
.getLoopData();
829 if (EstimatedLoopWeight
.count(LD
))
832 auto Res
= LoopExitBlocks
.try_emplace(LD
);
833 SmallVectorImpl
<BasicBlock
*> &Exits
= Res
.first
->second
;
835 getLoopExitBlocks(LoopBB
, Exits
);
836 auto LoopWeight
= getMaxEstimatedEdgeWeight(
837 LoopBB
, make_range(Exits
.begin(), Exits
.end()));
840 // If we never exit the loop then we can enter it once at maximum.
841 if (LoopWeight
<= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE
))
842 LoopWeight
= static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
);
844 EstimatedLoopWeight
.insert({LD
, *LoopWeight
});
845 // Add all blocks entering the loop into working list.
846 getLoopEnterBlocks(LoopBB
, BlockWorkList
);
850 while (!BlockWorkList
.empty()) {
851 // We can reach here only if BlockWorkList is not empty.
852 const BasicBlock
*BB
= BlockWorkList
.pop_back_val();
853 if (EstimatedBlockWeight
.count(BB
))
856 // We take maximum over all weights of successors. In other words we take
857 // weight of "hot" path. In theory we can probably find a better function
858 // which gives higher accuracy results (comparing to "maximum") but I
860 // think of any right now. And I doubt it will make any difference in
862 const LoopBlock LoopBB
= getLoopBlock(BB
);
863 auto MaxWeight
= getMaxEstimatedEdgeWeight(LoopBB
, successors(BB
));
866 propagateEstimatedBlockWeight(LoopBB
, DT
, PDT
, *MaxWeight
,
867 BlockWorkList
, LoopWorkList
);
869 } while (!BlockWorkList
.empty() || !LoopWorkList
.empty());
872 // Calculate edge probabilities based on block's estimated weight.
873 // Note that gathered weights were not scaled for loops. Thus edges entering
874 // and exiting loops requires special processing.
875 bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock
*BB
) {
876 assert(BB
->getTerminator()->getNumSuccessors() > 1 &&
877 "expected more than one successor!");
879 const LoopBlock LoopBB
= getLoopBlock(BB
);
881 SmallPtrSet
<const BasicBlock
*, 8> UnlikelyBlocks
;
882 uint32_t TC
= LBH_TAKEN_WEIGHT
/ LBH_NONTAKEN_WEIGHT
;
883 if (LoopBB
.getLoop())
884 computeUnlikelySuccessors(BB
, LoopBB
.getLoop(), UnlikelyBlocks
);
886 // Changed to 'true' if at least one successor has estimated weight.
887 bool FoundEstimatedWeight
= false;
888 SmallVector
<uint32_t, 4> SuccWeights
;
889 uint64_t TotalWeight
= 0;
890 // Go over all successors of BB and put their weights into SuccWeights.
891 for (const BasicBlock
*SuccBB
: successors(BB
)) {
892 std::optional
<uint32_t> Weight
;
893 const LoopBlock SuccLoopBB
= getLoopBlock(SuccBB
);
894 const LoopEdge Edge
{LoopBB
, SuccLoopBB
};
896 Weight
= getEstimatedEdgeWeight(Edge
);
898 if (isLoopExitingEdge(Edge
) &&
899 // Avoid adjustment of ZERO weight since it should remain unchanged.
900 Weight
!= static_cast<uint32_t>(BlockExecWeight::ZERO
)) {
901 // Scale down loop exiting weight by trip count.
903 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
),
904 Weight
.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT
)) /
907 bool IsUnlikelyEdge
= LoopBB
.getLoop() && UnlikelyBlocks
.contains(SuccBB
);
908 if (IsUnlikelyEdge
&&
909 // Avoid adjustment of ZERO weight since it should remain unchanged.
910 Weight
!= static_cast<uint32_t>(BlockExecWeight::ZERO
)) {
911 // 'Unlikely' blocks have twice lower weight.
913 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
),
914 Weight
.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT
)) / 2);
918 FoundEstimatedWeight
= true;
921 Weight
.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT
));
922 TotalWeight
+= WeightVal
;
923 SuccWeights
.push_back(WeightVal
);
926 // If non of blocks have estimated weight bail out.
927 // If TotalWeight is 0 that means weight of each successor is 0 as well and
928 // equally likely. Bail out early to not deal with devision by zero.
929 if (!FoundEstimatedWeight
|| TotalWeight
== 0)
932 assert(SuccWeights
.size() == succ_size(BB
) && "Missed successor?");
933 const unsigned SuccCount
= SuccWeights
.size();
935 // If the sum of weights does not fit in 32 bits, scale every weight down
937 if (TotalWeight
> UINT32_MAX
) {
938 uint64_t ScalingFactor
= TotalWeight
/ UINT32_MAX
+ 1;
940 for (unsigned Idx
= 0; Idx
< SuccCount
; ++Idx
) {
941 SuccWeights
[Idx
] /= ScalingFactor
;
942 if (SuccWeights
[Idx
] == static_cast<uint32_t>(BlockExecWeight::ZERO
))
944 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO
);
945 TotalWeight
+= SuccWeights
[Idx
];
947 assert(TotalWeight
<= UINT32_MAX
&& "Total weight overflows");
950 // Finally set probabilities to edges according to estimated block weights.
951 SmallVector
<BranchProbability
, 4> EdgeProbabilities(
952 SuccCount
, BranchProbability::getUnknown());
954 for (unsigned Idx
= 0; Idx
< SuccCount
; ++Idx
) {
955 EdgeProbabilities
[Idx
] =
956 BranchProbability(SuccWeights
[Idx
], (uint32_t)TotalWeight
);
958 setEdgeProbability(BB
, EdgeProbabilities
);
962 bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock
*BB
,
963 const TargetLibraryInfo
*TLI
) {
964 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
965 if (!BI
|| !BI
->isConditional())
968 Value
*Cond
= BI
->getCondition();
969 ICmpInst
*CI
= dyn_cast
<ICmpInst
>(Cond
);
973 auto GetConstantInt
= [](Value
*V
) {
974 if (auto *I
= dyn_cast
<BitCastInst
>(V
))
975 return dyn_cast
<ConstantInt
>(I
->getOperand(0));
976 return dyn_cast
<ConstantInt
>(V
);
979 Value
*RHS
= CI
->getOperand(1);
980 ConstantInt
*CV
= GetConstantInt(RHS
);
984 // If the LHS is the result of AND'ing a value with a single bit bitmask,
985 // we don't have information about probabilities.
986 if (Instruction
*LHS
= dyn_cast
<Instruction
>(CI
->getOperand(0)))
987 if (LHS
->getOpcode() == Instruction::And
)
988 if (ConstantInt
*AndRHS
= GetConstantInt(LHS
->getOperand(1)))
989 if (AndRHS
->getValue().isPowerOf2())
992 // Check if the LHS is the return value of a library function
993 LibFunc Func
= NumLibFuncs
;
995 if (CallInst
*Call
= dyn_cast
<CallInst
>(CI
->getOperand(0)))
996 if (Function
*CalledFn
= Call
->getCalledFunction())
997 TLI
->getLibFunc(*CalledFn
, Func
);
999 ProbabilityTable::const_iterator Search
;
1000 if (Func
== LibFunc_strcasecmp
||
1001 Func
== LibFunc_strcmp
||
1002 Func
== LibFunc_strncasecmp
||
1003 Func
== LibFunc_strncmp
||
1004 Func
== LibFunc_memcmp
||
1005 Func
== LibFunc_bcmp
) {
1006 Search
= ICmpWithLibCallTable
.find(CI
->getPredicate());
1007 if (Search
== ICmpWithLibCallTable
.end())
1009 } else if (CV
->isZero()) {
1010 Search
= ICmpWithZeroTable
.find(CI
->getPredicate());
1011 if (Search
== ICmpWithZeroTable
.end())
1013 } else if (CV
->isOne()) {
1014 Search
= ICmpWithOneTable
.find(CI
->getPredicate());
1015 if (Search
== ICmpWithOneTable
.end())
1017 } else if (CV
->isMinusOne()) {
1018 Search
= ICmpWithMinusOneTable
.find(CI
->getPredicate());
1019 if (Search
== ICmpWithMinusOneTable
.end())
1025 setEdgeProbability(BB
, Search
->second
);
1029 bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock
*BB
) {
1030 const BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
1031 if (!BI
|| !BI
->isConditional())
1034 Value
*Cond
= BI
->getCondition();
1035 FCmpInst
*FCmp
= dyn_cast
<FCmpInst
>(Cond
);
1039 ProbabilityList ProbList
;
1040 if (FCmp
->isEquality()) {
1041 ProbList
= !FCmp
->isTrueWhenEqual() ?
1042 // f1 == f2 -> Unlikely
1043 ProbabilityList({FPTakenProb
, FPUntakenProb
}) :
1044 // f1 != f2 -> Likely
1045 ProbabilityList({FPUntakenProb
, FPTakenProb
});
1047 auto Search
= FCmpTable
.find(FCmp
->getPredicate());
1048 if (Search
== FCmpTable
.end())
1050 ProbList
= Search
->second
;
1053 setEdgeProbability(BB
, ProbList
);
1057 void BranchProbabilityInfo::releaseMemory() {
1062 bool BranchProbabilityInfo::invalidate(Function
&, const PreservedAnalyses
&PA
,
1063 FunctionAnalysisManager::Invalidator
&) {
1064 // Check whether the analysis, all analyses on functions, or the function's
1065 // CFG have been preserved.
1066 auto PAC
= PA
.getChecker
<BranchProbabilityAnalysis
>();
1067 return !(PAC
.preserved() || PAC
.preservedSet
<AllAnalysesOn
<Function
>>() ||
1068 PAC
.preservedSet
<CFGAnalyses
>());
1071 void BranchProbabilityInfo::print(raw_ostream
&OS
) const {
1072 OS
<< "---- Branch Probabilities ----\n";
1073 // We print the probabilities from the last function the analysis ran over,
1074 // or the function it is currently running over.
1075 assert(LastF
&& "Cannot print prior to running over a function");
1076 for (const auto &BI
: *LastF
) {
1077 for (const BasicBlock
*Succ
: successors(&BI
))
1078 printEdgeProbability(OS
<< " ", &BI
, Succ
);
1082 bool BranchProbabilityInfo::
1083 isEdgeHot(const BasicBlock
*Src
, const BasicBlock
*Dst
) const {
1084 // Hot probability is at least 4/5 = 80%
1085 // FIXME: Compare against a static "hot" BranchProbability.
1086 return getEdgeProbability(Src
, Dst
) > BranchProbability(4, 5);
1089 /// Get the raw edge probability for the edge. If can't find it, return a
1090 /// default probability 1/N where N is the number of successors. Here an edge is
1091 /// specified using PredBlock and an
1092 /// index to the successors.
1094 BranchProbabilityInfo::getEdgeProbability(const BasicBlock
*Src
,
1095 unsigned IndexInSuccessors
) const {
1096 auto I
= Probs
.find(std::make_pair(Src
, IndexInSuccessors
));
1097 assert((Probs
.end() == Probs
.find(std::make_pair(Src
, 0))) ==
1098 (Probs
.end() == I
) &&
1099 "Probability for I-th successor must always be defined along with the "
1100 "probability for the first successor");
1102 if (I
!= Probs
.end())
1105 return {1, static_cast<uint32_t>(succ_size(Src
))};
1109 BranchProbabilityInfo::getEdgeProbability(const BasicBlock
*Src
,
1110 const_succ_iterator Dst
) const {
1111 return getEdgeProbability(Src
, Dst
.getSuccessorIndex());
1114 /// Get the raw edge probability calculated for the block pair. This returns the
1115 /// sum of all raw edge probabilities from Src to Dst.
1117 BranchProbabilityInfo::getEdgeProbability(const BasicBlock
*Src
,
1118 const BasicBlock
*Dst
) const {
1119 if (!Probs
.count(std::make_pair(Src
, 0)))
1120 return BranchProbability(llvm::count(successors(Src
), Dst
), succ_size(Src
));
1122 auto Prob
= BranchProbability::getZero();
1123 for (const_succ_iterator I
= succ_begin(Src
), E
= succ_end(Src
); I
!= E
; ++I
)
1125 Prob
+= Probs
.find(std::make_pair(Src
, I
.getSuccessorIndex()))->second
;
1130 /// Set the edge probability for all edges at once.
1131 void BranchProbabilityInfo::setEdgeProbability(
1132 const BasicBlock
*Src
, const SmallVectorImpl
<BranchProbability
> &Probs
) {
1133 assert(Src
->getTerminator()->getNumSuccessors() == Probs
.size());
1134 eraseBlock(Src
); // Erase stale data if any.
1135 if (Probs
.size() == 0)
1136 return; // Nothing to set.
1138 Handles
.insert(BasicBlockCallbackVH(Src
, this));
1139 uint64_t TotalNumerator
= 0;
1140 for (unsigned SuccIdx
= 0; SuccIdx
< Probs
.size(); ++SuccIdx
) {
1141 this->Probs
[std::make_pair(Src
, SuccIdx
)] = Probs
[SuccIdx
];
1142 LLVM_DEBUG(dbgs() << "set edge " << Src
->getName() << " -> " << SuccIdx
1143 << " successor probability to " << Probs
[SuccIdx
]
1145 TotalNumerator
+= Probs
[SuccIdx
].getNumerator();
1148 // Because of rounding errors the total probability cannot be checked to be
1149 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1150 // Instead, every single probability in Probs must be as accurate as possible.
1151 // This results in error 1/denominator at most, thus the total absolute error
1152 // should be within Probs.size / BranchProbability::getDenominator.
1153 assert(TotalNumerator
<= BranchProbability::getDenominator() + Probs
.size());
1154 assert(TotalNumerator
>= BranchProbability::getDenominator() - Probs
.size());
1155 (void)TotalNumerator
;
1158 void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock
*Src
,
1160 eraseBlock(Dst
); // Erase stale data if any.
1161 unsigned NumSuccessors
= Src
->getTerminator()->getNumSuccessors();
1162 assert(NumSuccessors
== Dst
->getTerminator()->getNumSuccessors());
1163 if (NumSuccessors
== 0)
1164 return; // Nothing to set.
1165 if (!this->Probs
.contains(std::make_pair(Src
, 0)))
1166 return; // No probability is set for edges from Src. Keep the same for Dst.
1168 Handles
.insert(BasicBlockCallbackVH(Dst
, this));
1169 for (unsigned SuccIdx
= 0; SuccIdx
< NumSuccessors
; ++SuccIdx
) {
1170 auto Prob
= this->Probs
[std::make_pair(Src
, SuccIdx
)];
1171 this->Probs
[std::make_pair(Dst
, SuccIdx
)] = Prob
;
1172 LLVM_DEBUG(dbgs() << "set edge " << Dst
->getName() << " -> " << SuccIdx
1173 << " successor probability to " << Prob
<< "\n");
1177 void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock
*Src
) {
1178 assert(Src
->getTerminator()->getNumSuccessors() == 2);
1179 if (!Probs
.contains(std::make_pair(Src
, 0)))
1180 return; // No probability is set for edges from Src
1181 assert(Probs
.contains(std::make_pair(Src
, 1)));
1182 std::swap(Probs
[std::make_pair(Src
, 0)], Probs
[std::make_pair(Src
, 1)]);
1186 BranchProbabilityInfo::printEdgeProbability(raw_ostream
&OS
,
1187 const BasicBlock
*Src
,
1188 const BasicBlock
*Dst
) const {
1189 const BranchProbability Prob
= getEdgeProbability(Src
, Dst
);
1191 Src
->printAsOperand(OS
, false, Src
->getModule());
1193 Dst
->printAsOperand(OS
, false, Dst
->getModule());
1194 OS
<< " probability is " << Prob
1195 << (isEdgeHot(Src
, Dst
) ? " [HOT edge]\n" : "\n");
1200 void BranchProbabilityInfo::eraseBlock(const BasicBlock
*BB
) {
1201 LLVM_DEBUG(dbgs() << "eraseBlock " << BB
->getName() << "\n");
1203 // Note that we cannot use successors of BB because the terminator of BB may
1204 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1205 // Instead we remove prob data for the block by iterating successors by their
1206 // indices from 0 till the last which exists. There could not be prob data for
1207 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1208 // set for all successors from 0 to M at once by the method
1209 // setEdgeProbability().
1210 Handles
.erase(BasicBlockCallbackVH(BB
, this));
1211 for (unsigned I
= 0;; ++I
) {
1212 auto MapI
= Probs
.find(std::make_pair(BB
, I
));
1213 if (MapI
== Probs
.end()) {
1214 assert(Probs
.count(std::make_pair(BB
, I
+ 1)) == 0 &&
1215 "Must be no more successors");
1222 void BranchProbabilityInfo::calculate(const Function
&F
, const LoopInfo
&LoopI
,
1223 const TargetLibraryInfo
*TLI
,
1225 PostDominatorTree
*PDT
) {
1226 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F
.getName()
1228 LastF
= &F
; // Store the last function we ran on for printing.
1231 SccI
= std::make_unique
<SccInfo
>(F
);
1233 assert(EstimatedBlockWeight
.empty());
1234 assert(EstimatedLoopWeight
.empty());
1236 std::unique_ptr
<DominatorTree
> DTPtr
;
1237 std::unique_ptr
<PostDominatorTree
> PDTPtr
;
1240 DTPtr
= std::make_unique
<DominatorTree
>(const_cast<Function
&>(F
));
1245 PDTPtr
= std::make_unique
<PostDominatorTree
>(const_cast<Function
&>(F
));
1249 computeEestimateBlockWeight(F
, DT
, PDT
);
1251 // Walk the basic blocks in post-order so that we can build up state about
1252 // the successors of a block iteratively.
1253 for (const auto *BB
: post_order(&F
.getEntryBlock())) {
1254 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB
->getName()
1256 // If there is no at least two successors, no sense to set probability.
1257 if (BB
->getTerminator()->getNumSuccessors() < 2)
1259 if (calcMetadataWeights(BB
))
1261 if (calcEstimatedHeuristics(BB
))
1263 if (calcPointerHeuristics(BB
))
1265 if (calcZeroHeuristics(BB
, TLI
))
1267 if (calcFloatingPointHeuristics(BB
))
1271 EstimatedLoopWeight
.clear();
1272 EstimatedBlockWeight
.clear();
1275 if (PrintBranchProb
&& (PrintBranchProbFuncName
.empty() ||
1276 F
.getName() == PrintBranchProbFuncName
)) {
1281 void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
1282 AnalysisUsage
&AU
) const {
1283 // We require DT so it's available when LI is available. The LI updating code
1284 // asserts that DT is also present so if we don't make sure that we have DT
1285 // here, that assert will trigger.
1286 AU
.addRequired
<DominatorTreeWrapperPass
>();
1287 AU
.addRequired
<LoopInfoWrapperPass
>();
1288 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
1289 AU
.addRequired
<DominatorTreeWrapperPass
>();
1290 AU
.addRequired
<PostDominatorTreeWrapperPass
>();
1291 AU
.setPreservesAll();
1294 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function
&F
) {
1295 const LoopInfo
&LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1296 const TargetLibraryInfo
&TLI
=
1297 getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
1298 DominatorTree
&DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
1299 PostDominatorTree
&PDT
=
1300 getAnalysis
<PostDominatorTreeWrapperPass
>().getPostDomTree();
1301 BPI
.calculate(F
, LI
, &TLI
, &DT
, &PDT
);
1305 void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI
.releaseMemory(); }
1307 void BranchProbabilityInfoWrapperPass::print(raw_ostream
&OS
,
1308 const Module
*) const {
1312 AnalysisKey
BranchProbabilityAnalysis::Key
;
1313 BranchProbabilityInfo
1314 BranchProbabilityAnalysis::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1315 auto &LI
= AM
.getResult
<LoopAnalysis
>(F
);
1316 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(F
);
1317 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
1318 auto &PDT
= AM
.getResult
<PostDominatorTreeAnalysis
>(F
);
1319 BranchProbabilityInfo BPI
;
1320 BPI
.calculate(F
, LI
, &TLI
, &DT
, &PDT
);
1325 BranchProbabilityPrinterPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1326 OS
<< "Printing analysis 'Branch Probability Analysis' for function '"
1327 << F
.getName() << "':\n";
1328 AM
.getResult
<BranchProbabilityAnalysis
>(F
).print(OS
);
1329 return PreservedAnalyses::all();