1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements functions for solving minimum-cost flow problem.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/Support/CommandLine.h"
23 #define DEBUG_TYPE "mcf"
30 extern cl::OptionCategory BoltOptCategory
;
32 extern cl::opt
<bool> TimeOpts
;
34 static cl::opt
<bool> IterativeGuess(
36 cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37 cl::Hidden
, cl::cat(BoltOptCategory
));
39 static cl::opt
<bool> UseRArcs(
41 cl::desc("in MCF, consider the possibility of cancelling flow to balance "
43 cl::Hidden
, cl::cat(BoltOptCategory
));
52 // Edge Weight Inference Heuristic
54 // We start by maintaining the invariant used in LBR mode where the sum of
55 // pred edges count is equal to the block execution count. This loop will set
56 // pred edges count by balancing its own execution count in different pred
57 // edges. The weight of each edge is guessed by looking at how hot each pred
58 // block is (in terms of samples).
59 // There are two caveats in this approach. One is for critical edges and the
60 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
61 // we can't infer the hotness of them based solely on pred BBs execution
62 // count. For each critical edge we look at the pred BB, then look at its
63 // succs to adjust its weight.
69 // The illustration above shows a critical edge \. We wish to adjust bb count
70 // 60 to 50 to properly determine the weight of the critical edge to be
72 // For self-referencing edges, we attribute its weight by subtracting the
73 // current BB execution count by the sum of predecessors count if this result
76 DenseMap
<std::pair
<const BinaryBasicBlock
*, const BinaryBasicBlock
*>,
79 template <class NodeT
>
80 void updateEdgeWeight(EdgeWeightMap
&EdgeWeights
, const BinaryBasicBlock
*A
,
81 const BinaryBasicBlock
*B
, double Weight
);
84 void updateEdgeWeight
<BinaryBasicBlock
*>(EdgeWeightMap
&EdgeWeights
,
85 const BinaryBasicBlock
*A
,
86 const BinaryBasicBlock
*B
,
88 EdgeWeights
[std::make_pair(A
, B
)] = Weight
;
92 void updateEdgeWeight
<Inverse
<BinaryBasicBlock
*>>(EdgeWeightMap
&EdgeWeights
,
93 const BinaryBasicBlock
*A
,
94 const BinaryBasicBlock
*B
,
96 EdgeWeights
[std::make_pair(B
, A
)] = Weight
;
99 template <class NodeT
>
100 void computeEdgeWeights(BinaryBasicBlock
*BB
, EdgeWeightMap
&EdgeWeights
) {
101 typedef GraphTraits
<NodeT
> GraphT
;
102 typedef GraphTraits
<Inverse
<NodeT
>> InvTraits
;
104 double TotalChildrenCount
= 0.0;
105 SmallVector
<double, 4> ChildrenExecCount
;
106 // First pass computes total children execution count that directly
107 // contribute to this BB.
108 for (typename
GraphT::ChildIteratorType CI
= GraphT::child_begin(BB
),
109 E
= GraphT::child_end(BB
);
111 typename
GraphT::NodeRef Child
= *CI
;
112 double ChildExecCount
= Child
->getExecutionCount();
113 // Is self-reference?
115 ChildExecCount
= 0.0; // will fill this in second pass
116 } else if (GraphT::child_end(BB
) - GraphT::child_begin(BB
) > 1 &&
117 InvTraits::child_end(Child
) - InvTraits::child_begin(Child
) >
119 // Handle critical edges. This will cause a skew towards crit edges, but
120 // it is a quick solution.
121 double CritWeight
= 0.0;
122 uint64_t Denominator
= 0;
123 for (typename
InvTraits::ChildIteratorType
124 II
= InvTraits::child_begin(Child
),
125 IE
= InvTraits::child_end(Child
);
127 typename
GraphT::NodeRef N
= *II
;
128 Denominator
+= N
->getExecutionCount();
131 CritWeight
= N
->getExecutionCount();
134 CritWeight
/= static_cast<double>(Denominator
);
135 ChildExecCount
*= CritWeight
;
137 ChildrenExecCount
.push_back(ChildExecCount
);
138 TotalChildrenCount
+= ChildExecCount
;
140 // Second pass fixes the weight of a possible self-reference edge
141 uint32_t ChildIndex
= 0;
142 for (typename
GraphT::ChildIteratorType CI
= GraphT::child_begin(BB
),
143 E
= GraphT::child_end(BB
);
145 typename
GraphT::NodeRef Child
= *CI
;
150 if (static_cast<double>(BB
->getExecutionCount()) > TotalChildrenCount
) {
151 ChildrenExecCount
[ChildIndex
] =
152 BB
->getExecutionCount() - TotalChildrenCount
;
153 TotalChildrenCount
+= ChildrenExecCount
[ChildIndex
];
157 // Third pass finally assigns weights to edges
159 for (typename
GraphT::ChildIteratorType CI
= GraphT::child_begin(BB
),
160 E
= GraphT::child_end(BB
);
162 typename
GraphT::NodeRef Child
= *CI
;
163 double Weight
= 1 / (GraphT::child_end(BB
) - GraphT::child_begin(BB
));
164 if (TotalChildrenCount
!= 0.0)
165 Weight
= ChildrenExecCount
[ChildIndex
] / TotalChildrenCount
;
166 updateEdgeWeight
<NodeT
>(EdgeWeights
, BB
, Child
, Weight
);
171 template <class NodeT
>
172 void computeEdgeWeights(BinaryFunction
&BF
, EdgeWeightMap
&EdgeWeights
) {
173 for (BinaryBasicBlock
&BB
: BF
)
174 computeEdgeWeights
<NodeT
>(&BB
, EdgeWeights
);
177 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
178 /// make it match max(SumPredEdges, SumSuccEdges).
179 void recalculateBBCounts(BinaryFunction
&BF
, bool AllEdges
) {
180 for (BinaryBasicBlock
&BB
: BF
) {
181 uint64_t TotalPredsEWeight
= 0;
182 for (BinaryBasicBlock
*Pred
: BB
.predecessors())
183 TotalPredsEWeight
+= Pred
->getBranchInfo(BB
).Count
;
185 if (TotalPredsEWeight
> BB
.getExecutionCount())
186 BB
.setExecutionCount(TotalPredsEWeight
);
191 uint64_t TotalSuccsEWeight
= 0;
192 for (BinaryBasicBlock::BinaryBranchInfo
&BI
: BB
.branch_info())
193 TotalSuccsEWeight
+= BI
.Count
;
195 if (TotalSuccsEWeight
> BB
.getExecutionCount())
196 BB
.setExecutionCount(TotalSuccsEWeight
);
200 // This is our main edge count guessing heuristic. Look at predecessors and
201 // assign a proportionally higher count to pred edges coming from blocks with
202 // a higher execution count in comparison with the other predecessor blocks,
203 // making SumPredEdges match the current BB count.
204 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
205 // some successor edges may already have assigned a count, only update it if the
206 // new count is higher.
207 void guessEdgeByRelHotness(BinaryFunction
&BF
, bool UseSucc
,
208 EdgeWeightMap
&PredEdgeWeights
,
209 EdgeWeightMap
&SuccEdgeWeights
) {
210 for (BinaryBasicBlock
&BB
: BF
) {
211 for (BinaryBasicBlock
*Pred
: BB
.predecessors()) {
212 double RelativeExec
= PredEdgeWeights
[std::make_pair(Pred
, &BB
)];
213 RelativeExec
*= BB
.getExecutionCount();
214 BinaryBasicBlock::BinaryBranchInfo
&BI
= Pred
->getBranchInfo(BB
);
215 if (static_cast<uint64_t>(RelativeExec
) > BI
.Count
)
216 BI
.Count
= static_cast<uint64_t>(RelativeExec
);
222 auto BI
= BB
.branch_info_begin();
223 for (BinaryBasicBlock
*Succ
: BB
.successors()) {
224 double RelativeExec
= SuccEdgeWeights
[std::make_pair(&BB
, Succ
)];
225 RelativeExec
*= BB
.getExecutionCount();
226 if (static_cast<uint64_t>(RelativeExec
) > BI
->Count
)
227 BI
->Count
= static_cast<uint64_t>(RelativeExec
);
234 DenseSet
<std::pair
<const BinaryBasicBlock
*, const BinaryBasicBlock
*>>;
236 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
237 /// all edges we already established their count. Try to guess the count of
238 /// the remaining edge, if there is only one to guess, and return true if we
239 /// were able to guess.
240 bool guessPredEdgeCounts(BinaryBasicBlock
*BB
, ArcSet
&GuessedArcs
) {
241 if (BB
->pred_size() == 0)
244 uint64_t TotalPredCount
= 0;
245 unsigned NumGuessedEdges
= 0;
246 for (BinaryBasicBlock
*Pred
: BB
->predecessors()) {
247 if (GuessedArcs
.count(std::make_pair(Pred
, BB
)))
249 TotalPredCount
+= Pred
->getBranchInfo(*BB
).Count
;
252 if (NumGuessedEdges
!= BB
->pred_size() - 1)
256 static_cast<int64_t>(BB
->getExecutionCount()) - TotalPredCount
;
260 for (BinaryBasicBlock
*Pred
: BB
->predecessors()) {
261 if (GuessedArcs
.count(std::make_pair(Pred
, BB
)))
264 Pred
->getBranchInfo(*BB
).Count
= Guessed
;
265 GuessedArcs
.insert(std::make_pair(Pred
, BB
));
268 llvm_unreachable("Expected unguessed arc");
271 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
272 /// all edges we already established their count. Try to guess the count of
273 /// the remaining edge, if there is only one to guess, and return true if we
274 /// were able to guess.
275 bool guessSuccEdgeCounts(BinaryBasicBlock
*BB
, ArcSet
&GuessedArcs
) {
276 if (BB
->succ_size() == 0)
279 uint64_t TotalSuccCount
= 0;
280 unsigned NumGuessedEdges
= 0;
281 auto BI
= BB
->branch_info_begin();
282 for (BinaryBasicBlock
*Succ
: BB
->successors()) {
283 if (GuessedArcs
.count(std::make_pair(BB
, Succ
)))
285 TotalSuccCount
+= BI
->Count
;
289 if (NumGuessedEdges
!= BB
->succ_size() - 1)
293 static_cast<int64_t>(BB
->getExecutionCount()) - TotalSuccCount
;
297 BI
= BB
->branch_info_begin();
298 for (BinaryBasicBlock
*Succ
: BB
->successors()) {
299 if (GuessedArcs
.count(std::make_pair(BB
, Succ
))) {
305 GuessedArcs
.insert(std::make_pair(BB
, Succ
));
308 llvm_unreachable("Expected unguessed arc");
311 /// Guess edge count whenever we have only one edge (pred or succ) left
312 /// to guess. Then make its count equal to BB count minus all other edge
313 /// counts we already know their count. Repeat this until there is no
315 void guessEdgeByIterativeApproach(BinaryFunction
&BF
) {
317 bool Changed
= false;
321 for (BinaryBasicBlock
&BB
: BF
) {
322 if (guessPredEdgeCounts(&BB
, KnownArcs
))
324 if (guessSuccEdgeCounts(&BB
, KnownArcs
))
329 // Guess count for non-inferred edges
330 for (BinaryBasicBlock
&BB
: BF
) {
331 for (BinaryBasicBlock
*Pred
: BB
.predecessors()) {
332 if (KnownArcs
.count(std::make_pair(Pred
, &BB
)))
334 BinaryBasicBlock::BinaryBranchInfo
&BI
= Pred
->getBranchInfo(BB
);
336 std::min(Pred
->getExecutionCount(), BB
.getExecutionCount()) / 2;
337 KnownArcs
.insert(std::make_pair(Pred
, &BB
));
339 auto BI
= BB
.branch_info_begin();
340 for (BinaryBasicBlock
*Succ
: BB
.successors()) {
341 if (KnownArcs
.count(std::make_pair(&BB
, Succ
))) {
346 std::min(BB
.getExecutionCount(), Succ
->getExecutionCount()) / 2;
347 KnownArcs
.insert(std::make_pair(&BB
, Succ
));
353 /// Associate each basic block with the BinaryLoop object corresponding to the
354 /// innermost loop containing this block.
355 DenseMap
<const BinaryBasicBlock
*, const BinaryLoop
*>
356 createLoopNestLevelMap(BinaryFunction
&BF
) {
357 DenseMap
<const BinaryBasicBlock
*, const BinaryLoop
*> LoopNestLevel
;
358 const BinaryLoopInfo
&BLI
= BF
.getLoopInfo();
360 for (BinaryBasicBlock
&BB
: BF
)
361 LoopNestLevel
[&BB
] = BLI
[&BB
];
363 return LoopNestLevel
;
366 } // end anonymous namespace
368 void equalizeBBCounts(DataflowInfoManager
&Info
, BinaryFunction
&BF
) {
369 if (BF
.begin() == BF
.end())
372 DominatorAnalysis
<false> &DA
= Info
.getDominatorAnalysis();
373 DominatorAnalysis
<true> &PDA
= Info
.getPostDominatorAnalysis();
374 auto &InsnToBB
= Info
.getInsnToBBMap();
375 // These analyses work at the instruction granularity, but we really only need
376 // basic block granularity here. So we'll use a set of visited edges to avoid
377 // revisiting the same BBs again and again.
378 DenseMap
<const BinaryBasicBlock
*, std::set
<const BinaryBasicBlock
*>>
380 // Equivalence classes mapping. Each equivalence class is defined by the set
381 // of BBs that obeys the aforementioned properties.
382 DenseMap
<const BinaryBasicBlock
*, signed> BBsToEC
;
383 std::vector
<std::vector
<BinaryBasicBlock
*>> Classes
;
385 BF
.calculateLoopInfo();
386 DenseMap
<const BinaryBasicBlock
*, const BinaryLoop
*> LoopNestLevel
=
387 createLoopNestLevelMap(BF
);
389 for (BinaryBasicBlock
&BB
: BF
)
392 for (BinaryBasicBlock
&BB
: BF
) {
397 DA
.doForAllDominators(*I
, [&](const MCInst
&DomInst
) {
398 BinaryBasicBlock
*DomBB
= InsnToBB
[&DomInst
];
399 if (Visited
[DomBB
].count(&BB
))
401 Visited
[DomBB
].insert(&BB
);
402 if (!PDA
.doesADominateB(*I
, DomInst
))
404 if (LoopNestLevel
[&BB
] != LoopNestLevel
[DomBB
])
406 if (BBsToEC
[DomBB
] == -1 && BBsToEC
[&BB
] == -1) {
407 BBsToEC
[DomBB
] = Classes
.size();
408 BBsToEC
[&BB
] = Classes
.size();
409 Classes
.emplace_back();
410 Classes
.back().push_back(DomBB
);
411 Classes
.back().push_back(&BB
);
414 if (BBsToEC
[DomBB
] == -1) {
415 BBsToEC
[DomBB
] = BBsToEC
[&BB
];
416 Classes
[BBsToEC
[&BB
]].push_back(DomBB
);
419 if (BBsToEC
[&BB
] == -1) {
420 BBsToEC
[&BB
] = BBsToEC
[DomBB
];
421 Classes
[BBsToEC
[DomBB
]].push_back(&BB
);
424 signed BBECNum
= BBsToEC
[&BB
];
425 std::vector
<BinaryBasicBlock
*> DomEC
= Classes
[BBsToEC
[DomBB
]];
426 std::vector
<BinaryBasicBlock
*> BBEC
= Classes
[BBECNum
];
427 for (BinaryBasicBlock
*Block
: DomEC
) {
428 BBsToEC
[Block
] = BBECNum
;
429 BBEC
.push_back(Block
);
435 for (std::vector
<BinaryBasicBlock
*> &Class
: Classes
) {
437 for (BinaryBasicBlock
*BB
: Class
)
438 Max
= std::max(Max
, BB
->getExecutionCount());
439 for (BinaryBasicBlock
*BB
: Class
)
440 BB
->setExecutionCount(Max
);
444 void estimateEdgeCounts(BinaryFunction
&BF
) {
445 EdgeWeightMap PredEdgeWeights
;
446 EdgeWeightMap SuccEdgeWeights
;
447 if (!opts::IterativeGuess
) {
448 computeEdgeWeights
<Inverse
<BinaryBasicBlock
*>>(BF
, PredEdgeWeights
);
449 computeEdgeWeights
<BinaryBasicBlock
*>(BF
, SuccEdgeWeights
);
451 if (opts::EqualizeBBCounts
) {
452 LLVM_DEBUG(BF
.print(dbgs(), "before equalize BB counts"));
453 auto Info
= DataflowInfoManager(BF
, nullptr, nullptr);
454 equalizeBBCounts(Info
, BF
);
455 LLVM_DEBUG(BF
.print(dbgs(), "after equalize BB counts"));
457 if (opts::IterativeGuess
)
458 guessEdgeByIterativeApproach(BF
);
460 guessEdgeByRelHotness(BF
, /*UseSuccs=*/false, PredEdgeWeights
,
462 recalculateBBCounts(BF
, /*AllEdges=*/false);
465 void solveMCF(BinaryFunction
&BF
, MCFCostFunction CostFunction
) {
466 llvm_unreachable("not implemented");