Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / MCF.cpp
blobc3898d2dce989efdd7f3f77149b5417578ad678a
1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for solving minimum-cost flow problem.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/Support/CommandLine.h"
19 #include <algorithm>
20 #include <vector>
22 #undef DEBUG_TYPE
23 #define DEBUG_TYPE "mcf"
25 using namespace llvm;
26 using namespace bolt;
28 namespace opts {
30 extern cl::OptionCategory BoltOptCategory;
32 extern cl::opt<bool> TimeOpts;
34 static cl::opt<bool> IterativeGuess(
35 "iterative-guess",
36 cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37 cl::Hidden, cl::cat(BoltOptCategory));
39 static cl::opt<bool> UseRArcs(
40 "mcf-use-rarcs",
41 cl::desc("in MCF, consider the possibility of cancelling flow to balance "
42 "edges"),
43 cl::Hidden, cl::cat(BoltOptCategory));
45 } // namespace opts
47 namespace llvm {
48 namespace bolt {
50 namespace {
52 // Edge Weight Inference Heuristic
54 // We start by maintaining the invariant used in LBR mode where the sum of
55 // pred edges count is equal to the block execution count. This loop will set
56 // pred edges count by balancing its own execution count in different pred
57 // edges. The weight of each edge is guessed by looking at how hot each pred
58 // block is (in terms of samples).
59 // There are two caveats in this approach. One is for critical edges and the
60 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
61 // we can't infer the hotness of them based solely on pred BBs execution
62 // count. For each critical edge we look at the pred BB, then look at its
63 // succs to adjust its weight.
65 // [ 60 ] [ 25 ]
66 // | \ |
67 // [ 10 ] [ 75 ]
69 // The illustration above shows a critical edge \. We wish to adjust bb count
70 // 60 to 50 to properly determine the weight of the critical edge to be
71 // 50 / 75.
72 // For self-referencing edges, we attribute its weight by subtracting the
73 // current BB execution count by the sum of predecessors count if this result
74 // is non-negative.
75 using EdgeWeightMap =
76 DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
77 double>;
79 template <class NodeT>
80 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
81 const BinaryBasicBlock *B, double Weight);
83 template <>
84 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
85 const BinaryBasicBlock *A,
86 const BinaryBasicBlock *B,
87 double Weight) {
88 EdgeWeights[std::make_pair(A, B)] = Weight;
91 template <>
92 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
93 const BinaryBasicBlock *A,
94 const BinaryBasicBlock *B,
95 double Weight) {
96 EdgeWeights[std::make_pair(B, A)] = Weight;
99 template <class NodeT>
100 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
101 typedef GraphTraits<NodeT> GraphT;
102 typedef GraphTraits<Inverse<NodeT>> InvTraits;
104 double TotalChildrenCount = 0.0;
105 SmallVector<double, 4> ChildrenExecCount;
106 // First pass computes total children execution count that directly
107 // contribute to this BB.
108 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
109 E = GraphT::child_end(BB);
110 CI != E; ++CI) {
111 typename GraphT::NodeRef Child = *CI;
112 double ChildExecCount = Child->getExecutionCount();
113 // Is self-reference?
114 if (Child == BB) {
115 ChildExecCount = 0.0; // will fill this in second pass
116 } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
117 InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
118 1) {
119 // Handle critical edges. This will cause a skew towards crit edges, but
120 // it is a quick solution.
121 double CritWeight = 0.0;
122 uint64_t Denominator = 0;
123 for (typename InvTraits::ChildIteratorType
124 II = InvTraits::child_begin(Child),
125 IE = InvTraits::child_end(Child);
126 II != IE; ++II) {
127 typename GraphT::NodeRef N = *II;
128 Denominator += N->getExecutionCount();
129 if (N != BB)
130 continue;
131 CritWeight = N->getExecutionCount();
133 if (Denominator)
134 CritWeight /= static_cast<double>(Denominator);
135 ChildExecCount *= CritWeight;
137 ChildrenExecCount.push_back(ChildExecCount);
138 TotalChildrenCount += ChildExecCount;
140 // Second pass fixes the weight of a possible self-reference edge
141 uint32_t ChildIndex = 0;
142 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
143 E = GraphT::child_end(BB);
144 CI != E; ++CI) {
145 typename GraphT::NodeRef Child = *CI;
146 if (Child != BB) {
147 ++ChildIndex;
148 continue;
150 if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
151 ChildrenExecCount[ChildIndex] =
152 BB->getExecutionCount() - TotalChildrenCount;
153 TotalChildrenCount += ChildrenExecCount[ChildIndex];
155 break;
157 // Third pass finally assigns weights to edges
158 ChildIndex = 0;
159 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
160 E = GraphT::child_end(BB);
161 CI != E; ++CI) {
162 typename GraphT::NodeRef Child = *CI;
163 double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
164 if (TotalChildrenCount != 0.0)
165 Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
166 updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
167 ++ChildIndex;
171 template <class NodeT>
172 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
173 for (BinaryBasicBlock &BB : BF)
174 computeEdgeWeights<NodeT>(&BB, EdgeWeights);
177 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
178 /// make it match max(SumPredEdges, SumSuccEdges).
179 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
180 for (BinaryBasicBlock &BB : BF) {
181 uint64_t TotalPredsEWeight = 0;
182 for (BinaryBasicBlock *Pred : BB.predecessors())
183 TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
185 if (TotalPredsEWeight > BB.getExecutionCount())
186 BB.setExecutionCount(TotalPredsEWeight);
188 if (!AllEdges)
189 continue;
191 uint64_t TotalSuccsEWeight = 0;
192 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
193 TotalSuccsEWeight += BI.Count;
195 if (TotalSuccsEWeight > BB.getExecutionCount())
196 BB.setExecutionCount(TotalSuccsEWeight);
200 // This is our main edge count guessing heuristic. Look at predecessors and
201 // assign a proportionally higher count to pred edges coming from blocks with
202 // a higher execution count in comparison with the other predecessor blocks,
203 // making SumPredEdges match the current BB count.
204 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
205 // some successor edges may already have assigned a count, only update it if the
206 // new count is higher.
207 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
208 EdgeWeightMap &PredEdgeWeights,
209 EdgeWeightMap &SuccEdgeWeights) {
210 for (BinaryBasicBlock &BB : BF) {
211 for (BinaryBasicBlock *Pred : BB.predecessors()) {
212 double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
213 RelativeExec *= BB.getExecutionCount();
214 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
215 if (static_cast<uint64_t>(RelativeExec) > BI.Count)
216 BI.Count = static_cast<uint64_t>(RelativeExec);
219 if (!UseSucc)
220 continue;
222 auto BI = BB.branch_info_begin();
223 for (BinaryBasicBlock *Succ : BB.successors()) {
224 double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
225 RelativeExec *= BB.getExecutionCount();
226 if (static_cast<uint64_t>(RelativeExec) > BI->Count)
227 BI->Count = static_cast<uint64_t>(RelativeExec);
228 ++BI;
233 using ArcSet =
234 DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
236 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
237 /// all edges we already established their count. Try to guess the count of
238 /// the remaining edge, if there is only one to guess, and return true if we
239 /// were able to guess.
240 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
241 if (BB->pred_size() == 0)
242 return false;
244 uint64_t TotalPredCount = 0;
245 unsigned NumGuessedEdges = 0;
246 for (BinaryBasicBlock *Pred : BB->predecessors()) {
247 if (GuessedArcs.count(std::make_pair(Pred, BB)))
248 ++NumGuessedEdges;
249 TotalPredCount += Pred->getBranchInfo(*BB).Count;
252 if (NumGuessedEdges != BB->pred_size() - 1)
253 return false;
255 int64_t Guessed =
256 static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
257 if (Guessed < 0)
258 Guessed = 0;
260 for (BinaryBasicBlock *Pred : BB->predecessors()) {
261 if (GuessedArcs.count(std::make_pair(Pred, BB)))
262 continue;
264 Pred->getBranchInfo(*BB).Count = Guessed;
265 GuessedArcs.insert(std::make_pair(Pred, BB));
266 return true;
268 llvm_unreachable("Expected unguessed arc");
271 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
272 /// all edges we already established their count. Try to guess the count of
273 /// the remaining edge, if there is only one to guess, and return true if we
274 /// were able to guess.
275 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
276 if (BB->succ_size() == 0)
277 return false;
279 uint64_t TotalSuccCount = 0;
280 unsigned NumGuessedEdges = 0;
281 auto BI = BB->branch_info_begin();
282 for (BinaryBasicBlock *Succ : BB->successors()) {
283 if (GuessedArcs.count(std::make_pair(BB, Succ)))
284 ++NumGuessedEdges;
285 TotalSuccCount += BI->Count;
286 ++BI;
289 if (NumGuessedEdges != BB->succ_size() - 1)
290 return false;
292 int64_t Guessed =
293 static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
294 if (Guessed < 0)
295 Guessed = 0;
297 BI = BB->branch_info_begin();
298 for (BinaryBasicBlock *Succ : BB->successors()) {
299 if (GuessedArcs.count(std::make_pair(BB, Succ))) {
300 ++BI;
301 continue;
304 BI->Count = Guessed;
305 GuessedArcs.insert(std::make_pair(BB, Succ));
306 return true;
308 llvm_unreachable("Expected unguessed arc");
311 /// Guess edge count whenever we have only one edge (pred or succ) left
312 /// to guess. Then make its count equal to BB count minus all other edge
313 /// counts we already know their count. Repeat this until there is no
314 /// change.
315 void guessEdgeByIterativeApproach(BinaryFunction &BF) {
316 ArcSet KnownArcs;
317 bool Changed = false;
319 do {
320 Changed = false;
321 for (BinaryBasicBlock &BB : BF) {
322 if (guessPredEdgeCounts(&BB, KnownArcs))
323 Changed = true;
324 if (guessSuccEdgeCounts(&BB, KnownArcs))
325 Changed = true;
327 } while (Changed);
329 // Guess count for non-inferred edges
330 for (BinaryBasicBlock &BB : BF) {
331 for (BinaryBasicBlock *Pred : BB.predecessors()) {
332 if (KnownArcs.count(std::make_pair(Pred, &BB)))
333 continue;
334 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
335 BI.Count =
336 std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
337 KnownArcs.insert(std::make_pair(Pred, &BB));
339 auto BI = BB.branch_info_begin();
340 for (BinaryBasicBlock *Succ : BB.successors()) {
341 if (KnownArcs.count(std::make_pair(&BB, Succ))) {
342 ++BI;
343 continue;
345 BI->Count =
346 std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
347 KnownArcs.insert(std::make_pair(&BB, Succ));
348 break;
353 /// Associate each basic block with the BinaryLoop object corresponding to the
354 /// innermost loop containing this block.
355 DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
356 createLoopNestLevelMap(BinaryFunction &BF) {
357 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
358 const BinaryLoopInfo &BLI = BF.getLoopInfo();
360 for (BinaryBasicBlock &BB : BF)
361 LoopNestLevel[&BB] = BLI[&BB];
363 return LoopNestLevel;
366 } // end anonymous namespace
368 void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
369 if (BF.begin() == BF.end())
370 return;
372 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
373 DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
374 auto &InsnToBB = Info.getInsnToBBMap();
375 // These analyses work at the instruction granularity, but we really only need
376 // basic block granularity here. So we'll use a set of visited edges to avoid
377 // revisiting the same BBs again and again.
378 DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
379 Visited;
380 // Equivalence classes mapping. Each equivalence class is defined by the set
381 // of BBs that obeys the aforementioned properties.
382 DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
383 std::vector<std::vector<BinaryBasicBlock *>> Classes;
385 BF.calculateLoopInfo();
386 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
387 createLoopNestLevelMap(BF);
389 for (BinaryBasicBlock &BB : BF)
390 BBsToEC[&BB] = -1;
392 for (BinaryBasicBlock &BB : BF) {
393 auto I = BB.begin();
394 if (I == BB.end())
395 continue;
397 DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
398 BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
399 if (Visited[DomBB].count(&BB))
400 return;
401 Visited[DomBB].insert(&BB);
402 if (!PDA.doesADominateB(*I, DomInst))
403 return;
404 if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
405 return;
406 if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
407 BBsToEC[DomBB] = Classes.size();
408 BBsToEC[&BB] = Classes.size();
409 Classes.emplace_back();
410 Classes.back().push_back(DomBB);
411 Classes.back().push_back(&BB);
412 return;
414 if (BBsToEC[DomBB] == -1) {
415 BBsToEC[DomBB] = BBsToEC[&BB];
416 Classes[BBsToEC[&BB]].push_back(DomBB);
417 return;
419 if (BBsToEC[&BB] == -1) {
420 BBsToEC[&BB] = BBsToEC[DomBB];
421 Classes[BBsToEC[DomBB]].push_back(&BB);
422 return;
424 signed BBECNum = BBsToEC[&BB];
425 std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
426 std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
427 for (BinaryBasicBlock *Block : DomEC) {
428 BBsToEC[Block] = BBECNum;
429 BBEC.push_back(Block);
431 DomEC.clear();
435 for (std::vector<BinaryBasicBlock *> &Class : Classes) {
436 uint64_t Max = 0ULL;
437 for (BinaryBasicBlock *BB : Class)
438 Max = std::max(Max, BB->getExecutionCount());
439 for (BinaryBasicBlock *BB : Class)
440 BB->setExecutionCount(Max);
444 void estimateEdgeCounts(BinaryFunction &BF) {
445 EdgeWeightMap PredEdgeWeights;
446 EdgeWeightMap SuccEdgeWeights;
447 if (!opts::IterativeGuess) {
448 computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
449 computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
451 if (opts::EqualizeBBCounts) {
452 LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts"));
453 auto Info = DataflowInfoManager(BF, nullptr, nullptr);
454 equalizeBBCounts(Info, BF);
455 LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts"));
457 if (opts::IterativeGuess)
458 guessEdgeByIterativeApproach(BF);
459 else
460 guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
461 SuccEdgeWeights);
462 recalculateBBCounts(BF, /*AllEdges=*/false);
465 void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
466 llvm_unreachable("not implemented");
469 } // namespace bolt
470 } // namespace llvm