[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / llvm / lib / Transforms / Utils / SampleProfileInference.cpp
blob5b4177bda9f9315be02a5e8a23626cb3293424a7
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <queue>
21 #include <set>
22 #include <stack>
24 using namespace llvm;
25 #define DEBUG_TYPE "sample-profile-inference"
27 namespace {
29 static cl::opt<bool> SampleProfileEvenCountDistribution(
30 "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
31 cl::desc("Try to evenly distribute counts when there are multiple equally "
32 "likely options."));
34 static cl::opt<unsigned> SampleProfileMaxDfsCalls(
35 "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
36 cl::desc("Maximum number of dfs iterations for even count distribution."));
38 static cl::opt<unsigned> SampleProfileProfiCostInc(
39 "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
40 cl::desc("A cost of increasing a block's count by one."));
42 static cl::opt<unsigned> SampleProfileProfiCostDec(
43 "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
44 cl::desc("A cost of decreasing a block's count by one."));
46 static cl::opt<unsigned> SampleProfileProfiCostIncZero(
47 "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
48 cl::desc("A cost of increasing a count of zero-weight block by one."));
50 static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
51 "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
52 cl::desc("A cost of increasing the entry block's count by one."));
54 static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
55 "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
56 cl::desc("A cost of decreasing the entry block's count by one."));
58 /// A value indicating an infinite flow/capacity/weight of a block/edge.
59 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
60 /// during the execution.
61 static constexpr int64_t INF = ((int64_t)1) << 50;
63 /// The minimum-cost maximum flow algorithm.
64 ///
65 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
66 /// network using a modified version of the classical Moore-Bellman-Ford
67 /// approach. The algorithm applies a number of augmentation iterations in which
68 /// flow is sent along paths of positive capacity from the source to the sink.
69 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
70 /// where m is the number of edges, n is the number of vertices, and v(f) is the
71 /// value of the maximum flow. However, the observed running time on typical
72 /// instances is sub-quadratic, that is, o(n^2).
73 ///
74 /// The input is a set of edges with specified costs and capacities, and a pair
75 /// of nodes (source and sink). The output is the flow along each edge of the
76 /// minimum total cost respecting the given edge capacities.
77 class MinCostMaxFlow {
78 public:
79 // Initialize algorithm's data structures for a network of a given size.
80 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
81 Source = SourceNode;
82 Target = SinkNode;
84 Nodes = std::vector<Node>(NodeCount);
85 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
86 if (SampleProfileEvenCountDistribution)
87 AugmentingEdges =
88 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
91 // Run the algorithm.
92 int64_t run() {
93 // Iteratively find an augmentation path/dag in the network and send the
94 // flow along its edges
95 size_t AugmentationIters = applyFlowAugmentation();
97 // Compute the total flow and its cost
98 int64_t TotalCost = 0;
99 int64_t TotalFlow = 0;
100 for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
101 for (auto &Edge : Edges[Src]) {
102 if (Edge.Flow > 0) {
103 TotalCost += Edge.Cost * Edge.Flow;
104 if (Src == Source)
105 TotalFlow += Edge.Flow;
109 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
110 << " iterations with " << TotalFlow << " total flow"
111 << " of " << TotalCost << " cost\n");
112 (void)TotalFlow;
113 (void)AugmentationIters;
114 return TotalCost;
117 /// Adding an edge to the network with a specified capacity and a cost.
118 /// Multiple edges between a pair of nodes are allowed but self-edges
119 /// are not supported.
120 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
121 assert(Capacity > 0 && "adding an edge of zero capacity");
122 assert(Src != Dst && "loop edge are not supported");
124 Edge SrcEdge;
125 SrcEdge.Dst = Dst;
126 SrcEdge.Cost = Cost;
127 SrcEdge.Capacity = Capacity;
128 SrcEdge.Flow = 0;
129 SrcEdge.RevEdgeIndex = Edges[Dst].size();
131 Edge DstEdge;
132 DstEdge.Dst = Src;
133 DstEdge.Cost = -Cost;
134 DstEdge.Capacity = 0;
135 DstEdge.Flow = 0;
136 DstEdge.RevEdgeIndex = Edges[Src].size();
138 Edges[Src].push_back(SrcEdge);
139 Edges[Dst].push_back(DstEdge);
142 /// Adding an edge to the network of infinite capacity and a given cost.
143 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
144 addEdge(Src, Dst, INF, Cost);
147 /// Get the total flow from a given source node.
148 /// Returns a list of pairs (target node, amount of flow to the target).
149 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
150 std::vector<std::pair<uint64_t, int64_t>> Flow;
151 for (const auto &Edge : Edges[Src]) {
152 if (Edge.Flow > 0)
153 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
155 return Flow;
158 /// Get the total flow between a pair of nodes.
159 int64_t getFlow(uint64_t Src, uint64_t Dst) const {
160 int64_t Flow = 0;
161 for (const auto &Edge : Edges[Src]) {
162 if (Edge.Dst == Dst) {
163 Flow += Edge.Flow;
166 return Flow;
169 /// A cost of taking an unlikely jump.
170 static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
171 /// Minimum BaseDistance for the jump distance values in island joining.
172 static constexpr uint64_t MinBaseDistance = 10000;
174 private:
175 /// Iteratively find an augmentation path/dag in the network and send the
176 /// flow along its edges. The method returns the number of applied iterations.
177 size_t applyFlowAugmentation() {
178 size_t AugmentationIters = 0;
179 while (findAugmentingPath()) {
180 uint64_t PathCapacity = computeAugmentingPathCapacity();
181 while (PathCapacity > 0) {
182 bool Progress = false;
183 if (SampleProfileEvenCountDistribution) {
184 // Identify node/edge candidates for augmentation
185 identifyShortestEdges(PathCapacity);
187 // Find an augmenting DAG
188 auto AugmentingOrder = findAugmentingDAG();
190 // Apply the DAG augmentation
191 Progress = augmentFlowAlongDAG(AugmentingOrder);
192 PathCapacity = computeAugmentingPathCapacity();
195 if (!Progress) {
196 augmentFlowAlongPath(PathCapacity);
197 PathCapacity = 0;
200 AugmentationIters++;
203 return AugmentationIters;
206 /// Compute the capacity of the cannonical augmenting path. If the path is
207 /// saturated (that is, no flow can be sent along the path), then return 0.
208 uint64_t computeAugmentingPathCapacity() {
209 uint64_t PathCapacity = INF;
210 uint64_t Now = Target;
211 while (Now != Source) {
212 uint64_t Pred = Nodes[Now].ParentNode;
213 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
215 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
216 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
217 PathCapacity = std::min(PathCapacity, EdgeCapacity);
219 Now = Pred;
221 return PathCapacity;
224 /// Check for existence of an augmenting path with a positive capacity.
225 bool findAugmentingPath() {
226 // Initialize data structures
227 for (auto &Node : Nodes) {
228 Node.Distance = INF;
229 Node.ParentNode = uint64_t(-1);
230 Node.ParentEdgeIndex = uint64_t(-1);
231 Node.Taken = false;
234 std::queue<uint64_t> Queue;
235 Queue.push(Source);
236 Nodes[Source].Distance = 0;
237 Nodes[Source].Taken = true;
238 while (!Queue.empty()) {
239 uint64_t Src = Queue.front();
240 Queue.pop();
241 Nodes[Src].Taken = false;
242 // Although the residual network contains edges with negative costs
243 // (in particular, backward edges), it can be shown that there are no
244 // negative-weight cycles and the following two invariants are maintained:
245 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
246 // where Dist is the length of the shortest path between two nodes. This
247 // allows to prune the search-space of the path-finding algorithm using
248 // the following early-stop criteria:
249 // -- If we find a path with zero-distance from Source to Target, stop the
250 // search, as the path is the shortest since Dist[Source, Target] >= 0;
251 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
252 // process node V, as it is guaranteed _not_ to be on a shortest path
253 // from Source to Target; it follows from inequalities
254 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
255 // >= Dist[Source, V]
256 if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
257 break;
258 if (Nodes[Src].Distance > Nodes[Target].Distance)
259 continue;
261 // Process adjacent edges
262 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
263 auto &Edge = Edges[Src][EdgeIdx];
264 if (Edge.Flow < Edge.Capacity) {
265 uint64_t Dst = Edge.Dst;
266 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
267 if (Nodes[Dst].Distance > NewDistance) {
268 // Update the distance and the parent node/edge
269 Nodes[Dst].Distance = NewDistance;
270 Nodes[Dst].ParentNode = Src;
271 Nodes[Dst].ParentEdgeIndex = EdgeIdx;
272 // Add the node to the queue, if it is not there yet
273 if (!Nodes[Dst].Taken) {
274 Queue.push(Dst);
275 Nodes[Dst].Taken = true;
282 return Nodes[Target].Distance != INF;
285 /// Update the current flow along the augmenting path.
286 void augmentFlowAlongPath(uint64_t PathCapacity) {
287 assert(PathCapacity > 0 && "found an incorrect augmenting path");
288 uint64_t Now = Target;
289 while (Now != Source) {
290 uint64_t Pred = Nodes[Now].ParentNode;
291 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
292 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
294 Edge.Flow += PathCapacity;
295 RevEdge.Flow -= PathCapacity;
297 Now = Pred;
301 /// Find an Augmenting DAG order using a modified version of DFS in which we
302 /// can visit a node multiple times. In the DFS search, when scanning each
303 /// edge out of a node, continue search at Edge.Dst endpoint if it has not
304 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
305 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
306 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
307 /// that starts with Source and ends with Target.
308 std::vector<uint64_t> findAugmentingDAG() {
309 // We use a stack based implemenation of DFS to avoid recursion.
310 // Defining DFS data structures:
311 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
312 // - we are currently visiting Nodes[NodeIdx] and
313 // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
314 typedef std::pair<uint64_t, uint64_t> StackItemType;
315 std::stack<StackItemType> Stack;
316 std::vector<uint64_t> AugmentingOrder;
318 // Phase 0: Initialize Node attributes and Time for DFS run
319 for (auto &Node : Nodes) {
320 Node.Discovery = 0;
321 Node.Finish = 0;
322 Node.NumCalls = 0;
323 Node.Taken = false;
325 uint64_t Time = 0;
326 // Mark Target as Taken
327 // Taken attribute will be propagated backwards from Target towards Source
328 Nodes[Target].Taken = true;
330 // Phase 1: Start DFS traversal from Source
331 Stack.emplace(Source, 0);
332 Nodes[Source].Discovery = ++Time;
333 while (!Stack.empty()) {
334 auto NodeIdx = Stack.top().first;
335 auto EdgeIdx = Stack.top().second;
337 // If we haven't scanned all edges out of NodeIdx, continue scanning
338 if (EdgeIdx < Edges[NodeIdx].size()) {
339 auto &Edge = Edges[NodeIdx][EdgeIdx];
340 auto &Dst = Nodes[Edge.Dst];
341 Stack.top().second++;
343 if (Edge.OnShortestPath) {
344 // If we haven't seen Edge.Dst so far, continue DFS search there
345 if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
346 Dst.Discovery = ++Time;
347 Stack.emplace(Edge.Dst, 0);
348 Dst.NumCalls++;
349 } else if (Dst.Taken && Dst.Finish != 0) {
350 // Else, if Edge.Dst already have a path to Target, so that NodeIdx
351 Nodes[NodeIdx].Taken = true;
354 } else {
355 // If we are done scanning all edge out of NodeIdx
356 Stack.pop();
357 // If we haven't found a path from NodeIdx to Target, forget about it
358 if (!Nodes[NodeIdx].Taken) {
359 Nodes[NodeIdx].Discovery = 0;
360 } else {
361 // If we have found a path from NodeIdx to Target, then finish NodeIdx
362 // and propagate Taken flag to DFS parent unless at the Source
363 Nodes[NodeIdx].Finish = ++Time;
364 // NodeIdx == Source if and only if the stack is empty
365 if (NodeIdx != Source) {
366 assert(!Stack.empty() && "empty stack while running dfs");
367 Nodes[Stack.top().first].Taken = true;
369 AugmentingOrder.push_back(NodeIdx);
373 // Nodes are collected decreasing Finish time, so the order is reversed
374 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
376 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
377 for (size_t Src : AugmentingOrder) {
378 AugmentingEdges[Src].clear();
379 for (auto &Edge : Edges[Src]) {
380 uint64_t Dst = Edge.Dst;
381 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
382 Nodes[Dst].Finish < Nodes[Src].Finish) {
383 AugmentingEdges[Src].push_back(&Edge);
386 assert((Src == Target || !AugmentingEdges[Src].empty()) &&
387 "incorrectly constructed augmenting edges");
390 return AugmentingOrder;
393 /// Update the current flow along the given (acyclic) subgraph specified by
394 /// the vertex order, AugmentingOrder. The objective is to send as much flow
395 /// as possible while evenly distributing flow among successors of each node.
396 /// After the update at least one edge is saturated.
397 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
398 // Phase 0: Initialization
399 for (uint64_t Src : AugmentingOrder) {
400 Nodes[Src].FracFlow = 0;
401 Nodes[Src].IntFlow = 0;
402 for (auto &Edge : AugmentingEdges[Src]) {
403 Edge->AugmentedFlow = 0;
407 // Phase 1: Send a unit of fractional flow along the DAG
408 uint64_t MaxFlowAmount = INF;
409 Nodes[Source].FracFlow = 1.0;
410 for (uint64_t Src : AugmentingOrder) {
411 assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
412 "incorrectly computed fractional flow");
413 // Distribute flow evenly among successors of Src
414 uint64_t Degree = AugmentingEdges[Src].size();
415 for (auto &Edge : AugmentingEdges[Src]) {
416 double EdgeFlow = Nodes[Src].FracFlow / Degree;
417 Nodes[Edge->Dst].FracFlow += EdgeFlow;
418 if (Edge->Capacity == INF)
419 continue;
420 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
421 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
424 // Stop early if we cannot send any (integral) flow from Source to Target
425 if (MaxFlowAmount == 0)
426 return false;
428 // Phase 2: Send an integral flow of MaxFlowAmount
429 Nodes[Source].IntFlow = MaxFlowAmount;
430 for (uint64_t Src : AugmentingOrder) {
431 if (Src == Target)
432 break;
433 // Distribute flow evenly among successors of Src, rounding up to make
434 // sure all flow is sent
435 uint64_t Degree = AugmentingEdges[Src].size();
436 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
437 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
438 for (auto &Edge : AugmentingEdges[Src]) {
439 uint64_t Dst = Edge->Dst;
440 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
441 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
442 Nodes[Dst].IntFlow += EdgeFlow;
443 Nodes[Src].IntFlow -= EdgeFlow;
444 Edge->AugmentedFlow += EdgeFlow;
447 assert(Nodes[Target].IntFlow <= MaxFlowAmount);
448 Nodes[Target].IntFlow = 0;
450 // Phase 3: Send excess flow back traversing the nodes backwards.
451 // Because of rounding, not all flow can be sent along the edges of Src.
452 // Hence, sending the remaining flow back to maintain flow conservation
453 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
454 uint64_t Src = AugmentingOrder[Idx - 1];
455 // Try to send excess flow back along each edge.
456 // Make sure we only send back flow we just augmented (AugmentedFlow).
457 for (auto &Edge : AugmentingEdges[Src]) {
458 uint64_t Dst = Edge->Dst;
459 if (Nodes[Dst].IntFlow == 0)
460 continue;
461 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
462 Nodes[Dst].IntFlow -= EdgeFlow;
463 Nodes[Src].IntFlow += EdgeFlow;
464 Edge->AugmentedFlow -= EdgeFlow;
468 // Phase 4: Update flow values along all edges
469 bool HasSaturatedEdges = false;
470 for (uint64_t Src : AugmentingOrder) {
471 // Verify that we have sent all the excess flow from the node
472 assert(Src == Source || Nodes[Src].IntFlow == 0);
473 for (auto &Edge : AugmentingEdges[Src]) {
474 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
475 // Update flow values along the edge and its reverse copy
476 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
477 Edge->Flow += Edge->AugmentedFlow;
478 RevEdge.Flow -= Edge->AugmentedFlow;
479 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
480 HasSaturatedEdges = true;
484 // The augmentation is successful iff at least one edge becomes saturated
485 return HasSaturatedEdges;
488 /// Identify candidate (shortest) edges for augmentation.
489 void identifyShortestEdges(uint64_t PathCapacity) {
490 assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
491 // To make sure the augmentation DAG contains only edges with large residual
492 // capacity, we prune all edges whose capacity is below a fraction of
493 // the capacity of the augmented path.
494 // (All edges of the path itself are always in the DAG)
495 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
497 // Decide which edges are on a shortest path from Source to Target
498 for (size_t Src = 0; Src < Nodes.size(); Src++) {
499 // An edge cannot be augmenting if the endpoint has large distance
500 if (Nodes[Src].Distance > Nodes[Target].Distance)
501 continue;
503 for (auto &Edge : Edges[Src]) {
504 uint64_t Dst = Edge.Dst;
505 Edge.OnShortestPath =
506 Src != Target && Dst != Source &&
507 Nodes[Dst].Distance <= Nodes[Target].Distance &&
508 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
509 Edge.Capacity > Edge.Flow &&
510 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
515 /// A node in a flow network.
516 struct Node {
517 /// The cost of the cheapest path from the source to the current node.
518 int64_t Distance;
519 /// The node preceding the current one in the path.
520 uint64_t ParentNode;
521 /// The index of the edge between ParentNode and the current node.
522 uint64_t ParentEdgeIndex;
523 /// An indicator of whether the current node is in a queue.
524 bool Taken;
526 /// Data fields utilized in DAG-augmentation:
527 /// Fractional flow.
528 double FracFlow;
529 /// Integral flow.
530 uint64_t IntFlow;
531 /// Discovery time.
532 uint64_t Discovery;
533 /// Finish time.
534 uint64_t Finish;
535 /// NumCalls.
536 uint64_t NumCalls;
539 /// An edge in a flow network.
540 struct Edge {
541 /// The cost of the edge.
542 int64_t Cost;
543 /// The capacity of the edge.
544 int64_t Capacity;
545 /// The current flow on the edge.
546 int64_t Flow;
547 /// The destination node of the edge.
548 uint64_t Dst;
549 /// The index of the reverse edge between Dst and the current node.
550 uint64_t RevEdgeIndex;
552 /// Data fields utilized in DAG-augmentation:
553 /// Whether the edge is currently on a shortest path from Source to Target.
554 bool OnShortestPath;
555 /// Extra flow along the edge.
556 uint64_t AugmentedFlow;
559 /// The set of network nodes.
560 std::vector<Node> Nodes;
561 /// The set of network edges.
562 std::vector<std::vector<Edge>> Edges;
563 /// Source node of the flow.
564 uint64_t Source;
565 /// Target (sink) node of the flow.
566 uint64_t Target;
567 /// Augmenting edges.
568 std::vector<std::vector<Edge *>> AugmentingEdges;
571 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
572 constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
574 /// A post-processing adjustment of control flow. It applies two steps by
575 /// rerouting some flow and making it more realistic:
577 /// - First, it removes all isolated components ("islands") with a positive flow
578 /// that are unreachable from the entry block. For every such component, we
579 /// find the shortest from the entry to an exit passing through the component,
580 /// and increase the flow by one unit along the path.
582 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
583 /// with no sampled counts. Then it rebalnces the flow that goes through such
584 /// a subgraph so that each branch is taken with probability 50%.
585 /// An unknown subgraph is such that for every two nodes u and v:
586 /// - u dominates v and u is not unknown;
587 /// - v post-dominates u; and
588 /// - all inner-nodes of all (u,v)-paths are unknown.
590 class FlowAdjuster {
591 public:
592 FlowAdjuster(FlowFunction &Func) : Func(Func) {
593 assert(Func.Blocks[Func.Entry].isEntry() &&
594 "incorrect index of the entry block");
597 // Run the post-processing
598 void run() {
599 /// Adjust the flow to get rid of isolated components.
600 joinIsolatedComponents();
602 /// Rebalance the flow inside unknown subgraphs.
603 rebalanceUnknownSubgraphs();
606 private:
607 void joinIsolatedComponents() {
608 // Find blocks that are reachable from the source
609 auto Visited = BitVector(NumBlocks(), false);
610 findReachable(Func.Entry, Visited);
612 // Iterate over all non-reachable blocks and adjust their weights
613 for (uint64_t I = 0; I < NumBlocks(); I++) {
614 auto &Block = Func.Blocks[I];
615 if (Block.Flow > 0 && !Visited[I]) {
616 // Find a path from the entry to an exit passing through the block I
617 auto Path = findShortestPath(I);
618 // Increase the flow along the path
619 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
620 "incorrectly computed path adjusting control flow");
621 Func.Blocks[Func.Entry].Flow += 1;
622 for (auto &Jump : Path) {
623 Jump->Flow += 1;
624 Func.Blocks[Jump->Target].Flow += 1;
625 // Update reachability
626 findReachable(Jump->Target, Visited);
632 /// Run BFS from a given block along the jumps with a positive flow and mark
633 /// all reachable blocks.
634 void findReachable(uint64_t Src, BitVector &Visited) {
635 if (Visited[Src])
636 return;
637 std::queue<uint64_t> Queue;
638 Queue.push(Src);
639 Visited[Src] = true;
640 while (!Queue.empty()) {
641 Src = Queue.front();
642 Queue.pop();
643 for (auto Jump : Func.Blocks[Src].SuccJumps) {
644 uint64_t Dst = Jump->Target;
645 if (Jump->Flow > 0 && !Visited[Dst]) {
646 Queue.push(Dst);
647 Visited[Dst] = true;
653 /// Find the shortest path from the entry block to an exit block passing
654 /// through a given block.
655 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
656 // A path from the entry block to BlockIdx
657 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
658 // A path from BlockIdx to an exit block
659 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
661 // Concatenate the two paths
662 std::vector<FlowJump *> Result;
663 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
664 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
665 return Result;
668 /// Apply the Dijkstra algorithm to find the shortest path from a given
669 /// Source to a given Target block.
670 /// If Target == -1, then the path ends at an exit block.
671 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
672 // Quit early, if possible
673 if (Source == Target)
674 return std::vector<FlowJump *>();
675 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
676 return std::vector<FlowJump *>();
678 // Initialize data structures
679 auto Distance = std::vector<int64_t>(NumBlocks(), INF);
680 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
681 Distance[Source] = 0;
682 std::set<std::pair<uint64_t, uint64_t>> Queue;
683 Queue.insert(std::make_pair(Distance[Source], Source));
685 // Run the Dijkstra algorithm
686 while (!Queue.empty()) {
687 uint64_t Src = Queue.begin()->second;
688 Queue.erase(Queue.begin());
689 // If we found a solution, quit early
690 if (Src == Target ||
691 (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
692 break;
694 for (auto Jump : Func.Blocks[Src].SuccJumps) {
695 uint64_t Dst = Jump->Target;
696 int64_t JumpDist = jumpDistance(Jump);
697 if (Distance[Dst] > Distance[Src] + JumpDist) {
698 Queue.erase(std::make_pair(Distance[Dst], Dst));
700 Distance[Dst] = Distance[Src] + JumpDist;
701 Parent[Dst] = Jump;
703 Queue.insert(std::make_pair(Distance[Dst], Dst));
707 // If Target is not provided, find the closest exit block
708 if (Target == AnyExitBlock) {
709 for (uint64_t I = 0; I < NumBlocks(); I++) {
710 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
711 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
712 Target = I;
717 assert(Parent[Target] != nullptr && "a path does not exist");
719 // Extract the constructed path
720 std::vector<FlowJump *> Result;
721 uint64_t Now = Target;
722 while (Now != Source) {
723 assert(Now == Parent[Now]->Target && "incorrect parent jump");
724 Result.push_back(Parent[Now]);
725 Now = Parent[Now]->Source;
727 // Reverse the path, since it is extracted from Target to Source
728 std::reverse(Result.begin(), Result.end());
729 return Result;
732 /// A distance of a path for a given jump.
733 /// In order to incite the path to use blocks/jumps with large positive flow,
734 /// and avoid changing branch probability of outgoing edges drastically,
735 /// set the jump distance so as:
736 /// - to minimize the number of unlikely jumps used and subject to that,
737 /// - to minimize the number of Flow == 0 jumps used and subject to that,
738 /// - minimizes total multiplicative Flow increase for the remaining edges.
739 /// To capture this objective with integer distances, we round off fractional
740 /// parts to a multiple of 1 / BaseDistance.
741 int64_t jumpDistance(FlowJump *Jump) const {
742 uint64_t BaseDistance =
743 std::max(MinCostMaxFlow::MinBaseDistance,
744 std::min(Func.Blocks[Func.Entry].Flow,
745 MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
746 if (Jump->IsUnlikely)
747 return MinCostMaxFlow::AuxCostUnlikely;
748 if (Jump->Flow > 0)
749 return BaseDistance + BaseDistance / Jump->Flow;
750 return BaseDistance * NumBlocks();
753 uint64_t NumBlocks() const { return Func.Blocks.size(); }
755 /// Rebalance unknown subgraphs so that the flow is split evenly across the
756 /// outgoing branches of every block of the subgraph. The method iterates over
757 /// blocks with known weight and identifies unknown subgraphs rooted at the
758 /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
759 void rebalanceUnknownSubgraphs() {
760 // Try to find unknown subgraphs from each block
761 for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
762 auto SrcBlock = &Func.Blocks[I];
763 // Verify if rebalancing rooted at SrcBlock is feasible
764 if (!canRebalanceAtRoot(SrcBlock))
765 continue;
767 // Find an unknown subgraphs starting at SrcBlock. Along the way,
768 // fill in known destinations and intermediate unknown blocks.
769 std::vector<FlowBlock *> UnknownBlocks;
770 std::vector<FlowBlock *> KnownDstBlocks;
771 findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
773 // Verify if rebalancing of the subgraph is feasible. If the search is
774 // successful, find the unique destination block (which can be null)
775 FlowBlock *DstBlock = nullptr;
776 if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
777 DstBlock))
778 continue;
780 // We cannot rebalance subgraphs containing cycles among unknown blocks
781 if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
782 continue;
784 // Rebalance the flow
785 rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
789 /// Verify if rebalancing rooted at a given block is possible.
790 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
791 // Do not attempt to find unknown subgraphs from an unknown or a
792 // zero-flow block
793 if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
794 return false;
796 // Do not attempt to process subgraphs from a block w/o unknown sucessors
797 bool HasUnknownSuccs = false;
798 for (auto Jump : SrcBlock->SuccJumps) {
799 if (Func.Blocks[Jump->Target].UnknownWeight) {
800 HasUnknownSuccs = true;
801 break;
804 if (!HasUnknownSuccs)
805 return false;
807 return true;
810 /// Find an unknown subgraph starting at block SrcBlock. The method sets
811 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
812 void findUnknownSubgraph(const FlowBlock *SrcBlock,
813 std::vector<FlowBlock *> &KnownDstBlocks,
814 std::vector<FlowBlock *> &UnknownBlocks) {
815 // Run BFS from SrcBlock and make sure all paths are going through unknown
816 // blocks and end at a known DstBlock
817 auto Visited = BitVector(NumBlocks(), false);
818 std::queue<uint64_t> Queue;
820 Queue.push(SrcBlock->Index);
821 Visited[SrcBlock->Index] = true;
822 while (!Queue.empty()) {
823 auto &Block = Func.Blocks[Queue.front()];
824 Queue.pop();
825 // Process blocks reachable from Block
826 for (auto Jump : Block.SuccJumps) {
827 // If Jump can be ignored, skip it
828 if (ignoreJump(SrcBlock, nullptr, Jump))
829 continue;
831 uint64_t Dst = Jump->Target;
832 // If Dst has been visited, skip Jump
833 if (Visited[Dst])
834 continue;
835 // Process block Dst
836 Visited[Dst] = true;
837 if (!Func.Blocks[Dst].UnknownWeight) {
838 KnownDstBlocks.push_back(&Func.Blocks[Dst]);
839 } else {
840 Queue.push(Dst);
841 UnknownBlocks.push_back(&Func.Blocks[Dst]);
847 /// Verify if rebalancing of the subgraph is feasible. If the checks are
848 /// successful, set the unique destination block, DstBlock (can be null).
849 bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
850 const std::vector<FlowBlock *> &KnownDstBlocks,
851 const std::vector<FlowBlock *> &UnknownBlocks,
852 FlowBlock *&DstBlock) {
853 // If the list of unknown blocks is empty, we don't need rebalancing
854 if (UnknownBlocks.empty())
855 return false;
857 // If there are multiple known sinks, we can't rebalance
858 if (KnownDstBlocks.size() > 1)
859 return false;
860 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
862 // Verify sinks of the subgraph
863 for (auto Block : UnknownBlocks) {
864 if (Block->SuccJumps.empty()) {
865 // If there are multiple (known and unknown) sinks, we can't rebalance
866 if (DstBlock != nullptr)
867 return false;
868 continue;
870 size_t NumIgnoredJumps = 0;
871 for (auto Jump : Block->SuccJumps) {
872 if (ignoreJump(SrcBlock, DstBlock, Jump))
873 NumIgnoredJumps++;
875 // If there is a non-sink block in UnknownBlocks with all jumps ignored,
876 // then we can't rebalance
877 if (NumIgnoredJumps == Block->SuccJumps.size())
878 return false;
881 return true;
884 /// Decide whether the Jump is ignored while processing an unknown subgraphs
885 /// rooted at basic block SrcBlock with the destination block, DstBlock.
886 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
887 const FlowJump *Jump) {
888 // Ignore unlikely jumps with zero flow
889 if (Jump->IsUnlikely && Jump->Flow == 0)
890 return true;
892 auto JumpSource = &Func.Blocks[Jump->Source];
893 auto JumpTarget = &Func.Blocks[Jump->Target];
895 // Do not ignore jumps coming into DstBlock
896 if (DstBlock != nullptr && JumpTarget == DstBlock)
897 return false;
899 // Ignore jumps out of SrcBlock to known blocks
900 if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
901 return true;
903 // Ignore jumps to known blocks with zero flow
904 if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
905 return true;
907 return false;
910 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
911 /// UnknownBlocks in the topological order (so that all jumps are "forward").
912 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
913 std::vector<FlowBlock *> &UnknownBlocks) {
914 // Extract local in-degrees in the considered subgraph
915 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
916 auto fillInDegree = [&](const FlowBlock *Block) {
917 for (auto Jump : Block->SuccJumps) {
918 if (ignoreJump(SrcBlock, DstBlock, Jump))
919 continue;
920 LocalInDegree[Jump->Target]++;
923 fillInDegree(SrcBlock);
924 for (auto Block : UnknownBlocks) {
925 fillInDegree(Block);
927 // A loop containing SrcBlock
928 if (LocalInDegree[SrcBlock->Index] > 0)
929 return false;
931 std::vector<FlowBlock *> AcyclicOrder;
932 std::queue<uint64_t> Queue;
933 Queue.push(SrcBlock->Index);
934 while (!Queue.empty()) {
935 FlowBlock *Block = &Func.Blocks[Queue.front()];
936 Queue.pop();
937 // Stop propagation once we reach DstBlock, if any
938 if (DstBlock != nullptr && Block == DstBlock)
939 break;
941 // Keep an acyclic order of unknown blocks
942 if (Block->UnknownWeight && Block != SrcBlock)
943 AcyclicOrder.push_back(Block);
945 // Add to the queue all successors with zero local in-degree
946 for (auto Jump : Block->SuccJumps) {
947 if (ignoreJump(SrcBlock, DstBlock, Jump))
948 continue;
949 uint64_t Dst = Jump->Target;
950 LocalInDegree[Dst]--;
951 if (LocalInDegree[Dst] == 0) {
952 Queue.push(Dst);
957 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
958 // of all blocks
959 if (UnknownBlocks.size() != AcyclicOrder.size())
960 return false;
961 UnknownBlocks = AcyclicOrder;
962 return true;
965 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
966 /// having UnknownBlocks intermediate blocks.
967 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
968 const FlowBlock *DstBlock,
969 const std::vector<FlowBlock *> &UnknownBlocks) {
970 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
972 // Ditribute flow from the source block
973 uint64_t BlockFlow = 0;
974 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
975 for (auto Jump : SrcBlock->SuccJumps) {
976 if (ignoreJump(SrcBlock, DstBlock, Jump))
977 continue;
978 BlockFlow += Jump->Flow;
980 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
982 // Ditribute flow from the remaining blocks
983 for (auto Block : UnknownBlocks) {
984 assert(Block->UnknownWeight && "incorrect unknown subgraph");
985 uint64_t BlockFlow = 0;
986 // Block's flow is the sum of incoming flows
987 for (auto Jump : Block->PredJumps) {
988 BlockFlow += Jump->Flow;
990 Block->Flow = BlockFlow;
991 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
995 /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
996 /// and ending at DstBlock.
997 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
998 const FlowBlock *Block, uint64_t BlockFlow) {
999 // Process all successor jumps and update corresponding flow values
1000 size_t BlockDegree = 0;
1001 for (auto Jump : Block->SuccJumps) {
1002 if (ignoreJump(SrcBlock, DstBlock, Jump))
1003 continue;
1004 BlockDegree++;
1006 // If all successor jumps of the block are ignored, skip it
1007 if (DstBlock == nullptr && BlockDegree == 0)
1008 return;
1009 assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1011 // Each of the Block's successors gets the following amount of flow.
1012 // Rounding the value up so that all flow is propagated
1013 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1014 for (auto Jump : Block->SuccJumps) {
1015 if (ignoreJump(SrcBlock, DstBlock, Jump))
1016 continue;
1017 uint64_t Flow = std::min(SuccFlow, BlockFlow);
1018 Jump->Flow = Flow;
1019 BlockFlow -= Flow;
1021 assert(BlockFlow == 0 && "not all flow is propagated");
1024 /// A constant indicating an arbitrary exit block of a function.
1025 static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1027 /// The function.
1028 FlowFunction &Func;
1031 /// Initializing flow network for a given function.
1033 /// Every block is split into three nodes that are responsible for (i) an
1034 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
1035 /// reduction of the block weight.
1036 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
1037 uint64_t NumBlocks = Func.Blocks.size();
1038 assert(NumBlocks > 1 && "Too few blocks in a function");
1039 LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
1041 // Pre-process data: make sure the entry weight is at least 1
1042 if (Func.Blocks[Func.Entry].Weight == 0) {
1043 Func.Blocks[Func.Entry].Weight = 1;
1045 // Introducing dummy source/sink pairs to allow flow circulation.
1046 // The nodes corresponding to blocks of Func have indicies in the range
1047 // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
1048 uint64_t S = 3 * NumBlocks;
1049 uint64_t T = S + 1;
1050 uint64_t S1 = S + 2;
1051 uint64_t T1 = S + 3;
1053 Network.initialize(3 * NumBlocks + 4, S1, T1);
1055 // Create three nodes for every block of the function
1056 for (uint64_t B = 0; B < NumBlocks; B++) {
1057 auto &Block = Func.Blocks[B];
1058 assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1059 "non-zero weight of a block w/o weight except for an entry");
1061 // Split every block into two nodes
1062 uint64_t Bin = 3 * B;
1063 uint64_t Bout = 3 * B + 1;
1064 uint64_t Baux = 3 * B + 2;
1065 if (Block.Weight > 0) {
1066 Network.addEdge(S1, Bout, Block.Weight, 0);
1067 Network.addEdge(Bin, T1, Block.Weight, 0);
1070 // Edges from S and to T
1071 assert((!Block.isEntry() || !Block.isExit()) &&
1072 "a block cannot be an entry and an exit");
1073 if (Block.isEntry()) {
1074 Network.addEdge(S, Bin, 0);
1075 } else if (Block.isExit()) {
1076 Network.addEdge(Bout, T, 0);
1079 // An auxiliary node to allow increase/reduction of block counts:
1080 // We assume that decreasing block counts is more expensive than increasing,
1081 // and thus, setting separate costs here. In the future we may want to tune
1082 // the relative costs so as to maximize the quality of generated profiles.
1083 int64_t AuxCostInc = SampleProfileProfiCostInc;
1084 int64_t AuxCostDec = SampleProfileProfiCostDec;
1085 if (Block.UnknownWeight) {
1086 // Do not penalize changing weights of blocks w/o known profile count
1087 AuxCostInc = 0;
1088 AuxCostDec = 0;
1089 } else {
1090 // Increasing the count for "cold" blocks with zero initial count is more
1091 // expensive than for "hot" ones
1092 if (Block.Weight == 0) {
1093 AuxCostInc = SampleProfileProfiCostIncZero;
1095 // Modifying the count of the entry block is expensive
1096 if (Block.isEntry()) {
1097 AuxCostInc = SampleProfileProfiCostIncEntry;
1098 AuxCostDec = SampleProfileProfiCostDecEntry;
1101 // For blocks with self-edges, do not penalize a reduction of the count,
1102 // as all of the increase can be attributed to the self-edge
1103 if (Block.HasSelfEdge) {
1104 AuxCostDec = 0;
1107 Network.addEdge(Bin, Baux, AuxCostInc);
1108 Network.addEdge(Baux, Bout, AuxCostInc);
1109 if (Block.Weight > 0) {
1110 Network.addEdge(Bout, Baux, AuxCostDec);
1111 Network.addEdge(Baux, Bin, AuxCostDec);
1115 // Creating edges for every jump
1116 for (auto &Jump : Func.Jumps) {
1117 uint64_t Src = Jump.Source;
1118 uint64_t Dst = Jump.Target;
1119 if (Src != Dst) {
1120 uint64_t SrcOut = 3 * Src + 1;
1121 uint64_t DstIn = 3 * Dst;
1122 uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
1123 Network.addEdge(SrcOut, DstIn, Cost);
1127 // Make sure we have a valid flow circulation
1128 Network.addEdge(T, S, 0);
1131 /// Extract resulting block and edge counts from the flow network.
1132 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
1133 uint64_t NumBlocks = Func.Blocks.size();
1135 // Extract resulting block counts
1136 for (uint64_t Src = 0; Src < NumBlocks; Src++) {
1137 auto &Block = Func.Blocks[Src];
1138 uint64_t SrcOut = 3 * Src + 1;
1139 int64_t Flow = 0;
1140 for (const auto &Adj : Network.getFlow(SrcOut)) {
1141 uint64_t DstIn = Adj.first;
1142 int64_t DstFlow = Adj.second;
1143 bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
1144 if (!IsAuxNode || Block.HasSelfEdge) {
1145 Flow += DstFlow;
1148 Block.Flow = Flow;
1149 assert(Flow >= 0 && "negative block flow");
1152 // Extract resulting jump counts
1153 for (auto &Jump : Func.Jumps) {
1154 uint64_t Src = Jump.Source;
1155 uint64_t Dst = Jump.Target;
1156 int64_t Flow = 0;
1157 if (Src != Dst) {
1158 uint64_t SrcOut = 3 * Src + 1;
1159 uint64_t DstIn = 3 * Dst;
1160 Flow = Network.getFlow(SrcOut, DstIn);
1161 } else {
1162 uint64_t SrcOut = 3 * Src + 1;
1163 uint64_t SrcAux = 3 * Src + 2;
1164 int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
1165 if (AuxFlow > 0)
1166 Flow = AuxFlow;
1168 Jump.Flow = Flow;
1169 assert(Flow >= 0 && "negative jump flow");
1173 #ifndef NDEBUG
1174 /// Verify that the computed flow values satisfy flow conservation rules
1175 void verifyWeights(const FlowFunction &Func) {
1176 const uint64_t NumBlocks = Func.Blocks.size();
1177 auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1178 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1179 for (const auto &Jump : Func.Jumps) {
1180 InFlow[Jump.Target] += Jump.Flow;
1181 OutFlow[Jump.Source] += Jump.Flow;
1184 uint64_t TotalInFlow = 0;
1185 uint64_t TotalOutFlow = 0;
1186 for (uint64_t I = 0; I < NumBlocks; I++) {
1187 auto &Block = Func.Blocks[I];
1188 if (Block.isEntry()) {
1189 TotalInFlow += Block.Flow;
1190 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1191 } else if (Block.isExit()) {
1192 TotalOutFlow += Block.Flow;
1193 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1194 } else {
1195 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1196 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1199 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1201 // Verify that there are no isolated flow components
1202 // One could modify FlowFunction to hold edges indexed by the sources, which
1203 // will avoid a creation of the object
1204 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1205 for (const auto &Jump : Func.Jumps) {
1206 if (Jump.Flow > 0) {
1207 PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1211 // Run BFS from the source along edges with positive flow
1212 std::queue<uint64_t> Queue;
1213 auto Visited = BitVector(NumBlocks, false);
1214 Queue.push(Func.Entry);
1215 Visited[Func.Entry] = true;
1216 while (!Queue.empty()) {
1217 uint64_t Src = Queue.front();
1218 Queue.pop();
1219 for (uint64_t Dst : PositiveFlowEdges[Src]) {
1220 if (!Visited[Dst]) {
1221 Queue.push(Dst);
1222 Visited[Dst] = true;
1227 // Verify that every block that has a positive flow is reached from the source
1228 // along edges with a positive flow
1229 for (uint64_t I = 0; I < NumBlocks; I++) {
1230 auto &Block = Func.Blocks[I];
1231 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1234 #endif
1236 } // end of anonymous namespace
1238 /// Apply the profile inference algorithm for a given flow function
1239 void llvm::applyFlowInference(FlowFunction &Func) {
1240 // Create and apply an inference network model
1241 auto InferenceNetwork = MinCostMaxFlow();
1242 initializeNetwork(InferenceNetwork, Func);
1243 InferenceNetwork.run();
1245 // Extract flow values for every block and every edge
1246 extractWeights(InferenceNetwork, Func);
1248 // Post-processing adjustments to the flow
1249 auto Adjuster = FlowAdjuster(Func);
1250 Adjuster.run();
1252 #ifndef NDEBUG
1253 // Verify the result
1254 verifyWeights(Func);
1255 #endif