1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
23 #include <unordered_set>
26 #define DEBUG_TYPE "sample-profile-inference"
30 static cl::opt
<bool> SampleProfileEvenFlowDistribution(
31 "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden
,
32 cl::desc("Try to evenly distribute flow when there are multiple equally "
35 static cl::opt
<bool> SampleProfileRebalanceUnknown(
36 "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden
,
37 cl::desc("Evenly re-distribute flow among unknown subgraphs."));
39 static cl::opt
<bool> SampleProfileJoinIslands(
40 "sample-profile-join-islands", cl::init(true), cl::Hidden
,
41 cl::desc("Join isolated components having positive flow."));
43 static cl::opt
<unsigned> SampleProfileProfiCostBlockInc(
44 "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden
,
45 cl::desc("The cost of increasing a block's count by one."));
47 static cl::opt
<unsigned> SampleProfileProfiCostBlockDec(
48 "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden
,
49 cl::desc("The cost of decreasing a block's count by one."));
51 static cl::opt
<unsigned> SampleProfileProfiCostBlockEntryInc(
52 "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden
,
53 cl::desc("The cost of increasing the entry block's count by one."));
55 static cl::opt
<unsigned> SampleProfileProfiCostBlockEntryDec(
56 "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden
,
57 cl::desc("The cost of decreasing the entry block's count by one."));
59 static cl::opt
<unsigned> SampleProfileProfiCostBlockZeroInc(
60 "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden
,
61 cl::desc("The cost of increasing a count of zero-weight block by one."));
63 static cl::opt
<unsigned> SampleProfileProfiCostBlockUnknownInc(
64 "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden
,
65 cl::desc("The cost of increasing an unknown block's count by one."));
67 /// A value indicating an infinite flow/capacity/weight of a block/edge.
68 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
69 /// during the execution.
70 static constexpr int64_t INF
= ((int64_t)1) << 50;
72 /// The minimum-cost maximum flow algorithm.
74 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
75 /// network using a modified version of the classical Moore-Bellman-Ford
76 /// approach. The algorithm applies a number of augmentation iterations in which
77 /// flow is sent along paths of positive capacity from the source to the sink.
78 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
79 /// where m is the number of edges, n is the number of vertices, and v(f) is the
80 /// value of the maximum flow. However, the observed running time on typical
81 /// instances is sub-quadratic, that is, o(n^2).
83 /// The input is a set of edges with specified costs and capacities, and a pair
84 /// of nodes (source and sink). The output is the flow along each edge of the
85 /// minimum total cost respecting the given edge capacities.
86 class MinCostMaxFlow
{
88 MinCostMaxFlow(const ProfiParams
&Params
) : Params(Params
) {}
90 // Initialize algorithm's data structures for a network of a given size.
91 void initialize(uint64_t NodeCount
, uint64_t SourceNode
, uint64_t SinkNode
) {
95 Nodes
= std::vector
<Node
>(NodeCount
);
96 Edges
= std::vector
<std::vector
<Edge
>>(NodeCount
, std::vector
<Edge
>());
97 if (Params
.EvenFlowDistribution
)
99 std::vector
<std::vector
<Edge
*>>(NodeCount
, std::vector
<Edge
*>());
102 // Run the algorithm.
104 LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes
.size() << " nodes\n");
106 // Iteratively find an augmentation path/dag in the network and send the
107 // flow along its edges
108 size_t AugmentationIters
= applyFlowAugmentation();
110 // Compute the total flow and its cost
111 int64_t TotalCost
= 0;
112 int64_t TotalFlow
= 0;
113 for (uint64_t Src
= 0; Src
< Nodes
.size(); Src
++) {
114 for (auto &Edge
: Edges
[Src
]) {
116 TotalCost
+= Edge
.Cost
* Edge
.Flow
;
118 TotalFlow
+= Edge
.Flow
;
122 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
123 << " iterations with " << TotalFlow
<< " total flow"
124 << " of " << TotalCost
<< " cost\n");
126 (void)AugmentationIters
;
130 /// Adding an edge to the network with a specified capacity and a cost.
131 /// Multiple edges between a pair of nodes are allowed but self-edges
132 /// are not supported.
133 void addEdge(uint64_t Src
, uint64_t Dst
, int64_t Capacity
, int64_t Cost
) {
134 assert(Capacity
> 0 && "adding an edge of zero capacity");
135 assert(Src
!= Dst
&& "loop edge are not supported");
140 SrcEdge
.Capacity
= Capacity
;
142 SrcEdge
.RevEdgeIndex
= Edges
[Dst
].size();
146 DstEdge
.Cost
= -Cost
;
147 DstEdge
.Capacity
= 0;
149 DstEdge
.RevEdgeIndex
= Edges
[Src
].size();
151 Edges
[Src
].push_back(SrcEdge
);
152 Edges
[Dst
].push_back(DstEdge
);
155 /// Adding an edge to the network of infinite capacity and a given cost.
156 void addEdge(uint64_t Src
, uint64_t Dst
, int64_t Cost
) {
157 addEdge(Src
, Dst
, INF
, Cost
);
160 /// Get the total flow from a given source node.
161 /// Returns a list of pairs (target node, amount of flow to the target).
162 std::vector
<std::pair
<uint64_t, int64_t>> getFlow(uint64_t Src
) const {
163 std::vector
<std::pair
<uint64_t, int64_t>> Flow
;
164 for (const auto &Edge
: Edges
[Src
]) {
166 Flow
.push_back(std::make_pair(Edge
.Dst
, Edge
.Flow
));
171 /// Get the total flow between a pair of nodes.
172 int64_t getFlow(uint64_t Src
, uint64_t Dst
) const {
174 for (const auto &Edge
: Edges
[Src
]) {
175 if (Edge
.Dst
== Dst
) {
183 /// Iteratively find an augmentation path/dag in the network and send the
184 /// flow along its edges. The method returns the number of applied iterations.
185 size_t applyFlowAugmentation() {
186 size_t AugmentationIters
= 0;
187 while (findAugmentingPath()) {
188 uint64_t PathCapacity
= computeAugmentingPathCapacity();
189 while (PathCapacity
> 0) {
190 bool Progress
= false;
191 if (Params
.EvenFlowDistribution
) {
192 // Identify node/edge candidates for augmentation
193 identifyShortestEdges(PathCapacity
);
195 // Find an augmenting DAG
196 auto AugmentingOrder
= findAugmentingDAG();
198 // Apply the DAG augmentation
199 Progress
= augmentFlowAlongDAG(AugmentingOrder
);
200 PathCapacity
= computeAugmentingPathCapacity();
204 augmentFlowAlongPath(PathCapacity
);
211 return AugmentationIters
;
214 /// Compute the capacity of the cannonical augmenting path. If the path is
215 /// saturated (that is, no flow can be sent along the path), then return 0.
216 uint64_t computeAugmentingPathCapacity() {
217 uint64_t PathCapacity
= INF
;
218 uint64_t Now
= Target
;
219 while (Now
!= Source
) {
220 uint64_t Pred
= Nodes
[Now
].ParentNode
;
221 auto &Edge
= Edges
[Pred
][Nodes
[Now
].ParentEdgeIndex
];
223 assert(Edge
.Capacity
>= Edge
.Flow
&& "incorrect edge flow");
224 uint64_t EdgeCapacity
= uint64_t(Edge
.Capacity
- Edge
.Flow
);
225 PathCapacity
= std::min(PathCapacity
, EdgeCapacity
);
232 /// Check for existence of an augmenting path with a positive capacity.
233 bool findAugmentingPath() {
234 // Initialize data structures
235 for (auto &Node
: Nodes
) {
237 Node
.ParentNode
= uint64_t(-1);
238 Node
.ParentEdgeIndex
= uint64_t(-1);
242 std::queue
<uint64_t> Queue
;
244 Nodes
[Source
].Distance
= 0;
245 Nodes
[Source
].Taken
= true;
246 while (!Queue
.empty()) {
247 uint64_t Src
= Queue
.front();
249 Nodes
[Src
].Taken
= false;
250 // Although the residual network contains edges with negative costs
251 // (in particular, backward edges), it can be shown that there are no
252 // negative-weight cycles and the following two invariants are maintained:
253 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
254 // where Dist is the length of the shortest path between two nodes. This
255 // allows to prune the search-space of the path-finding algorithm using
256 // the following early-stop criteria:
257 // -- If we find a path with zero-distance from Source to Target, stop the
258 // search, as the path is the shortest since Dist[Source, Target] >= 0;
259 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
260 // process node V, as it is guaranteed _not_ to be on a shortest path
261 // from Source to Target; it follows from inequalities
262 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
263 // >= Dist[Source, V]
264 if (!Params
.EvenFlowDistribution
&& Nodes
[Target
].Distance
== 0)
266 if (Nodes
[Src
].Distance
> Nodes
[Target
].Distance
)
269 // Process adjacent edges
270 for (uint64_t EdgeIdx
= 0; EdgeIdx
< Edges
[Src
].size(); EdgeIdx
++) {
271 auto &Edge
= Edges
[Src
][EdgeIdx
];
272 if (Edge
.Flow
< Edge
.Capacity
) {
273 uint64_t Dst
= Edge
.Dst
;
274 int64_t NewDistance
= Nodes
[Src
].Distance
+ Edge
.Cost
;
275 if (Nodes
[Dst
].Distance
> NewDistance
) {
276 // Update the distance and the parent node/edge
277 Nodes
[Dst
].Distance
= NewDistance
;
278 Nodes
[Dst
].ParentNode
= Src
;
279 Nodes
[Dst
].ParentEdgeIndex
= EdgeIdx
;
280 // Add the node to the queue, if it is not there yet
281 if (!Nodes
[Dst
].Taken
) {
283 Nodes
[Dst
].Taken
= true;
290 return Nodes
[Target
].Distance
!= INF
;
293 /// Update the current flow along the augmenting path.
294 void augmentFlowAlongPath(uint64_t PathCapacity
) {
295 assert(PathCapacity
> 0 && "found an incorrect augmenting path");
296 uint64_t Now
= Target
;
297 while (Now
!= Source
) {
298 uint64_t Pred
= Nodes
[Now
].ParentNode
;
299 auto &Edge
= Edges
[Pred
][Nodes
[Now
].ParentEdgeIndex
];
300 auto &RevEdge
= Edges
[Now
][Edge
.RevEdgeIndex
];
302 Edge
.Flow
+= PathCapacity
;
303 RevEdge
.Flow
-= PathCapacity
;
309 /// Find an Augmenting DAG order using a modified version of DFS in which we
310 /// can visit a node multiple times. In the DFS search, when scanning each
311 /// edge out of a node, continue search at Edge.Dst endpoint if it has not
312 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
313 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
314 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
315 /// that starts with Source and ends with Target.
316 std::vector
<uint64_t> findAugmentingDAG() {
317 // We use a stack based implemenation of DFS to avoid recursion.
318 // Defining DFS data structures:
319 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
320 // - we are currently visiting Nodes[NodeIdx] and
321 // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
322 typedef std::pair
<uint64_t, uint64_t> StackItemType
;
323 std::stack
<StackItemType
> Stack
;
324 std::vector
<uint64_t> AugmentingOrder
;
326 // Phase 0: Initialize Node attributes and Time for DFS run
327 for (auto &Node
: Nodes
) {
334 // Mark Target as Taken
335 // Taken attribute will be propagated backwards from Target towards Source
336 Nodes
[Target
].Taken
= true;
338 // Phase 1: Start DFS traversal from Source
339 Stack
.emplace(Source
, 0);
340 Nodes
[Source
].Discovery
= ++Time
;
341 while (!Stack
.empty()) {
342 auto NodeIdx
= Stack
.top().first
;
343 auto EdgeIdx
= Stack
.top().second
;
345 // If we haven't scanned all edges out of NodeIdx, continue scanning
346 if (EdgeIdx
< Edges
[NodeIdx
].size()) {
347 auto &Edge
= Edges
[NodeIdx
][EdgeIdx
];
348 auto &Dst
= Nodes
[Edge
.Dst
];
349 Stack
.top().second
++;
351 if (Edge
.OnShortestPath
) {
352 // If we haven't seen Edge.Dst so far, continue DFS search there
353 if (Dst
.Discovery
== 0 && Dst
.NumCalls
< MaxDfsCalls
) {
354 Dst
.Discovery
= ++Time
;
355 Stack
.emplace(Edge
.Dst
, 0);
357 } else if (Dst
.Taken
&& Dst
.Finish
!= 0) {
358 // Else, if Edge.Dst already have a path to Target, so that NodeIdx
359 Nodes
[NodeIdx
].Taken
= true;
363 // If we are done scanning all edge out of NodeIdx
365 // If we haven't found a path from NodeIdx to Target, forget about it
366 if (!Nodes
[NodeIdx
].Taken
) {
367 Nodes
[NodeIdx
].Discovery
= 0;
369 // If we have found a path from NodeIdx to Target, then finish NodeIdx
370 // and propagate Taken flag to DFS parent unless at the Source
371 Nodes
[NodeIdx
].Finish
= ++Time
;
372 // NodeIdx == Source if and only if the stack is empty
373 if (NodeIdx
!= Source
) {
374 assert(!Stack
.empty() && "empty stack while running dfs");
375 Nodes
[Stack
.top().first
].Taken
= true;
377 AugmentingOrder
.push_back(NodeIdx
);
381 // Nodes are collected decreasing Finish time, so the order is reversed
382 std::reverse(AugmentingOrder
.begin(), AugmentingOrder
.end());
384 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
385 for (size_t Src
: AugmentingOrder
) {
386 AugmentingEdges
[Src
].clear();
387 for (auto &Edge
: Edges
[Src
]) {
388 uint64_t Dst
= Edge
.Dst
;
389 if (Edge
.OnShortestPath
&& Nodes
[Src
].Taken
&& Nodes
[Dst
].Taken
&&
390 Nodes
[Dst
].Finish
< Nodes
[Src
].Finish
) {
391 AugmentingEdges
[Src
].push_back(&Edge
);
394 assert((Src
== Target
|| !AugmentingEdges
[Src
].empty()) &&
395 "incorrectly constructed augmenting edges");
398 return AugmentingOrder
;
401 /// Update the current flow along the given (acyclic) subgraph specified by
402 /// the vertex order, AugmentingOrder. The objective is to send as much flow
403 /// as possible while evenly distributing flow among successors of each node.
404 /// After the update at least one edge is saturated.
405 bool augmentFlowAlongDAG(const std::vector
<uint64_t> &AugmentingOrder
) {
406 // Phase 0: Initialization
407 for (uint64_t Src
: AugmentingOrder
) {
408 Nodes
[Src
].FracFlow
= 0;
409 Nodes
[Src
].IntFlow
= 0;
410 for (auto &Edge
: AugmentingEdges
[Src
]) {
411 Edge
->AugmentedFlow
= 0;
415 // Phase 1: Send a unit of fractional flow along the DAG
416 uint64_t MaxFlowAmount
= INF
;
417 Nodes
[Source
].FracFlow
= 1.0;
418 for (uint64_t Src
: AugmentingOrder
) {
419 assert((Src
== Target
|| Nodes
[Src
].FracFlow
> 0.0) &&
420 "incorrectly computed fractional flow");
421 // Distribute flow evenly among successors of Src
422 uint64_t Degree
= AugmentingEdges
[Src
].size();
423 for (auto &Edge
: AugmentingEdges
[Src
]) {
424 double EdgeFlow
= Nodes
[Src
].FracFlow
/ Degree
;
425 Nodes
[Edge
->Dst
].FracFlow
+= EdgeFlow
;
426 if (Edge
->Capacity
== INF
)
428 uint64_t MaxIntFlow
= double(Edge
->Capacity
- Edge
->Flow
) / EdgeFlow
;
429 MaxFlowAmount
= std::min(MaxFlowAmount
, MaxIntFlow
);
432 // Stop early if we cannot send any (integral) flow from Source to Target
433 if (MaxFlowAmount
== 0)
436 // Phase 2: Send an integral flow of MaxFlowAmount
437 Nodes
[Source
].IntFlow
= MaxFlowAmount
;
438 for (uint64_t Src
: AugmentingOrder
) {
441 // Distribute flow evenly among successors of Src, rounding up to make
442 // sure all flow is sent
443 uint64_t Degree
= AugmentingEdges
[Src
].size();
444 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
445 uint64_t SuccFlow
= (Nodes
[Src
].IntFlow
+ Degree
- 1) / Degree
;
446 for (auto &Edge
: AugmentingEdges
[Src
]) {
447 uint64_t Dst
= Edge
->Dst
;
448 uint64_t EdgeFlow
= std::min(Nodes
[Src
].IntFlow
, SuccFlow
);
449 EdgeFlow
= std::min(EdgeFlow
, uint64_t(Edge
->Capacity
- Edge
->Flow
));
450 Nodes
[Dst
].IntFlow
+= EdgeFlow
;
451 Nodes
[Src
].IntFlow
-= EdgeFlow
;
452 Edge
->AugmentedFlow
+= EdgeFlow
;
455 assert(Nodes
[Target
].IntFlow
<= MaxFlowAmount
);
456 Nodes
[Target
].IntFlow
= 0;
458 // Phase 3: Send excess flow back traversing the nodes backwards.
459 // Because of rounding, not all flow can be sent along the edges of Src.
460 // Hence, sending the remaining flow back to maintain flow conservation
461 for (size_t Idx
= AugmentingOrder
.size() - 1; Idx
> 0; Idx
--) {
462 uint64_t Src
= AugmentingOrder
[Idx
- 1];
463 // Try to send excess flow back along each edge.
464 // Make sure we only send back flow we just augmented (AugmentedFlow).
465 for (auto &Edge
: AugmentingEdges
[Src
]) {
466 uint64_t Dst
= Edge
->Dst
;
467 if (Nodes
[Dst
].IntFlow
== 0)
469 uint64_t EdgeFlow
= std::min(Nodes
[Dst
].IntFlow
, Edge
->AugmentedFlow
);
470 Nodes
[Dst
].IntFlow
-= EdgeFlow
;
471 Nodes
[Src
].IntFlow
+= EdgeFlow
;
472 Edge
->AugmentedFlow
-= EdgeFlow
;
476 // Phase 4: Update flow values along all edges
477 bool HasSaturatedEdges
= false;
478 for (uint64_t Src
: AugmentingOrder
) {
479 // Verify that we have sent all the excess flow from the node
480 assert(Src
== Source
|| Nodes
[Src
].IntFlow
== 0);
481 for (auto &Edge
: AugmentingEdges
[Src
]) {
482 assert(uint64_t(Edge
->Capacity
- Edge
->Flow
) >= Edge
->AugmentedFlow
);
483 // Update flow values along the edge and its reverse copy
484 auto &RevEdge
= Edges
[Edge
->Dst
][Edge
->RevEdgeIndex
];
485 Edge
->Flow
+= Edge
->AugmentedFlow
;
486 RevEdge
.Flow
-= Edge
->AugmentedFlow
;
487 if (Edge
->Capacity
== Edge
->Flow
&& Edge
->AugmentedFlow
> 0)
488 HasSaturatedEdges
= true;
492 // The augmentation is successful iff at least one edge becomes saturated
493 return HasSaturatedEdges
;
496 /// Identify candidate (shortest) edges for augmentation.
497 void identifyShortestEdges(uint64_t PathCapacity
) {
498 assert(PathCapacity
> 0 && "found an incorrect augmenting DAG");
499 // To make sure the augmentation DAG contains only edges with large residual
500 // capacity, we prune all edges whose capacity is below a fraction of
501 // the capacity of the augmented path.
502 // (All edges of the path itself are always in the DAG)
503 uint64_t MinCapacity
= std::max(PathCapacity
/ 2, uint64_t(1));
505 // Decide which edges are on a shortest path from Source to Target
506 for (size_t Src
= 0; Src
< Nodes
.size(); Src
++) {
507 // An edge cannot be augmenting if the endpoint has large distance
508 if (Nodes
[Src
].Distance
> Nodes
[Target
].Distance
)
511 for (auto &Edge
: Edges
[Src
]) {
512 uint64_t Dst
= Edge
.Dst
;
513 Edge
.OnShortestPath
=
514 Src
!= Target
&& Dst
!= Source
&&
515 Nodes
[Dst
].Distance
<= Nodes
[Target
].Distance
&&
516 Nodes
[Dst
].Distance
== Nodes
[Src
].Distance
+ Edge
.Cost
&&
517 Edge
.Capacity
> Edge
.Flow
&&
518 uint64_t(Edge
.Capacity
- Edge
.Flow
) >= MinCapacity
;
523 /// Maximum number of DFS iterations for DAG finding.
524 static constexpr uint64_t MaxDfsCalls
= 10;
526 /// A node in a flow network.
528 /// The cost of the cheapest path from the source to the current node.
530 /// The node preceding the current one in the path.
532 /// The index of the edge between ParentNode and the current node.
533 uint64_t ParentEdgeIndex
;
534 /// An indicator of whether the current node is in a queue.
537 /// Data fields utilized in DAG-augmentation:
550 /// An edge in a flow network.
552 /// The cost of the edge.
554 /// The capacity of the edge.
556 /// The current flow on the edge.
558 /// The destination node of the edge.
560 /// The index of the reverse edge between Dst and the current node.
561 uint64_t RevEdgeIndex
;
563 /// Data fields utilized in DAG-augmentation:
564 /// Whether the edge is currently on a shortest path from Source to Target.
566 /// Extra flow along the edge.
567 uint64_t AugmentedFlow
;
570 /// The set of network nodes.
571 std::vector
<Node
> Nodes
;
572 /// The set of network edges.
573 std::vector
<std::vector
<Edge
>> Edges
;
574 /// Source node of the flow.
576 /// Target (sink) node of the flow.
578 /// Augmenting edges.
579 std::vector
<std::vector
<Edge
*>> AugmentingEdges
;
580 /// Params for flow computation.
581 const ProfiParams
&Params
;
584 /// A post-processing adjustment of the control flow. It applies two steps by
585 /// rerouting some flow and making it more realistic:
587 /// - First, it removes all isolated components ("islands") with a positive flow
588 /// that are unreachable from the entry block. For every such component, we
589 /// find the shortest from the entry to an exit passing through the component,
590 /// and increase the flow by one unit along the path.
592 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
593 /// with no sampled counts. Then it rebalnces the flow that goes through such
594 /// a subgraph so that each branch is taken with probability 50%.
595 /// An unknown subgraph is such that for every two nodes u and v:
596 /// - u dominates v and u is not unknown;
597 /// - v post-dominates u; and
598 /// - all inner-nodes of all (u,v)-paths are unknown.
602 FlowAdjuster(const ProfiParams
&Params
, FlowFunction
&Func
)
603 : Params(Params
), Func(Func
) {}
605 /// Apply the post-processing.
607 if (Params
.JoinIslands
) {
608 // Adjust the flow to get rid of isolated components
609 joinIsolatedComponents();
612 if (Params
.RebalanceUnknown
) {
613 // Rebalance the flow inside unknown subgraphs
614 rebalanceUnknownSubgraphs();
619 void joinIsolatedComponents() {
620 // Find blocks that are reachable from the source
621 auto Visited
= BitVector(NumBlocks(), false);
622 findReachable(Func
.Entry
, Visited
);
624 // Iterate over all non-reachable blocks and adjust their weights
625 for (uint64_t I
= 0; I
< NumBlocks(); I
++) {
626 auto &Block
= Func
.Blocks
[I
];
627 if (Block
.Flow
> 0 && !Visited
[I
]) {
628 // Find a path from the entry to an exit passing through the block I
629 auto Path
= findShortestPath(I
);
630 // Increase the flow along the path
631 assert(Path
.size() > 0 && Path
[0]->Source
== Func
.Entry
&&
632 "incorrectly computed path adjusting control flow");
633 Func
.Blocks
[Func
.Entry
].Flow
+= 1;
634 for (auto &Jump
: Path
) {
636 Func
.Blocks
[Jump
->Target
].Flow
+= 1;
637 // Update reachability
638 findReachable(Jump
->Target
, Visited
);
644 /// Run BFS from a given block along the jumps with a positive flow and mark
645 /// all reachable blocks.
646 void findReachable(uint64_t Src
, BitVector
&Visited
) {
649 std::queue
<uint64_t> Queue
;
652 while (!Queue
.empty()) {
655 for (auto *Jump
: Func
.Blocks
[Src
].SuccJumps
) {
656 uint64_t Dst
= Jump
->Target
;
657 if (Jump
->Flow
> 0 && !Visited
[Dst
]) {
665 /// Find the shortest path from the entry block to an exit block passing
666 /// through a given block.
667 std::vector
<FlowJump
*> findShortestPath(uint64_t BlockIdx
) {
668 // A path from the entry block to BlockIdx
669 auto ForwardPath
= findShortestPath(Func
.Entry
, BlockIdx
);
670 // A path from BlockIdx to an exit block
671 auto BackwardPath
= findShortestPath(BlockIdx
, AnyExitBlock
);
673 // Concatenate the two paths
674 std::vector
<FlowJump
*> Result
;
675 Result
.insert(Result
.end(), ForwardPath
.begin(), ForwardPath
.end());
676 Result
.insert(Result
.end(), BackwardPath
.begin(), BackwardPath
.end());
680 /// Apply the Dijkstra algorithm to find the shortest path from a given
681 /// Source to a given Target block.
682 /// If Target == -1, then the path ends at an exit block.
683 std::vector
<FlowJump
*> findShortestPath(uint64_t Source
, uint64_t Target
) {
684 // Quit early, if possible
685 if (Source
== Target
)
686 return std::vector
<FlowJump
*>();
687 if (Func
.Blocks
[Source
].isExit() && Target
== AnyExitBlock
)
688 return std::vector
<FlowJump
*>();
690 // Initialize data structures
691 auto Distance
= std::vector
<int64_t>(NumBlocks(), INF
);
692 auto Parent
= std::vector
<FlowJump
*>(NumBlocks(), nullptr);
693 Distance
[Source
] = 0;
694 std::set
<std::pair
<uint64_t, uint64_t>> Queue
;
695 Queue
.insert(std::make_pair(Distance
[Source
], Source
));
697 // Run the Dijkstra algorithm
698 while (!Queue
.empty()) {
699 uint64_t Src
= Queue
.begin()->second
;
700 Queue
.erase(Queue
.begin());
701 // If we found a solution, quit early
703 (Func
.Blocks
[Src
].isExit() && Target
== AnyExitBlock
))
706 for (auto *Jump
: Func
.Blocks
[Src
].SuccJumps
) {
707 uint64_t Dst
= Jump
->Target
;
708 int64_t JumpDist
= jumpDistance(Jump
);
709 if (Distance
[Dst
] > Distance
[Src
] + JumpDist
) {
710 Queue
.erase(std::make_pair(Distance
[Dst
], Dst
));
712 Distance
[Dst
] = Distance
[Src
] + JumpDist
;
715 Queue
.insert(std::make_pair(Distance
[Dst
], Dst
));
719 // If Target is not provided, find the closest exit block
720 if (Target
== AnyExitBlock
) {
721 for (uint64_t I
= 0; I
< NumBlocks(); I
++) {
722 if (Func
.Blocks
[I
].isExit() && Parent
[I
] != nullptr) {
723 if (Target
== AnyExitBlock
|| Distance
[Target
] > Distance
[I
]) {
729 assert(Parent
[Target
] != nullptr && "a path does not exist");
731 // Extract the constructed path
732 std::vector
<FlowJump
*> Result
;
733 uint64_t Now
= Target
;
734 while (Now
!= Source
) {
735 assert(Now
== Parent
[Now
]->Target
&& "incorrect parent jump");
736 Result
.push_back(Parent
[Now
]);
737 Now
= Parent
[Now
]->Source
;
739 // Reverse the path, since it is extracted from Target to Source
740 std::reverse(Result
.begin(), Result
.end());
744 /// A distance of a path for a given jump.
745 /// In order to incite the path to use blocks/jumps with large positive flow,
746 /// and avoid changing branch probability of outgoing edges drastically,
747 /// set the jump distance so as:
748 /// - to minimize the number of unlikely jumps used and subject to that,
749 /// - to minimize the number of Flow == 0 jumps used and subject to that,
750 /// - minimizes total multiplicative Flow increase for the remaining edges.
751 /// To capture this objective with integer distances, we round off fractional
752 /// parts to a multiple of 1 / BaseDistance.
753 int64_t jumpDistance(FlowJump
*Jump
) const {
754 if (Jump
->IsUnlikely
)
755 return Params
.CostUnlikely
;
756 uint64_t BaseDistance
=
757 std::max(FlowAdjuster::MinBaseDistance
,
758 std::min(Func
.Blocks
[Func
.Entry
].Flow
,
759 Params
.CostUnlikely
/ (2 * (NumBlocks() + 1))));
761 return BaseDistance
+ BaseDistance
/ Jump
->Flow
;
762 return 2 * BaseDistance
* (NumBlocks() + 1);
765 uint64_t NumBlocks() const { return Func
.Blocks
.size(); }
767 /// Rebalance unknown subgraphs so that the flow is split evenly across the
768 /// outgoing branches of every block of the subgraph. The method iterates over
769 /// blocks with known weight and identifies unknown subgraphs rooted at the
770 /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
771 void rebalanceUnknownSubgraphs() {
772 // Try to find unknown subgraphs from each block
773 for (const FlowBlock
&SrcBlock
: Func
.Blocks
) {
774 // Verify if rebalancing rooted at SrcBlock is feasible
775 if (!canRebalanceAtRoot(&SrcBlock
))
778 // Find an unknown subgraphs starting at SrcBlock. Along the way,
779 // fill in known destinations and intermediate unknown blocks.
780 std::vector
<FlowBlock
*> UnknownBlocks
;
781 std::vector
<FlowBlock
*> KnownDstBlocks
;
782 findUnknownSubgraph(&SrcBlock
, KnownDstBlocks
, UnknownBlocks
);
784 // Verify if rebalancing of the subgraph is feasible. If the search is
785 // successful, find the unique destination block (which can be null)
786 FlowBlock
*DstBlock
= nullptr;
787 if (!canRebalanceSubgraph(&SrcBlock
, KnownDstBlocks
, UnknownBlocks
,
791 // We cannot rebalance subgraphs containing cycles among unknown blocks
792 if (!isAcyclicSubgraph(&SrcBlock
, DstBlock
, UnknownBlocks
))
795 // Rebalance the flow
796 rebalanceUnknownSubgraph(&SrcBlock
, DstBlock
, UnknownBlocks
);
800 /// Verify if rebalancing rooted at a given block is possible.
801 bool canRebalanceAtRoot(const FlowBlock
*SrcBlock
) {
802 // Do not attempt to find unknown subgraphs from an unknown or a
804 if (SrcBlock
->HasUnknownWeight
|| SrcBlock
->Flow
== 0)
807 // Do not attempt to process subgraphs from a block w/o unknown sucessors
808 bool HasUnknownSuccs
= false;
809 for (auto *Jump
: SrcBlock
->SuccJumps
) {
810 if (Func
.Blocks
[Jump
->Target
].HasUnknownWeight
) {
811 HasUnknownSuccs
= true;
815 if (!HasUnknownSuccs
)
821 /// Find an unknown subgraph starting at block SrcBlock. The method sets
822 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
823 void findUnknownSubgraph(const FlowBlock
*SrcBlock
,
824 std::vector
<FlowBlock
*> &KnownDstBlocks
,
825 std::vector
<FlowBlock
*> &UnknownBlocks
) {
826 // Run BFS from SrcBlock and make sure all paths are going through unknown
827 // blocks and end at a known DstBlock
828 auto Visited
= BitVector(NumBlocks(), false);
829 std::queue
<uint64_t> Queue
;
831 Queue
.push(SrcBlock
->Index
);
832 Visited
[SrcBlock
->Index
] = true;
833 while (!Queue
.empty()) {
834 auto &Block
= Func
.Blocks
[Queue
.front()];
836 // Process blocks reachable from Block
837 for (auto *Jump
: Block
.SuccJumps
) {
838 // If Jump can be ignored, skip it
839 if (ignoreJump(SrcBlock
, nullptr, Jump
))
842 uint64_t Dst
= Jump
->Target
;
843 // If Dst has been visited, skip Jump
848 if (!Func
.Blocks
[Dst
].HasUnknownWeight
) {
849 KnownDstBlocks
.push_back(&Func
.Blocks
[Dst
]);
852 UnknownBlocks
.push_back(&Func
.Blocks
[Dst
]);
858 /// Verify if rebalancing of the subgraph is feasible. If the checks are
859 /// successful, set the unique destination block, DstBlock (can be null).
860 bool canRebalanceSubgraph(const FlowBlock
*SrcBlock
,
861 const std::vector
<FlowBlock
*> &KnownDstBlocks
,
862 const std::vector
<FlowBlock
*> &UnknownBlocks
,
863 FlowBlock
*&DstBlock
) {
864 // If the list of unknown blocks is empty, we don't need rebalancing
865 if (UnknownBlocks
.empty())
868 // If there are multiple known sinks, we can't rebalance
869 if (KnownDstBlocks
.size() > 1)
871 DstBlock
= KnownDstBlocks
.empty() ? nullptr : KnownDstBlocks
.front();
873 // Verify sinks of the subgraph
874 for (auto *Block
: UnknownBlocks
) {
875 if (Block
->SuccJumps
.empty()) {
876 // If there are multiple (known and unknown) sinks, we can't rebalance
877 if (DstBlock
!= nullptr)
881 size_t NumIgnoredJumps
= 0;
882 for (auto *Jump
: Block
->SuccJumps
) {
883 if (ignoreJump(SrcBlock
, DstBlock
, Jump
))
886 // If there is a non-sink block in UnknownBlocks with all jumps ignored,
887 // then we can't rebalance
888 if (NumIgnoredJumps
== Block
->SuccJumps
.size())
895 /// Decide whether the Jump is ignored while processing an unknown subgraphs
896 /// rooted at basic block SrcBlock with the destination block, DstBlock.
897 bool ignoreJump(const FlowBlock
*SrcBlock
, const FlowBlock
*DstBlock
,
898 const FlowJump
*Jump
) {
899 // Ignore unlikely jumps with zero flow
900 if (Jump
->IsUnlikely
&& Jump
->Flow
== 0)
903 auto JumpSource
= &Func
.Blocks
[Jump
->Source
];
904 auto JumpTarget
= &Func
.Blocks
[Jump
->Target
];
906 // Do not ignore jumps coming into DstBlock
907 if (DstBlock
!= nullptr && JumpTarget
== DstBlock
)
910 // Ignore jumps out of SrcBlock to known blocks
911 if (!JumpTarget
->HasUnknownWeight
&& JumpSource
== SrcBlock
)
914 // Ignore jumps to known blocks with zero flow
915 if (!JumpTarget
->HasUnknownWeight
&& JumpTarget
->Flow
== 0)
921 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
922 /// UnknownBlocks in the topological order (so that all jumps are "forward").
923 bool isAcyclicSubgraph(const FlowBlock
*SrcBlock
, const FlowBlock
*DstBlock
,
924 std::vector
<FlowBlock
*> &UnknownBlocks
) {
925 // Extract local in-degrees in the considered subgraph
926 auto LocalInDegree
= std::vector
<uint64_t>(NumBlocks(), 0);
927 auto fillInDegree
= [&](const FlowBlock
*Block
) {
928 for (auto *Jump
: Block
->SuccJumps
) {
929 if (ignoreJump(SrcBlock
, DstBlock
, Jump
))
931 LocalInDegree
[Jump
->Target
]++;
934 fillInDegree(SrcBlock
);
935 for (auto *Block
: UnknownBlocks
) {
938 // A loop containing SrcBlock
939 if (LocalInDegree
[SrcBlock
->Index
] > 0)
942 std::vector
<FlowBlock
*> AcyclicOrder
;
943 std::queue
<uint64_t> Queue
;
944 Queue
.push(SrcBlock
->Index
);
945 while (!Queue
.empty()) {
946 FlowBlock
*Block
= &Func
.Blocks
[Queue
.front()];
948 // Stop propagation once we reach DstBlock, if any
949 if (DstBlock
!= nullptr && Block
== DstBlock
)
952 // Keep an acyclic order of unknown blocks
953 if (Block
->HasUnknownWeight
&& Block
!= SrcBlock
)
954 AcyclicOrder
.push_back(Block
);
956 // Add to the queue all successors with zero local in-degree
957 for (auto *Jump
: Block
->SuccJumps
) {
958 if (ignoreJump(SrcBlock
, DstBlock
, Jump
))
960 uint64_t Dst
= Jump
->Target
;
961 LocalInDegree
[Dst
]--;
962 if (LocalInDegree
[Dst
] == 0) {
968 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
970 if (UnknownBlocks
.size() != AcyclicOrder
.size())
972 UnknownBlocks
= AcyclicOrder
;
976 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
977 /// having UnknownBlocks intermediate blocks.
978 void rebalanceUnknownSubgraph(const FlowBlock
*SrcBlock
,
979 const FlowBlock
*DstBlock
,
980 const std::vector
<FlowBlock
*> &UnknownBlocks
) {
981 assert(SrcBlock
->Flow
> 0 && "zero-flow block in unknown subgraph");
983 // Ditribute flow from the source block
984 uint64_t BlockFlow
= 0;
985 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
986 for (auto *Jump
: SrcBlock
->SuccJumps
) {
987 if (ignoreJump(SrcBlock
, DstBlock
, Jump
))
989 BlockFlow
+= Jump
->Flow
;
991 rebalanceBlock(SrcBlock
, DstBlock
, SrcBlock
, BlockFlow
);
993 // Ditribute flow from the remaining blocks
994 for (auto *Block
: UnknownBlocks
) {
995 assert(Block
->HasUnknownWeight
&& "incorrect unknown subgraph");
996 uint64_t BlockFlow
= 0;
997 // Block's flow is the sum of incoming flows
998 for (auto *Jump
: Block
->PredJumps
) {
999 BlockFlow
+= Jump
->Flow
;
1001 Block
->Flow
= BlockFlow
;
1002 rebalanceBlock(SrcBlock
, DstBlock
, Block
, BlockFlow
);
1006 /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1007 /// and ending at DstBlock.
1008 void rebalanceBlock(const FlowBlock
*SrcBlock
, const FlowBlock
*DstBlock
,
1009 const FlowBlock
*Block
, uint64_t BlockFlow
) {
1010 // Process all successor jumps and update corresponding flow values
1011 size_t BlockDegree
= 0;
1012 for (auto *Jump
: Block
->SuccJumps
) {
1013 if (ignoreJump(SrcBlock
, DstBlock
, Jump
))
1017 // If all successor jumps of the block are ignored, skip it
1018 if (DstBlock
== nullptr && BlockDegree
== 0)
1020 assert(BlockDegree
> 0 && "all outgoing jumps are ignored");
1022 // Each of the Block's successors gets the following amount of flow.
1023 // Rounding the value up so that all flow is propagated
1024 uint64_t SuccFlow
= (BlockFlow
+ BlockDegree
- 1) / BlockDegree
;
1025 for (auto *Jump
: Block
->SuccJumps
) {
1026 if (ignoreJump(SrcBlock
, DstBlock
, Jump
))
1028 uint64_t Flow
= std::min(SuccFlow
, BlockFlow
);
1032 assert(BlockFlow
== 0 && "not all flow is propagated");
1035 /// A constant indicating an arbitrary exit block of a function.
1036 static constexpr uint64_t AnyExitBlock
= uint64_t(-1);
1037 /// Minimum BaseDistance for the jump distance values in island joining.
1038 static constexpr uint64_t MinBaseDistance
= 10000;
1040 /// Params for flow computation.
1041 const ProfiParams
&Params
;
1046 std::pair
<int64_t, int64_t> assignBlockCosts(const ProfiParams
&Params
,
1047 const FlowBlock
&Block
);
1048 std::pair
<int64_t, int64_t> assignJumpCosts(const ProfiParams
&Params
,
1049 const FlowJump
&Jump
);
1051 /// Initializing flow network for a given function.
1053 /// Every block is split into two nodes that are responsible for (i) an
1054 /// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1055 /// reduction of the block weight.
1056 void initializeNetwork(const ProfiParams
&Params
, MinCostMaxFlow
&Network
,
1057 FlowFunction
&Func
) {
1058 uint64_t NumBlocks
= Func
.Blocks
.size();
1059 assert(NumBlocks
> 1 && "Too few blocks in a function");
1060 uint64_t NumJumps
= Func
.Jumps
.size();
1061 assert(NumJumps
> 0 && "Too few jumps in a function");
1063 // Introducing dummy source/sink pairs to allow flow circulation.
1064 // The nodes corresponding to blocks of the function have indicies in
1065 // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
1066 // next four values.
1067 uint64_t S
= 2 * NumBlocks
;
1069 uint64_t S1
= S
+ 2;
1070 uint64_t T1
= S
+ 3;
1072 Network
.initialize(2 * NumBlocks
+ 4, S1
, T1
);
1074 // Initialize nodes of the flow network
1075 for (uint64_t B
= 0; B
< NumBlocks
; B
++) {
1076 auto &Block
= Func
.Blocks
[B
];
1078 // Split every block into two auxiliary nodes to allow
1079 // increase/reduction of the block count.
1080 uint64_t Bin
= 2 * B
;
1081 uint64_t Bout
= 2 * B
+ 1;
1083 // Edges from S and to T
1084 if (Block
.isEntry()) {
1085 Network
.addEdge(S
, Bin
, 0);
1086 } else if (Block
.isExit()) {
1087 Network
.addEdge(Bout
, T
, 0);
1090 // Assign costs for increasing/decreasing the block counts
1091 auto [AuxCostInc
, AuxCostDec
] = assignBlockCosts(Params
, Block
);
1093 // Add the corresponding edges to the network
1094 Network
.addEdge(Bin
, Bout
, AuxCostInc
);
1095 if (Block
.Weight
> 0) {
1096 Network
.addEdge(Bout
, Bin
, Block
.Weight
, AuxCostDec
);
1097 Network
.addEdge(S1
, Bout
, Block
.Weight
, 0);
1098 Network
.addEdge(Bin
, T1
, Block
.Weight
, 0);
1102 // Initialize edges of the flow network
1103 for (uint64_t J
= 0; J
< NumJumps
; J
++) {
1104 auto &Jump
= Func
.Jumps
[J
];
1106 // Get the endpoints corresponding to the jump
1107 uint64_t Jin
= 2 * Jump
.Source
+ 1;
1108 uint64_t Jout
= 2 * Jump
.Target
;
1110 // Assign costs for increasing/decreasing the jump counts
1111 auto [AuxCostInc
, AuxCostDec
] = assignJumpCosts(Params
, Jump
);
1113 // Add the corresponding edges to the network
1114 Network
.addEdge(Jin
, Jout
, AuxCostInc
);
1115 if (Jump
.Weight
> 0) {
1116 Network
.addEdge(Jout
, Jin
, Jump
.Weight
, AuxCostDec
);
1117 Network
.addEdge(S1
, Jout
, Jump
.Weight
, 0);
1118 Network
.addEdge(Jin
, T1
, Jump
.Weight
, 0);
1122 // Make sure we have a valid flow circulation
1123 Network
.addEdge(T
, S
, 0);
1126 /// Assign costs for increasing/decreasing the block counts.
1127 std::pair
<int64_t, int64_t> assignBlockCosts(const ProfiParams
&Params
,
1128 const FlowBlock
&Block
) {
1129 // Modifying the weight of an unlikely block is expensive
1130 if (Block
.IsUnlikely
)
1131 return std::make_pair(Params
.CostUnlikely
, Params
.CostUnlikely
);
1133 // Assign default values for the costs
1134 int64_t CostInc
= Params
.CostBlockInc
;
1135 int64_t CostDec
= Params
.CostBlockDec
;
1136 // Update the costs depending on the block metadata
1137 if (Block
.HasUnknownWeight
) {
1138 CostInc
= Params
.CostBlockUnknownInc
;
1141 // Increasing the count for "cold" blocks with zero initial count is more
1142 // expensive than for "hot" ones
1143 if (Block
.Weight
== 0)
1144 CostInc
= Params
.CostBlockZeroInc
;
1145 // Modifying the count of the entry block is expensive
1146 if (Block
.isEntry()) {
1147 CostInc
= Params
.CostBlockEntryInc
;
1148 CostDec
= Params
.CostBlockEntryDec
;
1151 return std::make_pair(CostInc
, CostDec
);
1154 /// Assign costs for increasing/decreasing the jump counts.
1155 std::pair
<int64_t, int64_t> assignJumpCosts(const ProfiParams
&Params
,
1156 const FlowJump
&Jump
) {
1157 // Modifying the weight of an unlikely jump is expensive
1158 if (Jump
.IsUnlikely
)
1159 return std::make_pair(Params
.CostUnlikely
, Params
.CostUnlikely
);
1161 // Assign default values for the costs
1162 int64_t CostInc
= Params
.CostJumpInc
;
1163 int64_t CostDec
= Params
.CostJumpDec
;
1164 // Update the costs depending on the block metadata
1165 if (Jump
.Source
+ 1 == Jump
.Target
) {
1166 // Adjusting the fall-through branch
1167 CostInc
= Params
.CostJumpFTInc
;
1168 CostDec
= Params
.CostJumpFTDec
;
1170 if (Jump
.HasUnknownWeight
) {
1171 // The cost is different for fall-through and non-fall-through branches
1172 if (Jump
.Source
+ 1 == Jump
.Target
)
1173 CostInc
= Params
.CostJumpUnknownFTInc
;
1175 CostInc
= Params
.CostJumpUnknownInc
;
1178 assert(Jump
.Weight
> 0 && "found zero-weight jump with a positive weight");
1180 return std::make_pair(CostInc
, CostDec
);
1183 /// Extract resulting block and edge counts from the flow network.
1184 void extractWeights(const ProfiParams
&Params
, MinCostMaxFlow
&Network
,
1185 FlowFunction
&Func
) {
1186 uint64_t NumBlocks
= Func
.Blocks
.size();
1187 uint64_t NumJumps
= Func
.Jumps
.size();
1189 // Extract resulting jump counts
1190 for (uint64_t J
= 0; J
< NumJumps
; J
++) {
1191 auto &Jump
= Func
.Jumps
[J
];
1192 uint64_t SrcOut
= 2 * Jump
.Source
+ 1;
1193 uint64_t DstIn
= 2 * Jump
.Target
;
1196 int64_t AuxFlow
= Network
.getFlow(SrcOut
, DstIn
);
1197 if (Jump
.Source
!= Jump
.Target
)
1198 Flow
= int64_t(Jump
.Weight
) + AuxFlow
;
1200 Flow
= int64_t(Jump
.Weight
) + (AuxFlow
> 0 ? AuxFlow
: 0);
1203 assert(Flow
>= 0 && "negative jump flow");
1206 // Extract resulting block counts
1207 auto InFlow
= std::vector
<uint64_t>(NumBlocks
, 0);
1208 auto OutFlow
= std::vector
<uint64_t>(NumBlocks
, 0);
1209 for (auto &Jump
: Func
.Jumps
) {
1210 InFlow
[Jump
.Target
] += Jump
.Flow
;
1211 OutFlow
[Jump
.Source
] += Jump
.Flow
;
1213 for (uint64_t B
= 0; B
< NumBlocks
; B
++) {
1214 auto &Block
= Func
.Blocks
[B
];
1215 Block
.Flow
= std::max(OutFlow
[B
], InFlow
[B
]);
1220 /// Verify that the provided block/jump weights are as expected.
1221 void verifyInput(const FlowFunction
&Func
) {
1222 // Verify entry and exit blocks
1223 assert(Func
.Entry
== 0 && Func
.Blocks
[0].isEntry());
1224 size_t NumExitBlocks
= 0;
1225 for (size_t I
= 1; I
< Func
.Blocks
.size(); I
++) {
1226 assert(!Func
.Blocks
[I
].isEntry() && "multiple entry blocks");
1227 if (Func
.Blocks
[I
].isExit())
1230 assert(NumExitBlocks
> 0 && "cannot find exit blocks");
1232 // Verify that there are no parallel edges
1233 for (auto &Block
: Func
.Blocks
) {
1234 std::unordered_set
<uint64_t> UniqueSuccs
;
1235 for (auto &Jump
: Block
.SuccJumps
) {
1236 auto It
= UniqueSuccs
.insert(Jump
->Target
);
1237 assert(It
.second
&& "input CFG contains parallel edges");
1241 for (auto &Block
: Func
.Blocks
) {
1242 assert((!Block
.isEntry() || !Block
.isExit()) &&
1243 "a block cannot be an entry and an exit");
1245 // Verify input block weights
1246 for (auto &Block
: Func
.Blocks
) {
1247 assert((!Block
.HasUnknownWeight
|| Block
.Weight
== 0 || Block
.isEntry()) &&
1248 "non-zero weight of a block w/o weight except for an entry");
1250 // Verify input jump weights
1251 for (auto &Jump
: Func
.Jumps
) {
1252 assert((!Jump
.HasUnknownWeight
|| Jump
.Weight
== 0) &&
1253 "non-zero weight of a jump w/o weight");
1257 /// Verify that the computed flow values satisfy flow conservation rules.
1258 void verifyOutput(const FlowFunction
&Func
) {
1259 const uint64_t NumBlocks
= Func
.Blocks
.size();
1260 auto InFlow
= std::vector
<uint64_t>(NumBlocks
, 0);
1261 auto OutFlow
= std::vector
<uint64_t>(NumBlocks
, 0);
1262 for (const auto &Jump
: Func
.Jumps
) {
1263 InFlow
[Jump
.Target
] += Jump
.Flow
;
1264 OutFlow
[Jump
.Source
] += Jump
.Flow
;
1267 uint64_t TotalInFlow
= 0;
1268 uint64_t TotalOutFlow
= 0;
1269 for (uint64_t I
= 0; I
< NumBlocks
; I
++) {
1270 auto &Block
= Func
.Blocks
[I
];
1271 if (Block
.isEntry()) {
1272 TotalInFlow
+= Block
.Flow
;
1273 assert(Block
.Flow
== OutFlow
[I
] && "incorrectly computed control flow");
1274 } else if (Block
.isExit()) {
1275 TotalOutFlow
+= Block
.Flow
;
1276 assert(Block
.Flow
== InFlow
[I
] && "incorrectly computed control flow");
1278 assert(Block
.Flow
== OutFlow
[I
] && "incorrectly computed control flow");
1279 assert(Block
.Flow
== InFlow
[I
] && "incorrectly computed control flow");
1282 assert(TotalInFlow
== TotalOutFlow
&& "incorrectly computed control flow");
1284 // Verify that there are no isolated flow components
1285 // One could modify FlowFunction to hold edges indexed by the sources, which
1286 // will avoid a creation of the object
1287 auto PositiveFlowEdges
= std::vector
<std::vector
<uint64_t>>(NumBlocks
);
1288 for (const auto &Jump
: Func
.Jumps
) {
1289 if (Jump
.Flow
> 0) {
1290 PositiveFlowEdges
[Jump
.Source
].push_back(Jump
.Target
);
1294 // Run BFS from the source along edges with positive flow
1295 std::queue
<uint64_t> Queue
;
1296 auto Visited
= BitVector(NumBlocks
, false);
1297 Queue
.push(Func
.Entry
);
1298 Visited
[Func
.Entry
] = true;
1299 while (!Queue
.empty()) {
1300 uint64_t Src
= Queue
.front();
1302 for (uint64_t Dst
: PositiveFlowEdges
[Src
]) {
1303 if (!Visited
[Dst
]) {
1305 Visited
[Dst
] = true;
1310 // Verify that every block that has a positive flow is reached from the source
1311 // along edges with a positive flow
1312 for (uint64_t I
= 0; I
< NumBlocks
; I
++) {
1313 auto &Block
= Func
.Blocks
[I
];
1314 assert((Visited
[I
] || Block
.Flow
== 0) && "an isolated flow component");
1319 } // end of anonymous namespace
1321 /// Apply the profile inference algorithm for a given function and provided
1323 void llvm::applyFlowInference(const ProfiParams
&Params
, FlowFunction
&Func
) {
1324 // Check if the function has samples and assign initial flow values
1325 bool HasSamples
= false;
1326 for (FlowBlock
&Block
: Func
.Blocks
) {
1327 if (Block
.Weight
> 0)
1329 Block
.Flow
= Block
.Weight
;
1331 for (FlowJump
&Jump
: Func
.Jumps
) {
1332 if (Jump
.Weight
> 0)
1334 Jump
.Flow
= Jump
.Weight
;
1337 // Quit early for functions with a single block or ones w/o samples
1338 if (Func
.Blocks
.size() <= 1 || !HasSamples
)
1342 // Verify the input data
1346 // Create and apply an inference network model
1347 auto InferenceNetwork
= MinCostMaxFlow(Params
);
1348 initializeNetwork(Params
, InferenceNetwork
, Func
);
1349 InferenceNetwork
.run();
1351 // Extract flow values for every block and every edge
1352 extractWeights(Params
, InferenceNetwork
, Func
);
1354 // Post-processing adjustments to the flow
1355 auto Adjuster
= FlowAdjuster(Params
, Func
);
1359 // Verify the result
1364 /// Apply the profile inference algorithm for a given flow function
1365 void llvm::applyFlowInference(FlowFunction
&Func
) {
1367 // Set the params from the command-line flags.
1368 Params
.EvenFlowDistribution
= SampleProfileEvenFlowDistribution
;
1369 Params
.RebalanceUnknown
= SampleProfileRebalanceUnknown
;
1370 Params
.JoinIslands
= SampleProfileJoinIslands
;
1371 Params
.CostBlockInc
= SampleProfileProfiCostBlockInc
;
1372 Params
.CostBlockDec
= SampleProfileProfiCostBlockDec
;
1373 Params
.CostBlockEntryInc
= SampleProfileProfiCostBlockEntryInc
;
1374 Params
.CostBlockEntryDec
= SampleProfileProfiCostBlockEntryDec
;
1375 Params
.CostBlockZeroInc
= SampleProfileProfiCostBlockZeroInc
;
1376 Params
.CostBlockUnknownInc
= SampleProfileProfiCostBlockUnknownInc
;
1378 applyFlowInference(Params
, Func
);