llvm/lib/Transforms/Utils/SampleProfileInference.cpp

   1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a profile inference algorithm. Given an incomplete and
  10 // possibly imprecise block counts, the algorithm reconstructs realistic block
  11 // and edge counts that satisfy flow conservation rules, while minimally modify
  12 // input block counts.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
  17 #include "llvm/ADT/BitVector.h"
  18 #include "llvm/Support/CommandLine.h"
  19 #include "llvm/Support/Debug.h"
  20 #include <queue>
  21 #include <set>
  22 #include <stack>
  23
  24 using namespace llvm;
  25 #define DEBUG_TYPE "sample-profile-inference"
  26
  27 namespace {
  28
  29 static cl::opt<bool> SampleProfileEvenCountDistribution(
  30     "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
  31     cl::desc("Try to evenly distribute counts when there are multiple equally "
  32              "likely options."));
  33
  34 static cl::opt<unsigned> SampleProfileMaxDfsCalls(
  35     "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
  36     cl::desc("Maximum number of dfs iterations for even count distribution."));
  37
  38 static cl::opt<unsigned> SampleProfileProfiCostInc(
  39     "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
  40     cl::desc("A cost of increasing a block's count by one."));
  41
  42 static cl::opt<unsigned> SampleProfileProfiCostDec(
  43     "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
  44     cl::desc("A cost of decreasing a block's count by one."));
  45
  46 static cl::opt<unsigned> SampleProfileProfiCostIncZero(
  47     "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
  48     cl::desc("A cost of increasing a count of zero-weight block by one."));
  49
  50 static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
  51     "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
  52     cl::desc("A cost of increasing the entry block's count by one."));
  53
  54 static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
  55     "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
  56     cl::desc("A cost of decreasing the entry block's count by one."));
  57
  58 /// A value indicating an infinite flow/capacity/weight of a block/edge.
  59 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
  60 /// during the execution.
  61 static constexpr int64_t INF = ((int64_t)1) << 50;
  62
  63 /// The minimum-cost maximum flow algorithm.
  64 ///
  65 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
  66 /// network using a modified version of the classical Moore-Bellman-Ford
  67 /// approach. The algorithm applies a number of augmentation iterations in which
  68 /// flow is sent along paths of positive capacity from the source to the sink.
  69 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
  70 /// where m is the number of edges, n is the number of vertices, and v(f) is the
  71 /// value of the maximum flow. However, the observed running time on typical
  72 /// instances is sub-quadratic, that is, o(n^2).
  73 ///
  74 /// The input is a set of edges with specified costs and capacities, and a pair
  75 /// of nodes (source and sink). The output is the flow along each edge of the
  76 /// minimum total cost respecting the given edge capacities.
  77 class MinCostMaxFlow {
  78 public:
  79   // Initialize algorithm's data structures for a network of a given size.
  80   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
  81     Source = SourceNode;
  82     Target = SinkNode;
  83
  84     Nodes = std::vector<Node>(NodeCount);
  85     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
  86     if (SampleProfileEvenCountDistribution)
  87       AugmentingEdges =
  88           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
  89   }
  90
  91   // Run the algorithm.
  92   int64_t run() {
  93     // Iteratively find an augmentation path/dag in the network and send the
  94     // flow along its edges
  95     size_t AugmentationIters = applyFlowAugmentation();
  96
  97     // Compute the total flow and its cost
  98     int64_t TotalCost = 0;
  99     int64_t TotalFlow = 0;
 100     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
 101       for (auto &Edge : Edges[Src]) {
 102         if (Edge.Flow > 0) {
 103           TotalCost += Edge.Cost * Edge.Flow;
 104           if (Src == Source)
 105             TotalFlow += Edge.Flow;
 106         }
 107       }
 108     }
 109     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
 110                       << " iterations with " << TotalFlow << " total flow"
 111                       << " of " << TotalCost << " cost\n");
 112     (void)TotalFlow;
 113     (void)AugmentationIters;
 114     return TotalCost;
 115   }
 116
 117   /// Adding an edge to the network with a specified capacity and a cost.
 118   /// Multiple edges between a pair of nodes are allowed but self-edges
 119   /// are not supported.
 120   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
 121     assert(Capacity > 0 && "adding an edge of zero capacity");
 122     assert(Src != Dst && "loop edge are not supported");
 123
 124     Edge SrcEdge;
 125     SrcEdge.Dst = Dst;
 126     SrcEdge.Cost = Cost;
 127     SrcEdge.Capacity = Capacity;
 128     SrcEdge.Flow = 0;
 129     SrcEdge.RevEdgeIndex = Edges[Dst].size();
 130
 131     Edge DstEdge;
 132     DstEdge.Dst = Src;
 133     DstEdge.Cost = -Cost;
 134     DstEdge.Capacity = 0;
 135     DstEdge.Flow = 0;
 136     DstEdge.RevEdgeIndex = Edges[Src].size();
 137
 138     Edges[Src].push_back(SrcEdge);
 139     Edges[Dst].push_back(DstEdge);
 140   }
 141
 142   /// Adding an edge to the network of infinite capacity and a given cost.
 143   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
 144     addEdge(Src, Dst, INF, Cost);
 145   }
 146
 147   /// Get the total flow from a given source node.
 148   /// Returns a list of pairs (target node, amount of flow to the target).
 149   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
 150     std::vector<std::pair<uint64_t, int64_t>> Flow;
 151     for (const auto &Edge : Edges[Src]) {
 152       if (Edge.Flow > 0)
 153         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
 154     }
 155     return Flow;
 156   }
 157
 158   /// Get the total flow between a pair of nodes.
 159   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
 160     int64_t Flow = 0;
 161     for (const auto &Edge : Edges[Src]) {
 162       if (Edge.Dst == Dst) {
 163         Flow += Edge.Flow;
 164       }
 165     }
 166     return Flow;
 167   }
 168
 169   /// A cost of taking an unlikely jump.
 170   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
 171   /// Minimum BaseDistance for the jump distance values in island joining.
 172   static constexpr uint64_t MinBaseDistance = 10000;
 173
 174 private:
 175   /// Iteratively find an augmentation path/dag in the network and send the
 176   /// flow along its edges. The method returns the number of applied iterations.
 177   size_t applyFlowAugmentation() {
 178     size_t AugmentationIters = 0;
 179     while (findAugmentingPath()) {
 180       uint64_t PathCapacity = computeAugmentingPathCapacity();
 181       while (PathCapacity > 0) {
 182         bool Progress = false;
 183         if (SampleProfileEvenCountDistribution) {
 184           // Identify node/edge candidates for augmentation
 185           identifyShortestEdges(PathCapacity);
 186
 187           // Find an augmenting DAG
 188           auto AugmentingOrder = findAugmentingDAG();
 189
 190           // Apply the DAG augmentation
 191           Progress = augmentFlowAlongDAG(AugmentingOrder);
 192           PathCapacity = computeAugmentingPathCapacity();
 193         }
 194
 195         if (!Progress) {
 196           augmentFlowAlongPath(PathCapacity);
 197           PathCapacity = 0;
 198         }
 199
 200         AugmentationIters++;
 201       }
 202     }
 203     return AugmentationIters;
 204   }
 205
 206   /// Compute the capacity of the cannonical augmenting path. If the path is
 207   /// saturated (that is, no flow can be sent along the path), then return 0.
 208   uint64_t computeAugmentingPathCapacity() {
 209     uint64_t PathCapacity = INF;
 210     uint64_t Now = Target;
 211     while (Now != Source) {
 212       uint64_t Pred = Nodes[Now].ParentNode;
 213       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
 214
 215       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
 216       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
 217       PathCapacity = std::min(PathCapacity, EdgeCapacity);
 218
 219       Now = Pred;
 220     }
 221     return PathCapacity;
 222   }
 223
 224   /// Check for existence of an augmenting path with a positive capacity.
 225   bool findAugmentingPath() {
 226     // Initialize data structures
 227     for (auto &Node : Nodes) {
 228       Node.Distance = INF;
 229       Node.ParentNode = uint64_t(-1);
 230       Node.ParentEdgeIndex = uint64_t(-1);
 231       Node.Taken = false;
 232     }
 233
 234     std::queue<uint64_t> Queue;
 235     Queue.push(Source);
 236     Nodes[Source].Distance = 0;
 237     Nodes[Source].Taken = true;
 238     while (!Queue.empty()) {
 239       uint64_t Src = Queue.front();
 240       Queue.pop();
 241       Nodes[Src].Taken = false;
 242       // Although the residual network contains edges with negative costs
 243       // (in particular, backward edges), it can be shown that there are no
 244       // negative-weight cycles and the following two invariants are maintained:
 245       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
 246       // where Dist is the length of the shortest path between two nodes. This
 247       // allows to prune the search-space of the path-finding algorithm using
 248       // the following early-stop criteria:
 249       // -- If we find a path with zero-distance from Source to Target, stop the
 250       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
 251       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
 252       //    process node V, as it is guaranteed _not_ to be on a shortest path
 253       //    from Source to Target; it follows from inequalities
 254       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
 255       //                         >= Dist[Source, V]
 256       if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
 257         break;
 258       if (Nodes[Src].Distance > Nodes[Target].Distance)
 259         continue;
 260
 261       // Process adjacent edges
 262       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
 263         auto &Edge = Edges[Src][EdgeIdx];
 264         if (Edge.Flow < Edge.Capacity) {
 265           uint64_t Dst = Edge.Dst;
 266           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
 267           if (Nodes[Dst].Distance > NewDistance) {
 268             // Update the distance and the parent node/edge
 269             Nodes[Dst].Distance = NewDistance;
 270             Nodes[Dst].ParentNode = Src;
 271             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
 272             // Add the node to the queue, if it is not there yet
 273             if (!Nodes[Dst].Taken) {
 274               Queue.push(Dst);
 275               Nodes[Dst].Taken = true;
 276             }
 277           }
 278         }
 279       }
 280     }
 281
 282     return Nodes[Target].Distance != INF;
 283   }
 284
 285   /// Update the current flow along the augmenting path.
 286   void augmentFlowAlongPath(uint64_t PathCapacity) {
 287     assert(PathCapacity > 0 && "found an incorrect augmenting path");
 288     uint64_t Now = Target;
 289     while (Now != Source) {
 290       uint64_t Pred = Nodes[Now].ParentNode;
 291       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
 292       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
 293
 294       Edge.Flow += PathCapacity;
 295       RevEdge.Flow -= PathCapacity;
 296
 297       Now = Pred;
 298     }
 299   }
 300
 301   /// Find an Augmenting DAG order using a modified version of DFS in which we
 302   /// can visit a node multiple times. In the DFS search, when scanning each
 303   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
 304   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
 305   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
 306   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
 307   /// that starts with Source and ends with Target.
 308   std::vector<uint64_t> findAugmentingDAG() {
 309     // We use a stack based implemenation of DFS to avoid recursion.
 310     // Defining DFS data structures:
 311     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
 312     //  - we are currently visiting Nodes[NodeIdx] and
 313     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
 314     typedef std::pair<uint64_t, uint64_t> StackItemType;
 315     std::stack<StackItemType> Stack;
 316     std::vector<uint64_t> AugmentingOrder;
 317
 318     // Phase 0: Initialize Node attributes and Time for DFS run
 319     for (auto &Node : Nodes) {
 320       Node.Discovery = 0;
 321       Node.Finish = 0;
 322       Node.NumCalls = 0;
 323       Node.Taken = false;
 324     }
 325     uint64_t Time = 0;
 326     // Mark Target as Taken
 327     // Taken attribute will be propagated backwards from Target towards Source
 328     Nodes[Target].Taken = true;
 329
 330     // Phase 1: Start DFS traversal from Source
 331     Stack.emplace(Source, 0);
 332     Nodes[Source].Discovery = ++Time;
 333     while (!Stack.empty()) {
 334       auto NodeIdx = Stack.top().first;
 335       auto EdgeIdx = Stack.top().second;
 336
 337       // If we haven't scanned all edges out of NodeIdx, continue scanning
 338       if (EdgeIdx < Edges[NodeIdx].size()) {
 339         auto &Edge = Edges[NodeIdx][EdgeIdx];
 340         auto &Dst = Nodes[Edge.Dst];
 341         Stack.top().second++;
 342
 343         if (Edge.OnShortestPath) {
 344           // If we haven't seen Edge.Dst so far, continue DFS search there
 345           if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
 346             Dst.Discovery = ++Time;
 347             Stack.emplace(Edge.Dst, 0);
 348             Dst.NumCalls++;
 349           } else if (Dst.Taken && Dst.Finish != 0) {
 350             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
 351             Nodes[NodeIdx].Taken = true;
 352           }
 353         }
 354       } else {
 355         // If we are done scanning all edge out of NodeIdx
 356         Stack.pop();
 357         // If we haven't found a path from NodeIdx to Target, forget about it
 358         if (!Nodes[NodeIdx].Taken) {
 359           Nodes[NodeIdx].Discovery = 0;
 360         } else {
 361           // If we have found a path from NodeIdx to Target, then finish NodeIdx
 362           // and propagate Taken flag to DFS parent unless at the Source
 363           Nodes[NodeIdx].Finish = ++Time;
 364           // NodeIdx == Source if and only if the stack is empty
 365           if (NodeIdx != Source) {
 366             assert(!Stack.empty() && "empty stack while running dfs");
 367             Nodes[Stack.top().first].Taken = true;
 368           }
 369           AugmentingOrder.push_back(NodeIdx);
 370         }
 371       }
 372     }
 373     // Nodes are collected decreasing Finish time, so the order is reversed
 374     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
 375
 376     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
 377     for (size_t Src : AugmentingOrder) {
 378       AugmentingEdges[Src].clear();
 379       for (auto &Edge : Edges[Src]) {
 380         uint64_t Dst = Edge.Dst;
 381         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
 382             Nodes[Dst].Finish < Nodes[Src].Finish) {
 383           AugmentingEdges[Src].push_back(&Edge);
 384         }
 385       }
 386       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
 387              "incorrectly constructed augmenting edges");
 388     }
 389
 390     return AugmentingOrder;
 391   }
 392
 393   /// Update the current flow along the given (acyclic) subgraph specified by
 394   /// the vertex order, AugmentingOrder. The objective is to send as much flow
 395   /// as possible while evenly distributing flow among successors of each node.
 396   /// After the update at least one edge is saturated.
 397   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
 398     // Phase 0: Initialization
 399     for (uint64_t Src : AugmentingOrder) {
 400       Nodes[Src].FracFlow = 0;
 401       Nodes[Src].IntFlow = 0;
 402       for (auto &Edge : AugmentingEdges[Src]) {
 403         Edge->AugmentedFlow = 0;
 404       }
 405     }
 406
 407     // Phase 1: Send a unit of fractional flow along the DAG
 408     uint64_t MaxFlowAmount = INF;
 409     Nodes[Source].FracFlow = 1.0;
 410     for (uint64_t Src : AugmentingOrder) {
 411       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
 412              "incorrectly computed fractional flow");
 413       // Distribute flow evenly among successors of Src
 414       uint64_t Degree = AugmentingEdges[Src].size();
 415       for (auto &Edge : AugmentingEdges[Src]) {
 416         double EdgeFlow = Nodes[Src].FracFlow / Degree;
 417         Nodes[Edge->Dst].FracFlow += EdgeFlow;
 418         if (Edge->Capacity == INF)
 419           continue;
 420         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
 421         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
 422       }
 423     }
 424     // Stop early if we cannot send any (integral) flow from Source to Target
 425     if (MaxFlowAmount == 0)
 426       return false;
 427
 428     // Phase 2: Send an integral flow of MaxFlowAmount
 429     Nodes[Source].IntFlow = MaxFlowAmount;
 430     for (uint64_t Src : AugmentingOrder) {
 431       if (Src == Target)
 432         break;
 433       // Distribute flow evenly among successors of Src, rounding up to make
 434       // sure all flow is sent
 435       uint64_t Degree = AugmentingEdges[Src].size();
 436       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
 437       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
 438       for (auto &Edge : AugmentingEdges[Src]) {
 439         uint64_t Dst = Edge->Dst;
 440         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
 441         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
 442         Nodes[Dst].IntFlow += EdgeFlow;
 443         Nodes[Src].IntFlow -= EdgeFlow;
 444         Edge->AugmentedFlow += EdgeFlow;
 445       }
 446     }
 447     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
 448     Nodes[Target].IntFlow = 0;
 449
 450     // Phase 3: Send excess flow back traversing the nodes backwards.
 451     // Because of rounding, not all flow can be sent along the edges of Src.
 452     // Hence, sending the remaining flow back to maintain flow conservation
 453     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
 454       uint64_t Src = AugmentingOrder[Idx - 1];
 455       // Try to send excess flow back along each edge.
 456       // Make sure we only send back flow we just augmented (AugmentedFlow).
 457       for (auto &Edge : AugmentingEdges[Src]) {
 458         uint64_t Dst = Edge->Dst;
 459         if (Nodes[Dst].IntFlow == 0)
 460           continue;
 461         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
 462         Nodes[Dst].IntFlow -= EdgeFlow;
 463         Nodes[Src].IntFlow += EdgeFlow;
 464         Edge->AugmentedFlow -= EdgeFlow;
 465       }
 466     }
 467
 468     // Phase 4: Update flow values along all edges
 469     bool HasSaturatedEdges = false;
 470     for (uint64_t Src : AugmentingOrder) {
 471       // Verify that we have sent all the excess flow from the node
 472       assert(Src == Source || Nodes[Src].IntFlow == 0);
 473       for (auto &Edge : AugmentingEdges[Src]) {
 474         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
 475         // Update flow values along the edge and its reverse copy
 476         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
 477         Edge->Flow += Edge->AugmentedFlow;
 478         RevEdge.Flow -= Edge->AugmentedFlow;
 479         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
 480           HasSaturatedEdges = true;
 481       }
 482     }
 483
 484     // The augmentation is successful iff at least one edge becomes saturated
 485     return HasSaturatedEdges;
 486   }
 487
 488   /// Identify candidate (shortest) edges for augmentation.
 489   void identifyShortestEdges(uint64_t PathCapacity) {
 490     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
 491     // To make sure the augmentation DAG contains only edges with large residual
 492     // capacity, we prune all edges whose capacity is below a fraction of
 493     // the capacity of the augmented path.
 494     // (All edges of the path itself are always in the DAG)
 495     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
 496
 497     // Decide which edges are on a shortest path from Source to Target
 498     for (size_t Src = 0; Src < Nodes.size(); Src++) {
 499       // An edge cannot be augmenting if the endpoint has large distance
 500       if (Nodes[Src].Distance > Nodes[Target].Distance)
 501         continue;
 502
 503       for (auto &Edge : Edges[Src]) {
 504         uint64_t Dst = Edge.Dst;
 505         Edge.OnShortestPath =
 506             Src != Target && Dst != Source &&
 507             Nodes[Dst].Distance <= Nodes[Target].Distance &&
 508             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
 509             Edge.Capacity > Edge.Flow &&
 510             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
 511       }
 512     }
 513   }
 514
 515   /// A node in a flow network.
 516   struct Node {
 517     /// The cost of the cheapest path from the source to the current node.
 518     int64_t Distance;
 519     /// The node preceding the current one in the path.
 520     uint64_t ParentNode;
 521     /// The index of the edge between ParentNode and the current node.
 522     uint64_t ParentEdgeIndex;
 523     /// An indicator of whether the current node is in a queue.
 524     bool Taken;
 525
 526     /// Data fields utilized in DAG-augmentation:
 527     /// Fractional flow.
 528     double FracFlow;
 529     /// Integral flow.
 530     uint64_t IntFlow;
 531     /// Discovery time.
 532     uint64_t Discovery;
 533     /// Finish time.
 534     uint64_t Finish;
 535     /// NumCalls.
 536     uint64_t NumCalls;
 537   };
 538
 539   /// An edge in a flow network.
 540   struct Edge {
 541     /// The cost of the edge.
 542     int64_t Cost;
 543     /// The capacity of the edge.
 544     int64_t Capacity;
 545     /// The current flow on the edge.
 546     int64_t Flow;
 547     /// The destination node of the edge.
 548     uint64_t Dst;
 549     /// The index of the reverse edge between Dst and the current node.
 550     uint64_t RevEdgeIndex;
 551
 552     /// Data fields utilized in DAG-augmentation:
 553     /// Whether the edge is currently on a shortest path from Source to Target.
 554     bool OnShortestPath;
 555     /// Extra flow along the edge.
 556     uint64_t AugmentedFlow;
 557   };
 558
 559   /// The set of network nodes.
 560   std::vector<Node> Nodes;
 561   /// The set of network edges.
 562   std::vector<std::vector<Edge>> Edges;
 563   /// Source node of the flow.
 564   uint64_t Source;
 565   /// Target (sink) node of the flow.
 566   uint64_t Target;
 567   /// Augmenting edges.
 568   std::vector<std::vector<Edge *>> AugmentingEdges;
 569 };
 570
 571 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
 572 constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
 573
 574 /// A post-processing adjustment of control flow. It applies two steps by
 575 /// rerouting some flow and making it more realistic:
 576 ///
 577 /// - First, it removes all isolated components ("islands") with a positive flow
 578 ///   that are unreachable from the entry block. For every such component, we
 579 ///   find the shortest from the entry to an exit passing through the component,
 580 ///   and increase the flow by one unit along the path.
 581 ///
 582 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
 583 ///   with no sampled counts. Then it rebalnces the flow that goes through such
 584 ///   a subgraph so that each branch is taken with probability 50%.
 585 ///   An unknown subgraph is such that for every two nodes u and v:
 586 ///     - u dominates v and u is not unknown;
 587 ///     - v post-dominates u; and
 588 ///     - all inner-nodes of all (u,v)-paths are unknown.
 589 ///
 590 class FlowAdjuster {
 591 public:
 592   FlowAdjuster(FlowFunction &Func) : Func(Func) {
 593     assert(Func.Blocks[Func.Entry].isEntry() &&
 594            "incorrect index of the entry block");
 595   }
 596
 597   // Run the post-processing
 598   void run() {
 599     /// Adjust the flow to get rid of isolated components.
 600     joinIsolatedComponents();
 601
 602     /// Rebalance the flow inside unknown subgraphs.
 603     rebalanceUnknownSubgraphs();
 604   }
 605
 606 private:
 607   void joinIsolatedComponents() {
 608     // Find blocks that are reachable from the source
 609     auto Visited = BitVector(NumBlocks(), false);
 610     findReachable(Func.Entry, Visited);
 611
 612     // Iterate over all non-reachable blocks and adjust their weights
 613     for (uint64_t I = 0; I < NumBlocks(); I++) {
 614       auto &Block = Func.Blocks[I];
 615       if (Block.Flow > 0 && !Visited[I]) {
 616         // Find a path from the entry to an exit passing through the block I
 617         auto Path = findShortestPath(I);
 618         // Increase the flow along the path
 619         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
 620                "incorrectly computed path adjusting control flow");
 621         Func.Blocks[Func.Entry].Flow += 1;
 622         for (auto &Jump : Path) {
 623           Jump->Flow += 1;
 624           Func.Blocks[Jump->Target].Flow += 1;
 625           // Update reachability
 626           findReachable(Jump->Target, Visited);
 627         }
 628       }
 629     }
 630   }
 631
 632   /// Run BFS from a given block along the jumps with a positive flow and mark
 633   /// all reachable blocks.
 634   void findReachable(uint64_t Src, BitVector &Visited) {
 635     if (Visited[Src])
 636       return;
 637     std::queue<uint64_t> Queue;
 638     Queue.push(Src);
 639     Visited[Src] = true;
 640     while (!Queue.empty()) {
 641       Src = Queue.front();
 642       Queue.pop();
 643       for (auto Jump : Func.Blocks[Src].SuccJumps) {
 644         uint64_t Dst = Jump->Target;
 645         if (Jump->Flow > 0 && !Visited[Dst]) {
 646           Queue.push(Dst);
 647           Visited[Dst] = true;
 648         }
 649       }
 650     }
 651   }
 652
 653   /// Find the shortest path from the entry block to an exit block passing
 654   /// through a given block.
 655   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
 656     // A path from the entry block to BlockIdx
 657     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
 658     // A path from BlockIdx to an exit block
 659     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
 660
 661     // Concatenate the two paths
 662     std::vector<FlowJump *> Result;
 663     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
 664     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
 665     return Result;
 666   }
 667
 668   /// Apply the Dijkstra algorithm to find the shortest path from a given
 669   /// Source to a given Target block.
 670   /// If Target == -1, then the path ends at an exit block.
 671   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
 672     // Quit early, if possible
 673     if (Source == Target)
 674       return std::vector<FlowJump *>();
 675     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
 676       return std::vector<FlowJump *>();
 677
 678     // Initialize data structures
 679     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
 680     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
 681     Distance[Source] = 0;
 682     std::set<std::pair<uint64_t, uint64_t>> Queue;
 683     Queue.insert(std::make_pair(Distance[Source], Source));
 684
 685     // Run the Dijkstra algorithm
 686     while (!Queue.empty()) {
 687       uint64_t Src = Queue.begin()->second;
 688       Queue.erase(Queue.begin());
 689       // If we found a solution, quit early
 690       if (Src == Target ||
 691           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
 692         break;
 693
 694       for (auto Jump : Func.Blocks[Src].SuccJumps) {
 695         uint64_t Dst = Jump->Target;
 696         int64_t JumpDist = jumpDistance(Jump);
 697         if (Distance[Dst] > Distance[Src] + JumpDist) {
 698           Queue.erase(std::make_pair(Distance[Dst], Dst));
 699
 700           Distance[Dst] = Distance[Src] + JumpDist;
 701           Parent[Dst] = Jump;
 702
 703           Queue.insert(std::make_pair(Distance[Dst], Dst));
 704         }
 705       }
 706     }
 707     // If Target is not provided, find the closest exit block
 708     if (Target == AnyExitBlock) {
 709       for (uint64_t I = 0; I < NumBlocks(); I++) {
 710         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
 711           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
 712             Target = I;
 713           }
 714         }
 715       }
 716     }
 717     assert(Parent[Target] != nullptr && "a path does not exist");
 718
 719     // Extract the constructed path
 720     std::vector<FlowJump *> Result;
 721     uint64_t Now = Target;
 722     while (Now != Source) {
 723       assert(Now == Parent[Now]->Target && "incorrect parent jump");
 724       Result.push_back(Parent[Now]);
 725       Now = Parent[Now]->Source;
 726     }
 727     // Reverse the path, since it is extracted from Target to Source
 728     std::reverse(Result.begin(), Result.end());
 729     return Result;
 730   }
 731
 732   /// A distance of a path for a given jump.
 733   /// In order to incite the path to use blocks/jumps with large positive flow,
 734   /// and avoid changing branch probability of outgoing edges drastically,
 735   /// set the jump distance so as:
 736   ///   - to minimize the number of unlikely jumps used and subject to that,
 737   ///   - to minimize the number of Flow == 0 jumps used and subject to that,
 738   ///   - minimizes total multiplicative Flow increase for the remaining edges.
 739   /// To capture this objective with integer distances, we round off fractional
 740   /// parts to a multiple of 1 / BaseDistance.
 741   int64_t jumpDistance(FlowJump *Jump) const {
 742     uint64_t BaseDistance =
 743         std::max(MinCostMaxFlow::MinBaseDistance,
 744                  std::min(Func.Blocks[Func.Entry].Flow,
 745                           MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
 746     if (Jump->IsUnlikely)
 747       return MinCostMaxFlow::AuxCostUnlikely;
 748     if (Jump->Flow > 0)
 749       return BaseDistance + BaseDistance / Jump->Flow;
 750     return BaseDistance * NumBlocks();
 751   };
 752
 753   uint64_t NumBlocks() const { return Func.Blocks.size(); }
 754
 755   /// Rebalance unknown subgraphs so that the flow is split evenly across the
 756   /// outgoing branches of every block of the subgraph. The method iterates over
 757   /// blocks with known weight and identifies unknown subgraphs rooted at the
 758   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
 759   void rebalanceUnknownSubgraphs() {
 760     // Try to find unknown subgraphs from each block
 761     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
 762       auto SrcBlock = &Func.Blocks[I];
 763       // Verify if rebalancing rooted at SrcBlock is feasible
 764       if (!canRebalanceAtRoot(SrcBlock))
 765         continue;
 766
 767       // Find an unknown subgraphs starting at SrcBlock. Along the way,
 768       // fill in known destinations and intermediate unknown blocks.
 769       std::vector<FlowBlock *> UnknownBlocks;
 770       std::vector<FlowBlock *> KnownDstBlocks;
 771       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
 772
 773       // Verify if rebalancing of the subgraph is feasible. If the search is
 774       // successful, find the unique destination block (which can be null)
 775       FlowBlock *DstBlock = nullptr;
 776       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
 777                                 DstBlock))
 778         continue;
 779
 780       // We cannot rebalance subgraphs containing cycles among unknown blocks
 781       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
 782         continue;
 783
 784       // Rebalance the flow
 785       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
 786     }
 787   }
 788
 789   /// Verify if rebalancing rooted at a given block is possible.
 790   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
 791     // Do not attempt to find unknown subgraphs from an unknown or a
 792     // zero-flow block
 793     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
 794       return false;
 795
 796     // Do not attempt to process subgraphs from a block w/o unknown sucessors
 797     bool HasUnknownSuccs = false;
 798     for (auto Jump : SrcBlock->SuccJumps) {
 799       if (Func.Blocks[Jump->Target].UnknownWeight) {
 800         HasUnknownSuccs = true;
 801         break;
 802       }
 803     }
 804     if (!HasUnknownSuccs)
 805       return false;
 806
 807     return true;
 808   }
 809
 810   /// Find an unknown subgraph starting at block SrcBlock. The method sets
 811   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
 812   void findUnknownSubgraph(const FlowBlock *SrcBlock,
 813                            std::vector<FlowBlock *> &KnownDstBlocks,
 814                            std::vector<FlowBlock *> &UnknownBlocks) {
 815     // Run BFS from SrcBlock and make sure all paths are going through unknown
 816     // blocks and end at a known DstBlock
 817     auto Visited = BitVector(NumBlocks(), false);
 818     std::queue<uint64_t> Queue;
 819
 820     Queue.push(SrcBlock->Index);
 821     Visited[SrcBlock->Index] = true;
 822     while (!Queue.empty()) {
 823       auto &Block = Func.Blocks[Queue.front()];
 824       Queue.pop();
 825       // Process blocks reachable from Block
 826       for (auto Jump : Block.SuccJumps) {
 827         // If Jump can be ignored, skip it
 828         if (ignoreJump(SrcBlock, nullptr, Jump))
 829           continue;
 830
 831         uint64_t Dst = Jump->Target;
 832         // If Dst has been visited, skip Jump
 833         if (Visited[Dst])
 834           continue;
 835         // Process block Dst
 836         Visited[Dst] = true;
 837         if (!Func.Blocks[Dst].UnknownWeight) {
 838           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
 839         } else {
 840           Queue.push(Dst);
 841           UnknownBlocks.push_back(&Func.Blocks[Dst]);
 842         }
 843       }
 844     }
 845   }
 846
 847   /// Verify if rebalancing of the subgraph is feasible. If the checks are
 848   /// successful, set the unique destination block, DstBlock (can be null).
 849   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
 850                             const std::vector<FlowBlock *> &KnownDstBlocks,
 851                             const std::vector<FlowBlock *> &UnknownBlocks,
 852                             FlowBlock *&DstBlock) {
 853     // If the list of unknown blocks is empty, we don't need rebalancing
 854     if (UnknownBlocks.empty())
 855       return false;
 856
 857     // If there are multiple known sinks, we can't rebalance
 858     if (KnownDstBlocks.size() > 1)
 859       return false;
 860     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
 861
 862     // Verify sinks of the subgraph
 863     for (auto Block : UnknownBlocks) {
 864       if (Block->SuccJumps.empty()) {
 865         // If there are multiple (known and unknown) sinks, we can't rebalance
 866         if (DstBlock != nullptr)
 867           return false;
 868         continue;
 869       }
 870       size_t NumIgnoredJumps = 0;
 871       for (auto Jump : Block->SuccJumps) {
 872         if (ignoreJump(SrcBlock, DstBlock, Jump))
 873           NumIgnoredJumps++;
 874       }
 875       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
 876       // then we can't rebalance
 877       if (NumIgnoredJumps == Block->SuccJumps.size())
 878         return false;
 879     }
 880
 881     return true;
 882   }
 883
 884   /// Decide whether the Jump is ignored while processing an unknown subgraphs
 885   /// rooted at basic block SrcBlock with the destination block, DstBlock.
 886   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
 887                   const FlowJump *Jump) {
 888     // Ignore unlikely jumps with zero flow
 889     if (Jump->IsUnlikely && Jump->Flow == 0)
 890       return true;
 891
 892     auto JumpSource = &Func.Blocks[Jump->Source];
 893     auto JumpTarget = &Func.Blocks[Jump->Target];
 894
 895     // Do not ignore jumps coming into DstBlock
 896     if (DstBlock != nullptr && JumpTarget == DstBlock)
 897       return false;
 898
 899     // Ignore jumps out of SrcBlock to known blocks
 900     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
 901       return true;
 902
 903     // Ignore jumps to known blocks with zero flow
 904     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
 905       return true;
 906
 907     return false;
 908   }
 909
 910   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
 911   /// UnknownBlocks in the topological order (so that all jumps are "forward").
 912   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
 913                          std::vector<FlowBlock *> &UnknownBlocks) {
 914     // Extract local in-degrees in the considered subgraph
 915     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
 916     auto fillInDegree = [&](const FlowBlock *Block) {
 917       for (auto Jump : Block->SuccJumps) {
 918         if (ignoreJump(SrcBlock, DstBlock, Jump))
 919           continue;
 920         LocalInDegree[Jump->Target]++;
 921       }
 922     };
 923     fillInDegree(SrcBlock);
 924     for (auto Block : UnknownBlocks) {
 925       fillInDegree(Block);
 926     }
 927     // A loop containing SrcBlock
 928     if (LocalInDegree[SrcBlock->Index] > 0)
 929       return false;
 930
 931     std::vector<FlowBlock *> AcyclicOrder;
 932     std::queue<uint64_t> Queue;
 933     Queue.push(SrcBlock->Index);
 934     while (!Queue.empty()) {
 935       FlowBlock *Block = &Func.Blocks[Queue.front()];
 936       Queue.pop();
 937       // Stop propagation once we reach DstBlock, if any
 938       if (DstBlock != nullptr && Block == DstBlock)
 939         break;
 940
 941       // Keep an acyclic order of unknown blocks
 942       if (Block->UnknownWeight && Block != SrcBlock)
 943         AcyclicOrder.push_back(Block);
 944
 945       // Add to the queue all successors with zero local in-degree
 946       for (auto Jump : Block->SuccJumps) {
 947         if (ignoreJump(SrcBlock, DstBlock, Jump))
 948           continue;
 949         uint64_t Dst = Jump->Target;
 950         LocalInDegree[Dst]--;
 951         if (LocalInDegree[Dst] == 0) {
 952           Queue.push(Dst);
 953         }
 954       }
 955     }
 956
 957     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
 958     // of all blocks
 959     if (UnknownBlocks.size() != AcyclicOrder.size())
 960       return false;
 961     UnknownBlocks = AcyclicOrder;
 962     return true;
 963   }
 964
 965   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
 966   /// having UnknownBlocks intermediate blocks.
 967   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
 968                                 const FlowBlock *DstBlock,
 969                                 const std::vector<FlowBlock *> &UnknownBlocks) {
 970     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
 971
 972     // Ditribute flow from the source block
 973     uint64_t BlockFlow = 0;
 974     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
 975     for (auto Jump : SrcBlock->SuccJumps) {
 976       if (ignoreJump(SrcBlock, DstBlock, Jump))
 977         continue;
 978       BlockFlow += Jump->Flow;
 979     }
 980     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
 981
 982     // Ditribute flow from the remaining blocks
 983     for (auto Block : UnknownBlocks) {
 984       assert(Block->UnknownWeight && "incorrect unknown subgraph");
 985       uint64_t BlockFlow = 0;
 986       // Block's flow is the sum of incoming flows
 987       for (auto Jump : Block->PredJumps) {
 988         BlockFlow += Jump->Flow;
 989       }
 990       Block->Flow = BlockFlow;
 991       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
 992     }
 993   }
 994
 995   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
 996   /// and ending at DstBlock.
 997   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
 998                       const FlowBlock *Block, uint64_t BlockFlow) {
 999     // Process all successor jumps and update corresponding flow values
1000     size_t BlockDegree = 0;
1001     for (auto Jump : Block->SuccJumps) {
1002       if (ignoreJump(SrcBlock, DstBlock, Jump))
1003         continue;
1004       BlockDegree++;
1005     }
1006     // If all successor jumps of the block are ignored, skip it
1007     if (DstBlock == nullptr && BlockDegree == 0)
1008       return;
1009     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1010
1011     // Each of the Block's successors gets the following amount of flow.
1012     // Rounding the value up so that all flow is propagated
1013     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1014     for (auto Jump : Block->SuccJumps) {
1015       if (ignoreJump(SrcBlock, DstBlock, Jump))
1016         continue;
1017       uint64_t Flow = std::min(SuccFlow, BlockFlow);
1018       Jump->Flow = Flow;
1019       BlockFlow -= Flow;
1020     }
1021     assert(BlockFlow == 0 && "not all flow is propagated");
1022   }
1023
1024   /// A constant indicating an arbitrary exit block of a function.
1025   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1026
1027   /// The function.
1028   FlowFunction &Func;
1029 };
1030
1031 /// Initializing flow network for a given function.
1032 ///
1033 /// Every block is split into three nodes that are responsible for (i) an
1034 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
1035 /// reduction of the block weight.
1036 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
1037   uint64_t NumBlocks = Func.Blocks.size();
1038   assert(NumBlocks > 1 && "Too few blocks in a function");
1039   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
1040
1041   // Pre-process data: make sure the entry weight is at least 1
1042   if (Func.Blocks[Func.Entry].Weight == 0) {
1043     Func.Blocks[Func.Entry].Weight = 1;
1044   }
1045   // Introducing dummy source/sink pairs to allow flow circulation.
1046   // The nodes corresponding to blocks of Func have indicies in the range
1047   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
1048   uint64_t S = 3 * NumBlocks;
1049   uint64_t T = S + 1;
1050   uint64_t S1 = S + 2;
1051   uint64_t T1 = S + 3;
1052
1053   Network.initialize(3 * NumBlocks + 4, S1, T1);
1054
1055   // Create three nodes for every block of the function
1056   for (uint64_t B = 0; B < NumBlocks; B++) {
1057     auto &Block = Func.Blocks[B];
1058     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1059            "non-zero weight of a block w/o weight except for an entry");
1060
1061     // Split every block into two nodes
1062     uint64_t Bin = 3 * B;
1063     uint64_t Bout = 3 * B + 1;
1064     uint64_t Baux = 3 * B + 2;
1065     if (Block.Weight > 0) {
1066       Network.addEdge(S1, Bout, Block.Weight, 0);
1067       Network.addEdge(Bin, T1, Block.Weight, 0);
1068     }
1069
1070     // Edges from S and to T
1071     assert((!Block.isEntry() || !Block.isExit()) &&
1072            "a block cannot be an entry and an exit");
1073     if (Block.isEntry()) {
1074       Network.addEdge(S, Bin, 0);
1075     } else if (Block.isExit()) {
1076       Network.addEdge(Bout, T, 0);
1077     }
1078
1079     // An auxiliary node to allow increase/reduction of block counts:
1080     // We assume that decreasing block counts is more expensive than increasing,
1081     // and thus, setting separate costs here. In the future we may want to tune
1082     // the relative costs so as to maximize the quality of generated profiles.
1083     int64_t AuxCostInc = SampleProfileProfiCostInc;
1084     int64_t AuxCostDec = SampleProfileProfiCostDec;
1085     if (Block.UnknownWeight) {
1086       // Do not penalize changing weights of blocks w/o known profile count
1087       AuxCostInc = 0;
1088       AuxCostDec = 0;
1089     } else {
1090       // Increasing the count for "cold" blocks with zero initial count is more
1091       // expensive than for "hot" ones
1092       if (Block.Weight == 0) {
1093         AuxCostInc = SampleProfileProfiCostIncZero;
1094       }
1095       // Modifying the count of the entry block is expensive
1096       if (Block.isEntry()) {
1097         AuxCostInc = SampleProfileProfiCostIncEntry;
1098         AuxCostDec = SampleProfileProfiCostDecEntry;
1099       }
1100     }
1101     // For blocks with self-edges, do not penalize a reduction of the count,
1102     // as all of the increase can be attributed to the self-edge
1103     if (Block.HasSelfEdge) {
1104       AuxCostDec = 0;
1105     }
1106
1107     Network.addEdge(Bin, Baux, AuxCostInc);
1108     Network.addEdge(Baux, Bout, AuxCostInc);
1109     if (Block.Weight > 0) {
1110       Network.addEdge(Bout, Baux, AuxCostDec);
1111       Network.addEdge(Baux, Bin, AuxCostDec);
1112     }
1113   }
1114
1115   // Creating edges for every jump
1116   for (auto &Jump : Func.Jumps) {
1117     uint64_t Src = Jump.Source;
1118     uint64_t Dst = Jump.Target;
1119     if (Src != Dst) {
1120       uint64_t SrcOut = 3 * Src + 1;
1121       uint64_t DstIn = 3 * Dst;
1122       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
1123       Network.addEdge(SrcOut, DstIn, Cost);
1124     }
1125   }
1126
1127   // Make sure we have a valid flow circulation
1128   Network.addEdge(T, S, 0);
1129 }
1130
1131 /// Extract resulting block and edge counts from the flow network.
1132 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
1133   uint64_t NumBlocks = Func.Blocks.size();
1134
1135   // Extract resulting block counts
1136   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
1137     auto &Block = Func.Blocks[Src];
1138     uint64_t SrcOut = 3 * Src + 1;
1139     int64_t Flow = 0;
1140     for (const auto &Adj : Network.getFlow(SrcOut)) {
1141       uint64_t DstIn = Adj.first;
1142       int64_t DstFlow = Adj.second;
1143       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
1144       if (!IsAuxNode || Block.HasSelfEdge) {
1145         Flow += DstFlow;
1146       }
1147     }
1148     Block.Flow = Flow;
1149     assert(Flow >= 0 && "negative block flow");
1150   }
1151
1152   // Extract resulting jump counts
1153   for (auto &Jump : Func.Jumps) {
1154     uint64_t Src = Jump.Source;
1155     uint64_t Dst = Jump.Target;
1156     int64_t Flow = 0;
1157     if (Src != Dst) {
1158       uint64_t SrcOut = 3 * Src + 1;
1159       uint64_t DstIn = 3 * Dst;
1160       Flow = Network.getFlow(SrcOut, DstIn);
1161     } else {
1162       uint64_t SrcOut = 3 * Src + 1;
1163       uint64_t SrcAux = 3 * Src + 2;
1164       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
1165       if (AuxFlow > 0)
1166         Flow = AuxFlow;
1167     }
1168     Jump.Flow = Flow;
1169     assert(Flow >= 0 && "negative jump flow");
1170   }
1171 }
1172
1173 #ifndef NDEBUG
1174 /// Verify that the computed flow values satisfy flow conservation rules
1175 void verifyWeights(const FlowFunction &Func) {
1176   const uint64_t NumBlocks = Func.Blocks.size();
1177   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1178   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1179   for (const auto &Jump : Func.Jumps) {
1180     InFlow[Jump.Target] += Jump.Flow;
1181     OutFlow[Jump.Source] += Jump.Flow;
1182   }
1183
1184   uint64_t TotalInFlow = 0;
1185   uint64_t TotalOutFlow = 0;
1186   for (uint64_t I = 0; I < NumBlocks; I++) {
1187     auto &Block = Func.Blocks[I];
1188     if (Block.isEntry()) {
1189       TotalInFlow += Block.Flow;
1190       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1191     } else if (Block.isExit()) {
1192       TotalOutFlow += Block.Flow;
1193       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1194     } else {
1195       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1196       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1197     }
1198   }
1199   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1200
1201   // Verify that there are no isolated flow components
1202   // One could modify FlowFunction to hold edges indexed by the sources, which
1203   // will avoid a creation of the object
1204   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1205   for (const auto &Jump : Func.Jumps) {
1206     if (Jump.Flow > 0) {
1207       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1208     }
1209   }
1210
1211   // Run BFS from the source along edges with positive flow
1212   std::queue<uint64_t> Queue;
1213   auto Visited = BitVector(NumBlocks, false);
1214   Queue.push(Func.Entry);
1215   Visited[Func.Entry] = true;
1216   while (!Queue.empty()) {
1217     uint64_t Src = Queue.front();
1218     Queue.pop();
1219     for (uint64_t Dst : PositiveFlowEdges[Src]) {
1220       if (!Visited[Dst]) {
1221         Queue.push(Dst);
1222         Visited[Dst] = true;
1223       }
1224     }
1225   }
1226
1227   // Verify that every block that has a positive flow is reached from the source
1228   // along edges with a positive flow
1229   for (uint64_t I = 0; I < NumBlocks; I++) {
1230     auto &Block = Func.Blocks[I];
1231     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1232   }
1233 }
1234 #endif
1235
1236 } // end of anonymous namespace
1237
1238 /// Apply the profile inference algorithm for a given flow function
1239 void llvm::applyFlowInference(FlowFunction &Func) {
1240   // Create and apply an inference network model
1241   auto InferenceNetwork = MinCostMaxFlow();
1242   initializeNetwork(InferenceNetwork, Func);
1243   InferenceNetwork.run();
1244
1245   // Extract flow values for every block and every edge
1246   extractWeights(InferenceNetwork, Func);
1247
1248   // Post-processing adjustments to the flow
1249   auto Adjuster = FlowAdjuster(Func);
1250   Adjuster.run();
1251
1252 #ifndef NDEBUG
1253   // Verify the result
1254   verifyWeights(Func);
1255 #endif
1256 }