bolt/lib/Passes/HFSort.cpp

   1 //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Implementation of HFSort algorithm for function ordering:
  10 // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "bolt/Passes/HFSort.h"
  15 #include "llvm/Support/CommandLine.h"
  16 #include "llvm/Support/Debug.h"
  17 #include "llvm/Support/Format.h"
  18 #include "llvm/Support/raw_ostream.h"
  19 #include <unordered_set>
  20
  21 #define DEBUG_TYPE "hfsort"
  22
  23 namespace opts {
  24 extern llvm::cl::opt<unsigned> Verbosity;
  25 }
  26
  27 namespace llvm {
  28 namespace bolt {
  29
  30 using NodeId = CallGraph::NodeId;
  31 using Arc = CallGraph::Arc;
  32 using Node = CallGraph::Node;
  33
  34 namespace {
  35
  36 // The number of pages to reserve for the functions with highest
  37 // density (samples / size).  The functions put in these pages are not
  38 // considered for clustering.
  39 constexpr uint32_t FrozenPages = 0;
  40
  41 // The minimum approximate probability of a callee being called from a
  42 // particular arc to consider merging with the caller's cluster.
  43 constexpr double MinArcProbability = 0.1;
  44
  45 // This is a factor to determine by how much a caller cluster is
  46 // willing to degrade it's density by merging a callee.
  47 constexpr int CallerDegradeFactor = 8;
  48
  49 } // namespace
  50
  51 ////////////////////////////////////////////////////////////////////////////////
  52
  53 Cluster::Cluster(NodeId Id, const Node &Func)
  54     : Samples(Func.samples()), Size(Func.size()),
  55       Density((double)Samples / Size) {
  56   Targets.push_back(Id);
  57 }
  58
  59 Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) {
  60   Samples = 0;
  61   Size = 0;
  62   for (NodeId TargetId : Nodes) {
  63     Targets.push_back(TargetId);
  64     Samples += Cg.samples(TargetId);
  65     Size += Cg.size(TargetId);
  66   }
  67   Density = (double)Samples / Size;
  68 }
  69
  70 std::string Cluster::toString() const {
  71   std::string Str;
  72   raw_string_ostream CS(Str);
  73   bool PrintComma = false;
  74   CS << "funcs = [";
  75   for (const NodeId &Target : Targets) {
  76     if (PrintComma)
  77       CS << ", ";
  78     CS << Target;
  79     PrintComma = true;
  80   }
  81   CS << "]";
  82   return CS.str();
  83 }
  84
  85 namespace {
  86
  87 void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) {
  88   uint32_t TotalSize = 0;
  89   llvm::sort(Clusters, compareClustersDensity);
  90   for (Cluster &C : Clusters) {
  91     uint32_t NewSize = TotalSize + C.size();
  92     if (NewSize > FrozenPages * HugePageSize)
  93       break;
  94     C.freeze();
  95     TotalSize = NewSize;
  96     LLVM_DEBUG(NodeId Fid = C.target(0);
  97                dbgs() << format(
  98                    "freezing cluster for func %d, size = %u, samples = %lu)\n",
  99                    Fid, Cg.size(Fid), Cg.samples(Fid)););
 100   }
 101 }
 102
 103 } // namespace
 104
 105 void Cluster::reverseTargets() { std::reverse(Targets.begin(), Targets.end()); }
 106
 107 void Cluster::merge(const Cluster &Other, const double Aw) {
 108   Targets.insert(Targets.end(), Other.Targets.begin(), Other.Targets.end());
 109   Size += Other.Size;
 110   Samples += Other.Samples;
 111   Density = (double)Samples / Size;
 112 }
 113
 114 void Cluster::merge(const Cluster &Other,
 115                     const std::vector<CallGraph::NodeId> &Targets_) {
 116   Targets = Targets_;
 117   Size += Other.Size;
 118   Samples += Other.Samples;
 119   Density = (double)Samples / Size;
 120 }
 121
 122 void Cluster::clear() {
 123   Id = -1u;
 124   Size = 0;
 125   Samples = 0;
 126   Density = 0.0;
 127   Targets.clear();
 128   Frozen = false;
 129 }
 130
 131 std::vector<Cluster> clusterize(const CallGraph &Cg) {
 132   std::vector<NodeId> SortedFuncs;
 133
 134   // indexed by NodeId, keeps it's current cluster
 135   std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr);
 136   std::vector<Cluster> Clusters;
 137   Clusters.reserve(Cg.numNodes());
 138
 139   for (NodeId F = 0; F < Cg.numNodes(); F++) {
 140     if (Cg.samples(F) == 0)
 141       continue;
 142     Clusters.emplace_back(F, Cg.getNode(F));
 143     SortedFuncs.push_back(F);
 144   }
 145
 146   freezeClusters(Cg, Clusters);
 147
 148   // The size and order of Clusters is fixed until we reshuffle it immediately
 149   // before returning.
 150   for (Cluster &Cluster : Clusters)
 151     FuncCluster[Cluster.targets().front()] = &Cluster;
 152
 153   llvm::sort(SortedFuncs, [&](const NodeId F1, const NodeId F2) {
 154     const CallGraph::Node &Func1 = Cg.getNode(F1);
 155     const CallGraph::Node &Func2 = Cg.getNode(F2);
 156     return Func1.samples() * Func2.size() > // TODO: is this correct?
 157            Func2.samples() * Func1.size();
 158   });
 159
 160   // Process each function, and consider merging its cluster with the
 161   // one containing its most likely predecessor.
 162   for (const NodeId Fid : SortedFuncs) {
 163     Cluster *Cluster = FuncCluster[Fid];
 164     if (Cluster->frozen())
 165       continue;
 166
 167     // Find best predecessor.
 168     NodeId BestPred = CallGraph::InvalidId;
 169     double BestProb = 0;
 170
 171     for (const NodeId Src : Cg.predecessors(Fid)) {
 172       const Arc &Arc = *Cg.findArc(Src, Fid);
 173       if (BestPred == CallGraph::InvalidId ||
 174           Arc.normalizedWeight() > BestProb) {
 175         BestPred = Arc.src();
 176         BestProb = Arc.normalizedWeight();
 177       }
 178     }
 179
 180     // Check if the merge is good for the callee.
 181     //   Don't merge if the probability of getting to the callee from the
 182     //   caller is too low.
 183     if (BestProb < MinArcProbability)
 184       continue;
 185
 186     assert(BestPred != CallGraph::InvalidId);
 187
 188     class Cluster *PredCluster = FuncCluster[BestPred];
 189
 190     // Skip if no predCluster (predecessor w/ no samples), or if same
 191     // as cluster, of it's frozen.
 192     if (PredCluster == nullptr || PredCluster == Cluster ||
 193         PredCluster->frozen())
 194       continue;
 195
 196     // Skip if merged cluster would be bigger than the threshold.
 197     if (Cluster->size() + PredCluster->size() > MaxClusterSize)
 198       continue;
 199
 200     // Check if the merge is good for the caller.
 201     //   Don't merge if the caller's density is significantly better
 202     //   than the density resulting from the merge.
 203     const double NewDensity =
 204         ((double)PredCluster->samples() + Cluster->samples()) /
 205         (PredCluster->size() + Cluster->size());
 206     if (PredCluster->density() > NewDensity * CallerDegradeFactor) {
 207       continue;
 208     }
 209
 210     LLVM_DEBUG(if (opts::Verbosity > 1) {
 211       dbgs() << format("merging %s -> %s: %u\n",
 212                        PredCluster->toString().c_str(),
 213                        Cluster->toString().c_str(), Cg.samples(Fid));
 214     });
 215
 216     for (NodeId F : Cluster->targets())
 217       FuncCluster[F] = PredCluster;
 218
 219     PredCluster->merge(*Cluster);
 220     Cluster->clear();
 221   }
 222
 223   // Return the set of Clusters that are left, which are the ones that
 224   // didn't get merged (so their first func is its original func).
 225   std::vector<Cluster> SortedClusters;
 226   std::unordered_set<Cluster *> Visited;
 227   for (const NodeId Func : SortedFuncs) {
 228     Cluster *Cluster = FuncCluster[Func];
 229     if (!Cluster || Visited.count(Cluster) == 1 || Cluster->target(0) != Func)
 230       continue;
 231
 232     SortedClusters.emplace_back(std::move(*Cluster));
 233     Visited.insert(Cluster);
 234   }
 235
 236   llvm::sort(SortedClusters, compareClustersDensity);
 237
 238   return SortedClusters;
 239 }
 240
 241 std::vector<Cluster> randomClusters(const CallGraph &Cg) {
 242   std::vector<NodeId> FuncIds(Cg.numNodes(), 0);
 243   std::vector<Cluster> Clusters;
 244   Clusters.reserve(Cg.numNodes());
 245
 246   for (NodeId F = 0; F < Cg.numNodes(); F++) {
 247     if (Cg.samples(F) == 0)
 248       continue;
 249     Clusters.emplace_back(F, Cg.getNode(F));
 250   }
 251
 252   llvm::sort(Clusters, [](const Cluster &A, const Cluster &B) {
 253     return A.size() < B.size();
 254   });
 255
 256   auto pickMergeCluster = [&Clusters](const size_t Idx) {
 257     size_t MaxIdx = Idx + 1;
 258
 259     while (MaxIdx < Clusters.size() &&
 260            Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize)
 261       ++MaxIdx;
 262
 263     if (MaxIdx - Idx > 1) {
 264       size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1;
 265       assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <=
 266              MaxClusterSize);
 267       return MergeIdx;
 268     }
 269     return Clusters.size();
 270   };
 271
 272   size_t Idx = 0;
 273   while (Idx < Clusters.size()) {
 274     size_t MergeIdx = pickMergeCluster(Idx);
 275     if (MergeIdx == Clusters.size()) {
 276       ++Idx;
 277     } else {
 278       Clusters[Idx].merge(Clusters[MergeIdx]);
 279       Clusters.erase(Clusters.begin() + MergeIdx);
 280     }
 281   }
 282
 283   return Clusters;
 284 }
 285
 286 } // namespace bolt
 287 } // namespace llvm