lld/MachO/SectionPriorities.cpp

   1 //===- SectionPriorities.cpp ----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
  10 /// about the algorithm.
  11 ///
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SectionPriorities.h"
  15 #include "BPSectionOrderer.h"
  16 #include "Config.h"
  17 #include "InputFiles.h"
  18 #include "Symbols.h"
  19 #include "Target.h"
  20
  21 #include "lld/Common/Args.h"
  22 #include "lld/Common/CommonLinkerContext.h"
  23 #include "lld/Common/ErrorHandler.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/MapVector.h"
  26 #include "llvm/Support/Path.h"
  27 #include "llvm/Support/TimeProfiler.h"
  28 #include "llvm/Support/raw_ostream.h"
  29
  30 #include <numeric>
  31
  32 using namespace llvm;
  33 using namespace llvm::MachO;
  34 using namespace llvm::sys;
  35 using namespace lld;
  36 using namespace lld::macho;
  37
  38 PriorityBuilder macho::priorityBuilder;
  39
  40 namespace {
  41
  42 size_t highestAvailablePriority = std::numeric_limits<size_t>::max();
  43
  44 struct Edge {
  45   int from;
  46   uint64_t weight;
  47 };
  48
  49 struct Cluster {
  50   Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
  51
  52   double getDensity() const {
  53     if (size == 0)
  54       return 0;
  55     return double(weight) / double(size);
  56   }
  57
  58   int next;
  59   int prev;
  60   uint64_t size;
  61   uint64_t weight = 0;
  62   uint64_t initialWeight = 0;
  63   Edge bestPred = {-1, 0};
  64 };
  65
  66 class CallGraphSort {
  67 public:
  68   CallGraphSort(const MapVector<SectionPair, uint64_t> &profile);
  69
  70   DenseMap<const InputSection *, size_t> run();
  71
  72 private:
  73   std::vector<Cluster> clusters;
  74   std::vector<const InputSection *> sections;
  75 };
  76 // Maximum amount the combined cluster density can be worse than the original
  77 // cluster to consider merging.
  78 constexpr int MAX_DENSITY_DEGRADATION = 8;
  79 } // end anonymous namespace
  80
  81 // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and
  82 // generate a graph between InputSections with the provided weights.
  83 CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) {
  84   DenseMap<const InputSection *, int> secToCluster;
  85
  86   auto getOrCreateCluster = [&](const InputSection *isec) -> int {
  87     auto res = secToCluster.try_emplace(isec, clusters.size());
  88     if (res.second) {
  89       sections.push_back(isec);
  90       clusters.emplace_back(clusters.size(), isec->getSize());
  91     }
  92     return res.first->second;
  93   };
  94
  95   // Create the graph
  96   for (const std::pair<SectionPair, uint64_t> &c : profile) {
  97     const auto fromSec = c.first.first->canonical();
  98     const auto toSec = c.first.second->canonical();
  99     uint64_t weight = c.second;
 100     // Ignore edges between input sections belonging to different output
 101     // sections.  This is done because otherwise we would end up with clusters
 102     // containing input sections that can't actually be placed adjacently in the
 103     // output.  This messes with the cluster size and density calculations.  We
 104     // would also end up moving input sections in other output sections without
 105     // moving them closer to what calls them.
 106     if (fromSec->parent != toSec->parent)
 107       continue;
 108
 109     int from = getOrCreateCluster(fromSec);
 110     int to = getOrCreateCluster(toSec);
 111
 112     clusters[to].weight += weight;
 113
 114     if (from == to)
 115       continue;
 116
 117     // Remember the best edge.
 118     Cluster &toC = clusters[to];
 119     if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
 120       toC.bestPred.from = from;
 121       toC.bestPred.weight = weight;
 122     }
 123   }
 124   for (Cluster &c : clusters)
 125     c.initialWeight = c.weight;
 126 }
 127
 128 // It's bad to merge clusters which would degrade the density too much.
 129 static bool isNewDensityBad(Cluster &a, Cluster &b) {
 130   double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
 131   return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
 132 }
 133
 134 // Find the leader of V's belonged cluster (represented as an equivalence
 135 // class). We apply union-find path-halving technique (simple to implement) in
 136 // the meantime as it decreases depths and the time complexity.
 137 static int getLeader(std::vector<int> &leaders, int v) {
 138   while (leaders[v] != v) {
 139     leaders[v] = leaders[leaders[v]];
 140     v = leaders[v];
 141   }
 142   return v;
 143 }
 144
 145 static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
 146                           Cluster &from, int fromIdx) {
 147   int tail1 = into.prev, tail2 = from.prev;
 148   into.prev = tail2;
 149   cs[tail2].next = intoIdx;
 150   from.prev = tail1;
 151   cs[tail1].next = fromIdx;
 152   into.size += from.size;
 153   into.weight += from.weight;
 154   from.size = 0;
 155   from.weight = 0;
 156 }
 157
 158 // Group InputSections into clusters using the Call-Chain Clustering heuristic
 159 // then sort the clusters by density.
 160 DenseMap<const InputSection *, size_t> CallGraphSort::run() {
 161   const uint64_t maxClusterSize = target->getPageSize();
 162
 163   // Cluster indices sorted by density.
 164   std::vector<int> sorted(clusters.size());
 165   // For union-find.
 166   std::vector<int> leaders(clusters.size());
 167
 168   std::iota(leaders.begin(), leaders.end(), 0);
 169   std::iota(sorted.begin(), sorted.end(), 0);
 170
 171   llvm::stable_sort(sorted, [&](int a, int b) {
 172     return clusters[a].getDensity() > clusters[b].getDensity();
 173   });
 174
 175   for (int l : sorted) {
 176     // The cluster index is the same as the index of its leader here because
 177     // clusters[L] has not been merged into another cluster yet.
 178     Cluster &c = clusters[l];
 179
 180     // Don't consider merging if the edge is unlikely.
 181     if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
 182       continue;
 183
 184     int predL = getLeader(leaders, c.bestPred.from);
 185     // Already in the same cluster.
 186     if (l == predL)
 187       continue;
 188
 189     Cluster *predC = &clusters[predL];
 190     if (c.size + predC->size > maxClusterSize)
 191       continue;
 192
 193     if (isNewDensityBad(*predC, c))
 194       continue;
 195
 196     leaders[l] = predL;
 197     mergeClusters(clusters, *predC, predL, c, l);
 198   }
 199   // Sort remaining non-empty clusters by density.
 200   sorted.clear();
 201   for (int i = 0, e = (int)clusters.size(); i != e; ++i)
 202     if (clusters[i].size > 0)
 203       sorted.push_back(i);
 204   llvm::stable_sort(sorted, [&](int a, int b) {
 205     return clusters[a].getDensity() > clusters[b].getDensity();
 206   });
 207
 208   DenseMap<const InputSection *, size_t> orderMap;
 209
 210   // Sections will be sorted by decreasing order. Absent sections will have
 211   // priority 0 and be placed at the end of sections.
 212   // NB: This is opposite from COFF/ELF to be compatible with the existing
 213   // order-file code.
 214   int curOrder = highestAvailablePriority;
 215   for (int leader : sorted) {
 216     for (int i = leader;;) {
 217       orderMap[sections[i]] = curOrder--;
 218       i = clusters[i].next;
 219       if (i == leader)
 220         break;
 221     }
 222   }
 223   if (!config->printSymbolOrder.empty()) {
 224     std::error_code ec;
 225     raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
 226     if (ec) {
 227       error("cannot open " + config->printSymbolOrder + ": " + ec.message());
 228       return orderMap;
 229     }
 230     // Print the symbols ordered by C3, in the order of decreasing curOrder
 231     // Instead of sorting all the orderMap, just repeat the loops above.
 232     for (int leader : sorted)
 233       for (int i = leader;;) {
 234         const InputSection *isec = sections[i];
 235         // Search all the symbols in the file of the section
 236         // and find out a Defined symbol with name that is within the
 237         // section.
 238         for (Symbol *sym : isec->getFile()->symbols) {
 239           if (auto *d = dyn_cast_or_null<Defined>(sym)) {
 240             if (d->isec() == isec)
 241               os << sym->getName() << "\n";
 242           }
 243         }
 244         i = clusters[i].next;
 245         if (i == leader)
 246           break;
 247       }
 248   }
 249
 250   return orderMap;
 251 }
 252
 253 std::optional<size_t>
 254 macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
 255   if (sym->isAbsolute())
 256     return std::nullopt;
 257
 258   auto it = priorities.find(sym->getName());
 259   if (it == priorities.end())
 260     return std::nullopt;
 261   const SymbolPriorityEntry &entry = it->second;
 262   const InputFile *f = sym->isec()->getFile();
 263   if (!f)
 264     return entry.anyObjectFile;
 265   // We don't use toString(InputFile *) here because it returns the full path
 266   // for object files, and we only want the basename.
 267   StringRef filename;
 268   if (f->archiveName.empty())
 269     filename = path::filename(f->getName());
 270   else
 271     filename = saver().save(path::filename(f->archiveName) + "(" +
 272                             path::filename(f->getName()) + ")");
 273   return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
 274 }
 275
 276 void macho::PriorityBuilder::extractCallGraphProfile() {
 277   TimeTraceScope timeScope("Extract call graph profile");
 278   bool hasOrderFile = !priorities.empty();
 279   for (const InputFile *file : inputFiles) {
 280     auto *obj = dyn_cast_or_null<ObjFile>(file);
 281     if (!obj)
 282       continue;
 283     for (const CallGraphEntry &entry : obj->callGraph) {
 284       assert(entry.fromIndex < obj->symbols.size() &&
 285              entry.toIndex < obj->symbols.size());
 286       auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);
 287       auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);
 288       if (fromSym && toSym &&
 289           (!hasOrderFile ||
 290            (!getSymbolPriority(fromSym) && !getSymbolPriority(toSym))))
 291         callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count;
 292     }
 293   }
 294 }
 295
 296 void macho::PriorityBuilder::parseOrderFile(StringRef path) {
 297   assert(callGraphProfile.empty() &&
 298          "Order file must be parsed before call graph profile is processed");
 299   std::optional<MemoryBufferRef> buffer = readFile(path);
 300   if (!buffer) {
 301     error("Could not read order file at " + path);
 302     return;
 303   }
 304
 305   MemoryBufferRef mbref = *buffer;
 306   for (StringRef line : args::getLines(mbref)) {
 307     StringRef objectFile, symbol;
 308     line = line.take_until([](char c) { return c == '#'; }); // ignore comments
 309     line = line.ltrim();
 310
 311     CPUType cpuType = StringSwitch<CPUType>(line)
 312                           .StartsWith("i386:", CPU_TYPE_I386)
 313                           .StartsWith("x86_64:", CPU_TYPE_X86_64)
 314                           .StartsWith("arm:", CPU_TYPE_ARM)
 315                           .StartsWith("arm64:", CPU_TYPE_ARM64)
 316                           .StartsWith("ppc:", CPU_TYPE_POWERPC)
 317                           .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
 318                           .Default(CPU_TYPE_ANY);
 319
 320     if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
 321       continue;
 322
 323     // Drop the CPU type as well as the colon
 324     if (cpuType != CPU_TYPE_ANY)
 325       line = line.drop_until([](char c) { return c == ':'; }).drop_front();
 326
 327     constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"};
 328     for (StringRef fileEnd : fileEnds) {
 329       size_t pos = line.find(fileEnd);
 330       if (pos != StringRef::npos) {
 331         // Split the string around the colon
 332         objectFile = line.take_front(pos + fileEnd.size() - 1);
 333         line = line.drop_front(pos + fileEnd.size());
 334         break;
 335       }
 336     }
 337     symbol = line.trim();
 338
 339     if (!symbol.empty()) {
 340       SymbolPriorityEntry &entry = priorities[symbol];
 341       if (!objectFile.empty())
 342         entry.objectFiles.insert(
 343             std::make_pair(objectFile, highestAvailablePriority));
 344       else
 345         entry.anyObjectFile =
 346             std::max(entry.anyObjectFile, highestAvailablePriority);
 347     }
 348
 349     --highestAvailablePriority;
 350   }
 351 }
 352
 353 DenseMap<const InputSection *, size_t>
 354 macho::PriorityBuilder::buildInputSectionPriorities() {
 355   DenseMap<const InputSection *, size_t> sectionPriorities;
 356   if (!config->irpgoProfileSortProfilePath.empty() ||
 357       config->functionOrderForCompression || config->dataOrderForCompression) {
 358     TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
 359     sectionPriorities = runBalancedPartitioning(
 360         highestAvailablePriority, config->irpgoProfileSortProfilePath,
 361         config->functionOrderForCompression, config->dataOrderForCompression,
 362         config->compressionSortStartupFunctions,
 363         config->verboseBpSectionOrderer);
 364   } else if (config->callGraphProfileSort) {
 365     // Sort sections by the profile data provided by __LLVM,__cg_profile
 366     // sections.
 367     //
 368     // This first builds a call graph based on the profile data then merges
 369     // sections according to the C³ heuristic. All clusters are then sorted by a
 370     // density metric to further improve locality.
 371     TimeTraceScope timeScope("Call graph profile sort");
 372     sectionPriorities = CallGraphSort(callGraphProfile).run();
 373   }
 374
 375   if (priorities.empty())
 376     return sectionPriorities;
 377
 378   auto addSym = [&](const Defined *sym) {
 379     std::optional<size_t> symbolPriority = getSymbolPriority(sym);
 380     if (!symbolPriority)
 381       return;
 382     size_t &priority = sectionPriorities[sym->isec()];
 383     priority = std::max(priority, *symbolPriority);
 384   };
 385
 386   // TODO: Make sure this handles weak symbols correctly.
 387   for (const InputFile *file : inputFiles) {
 388     if (isa<ObjFile>(file))
 389       for (Symbol *sym : file->symbols)
 390         if (auto *d = dyn_cast_or_null<Defined>(sym))
 391           addSym(d);
 392   }
 393
 394   return sectionPriorities;
 395 }