lld/MachO/SectionPriorities.cpp

   1 //===- SectionPriorities.cpp ----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
  10 /// about the algorithm.
  11 ///
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SectionPriorities.h"
  15 #include "Config.h"
  16 #include "InputFiles.h"
  17 #include "Symbols.h"
  18 #include "Target.h"
  19
  20 #include "lld/Common/Args.h"
  21 #include "lld/Common/CommonLinkerContext.h"
  22 #include "lld/Common/ErrorHandler.h"
  23 #include "llvm/ADT/DenseMap.h"
  24 #include "llvm/ADT/MapVector.h"
  25 #include "llvm/ADT/Optional.h"
  26 #include "llvm/Support/Path.h"
  27 #include "llvm/Support/TimeProfiler.h"
  28 #include "llvm/Support/raw_ostream.h"
  29 #include <numeric>
  30
  31 using namespace llvm;
  32 using namespace llvm::MachO;
  33 using namespace llvm::sys;
  34 using namespace lld;
  35 using namespace lld::macho;
  36
  37 PriorityBuilder macho::priorityBuilder;
  38
  39 namespace {
  40
  41 size_t highestAvailablePriority = std::numeric_limits<size_t>::max();
  42
  43 struct Edge {
  44   int from;
  45   uint64_t weight;
  46 };
  47
  48 struct Cluster {
  49   Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
  50
  51   double getDensity() const {
  52     if (size == 0)
  53       return 0;
  54     return double(weight) / double(size);
  55   }
  56
  57   int next;
  58   int prev;
  59   uint64_t size;
  60   uint64_t weight = 0;
  61   uint64_t initialWeight = 0;
  62   Edge bestPred = {-1, 0};
  63 };
  64
  65 class CallGraphSort {
  66 public:
  67   CallGraphSort(const MapVector<SectionPair, uint64_t> &profile);
  68
  69   DenseMap<const InputSection *, size_t> run();
  70
  71 private:
  72   std::vector<Cluster> clusters;
  73   std::vector<const InputSection *> sections;
  74 };
  75 // Maximum amount the combined cluster density can be worse than the original
  76 // cluster to consider merging.
  77 constexpr int MAX_DENSITY_DEGRADATION = 8;
  78 } // end anonymous namespace
  79
  80 // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and
  81 // generate a graph between InputSections with the provided weights.
  82 CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) {
  83   DenseMap<const InputSection *, int> secToCluster;
  84
  85   auto getOrCreateCluster = [&](const InputSection *isec) -> int {
  86     auto res = secToCluster.try_emplace(isec, clusters.size());
  87     if (res.second) {
  88       sections.push_back(isec);
  89       clusters.emplace_back(clusters.size(), isec->getSize());
  90     }
  91     return res.first->second;
  92   };
  93
  94   // Create the graph
  95   for (const std::pair<SectionPair, uint64_t> &c : profile) {
  96     const auto fromSec = c.first.first->canonical();
  97     const auto toSec = c.first.second->canonical();
  98     uint64_t weight = c.second;
  99     // Ignore edges between input sections belonging to different output
 100     // sections.  This is done because otherwise we would end up with clusters
 101     // containing input sections that can't actually be placed adjacently in the
 102     // output.  This messes with the cluster size and density calculations.  We
 103     // would also end up moving input sections in other output sections without
 104     // moving them closer to what calls them.
 105     if (fromSec->parent != toSec->parent)
 106       continue;
 107
 108     int from = getOrCreateCluster(fromSec);
 109     int to = getOrCreateCluster(toSec);
 110
 111     clusters[to].weight += weight;
 112
 113     if (from == to)
 114       continue;
 115
 116     // Remember the best edge.
 117     Cluster &toC = clusters[to];
 118     if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
 119       toC.bestPred.from = from;
 120       toC.bestPred.weight = weight;
 121     }
 122   }
 123   for (Cluster &c : clusters)
 124     c.initialWeight = c.weight;
 125 }
 126
 127 // It's bad to merge clusters which would degrade the density too much.
 128 static bool isNewDensityBad(Cluster &a, Cluster &b) {
 129   double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
 130   return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
 131 }
 132
 133 // Find the leader of V's belonged cluster (represented as an equivalence
 134 // class). We apply union-find path-halving technique (simple to implement) in
 135 // the meantime as it decreases depths and the time complexity.
 136 static int getLeader(std::vector<int> &leaders, int v) {
 137   while (leaders[v] != v) {
 138     leaders[v] = leaders[leaders[v]];
 139     v = leaders[v];
 140   }
 141   return v;
 142 }
 143
 144 static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
 145                           Cluster &from, int fromIdx) {
 146   int tail1 = into.prev, tail2 = from.prev;
 147   into.prev = tail2;
 148   cs[tail2].next = intoIdx;
 149   from.prev = tail1;
 150   cs[tail1].next = fromIdx;
 151   into.size += from.size;
 152   into.weight += from.weight;
 153   from.size = 0;
 154   from.weight = 0;
 155 }
 156
 157 // Group InputSections into clusters using the Call-Chain Clustering heuristic
 158 // then sort the clusters by density.
 159 DenseMap<const InputSection *, size_t> CallGraphSort::run() {
 160   const uint64_t maxClusterSize = target->getPageSize();
 161
 162   // Cluster indices sorted by density.
 163   std::vector<int> sorted(clusters.size());
 164   // For union-find.
 165   std::vector<int> leaders(clusters.size());
 166
 167   std::iota(leaders.begin(), leaders.end(), 0);
 168   std::iota(sorted.begin(), sorted.end(), 0);
 169
 170   llvm::stable_sort(sorted, [&](int a, int b) {
 171     return clusters[a].getDensity() > clusters[b].getDensity();
 172   });
 173
 174   for (int l : sorted) {
 175     // The cluster index is the same as the index of its leader here because
 176     // clusters[L] has not been merged into another cluster yet.
 177     Cluster &c = clusters[l];
 178
 179     // Don't consider merging if the edge is unlikely.
 180     if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
 181       continue;
 182
 183     int predL = getLeader(leaders, c.bestPred.from);
 184     // Already in the same cluster.
 185     if (l == predL)
 186       continue;
 187
 188     Cluster *predC = &clusters[predL];
 189     if (c.size + predC->size > maxClusterSize)
 190       continue;
 191
 192     if (isNewDensityBad(*predC, c))
 193       continue;
 194
 195     leaders[l] = predL;
 196     mergeClusters(clusters, *predC, predL, c, l);
 197   }
 198   // Sort remaining non-empty clusters by density.
 199   sorted.clear();
 200   for (int i = 0, e = (int)clusters.size(); i != e; ++i)
 201     if (clusters[i].size > 0)
 202       sorted.push_back(i);
 203   llvm::stable_sort(sorted, [&](int a, int b) {
 204     return clusters[a].getDensity() > clusters[b].getDensity();
 205   });
 206
 207   DenseMap<const InputSection *, size_t> orderMap;
 208
 209   // Sections will be sorted by decreasing order. Absent sections will have
 210   // priority 0 and be placed at the end of sections.
 211   // NB: This is opposite from COFF/ELF to be compatible with the existing
 212   // order-file code.
 213   int curOrder = highestAvailablePriority;
 214   for (int leader : sorted) {
 215     for (int i = leader;;) {
 216       orderMap[sections[i]] = curOrder--;
 217       i = clusters[i].next;
 218       if (i == leader)
 219         break;
 220     }
 221   }
 222   if (!config->printSymbolOrder.empty()) {
 223     std::error_code ec;
 224     raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
 225     if (ec) {
 226       error("cannot open " + config->printSymbolOrder + ": " + ec.message());
 227       return orderMap;
 228     }
 229     // Print the symbols ordered by C3, in the order of decreasing curOrder
 230     // Instead of sorting all the orderMap, just repeat the loops above.
 231     for (int leader : sorted)
 232       for (int i = leader;;) {
 233         const InputSection *isec = sections[i];
 234         // Search all the symbols in the file of the section
 235         // and find out a Defined symbol with name that is within the
 236         // section.
 237         for (Symbol *sym : isec->getFile()->symbols) {
 238           if (auto *d = dyn_cast_or_null<Defined>(sym)) {
 239             if (d->isec == isec)
 240               os << sym->getName() << "\n";
 241           }
 242         }
 243         i = clusters[i].next;
 244         if (i == leader)
 245           break;
 246       }
 247   }
 248
 249   return orderMap;
 250 }
 251
 252 Optional<size_t> macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
 253   if (sym->isAbsolute())
 254     return None;
 255
 256   auto it = priorities.find(sym->getName());
 257   if (it == priorities.end())
 258     return None;
 259   const SymbolPriorityEntry &entry = it->second;
 260   const InputFile *f = sym->isec->getFile();
 261   if (!f)
 262     return entry.anyObjectFile;
 263   // We don't use toString(InputFile *) here because it returns the full path
 264   // for object files, and we only want the basename.
 265   StringRef filename;
 266   if (f->archiveName.empty())
 267     filename = path::filename(f->getName());
 268   else
 269     filename = saver().save(path::filename(f->archiveName) + "(" +
 270                             path::filename(f->getName()) + ")");
 271   return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
 272 }
 273
 274 void macho::PriorityBuilder::extractCallGraphProfile() {
 275   TimeTraceScope timeScope("Extract call graph profile");
 276   bool hasOrderFile = !priorities.empty();
 277   for (const InputFile *file : inputFiles) {
 278     auto *obj = dyn_cast_or_null<ObjFile>(file);
 279     if (!obj)
 280       continue;
 281     for (const CallGraphEntry &entry : obj->callGraph) {
 282       assert(entry.fromIndex < obj->symbols.size() &&
 283              entry.toIndex < obj->symbols.size());
 284       auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);
 285       auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);
 286       if (fromSym && toSym &&
 287           (!hasOrderFile ||
 288            (!getSymbolPriority(fromSym) && !getSymbolPriority(toSym))))
 289         callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
 290     }
 291   }
 292 }
 293
 294 void macho::PriorityBuilder::parseOrderFile(StringRef path) {
 295   assert(callGraphProfile.empty() &&
 296          "Order file must be parsed before call graph profile is processed");
 297   Optional<MemoryBufferRef> buffer = readFile(path);
 298   if (!buffer) {
 299     error("Could not read order file at " + path);
 300     return;
 301   }
 302
 303   MemoryBufferRef mbref = *buffer;
 304   for (StringRef line : args::getLines(mbref)) {
 305     StringRef objectFile, symbol;
 306     line = line.take_until([](char c) { return c == '#'; }); // ignore comments
 307     line = line.ltrim();
 308
 309     CPUType cpuType = StringSwitch<CPUType>(line)
 310                           .StartsWith("i386:", CPU_TYPE_I386)
 311                           .StartsWith("x86_64:", CPU_TYPE_X86_64)
 312                           .StartsWith("arm:", CPU_TYPE_ARM)
 313                           .StartsWith("arm64:", CPU_TYPE_ARM64)
 314                           .StartsWith("ppc:", CPU_TYPE_POWERPC)
 315                           .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
 316                           .Default(CPU_TYPE_ANY);
 317
 318     if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
 319       continue;
 320
 321     // Drop the CPU type as well as the colon
 322     if (cpuType != CPU_TYPE_ANY)
 323       line = line.drop_until([](char c) { return c == ':'; }).drop_front();
 324
 325     constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"};
 326     for (StringRef fileEnd : fileEnds) {
 327       size_t pos = line.find(fileEnd);
 328       if (pos != StringRef::npos) {
 329         // Split the string around the colon
 330         objectFile = line.take_front(pos + fileEnd.size() - 1);
 331         line = line.drop_front(pos + fileEnd.size());
 332         break;
 333       }
 334     }
 335     symbol = line.trim();
 336
 337     if (!symbol.empty()) {
 338       SymbolPriorityEntry &entry = priorities[symbol];
 339       if (!objectFile.empty())
 340         entry.objectFiles.insert(
 341             std::make_pair(objectFile, highestAvailablePriority));
 342       else
 343         entry.anyObjectFile =
 344             std::max(entry.anyObjectFile, highestAvailablePriority);
 345     }
 346
 347     --highestAvailablePriority;
 348   }
 349 }
 350
 351 DenseMap<const InputSection *, size_t>
 352 macho::PriorityBuilder::buildInputSectionPriorities() {
 353   DenseMap<const InputSection *, size_t> sectionPriorities;
 354   if (config->callGraphProfileSort) {
 355     // Sort sections by the profile data provided by __LLVM,__cg_profile
 356     // sections.
 357     //
 358     // This first builds a call graph based on the profile data then merges
 359     // sections according to the C³ heuristic. All clusters are then sorted by a
 360     // density metric to further improve locality.
 361     TimeTraceScope timeScope("Call graph profile sort");
 362     sectionPriorities = CallGraphSort(callGraphProfile).run();
 363   }
 364
 365   if (priorities.empty())
 366     return sectionPriorities;
 367
 368   auto addSym = [&](const Defined *sym) {
 369     Optional<size_t> symbolPriority = getSymbolPriority(sym);
 370     if (!symbolPriority.hasValue())
 371       return;
 372     size_t &priority = sectionPriorities[sym->isec];
 373     priority = std::max(priority, symbolPriority.getValue());
 374   };
 375
 376   // TODO: Make sure this handles weak symbols correctly.
 377   for (const InputFile *file : inputFiles) {
 378     if (isa<ObjFile>(file))
 379       for (Symbol *sym : file->symbols)
 380         if (auto *d = dyn_cast_or_null<Defined>(sym))
 381           addSym(d);
 382   }
 383
 384   return sectionPriorities;
 385 }