lld/MachO/ICF.cpp

   1 //===- ICF.cpp ------------------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "ICF.h"
  10 #include "ConcatOutputSection.h"
  11 #include "Config.h"
  12 #include "InputSection.h"
  13 #include "SymbolTable.h"
  14 #include "Symbols.h"
  15 #include "UnwindInfoSection.h"
  16
  17 #include "lld/Common/CommonLinkerContext.h"
  18 #include "llvm/Support/LEB128.h"
  19 #include "llvm/Support/Parallel.h"
  20 #include "llvm/Support/TimeProfiler.h"
  21 #include "llvm/Support/xxhash.h"
  22
  23 #include <atomic>
  24
  25 using namespace llvm;
  26 using namespace lld;
  27 using namespace lld::macho;
  28
  29 static constexpr bool verboseDiagnostics = false;
  30
  31 class ICF {
  32 public:
  33   ICF(std::vector<ConcatInputSection *> &inputs);
  34   void run();
  35
  36   using EqualsFn = bool (ICF::*)(const ConcatInputSection *,
  37                                  const ConcatInputSection *);
  38   void segregate(size_t begin, size_t end, EqualsFn);
  39   size_t findBoundary(size_t begin, size_t end);
  40   void forEachClassRange(size_t begin, size_t end,
  41                          llvm::function_ref<void(size_t, size_t)> func);
  42   void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
  43
  44   bool equalsConstant(const ConcatInputSection *ia,
  45                       const ConcatInputSection *ib);
  46   bool equalsVariable(const ConcatInputSection *ia,
  47                       const ConcatInputSection *ib);
  48
  49   // ICF needs a copy of the inputs vector because its equivalence-class
  50   // segregation algorithm destroys the proper sequence.
  51   std::vector<ConcatInputSection *> icfInputs;
  52
  53   unsigned icfPass = 0;
  54   std::atomic<bool> icfRepeat{false};
  55   std::atomic<uint64_t> equalsConstantCount{0};
  56   std::atomic<uint64_t> equalsVariableCount{0};
  57 };
  58
  59 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
  60   icfInputs.assign(inputs.begin(), inputs.end());
  61 }
  62
  63 // ICF = Identical Code Folding
  64 //
  65 // We only fold __TEXT,__text, so this is really "code" folding, and not
  66 // "COMDAT" folding. String and scalar constant literals are deduplicated
  67 // elsewhere.
  68 //
  69 // Summary of segments & sections:
  70 //
  71 // The __TEXT segment is readonly at the MMU. Some sections are already
  72 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
  73 // synthetic and inherently free of duplicates (__TEXT,__stubs &
  74 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
  75 // because doing so induces many test failures.
  76 //
  77 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
  78 // thus ineligible for ICF.
  79 //
  80 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
  81 // the application after dyld applies fixups to pointer data. We currently
  82 // fold only the __DATA_CONST,__cfstring section.
  83 //
  84 // The __DATA segment is read/write at the MMU, and as application-writeable
  85 // data, none of its sections are eligible for ICF.
  86 //
  87 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
  88 // of the segregation algorithm.
  89 //
  90 // FIXME(gkm): implement keep-unique attributes
  91 // FIXME(gkm): implement address-significance tables for MachO object files
  92
  93 // Compare "non-moving" parts of two ConcatInputSections, namely everything
  94 // except references to other ConcatInputSections.
  95 bool ICF::equalsConstant(const ConcatInputSection *ia,
  96                          const ConcatInputSection *ib) {
  97   if (verboseDiagnostics)
  98     ++equalsConstantCount;
  99   // We can only fold within the same OutputSection.
 100   if (ia->parent != ib->parent)
 101     return false;
 102   if (ia->data.size() != ib->data.size())
 103     return false;
 104   if (ia->data != ib->data)
 105     return false;
 106   if (ia->relocs.size() != ib->relocs.size())
 107     return false;
 108   auto f = [](const Reloc &ra, const Reloc &rb) {
 109     if (ra.type != rb.type)
 110       return false;
 111     if (ra.pcrel != rb.pcrel)
 112       return false;
 113     if (ra.length != rb.length)
 114       return false;
 115     if (ra.offset != rb.offset)
 116       return false;
 117     if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
 118       return false;
 119
 120     InputSection *isecA, *isecB;
 121
 122     uint64_t valueA = 0;
 123     uint64_t valueB = 0;
 124     if (ra.referent.is<Symbol *>()) {
 125       const auto *sa = ra.referent.get<Symbol *>();
 126       const auto *sb = rb.referent.get<Symbol *>();
 127       if (sa->kind() != sb->kind())
 128         return false;
 129       // ICF runs before Undefineds are treated (and potentially converted into
 130       // DylibSymbols).
 131       if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))
 132         return sa == sb && ra.addend == rb.addend;
 133       assert(isa<Defined>(sa));
 134       const auto *da = cast<Defined>(sa);
 135       const auto *db = cast<Defined>(sb);
 136       if (!da->isec || !db->isec) {
 137         assert(da->isAbsolute() && db->isAbsolute());
 138         return da->value + ra.addend == db->value + rb.addend;
 139       }
 140       isecA = da->isec;
 141       valueA = da->value;
 142       isecB = db->isec;
 143       valueB = db->value;
 144     } else {
 145       isecA = ra.referent.get<InputSection *>();
 146       isecB = rb.referent.get<InputSection *>();
 147     }
 148
 149     if (isecA->parent != isecB->parent)
 150       return false;
 151     // Sections with identical parents should be of the same kind.
 152     assert(isecA->kind() == isecB->kind());
 153     // We will compare ConcatInputSection contents in equalsVariable.
 154     if (isa<ConcatInputSection>(isecA))
 155       return ra.addend == rb.addend;
 156     // Else we have two literal sections. References to them are equal iff their
 157     // offsets in the output section are equal.
 158     if (ra.referent.is<Symbol *>())
 159       // For symbol relocs, we compare the contents at the symbol address. We
 160       // don't do `getOffset(value + addend)` because value + addend may not be
 161       // a valid offset in the literal section.
 162       return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
 163              ra.addend == rb.addend;
 164     else {
 165       assert(valueA == 0 && valueB == 0);
 166       // For section relocs, we compare the content at the section offset.
 167       return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
 168     }
 169   };
 170   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
 171                     f);
 172 }
 173
 174 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
 175 // handled by equalsConstant().
 176 bool ICF::equalsVariable(const ConcatInputSection *ia,
 177                          const ConcatInputSection *ib) {
 178   if (verboseDiagnostics)
 179     ++equalsVariableCount;
 180   assert(ia->relocs.size() == ib->relocs.size());
 181   auto f = [this](const Reloc &ra, const Reloc &rb) {
 182     // We already filtered out mismatching values/addends in equalsConstant.
 183     if (ra.referent == rb.referent)
 184       return true;
 185     const ConcatInputSection *isecA, *isecB;
 186     if (ra.referent.is<Symbol *>()) {
 187       // Matching DylibSymbols are already filtered out by the
 188       // identical-referent check above. Non-matching DylibSymbols were filtered
 189       // out in equalsConstant(). So we can safely cast to Defined here.
 190       const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
 191       const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
 192       if (da->isAbsolute())
 193         return true;
 194       isecA = dyn_cast<ConcatInputSection>(da->isec);
 195       if (!isecA)
 196         return true; // literal sections were checked in equalsConstant.
 197       isecB = cast<ConcatInputSection>(db->isec);
 198     } else {
 199       const auto *sa = ra.referent.get<InputSection *>();
 200       const auto *sb = rb.referent.get<InputSection *>();
 201       isecA = dyn_cast<ConcatInputSection>(sa);
 202       if (!isecA)
 203         return true;
 204       isecB = cast<ConcatInputSection>(sb);
 205     }
 206     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
 207   };
 208   if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
 209     return false;
 210
 211   // If there are symbols with associated unwind info, check that the unwind
 212   // info matches. For simplicity, we only handle the case where there are only
 213   // symbols at offset zero within the section (which is typically the case with
 214   // .subsections_via_symbols.)
 215   auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
 216   const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
 217   const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
 218   if (itA == ia->symbols.end())
 219     return itB == ib->symbols.end();
 220   if (itB == ib->symbols.end())
 221     return false;
 222   const Defined *da = *itA;
 223   const Defined *db = *itB;
 224   if (da->unwindEntry->icfEqClass[icfPass % 2] !=
 225           db->unwindEntry->icfEqClass[icfPass % 2] ||
 226       da->value != 0 || db->value != 0)
 227     return false;
 228   auto isZero = [](Defined *d) { return d->value == 0; };
 229   return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
 230              ia->symbols.end() &&
 231          std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
 232              ib->symbols.end();
 233 }
 234
 235 // Find the first InputSection after BEGIN whose equivalence class differs
 236 size_t ICF::findBoundary(size_t begin, size_t end) {
 237   uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
 238   for (size_t i = begin + 1; i < end; ++i)
 239     if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
 240       return i;
 241   return end;
 242 }
 243
 244 // Invoke FUNC on subranges with matching equivalence class
 245 void ICF::forEachClassRange(size_t begin, size_t end,
 246                             llvm::function_ref<void(size_t, size_t)> func) {
 247   while (begin < end) {
 248     size_t mid = findBoundary(begin, end);
 249     func(begin, mid);
 250     begin = mid;
 251   }
 252 }
 253
 254 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
 255 // with matching equivalence class
 256 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
 257   // Only use threads when the benefits outweigh the overhead.
 258   const size_t threadingThreshold = 1024;
 259   if (icfInputs.size() < threadingThreshold) {
 260     forEachClassRange(0, icfInputs.size(), func);
 261     ++icfPass;
 262     return;
 263   }
 264
 265   // Shard into non-overlapping intervals, and call FUNC in parallel.  The
 266   // sharding must be completed before any calls to FUNC are made so that FUNC
 267   // can modify the InputSection in its shard without causing data races.
 268   const size_t shards = 256;
 269   size_t step = icfInputs.size() / shards;
 270   size_t boundaries[shards + 1];
 271   boundaries[0] = 0;
 272   boundaries[shards] = icfInputs.size();
 273   parallelFor(1, shards, [&](size_t i) {
 274     boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
 275   });
 276   parallelFor(1, shards + 1, [&](size_t i) {
 277     if (boundaries[i - 1] < boundaries[i]) {
 278       forEachClassRange(boundaries[i - 1], boundaries[i], func);
 279     }
 280   });
 281   ++icfPass;
 282 }
 283
 284 void ICF::run() {
 285   // Into each origin-section hash, combine all reloc referent section hashes.
 286   for (icfPass = 0; icfPass < 2; ++icfPass) {
 287     parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
 288       uint32_t hash = isec->icfEqClass[icfPass % 2];
 289       for (const Reloc &r : isec->relocs) {
 290         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
 291           if (auto *defined = dyn_cast<Defined>(sym)) {
 292             if (defined->isec) {
 293               if (auto *referentIsec =
 294                       dyn_cast<ConcatInputSection>(defined->isec))
 295                 hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
 296               else
 297                 hash += defined->isec->kind() +
 298                         defined->isec->getOffset(defined->value);
 299             } else {
 300               hash += defined->value;
 301             }
 302           } else {
 303             // ICF runs before Undefined diags
 304             assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));
 305           }
 306         }
 307       }
 308       // Set MSB to 1 to avoid collisions with non-hashed classes.
 309       isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
 310     });
 311   }
 312
 313   llvm::stable_sort(
 314       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
 315         return a->icfEqClass[0] < b->icfEqClass[0];
 316       });
 317   forEachClass([&](size_t begin, size_t end) {
 318     segregate(begin, end, &ICF::equalsConstant);
 319   });
 320
 321   // Split equivalence groups by comparing relocations until convergence
 322   do {
 323     icfRepeat = false;
 324     forEachClass([&](size_t begin, size_t end) {
 325       segregate(begin, end, &ICF::equalsVariable);
 326     });
 327   } while (icfRepeat);
 328   log("ICF needed " + Twine(icfPass) + " iterations");
 329   if (verboseDiagnostics) {
 330     log("equalsConstant() called " + Twine(equalsConstantCount) + " times");
 331     log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
 332   }
 333
 334   // Fold sections within equivalence classes
 335   forEachClass([&](size_t begin, size_t end) {
 336     if (end - begin < 2)
 337       return;
 338     ConcatInputSection *beginIsec = icfInputs[begin];
 339     for (size_t i = begin + 1; i < end; ++i)
 340       beginIsec->foldIdentical(icfInputs[i]);
 341   });
 342 }
 343
 344 // Split an equivalence class into smaller classes.
 345 void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
 346   while (begin < end) {
 347     // Divide [begin, end) into two. Let mid be the start index of the
 348     // second group.
 349     auto bound = std::stable_partition(
 350         icfInputs.begin() + begin + 1, icfInputs.begin() + end,
 351         [&](ConcatInputSection *isec) {
 352           return (this->*equals)(icfInputs[begin], isec);
 353         });
 354     size_t mid = bound - icfInputs.begin();
 355
 356     // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
 357     // equivalence class ID because every group ends with a unique index.
 358     for (size_t i = begin; i < mid; ++i)
 359       icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
 360
 361     // If we created a group, we need to iterate the main loop again.
 362     if (mid != end)
 363       icfRepeat = true;
 364
 365     begin = mid;
 366   }
 367 }
 368
 369 void macho::markSymAsAddrSig(Symbol *s) {
 370   if (auto *d = dyn_cast_or_null<Defined>(s))
 371     if (d->isec)
 372       d->isec->keepUnique = true;
 373 }
 374
 375 void macho::markAddrSigSymbols() {
 376   TimeTraceScope timeScope("Mark addrsig symbols");
 377   for (InputFile *file : inputFiles) {
 378     ObjFile *obj = dyn_cast<ObjFile>(file);
 379     if (!obj)
 380       continue;
 381
 382     Section *addrSigSection = obj->addrSigSection;
 383     if (!addrSigSection)
 384       continue;
 385     assert(addrSigSection->subsections.size() == 1);
 386
 387     const InputSection *isec = addrSigSection->subsections[0].isec;
 388
 389     for (const Reloc &r : isec->relocs) {
 390       if (auto *sym = r.referent.dyn_cast<Symbol *>())
 391         markSymAsAddrSig(sym);
 392       else
 393         error(toString(isec) + ": unexpected section relocation");
 394     }
 395   }
 396 }
 397
 398 void macho::foldIdenticalSections(bool onlyCfStrings) {
 399   TimeTraceScope timeScope("Fold Identical Code Sections");
 400   // The ICF equivalence-class segregation algorithm relies on pre-computed
 401   // hashes of InputSection::data for the ConcatOutputSection::inputs and all
 402   // sections referenced by their relocs. We could recursively traverse the
 403   // relocs to find every referenced InputSection, but that precludes easy
 404   // parallelization. Therefore, we hash every InputSection here where we have
 405   // them all accessible as simple vectors.
 406
 407   // If an InputSection is ineligible for ICF, we give it a unique ID to force
 408   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
 409   // space at inputSections.size(), so that it will never intersect with
 410   // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
 411   // coexist with equivalence-class IDs, this is not necessary, but might help
 412   // someone keep the numbers straight in case we ever need to debug the
 413   // ICF::segregate()
 414   std::vector<ConcatInputSection *> foldable;
 415   uint64_t icfUniqueID = inputSections.size();
 416   for (ConcatInputSection *isec : inputSections) {
 417     bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
 418                                         isClassRefsSection(isec) ||
 419                                         isSelRefsSection(isec);
 420     // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
 421     // can still fold it.
 422     bool hasFoldableFlags = (isSelRefsSection(isec) ||
 423                              sectionType(isec->getFlags()) == MachO::S_REGULAR);
 424     // FIXME: consider non-code __text sections as foldable?
 425     bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
 426                       (isCodeSection(isec) || isFoldableWithAddendsRemoved ||
 427                        isGccExceptTabSection(isec)) &&
 428                       !isec->keepUnique && !isec->hasAltEntry &&
 429                       !isec->shouldOmitFromOutput() && hasFoldableFlags;
 430     if (isFoldable) {
 431       foldable.push_back(isec);
 432       for (Defined *d : isec->symbols)
 433         if (d->unwindEntry)
 434           foldable.push_back(d->unwindEntry);
 435
 436       // Some sections have embedded addends that foil ICF's hashing / equality
 437       // checks. (We can ignore embedded addends when doing ICF because the same
 438       // information gets recorded in our Reloc structs.) We therefore create a
 439       // mutable copy of the section data and zero out the embedded addends
 440       // before performing any hashing / equality checks.
 441       if (isFoldableWithAddendsRemoved) {
 442         // We have to do this copying serially as the BumpPtrAllocator is not
 443         // thread-safe. FIXME: Make a thread-safe allocator.
 444         MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
 445         for (const Reloc &r : isec->relocs)
 446           target->relocateOne(copy.data() + r.offset, r, /*va=*/0,
 447                               /*relocVA=*/0);
 448         isec->data = copy;
 449       }
 450     } else if (!isEhFrameSection(isec)) {
 451       // EH frames are gathered as foldables from unwindEntry above; give a
 452       // unique ID to everything else.
 453       isec->icfEqClass[0] = ++icfUniqueID;
 454     }
 455   }
 456   parallelForEach(foldable, [](ConcatInputSection *isec) {
 457     assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
 458     // Turn-on the top bit to guarantee that valid hashes have no collisions
 459     // with the small-integer unique IDs for ICF-ineligible sections
 460     isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);
 461   });
 462   // Now that every input section is either hashed or marked as unique, run the
 463   // segregation algorithm to detect foldable subsections.
 464   ICF(foldable).run();
 465 }