lld/COFF/DebugTypes.cpp

   1 //===- DebugTypes.cpp -----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "DebugTypes.h"
  10 #include "COFFLinkerContext.h"
  11 #include "Chunks.h"
  12 #include "Driver.h"
  13 #include "InputFiles.h"
  14 #include "PDB.h"
  15 #include "TypeMerger.h"
  16 #include "lld/Common/ErrorHandler.h"
  17 #include "lld/Common/Memory.h"
  18 #include "llvm/ADT/StringExtras.h"
  19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
  20 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
  21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
  22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
  23 #include "llvm/DebugInfo/PDB/GenericError.h"
  24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
  25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
  26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
  28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  29 #include "llvm/Support/FormatVariadic.h"
  30 #include "llvm/Support/Parallel.h"
  31 #include "llvm/Support/Path.h"
  32
  33 using namespace llvm;
  34 using namespace llvm::codeview;
  35 using namespace lld;
  36 using namespace lld::coff;
  37
  38 namespace {
  39 class TypeServerIpiSource;
  40
  41 // The TypeServerSource class represents a PDB type server, a file referenced by
  42 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
  43 // files, therefore there must be only once instance per OBJ lot. The file path
  44 // is discovered from the dependent OBJ's debug type stream. The
  45 // TypeServerSource object is then queued and loaded by the COFF Driver. The
  46 // debug type stream for such PDB files will be merged first in the final PDB,
  47 // before any dependent OBJ.
  48 class TypeServerSource : public TpiSource {
  49 public:
  50   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
  51       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
  52     if (f->loadErrorStr)
  53       return;
  54     pdb::PDBFile &file = f->session->getPDBFile();
  55     auto expectedInfo = file.getPDBInfoStream();
  56     if (!expectedInfo)
  57       return;
  58     Guid = expectedInfo->getGuid();
  59     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
  60     if (!it.second) {
  61       // If we hit here we have collision on Guid's in two PDB files.
  62       // This can happen if the PDB Guid is invalid or if we are really
  63       // unlucky. This should fall back on stright file-system lookup.
  64       it.first->second = nullptr;
  65     }
  66   }
  67
  68   Error mergeDebugT(TypeMerger *m) override;
  69
  70   void loadGHashes() override;
  71   void remapTpiWithGHashes(GHashState *g) override;
  72
  73   bool isDependency() const override { return true; }
  74
  75   PDBInputFile *pdbInputFile = nullptr;
  76
  77   // TpiSource for IPI stream.
  78   TypeServerIpiSource *ipiSrc = nullptr;
  79
  80   // The PDB signature GUID.
  81   codeview::GUID Guid;
  82 };
  83
  84 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
  85 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
  86 // invariant of one type index space per source.
  87 class TypeServerIpiSource : public TpiSource {
  88 public:
  89   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
  90       : TpiSource(ctx, PDBIpi, nullptr) {}
  91
  92   friend class TypeServerSource;
  93
  94   // All of the TpiSource methods are no-ops. The parent TypeServerSource
  95   // handles both TPI and IPI.
  96   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
  97   void loadGHashes() override {}
  98   void remapTpiWithGHashes(GHashState *g) override {}
  99   bool isDependency() const override { return true; }
 100 };
 101
 102 // This class represents the debug type stream of an OBJ file that depends on a
 103 // PDB type server (see TypeServerSource).
 104 class UseTypeServerSource : public TpiSource {
 105   Expected<TypeServerSource *> getTypeServerSource();
 106
 107 public:
 108   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
 109       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
 110
 111   Error mergeDebugT(TypeMerger *m) override;
 112
 113   // No need to load ghashes from /Zi objects.
 114   void loadGHashes() override {}
 115   void remapTpiWithGHashes(GHashState *g) override;
 116
 117   // Information about the PDB type server dependency, that needs to be loaded
 118   // in before merging this OBJ.
 119   TypeServer2Record typeServerDependency;
 120 };
 121
 122 // This class represents the debug type stream of a Microsoft precompiled
 123 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
 124 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
 125 // such files, clang does not.
 126 class PrecompSource : public TpiSource {
 127 public:
 128   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
 129     // If the S_OBJNAME record contains the PCH signature, we'll register this
 130     // source file right away.
 131     registerMapping();
 132   }
 133
 134   Error mergeDebugT(TypeMerger *m) override;
 135
 136   void loadGHashes() override;
 137
 138   bool isDependency() const override { return true; }
 139
 140 private:
 141   void registerMapping();
 142
 143   // Whether this precomp OBJ was recorded in the precompSourceMappings map.
 144   // Only happens if the file->pchSignature is valid.
 145   bool registered = false;
 146 };
 147
 148 // This class represents the debug type stream of an OBJ file that depends on a
 149 // Microsoft precompiled headers OBJ (see PrecompSource).
 150 class UsePrecompSource : public TpiSource {
 151 public:
 152   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
 153       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
 154
 155   Error mergeDebugT(TypeMerger *m) override;
 156
 157   void loadGHashes() override;
 158   void remapTpiWithGHashes(GHashState *g) override;
 159
 160 private:
 161   Error mergeInPrecompHeaderObj();
 162
 163   PrecompSource *findObjByName(StringRef fileNameOnly);
 164   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
 165   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
 166
 167 public:
 168   // Information about the Precomp OBJ dependency, that needs to be loaded in
 169   // before merging this OBJ.
 170   PrecompRecord precompDependency;
 171 };
 172 } // namespace
 173
 174 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
 175     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
 176   ctx.addTpiSource(this);
 177 }
 178
 179 // Vtable key method.
 180 TpiSource::~TpiSource() {
 181   // Silence any assertions about unchecked errors.
 182   consumeError(std::move(typeMergingError));
 183 }
 184
 185 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
 186   return make<TpiSource>(ctx, TpiSource::Regular, file);
 187 }
 188
 189 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
 190                                            PDBInputFile *pdbInputFile) {
 191   // Type server sources come in pairs: the TPI stream, and the IPI stream.
 192   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
 193   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
 194     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
 195   return tpiSource;
 196 }
 197
 198 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
 199                                               ObjFile *file,
 200                                               TypeServer2Record ts) {
 201   return make<UseTypeServerSource>(ctx, file, ts);
 202 }
 203
 204 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
 205   return make<PrecompSource>(ctx, file);
 206 }
 207
 208 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
 209                                            ObjFile *file,
 210                                            PrecompRecord precomp) {
 211   return make<UsePrecompSource>(ctx, file, precomp);
 212 }
 213
 214 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
 215   if (ti.isSimple())
 216     return true;
 217
 218   // This can be an item index or a type index. Choose the appropriate map.
 219   ArrayRef<TypeIndex> tpiOrIpiMap =
 220       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
 221   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
 222     return false;
 223   ti = tpiOrIpiMap[ti.toArrayIndex()];
 224   return true;
 225 }
 226
 227 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
 228                             ArrayRef<TiReference> typeRefs) {
 229   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
 230   for (const TiReference &ref : typeRefs) {
 231     unsigned byteSize = ref.Count * sizeof(TypeIndex);
 232     if (contents.size() < ref.Offset + byteSize)
 233       fatal("symbol record too short");
 234
 235     MutableArrayRef<TypeIndex> indices(
 236         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
 237     for (TypeIndex &ti : indices) {
 238       if (!remapTypeIndex(ti, ref.Kind)) {
 239         if (ctx.config.verbose) {
 240           uint16_t kind =
 241               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
 242           StringRef fname = file ? file->getName() : "<unknown PDB>";
 243           log("failed to remap type index in record of kind 0x" +
 244               utohexstr(kind) + " in " + fname + " with bad " +
 245               (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
 246               " index 0x" + utohexstr(ti.getIndex()));
 247         }
 248         ti = TypeIndex(SimpleTypeKind::NotTranslated);
 249         continue;
 250       }
 251     }
 252   }
 253 }
 254
 255 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
 256   // TODO: Handle errors similar to symbols.
 257   SmallVector<TiReference, 32> typeRefs;
 258   discoverTypeIndices(CVType(rec), typeRefs);
 259   remapRecord(rec, typeRefs);
 260 }
 261
 262 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
 263   // Discover type index references in the record. Skip it if we don't
 264   // know where they are.
 265   SmallVector<TiReference, 32> typeRefs;
 266   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
 267     return false;
 268   remapRecord(rec, typeRefs);
 269   return true;
 270 }
 271
 272 // A COFF .debug$H section is currently a clang extension.  This function checks
 273 // if a .debug$H section is in a format that we expect / understand, so that we
 274 // can ignore any sections which are coincidentally also named .debug$H but do
 275 // not contain a format we recognize.
 276 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
 277   if (debugH.size() < sizeof(object::debug_h_header))
 278     return false;
 279   auto *header =
 280       reinterpret_cast<const object::debug_h_header *>(debugH.data());
 281   debugH = debugH.drop_front(sizeof(object::debug_h_header));
 282   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
 283          header->Version == 0 &&
 284          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
 285          (debugH.size() % 8 == 0);
 286 }
 287
 288 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
 289   SectionChunk *sec =
 290       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
 291   if (!sec)
 292     return std::nullopt;
 293   ArrayRef<uint8_t> contents = sec->getContents();
 294   if (!canUseDebugH(contents))
 295     return std::nullopt;
 296   return contents;
 297 }
 298
 299 static ArrayRef<GloballyHashedType>
 300 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
 301   assert(canUseDebugH(debugH));
 302   debugH = debugH.drop_front(sizeof(object::debug_h_header));
 303   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
 304   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
 305 }
 306
 307 // Merge .debug$T for a generic object file.
 308 Error TpiSource::mergeDebugT(TypeMerger *m) {
 309   assert(!ctx.config.debugGHashes &&
 310          "use remapTpiWithGHashes when ghash is enabled");
 311
 312   CVTypeArray types;
 313   BinaryStreamReader reader(file->debugTypes, support::little);
 314   cantFail(reader.readArray(types, reader.getLength()));
 315
 316   // When dealing with PCH.OBJ, some indices were already merged.
 317   unsigned nbHeadIndices = indexMapStorage.size();
 318
 319   std::optional<PCHMergerInfo> pchInfo;
 320   if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
 321                                        indexMapStorage, types, pchInfo))
 322     fatal("codeview::mergeTypeAndIdRecords failed: " +
 323           toString(std::move(err)));
 324   if (pchInfo) {
 325     file->pchSignature = pchInfo->PCHSignature;
 326     endPrecompIdx = pchInfo->EndPrecompIndex;
 327   }
 328
 329   // In an object, there is only one mapping for both types and items.
 330   tpiMap = indexMapStorage;
 331   ipiMap = indexMapStorage;
 332
 333   if (ctx.config.showSummary) {
 334     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
 335     nbTypeRecordsBytes = reader.getLength();
 336     // Count how many times we saw each type record in our input. This
 337     // calculation requires a second pass over the type records to classify each
 338     // record as a type or index. This is slow, but this code executes when
 339     // collecting statistics.
 340     m->tpiCounts.resize(m->getTypeTable().size());
 341     m->ipiCounts.resize(m->getIDTable().size());
 342     uint32_t srcIdx = nbHeadIndices;
 343     for (const CVType &ty : types) {
 344       TypeIndex dstIdx = tpiMap[srcIdx++];
 345       // Type merging may fail, so a complex source type may become the simple
 346       // NotTranslated type, which cannot be used as an array index.
 347       if (dstIdx.isSimple())
 348         continue;
 349       SmallVectorImpl<uint32_t> &counts =
 350           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
 351       ++counts[dstIdx.toArrayIndex()];
 352     }
 353   }
 354
 355   return Error::success();
 356 }
 357
 358 // Merge types from a type server PDB.
 359 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
 360   assert(!ctx.config.debugGHashes &&
 361          "use remapTpiWithGHashes when ghash is enabled");
 362
 363   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
 364   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
 365   if (auto e = expectedTpi.takeError())
 366     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
 367   pdb::TpiStream *maybeIpi = nullptr;
 368   if (pdbFile.hasPDBIpiStream()) {
 369     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
 370     if (auto e = expectedIpi.takeError())
 371       fatal("Error getting type server IPI stream: " + toString(std::move(e)));
 372     maybeIpi = &*expectedIpi;
 373   }
 374
 375   // Merge TPI first, because the IPI stream will reference type indices.
 376   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
 377                                   expectedTpi->typeArray()))
 378     fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
 379   tpiMap = indexMapStorage;
 380
 381   // Merge IPI.
 382   if (maybeIpi) {
 383     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
 384                                   maybeIpi->typeArray()))
 385       fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
 386     ipiMap = ipiSrc->indexMapStorage;
 387   }
 388
 389   if (ctx.config.showSummary) {
 390     nbTypeRecords = tpiMap.size() + ipiMap.size();
 391     nbTypeRecordsBytes =
 392         expectedTpi->typeArray().getUnderlyingStream().getLength() +
 393         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
 394                   : 0);
 395
 396     // Count how many times we saw each type record in our input. If a
 397     // destination type index is present in the source to destination type index
 398     // map, that means we saw it once in the input. Add it to our histogram.
 399     m->tpiCounts.resize(m->getTypeTable().size());
 400     m->ipiCounts.resize(m->getIDTable().size());
 401     for (TypeIndex ti : tpiMap)
 402       if (!ti.isSimple())
 403         ++m->tpiCounts[ti.toArrayIndex()];
 404     for (TypeIndex ti : ipiMap)
 405       if (!ti.isSimple())
 406         ++m->ipiCounts[ti.toArrayIndex()];
 407   }
 408
 409   return Error::success();
 410 }
 411
 412 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
 413   const codeview::GUID &tsId = typeServerDependency.getGuid();
 414   StringRef tsPath = typeServerDependency.getName();
 415
 416   TypeServerSource *tsSrc = nullptr;
 417   auto it = ctx.typeServerSourceMappings.find(tsId);
 418   if (it != ctx.typeServerSourceMappings.end()) {
 419     tsSrc = (TypeServerSource *)it->second;
 420   }
 421   if (tsSrc == nullptr) {
 422     // The file failed to load, lookup by name
 423     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
 424     if (!pdb)
 425       return createFileError(tsPath, errorCodeToError(std::error_code(
 426                                          ENOENT, std::generic_category())));
 427     // If an error occurred during loading, throw it now
 428     if (pdb->loadErrorStr)
 429       return createFileError(
 430           tsPath, make_error<StringError>(*pdb->loadErrorStr,
 431                                           llvm::inconvertibleErrorCode()));
 432
 433     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
 434
 435     // Just because a file with a matching name was found and it was an actual
 436     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
 437     // must match the GUID specified in the TypeServer2 record.
 438     if (tsSrc->Guid != tsId) {
 439       return createFileError(tsPath,
 440                              make_error<pdb::PDBError>(
 441                                  pdb::pdb_error_code::signature_out_of_date));
 442     }
 443   }
 444   return tsSrc;
 445 }
 446
 447 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
 448   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
 449   if (!tsSrc)
 450     return tsSrc.takeError();
 451
 452   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
 453   auto expectedInfo = pdbSession.getPDBInfoStream();
 454   if (!expectedInfo)
 455     return expectedInfo.takeError();
 456
 457   // Reuse the type index map of the type server.
 458   tpiMap = (*tsSrc)->tpiMap;
 459   ipiMap = (*tsSrc)->ipiMap;
 460   return Error::success();
 461 }
 462
 463 static bool equalsPath(StringRef path1, StringRef path2) {
 464 #if defined(_WIN32)
 465   return path1.equals_insensitive(path2);
 466 #else
 467   return path1.equals(path2);
 468 #endif
 469 }
 470
 471 // Find by name an OBJ provided on the command line
 472 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
 473   SmallString<128> currentPath;
 474   for (auto kv : ctx.precompSourceMappings) {
 475     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
 476                                                     sys::path::Style::windows);
 477
 478     // Compare based solely on the file name (link.exe behavior)
 479     if (equalsPath(currentFileName, fileNameOnly))
 480       return (PrecompSource *)kv.second;
 481   }
 482   return nullptr;
 483 }
 484
 485 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
 486                                                    PrecompRecord &pr) {
 487   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
 488   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
 489   // the paths embedded in the OBJs are in the Windows format.
 490   SmallString<128> prFileName =
 491       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
 492
 493   auto it = ctx.precompSourceMappings.find(pr.getSignature());
 494   if (it != ctx.precompSourceMappings.end()) {
 495     return (PrecompSource *)it->second;
 496   }
 497   // Lookup by name
 498   return findObjByName(prFileName);
 499 }
 500
 501 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
 502                                                            PrecompRecord &pr) {
 503   PrecompSource *precomp = findPrecompSource(file, pr);
 504
 505   if (!precomp)
 506     return createFileError(
 507         pr.getPrecompFilePath(),
 508         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 509
 510   // Don't rely on the PCH signature to validate the concordance between the PCH
 511   // and the OBJ that uses it. However we do validate here that the
 512   // LF_ENDPRECOMP record index lines up with the number of type records
 513   // LF_PRECOMP is expecting.
 514   if (precomp->endPrecompIdx != pr.getTypesCount())
 515     return createFileError(
 516         toString(file),
 517         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 518
 519   return precomp;
 520 }
 521
 522 /// Merges a precompiled headers TPI map into the current TPI map. The
 523 /// precompiled headers object will also be loaded and remapped in the
 524 /// process.
 525 Error UsePrecompSource::mergeInPrecompHeaderObj() {
 526   auto e = findPrecompMap(file, precompDependency);
 527   if (!e)
 528     return e.takeError();
 529
 530   PrecompSource *precompSrc = *e;
 531   if (precompSrc->tpiMap.empty())
 532     return Error::success();
 533
 534   assert(precompDependency.getStartTypeIndex() ==
 535          TypeIndex::FirstNonSimpleIndex);
 536   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
 537   // Use the previously remapped index map from the precompiled headers.
 538   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
 539                          precompSrc->tpiMap.begin() +
 540                              precompDependency.getTypesCount());
 541
 542   return Error::success();
 543 }
 544
 545 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
 546   // This object was compiled with /Yu, so process the corresponding
 547   // precompiled headers object (/Yc) first. Some type indices in the current
 548   // object are referencing data in the precompiled headers object, so we need
 549   // both to be loaded.
 550   if (Error e = mergeInPrecompHeaderObj())
 551     return e;
 552
 553   return TpiSource::mergeDebugT(m);
 554 }
 555
 556 Error PrecompSource::mergeDebugT(TypeMerger *m) {
 557   // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
 558   // The signature comes later with the LF_ENDPRECOMP record, so we first need
 559   // to merge in all the .PCH.OBJ file type records, before registering below.
 560   if (Error e = TpiSource::mergeDebugT(m))
 561     return e;
 562
 563   registerMapping();
 564
 565   return Error::success();
 566 }
 567
 568 void PrecompSource::registerMapping() {
 569   if (registered)
 570     return;
 571   if (file->pchSignature && *file->pchSignature) {
 572     auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
 573     if (!it.second)
 574       fatal("a PCH object with the same signature has already been provided (" +
 575             toString(it.first->second->file) + " and " + toString(file) + ")");
 576     registered = true;
 577   }
 578 }
 579
 580 //===----------------------------------------------------------------------===//
 581 // Parellel GHash type merging implementation.
 582 //===----------------------------------------------------------------------===//
 583
 584 void TpiSource::loadGHashes() {
 585   if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
 586     ghashes = getHashesFromDebugH(*debugH);
 587     ownedGHashes = false;
 588   } else {
 589     CVTypeArray types;
 590     BinaryStreamReader reader(file->debugTypes, support::little);
 591     cantFail(reader.readArray(types, reader.getLength()));
 592     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
 593   }
 594
 595   fillIsItemIndexFromDebugT();
 596 }
 597
 598 // Copies ghashes from a vector into an array. These are long lived, so it's
 599 // worth the time to copy these into an appropriately sized vector to reduce
 600 // memory usage.
 601 void TpiSource::assignGHashesFromVector(
 602     std::vector<GloballyHashedType> &&hashVec) {
 603   if (hashVec.empty())
 604     return;
 605   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
 606   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
 607   ghashes = ArrayRef(hashes, hashVec.size());
 608   ownedGHashes = true;
 609 }
 610
 611 // Faster way to iterate type records. forEachTypeChecked is faster than
 612 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
 613 static void forEachTypeChecked(ArrayRef<uint8_t> types,
 614                                function_ref<void(const CVType &)> fn) {
 615   checkError(
 616       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
 617         fn(ty);
 618         return Error::success();
 619       }));
 620 }
 621
 622 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
 623 // TODO: Store this information in .debug$H so that we don't have to recompute
 624 // it. This is the main bottleneck slowing down parallel ghashing with one
 625 // thread over single-threaded ghashing.
 626 void TpiSource::fillIsItemIndexFromDebugT() {
 627   uint32_t index = 0;
 628   isItemIndex.resize(ghashes.size());
 629   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
 630     if (isIdRecord(ty.kind()))
 631       isItemIndex.set(index);
 632     ++index;
 633   });
 634 }
 635
 636 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
 637   // Decide if the merged type goes into TPI or IPI.
 638   bool isItem = isIdRecord(ty.kind());
 639   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
 640
 641   // Copy the type into our mutable buffer.
 642   assert(ty.length() <= codeview::MaxRecordLength);
 643   size_t offset = merged.recs.size();
 644   size_t newSize = alignTo(ty.length(), 4);
 645   merged.recs.resize(offset + newSize);
 646   auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
 647   memcpy(newRec.data(), ty.data().data(), newSize);
 648
 649   // Fix up the record prefix and padding bytes if it required resizing.
 650   if (newSize != ty.length()) {
 651     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
 652     for (size_t i = ty.length(); i < newSize; ++i)
 653       newRec[i] = LF_PAD0 + (newSize - i);
 654   }
 655
 656   // Remap the type indices in the new record.
 657   remapTypesInTypeRecord(newRec);
 658   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
 659   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
 660   merged.recHashes.push_back(pdbHash);
 661
 662   // Retain a mapping from PDB function id to PDB function type. This mapping is
 663   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
 664   // symbols.
 665   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
 666     bool success = ty.length() >= 12;
 667     TypeIndex funcId = curIndex;
 668     if (success)
 669       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
 670     TypeIndex funcType =
 671         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
 672     if (success) {
 673       funcIdToType.push_back({funcId, funcType});
 674     } else {
 675       StringRef fname = file ? file->getName() : "<unknown PDB>";
 676       warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
 677            " in " + fname);
 678     }
 679   }
 680 }
 681
 682 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
 683                                        TypeIndex beginIndex) {
 684   // Re-sort the list of unique types by index.
 685   if (kind == PDB)
 686     assert(llvm::is_sorted(uniqueTypes));
 687   else
 688     llvm::sort(uniqueTypes);
 689
 690   // Accumulate all the unique types into one buffer in mergedTypes.
 691   uint32_t ghashIndex = 0;
 692   auto nextUniqueIndex = uniqueTypes.begin();
 693   assert(mergedTpi.recs.empty());
 694   assert(mergedIpi.recs.empty());
 695
 696   // Pre-compute the number of elements in advance to avoid std::vector resizes.
 697   unsigned nbTpiRecs = 0;
 698   unsigned nbIpiRecs = 0;
 699   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
 700     if (nextUniqueIndex != uniqueTypes.end() &&
 701         *nextUniqueIndex == ghashIndex) {
 702       assert(ty.length() <= codeview::MaxRecordLength);
 703       size_t newSize = alignTo(ty.length(), 4);
 704       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
 705       ++nextUniqueIndex;
 706     }
 707     ++ghashIndex;
 708   });
 709   mergedTpi.recs.reserve(nbTpiRecs);
 710   mergedIpi.recs.reserve(nbIpiRecs);
 711
 712   // Do the actual type merge.
 713   ghashIndex = 0;
 714   nextUniqueIndex = uniqueTypes.begin();
 715   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
 716     if (nextUniqueIndex != uniqueTypes.end() &&
 717         *nextUniqueIndex == ghashIndex) {
 718       mergeTypeRecord(beginIndex + ghashIndex, ty);
 719       ++nextUniqueIndex;
 720     }
 721     ++ghashIndex;
 722   });
 723   assert(nextUniqueIndex == uniqueTypes.end() &&
 724          "failed to merge all desired records");
 725   assert(uniqueTypes.size() ==
 726              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
 727          "missing desired record");
 728 }
 729
 730 void TpiSource::remapTpiWithGHashes(GHashState *g) {
 731   assert(ctx.config.debugGHashes && "ghashes must be enabled");
 732   fillMapFromGHashes(g);
 733   tpiMap = indexMapStorage;
 734   ipiMap = indexMapStorage;
 735   mergeUniqueTypeRecords(file->debugTypes);
 736   // TODO: Free all unneeded ghash resources now that we have a full index map.
 737
 738   if (ctx.config.showSummary) {
 739     nbTypeRecords = ghashes.size();
 740     nbTypeRecordsBytes = file->debugTypes.size();
 741   }
 742 }
 743
 744 // PDBs do not actually store global hashes, so when merging a type server
 745 // PDB we have to synthesize global hashes.  To do this, we first synthesize
 746 // global hashes for the TPI stream, since it is independent, then we
 747 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
 748 // as inputs.
 749 void TypeServerSource::loadGHashes() {
 750   // Don't hash twice.
 751   if (!ghashes.empty())
 752     return;
 753   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
 754
 755   // Hash TPI stream.
 756   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
 757   if (auto e = expectedTpi.takeError())
 758     fatal("Type server does not have TPI stream: " + toString(std::move(e)));
 759   assignGHashesFromVector(
 760       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
 761   isItemIndex.resize(ghashes.size());
 762
 763   // Hash IPI stream, which depends on TPI ghashes.
 764   if (!pdbFile.hasPDBIpiStream())
 765     return;
 766   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
 767   if (auto e = expectedIpi.takeError())
 768     fatal("error retrieving IPI stream: " + toString(std::move(e)));
 769   ipiSrc->assignGHashesFromVector(
 770       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
 771
 772   // The IPI stream isItemIndex bitvector should be all ones.
 773   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
 774   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
 775 }
 776
 777 // Flatten discontiguous PDB type arrays to bytes so that we can use
 778 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
 779 // type servers is faster than iterating all object files compiled with /Z7 with
 780 // CVTypeArray, which has high overheads due to the virtual interface of
 781 // BinaryStream::readBytes.
 782 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
 783   BinaryStreamRef stream = types.getUnderlyingStream();
 784   ArrayRef<uint8_t> debugTypes;
 785   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
 786   return debugTypes;
 787 }
 788
 789 // Merge types from a type server PDB.
 790 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
 791   assert(ctx.config.debugGHashes && "ghashes must be enabled");
 792
 793   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
 794   // propagate errors, those should've been handled during ghash loading.
 795   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
 796   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
 797   fillMapFromGHashes(g);
 798   tpiMap = indexMapStorage;
 799   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
 800   if (pdbFile.hasPDBIpiStream()) {
 801     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
 802     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
 803     ipiSrc->fillMapFromGHashes(g);
 804     ipiMap = ipiSrc->indexMapStorage;
 805     ipiSrc->tpiMap = tpiMap;
 806     ipiSrc->ipiMap = ipiMap;
 807     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
 808
 809     if (ctx.config.showSummary) {
 810       nbTypeRecords = ipiSrc->ghashes.size();
 811       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
 812     }
 813   }
 814
 815   if (ctx.config.showSummary) {
 816     nbTypeRecords += ghashes.size();
 817     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
 818   }
 819 }
 820
 821 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
 822   // No remapping to do with /Zi objects. Simply use the index map from the type
 823   // server. Errors should have been reported earlier. Symbols from this object
 824   // will be ignored.
 825   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
 826   if (!maybeTsSrc) {
 827     typeMergingError =
 828         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
 829     return;
 830   }
 831   TypeServerSource *tsSrc = *maybeTsSrc;
 832   tpiMap = tsSrc->tpiMap;
 833   ipiMap = tsSrc->ipiMap;
 834 }
 835
 836 void PrecompSource::loadGHashes() {
 837   if (getDebugH(file)) {
 838     warn("ignoring .debug$H section; pch with ghash is not implemented");
 839   }
 840
 841   uint32_t ghashIdx = 0;
 842   std::vector<GloballyHashedType> hashVec;
 843   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
 844     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
 845     // the PDB. There must be an entry in the list of ghashes so that the type
 846     // indexes of the following records in the /Yc PCH object line up.
 847     if (ty.kind() == LF_ENDPRECOMP) {
 848       EndPrecompRecord endPrecomp;
 849       cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
 850           const_cast<CVType &>(ty), endPrecomp));
 851       file->pchSignature = endPrecomp.getSignature();
 852       registerMapping();
 853       endPrecompIdx = ghashIdx;
 854     }
 855
 856     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
 857     isItemIndex.push_back(isIdRecord(ty.kind()));
 858     ++ghashIdx;
 859   });
 860   assignGHashesFromVector(std::move(hashVec));
 861 }
 862
 863 void UsePrecompSource::loadGHashes() {
 864   auto e = findPrecompMap(file, precompDependency);
 865   if (!e) {
 866     warn(toString(e.takeError()));
 867     return;
 868   }
 869
 870   PrecompSource *pchSrc = *e;
 871
 872   // To compute ghashes of a /Yu object file, we need to build on the ghashes of
 873   // the /Yc PCH object. After we are done hashing, discard the ghashes from the
 874   // PCH source so we don't unnecessarily try to deduplicate them.
 875   std::vector<GloballyHashedType> hashVec =
 876       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
 877   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
 878     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
 879     isItemIndex.push_back(isIdRecord(ty.kind()));
 880   });
 881   hashVec.erase(hashVec.begin(),
 882                 hashVec.begin() + precompDependency.getTypesCount());
 883   assignGHashesFromVector(std::move(hashVec));
 884 }
 885
 886 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
 887   fillMapFromGHashes(g);
 888   // This object was compiled with /Yu, so process the corresponding
 889   // precompiled headers object (/Yc) first. Some type indices in the current
 890   // object are referencing data in the precompiled headers object, so we need
 891   // both to be loaded.
 892   if (Error e = mergeInPrecompHeaderObj()) {
 893     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
 894     return;
 895   }
 896
 897   tpiMap = indexMapStorage;
 898   ipiMap = indexMapStorage;
 899   mergeUniqueTypeRecords(file->debugTypes,
 900                          TypeIndex(precompDependency.getStartTypeIndex() +
 901                                    precompDependency.getTypesCount()));
 902   if (ctx.config.showSummary) {
 903     nbTypeRecords = ghashes.size();
 904     nbTypeRecordsBytes = file->debugTypes.size();
 905   }
 906 }
 907
 908 namespace {
 909 /// A concurrent hash table for global type hashing. It is based on this paper:
 910 /// Concurrent Hash Tables: Fast and General(?)!
 911 /// https://dl.acm.org/doi/10.1145/3309206
 912 ///
 913 /// This hash table is meant to be used in two phases:
 914 /// 1. concurrent insertions
 915 /// 2. concurrent reads
 916 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
 917 ///
 918 /// The paper describes storing a key-value pair in two machine words.
 919 /// Generally, the values stored in this map are type indices, and we can use
 920 /// those values to recover the ghash key from a side table. This allows us to
 921 /// shrink the table entries further at the cost of some loads, and sidesteps
 922 /// the need for a 128 bit atomic compare-and-swap operation.
 923 ///
 924 /// During insertion, a priority function is used to decide which insertion
 925 /// should be preferred. This ensures that the output is deterministic. For
 926 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
 927 ///
 928 class GHashCell;
 929 struct GHashTable {
 930   GHashCell *table = nullptr;
 931   uint32_t tableSize = 0;
 932
 933   GHashTable() = default;
 934   ~GHashTable();
 935
 936   /// Initialize the table with the given size. Because the table cannot be
 937   /// resized, the initial size of the table must be large enough to contain all
 938   /// inputs, or insertion may not be able to find an empty cell.
 939   void init(uint32_t newTableSize);
 940
 941   /// Insert the cell with the given ghash into the table. Return the insertion
 942   /// position in the table. It is safe for the caller to store the insertion
 943   /// position because the table cannot be resized.
 944   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
 945                   GHashCell newCell);
 946 };
 947
 948 /// A ghash table cell for deduplicating types from TpiSources.
 949 class GHashCell {
 950   // Force "data" to be 64-bit aligned; otherwise, some versions of clang
 951   // will generate calls to libatomic when using some versions of libstdc++
 952   // on 32-bit targets.  (Also, in theory, there could be a target where
 953   // new[] doesn't always return an 8-byte-aligned allocation.)
 954   alignas(sizeof(uint64_t)) uint64_t data = 0;
 955
 956 public:
 957   GHashCell() = default;
 958
 959   // Construct data most to least significant so that sorting works well:
 960   // - isItem
 961   // - tpiSrcIdx
 962   // - ghashIdx
 963   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
 964   // non-zero representation.
 965   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
 966       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
 967              ghashIdx) {
 968     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
 969     assert(ghashIdx == getGHashIdx() && "round trip failure");
 970   }
 971
 972   explicit GHashCell(uint64_t data) : data(data) {}
 973
 974   // The empty cell is all zeros.
 975   bool isEmpty() const { return data == 0ULL; }
 976
 977   /// Extract the tpiSrcIdx.
 978   uint32_t getTpiSrcIdx() const {
 979     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
 980   }
 981
 982   /// Extract the index into the ghash array of the TpiSource.
 983   uint32_t getGHashIdx() const { return (uint32_t)data; }
 984
 985   bool isItem() const { return data & (1ULL << 63U); }
 986
 987   /// Get the ghash key for this cell.
 988   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
 989     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
 990   }
 991
 992   /// The priority function for the cell. The data is stored such that lower
 993   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
 994   /// from earlier sources are more likely to prevail.
 995   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
 996     return l.data < r.data;
 997   }
 998 };
 999 } // namespace
1000
1001 namespace lld::coff {
1002 /// This type is just a wrapper around GHashTable with external linkage so it
1003 /// can be used from a header.
1004 struct GHashState {
1005   GHashTable table;
1006 };
1007 } // namespace lld::coff
1008
1009 GHashTable::~GHashTable() { delete[] table; }
1010
1011 void GHashTable::init(uint32_t newTableSize) {
1012   table = new GHashCell[newTableSize];
1013   memset(table, 0, newTableSize * sizeof(GHashCell));
1014   tableSize = newTableSize;
1015 }
1016
1017 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1018                             GHashCell newCell) {
1019   assert(!newCell.isEmpty() && "cannot insert empty cell value");
1020
1021   // FIXME: The low bytes of SHA1 have low entropy for short records, which
1022   // type records are. Swap the byte order for better entropy. A better ghash
1023   // won't need this.
1024   uint32_t startIdx =
1025       llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) %
1026       tableSize;
1027
1028   // Do a linear probe starting at startIdx.
1029   uint32_t idx = startIdx;
1030   while (true) {
1031     // Run a compare and swap loop. There are four cases:
1032     // - cell is empty: CAS into place and return
1033     // - cell has matching key, earlier priority: do nothing, return
1034     // - cell has matching key, later priority: CAS into place and return
1035     // - cell has non-matching key: hash collision, probe next cell
1036     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1037     GHashCell oldCell(cellPtr->load());
1038     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1039       // Check if there is an existing ghash entry with a higher priority
1040       // (earlier ordering). If so, this is a duplicate, we are done.
1041       if (!oldCell.isEmpty() && oldCell < newCell)
1042         return idx;
1043       // Either the cell is empty, or our value is higher priority. Try to
1044       // compare and swap. If it succeeds, we are done.
1045       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1046         return idx;
1047       // If the CAS failed, check this cell again.
1048     }
1049
1050     // Advance the probe. Wrap around to the beginning if we run off the end.
1051     ++idx;
1052     idx = idx == tableSize ? 0 : idx;
1053     if (idx == startIdx) {
1054       // If this becomes an issue, we could mark failure and rehash from the
1055       // beginning with a bigger table. There is no difference between rehashing
1056       // internally and starting over.
1057       report_fatal_error("ghash table is full");
1058     }
1059   }
1060   llvm_unreachable("left infloop");
1061 }
1062
1063 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1064     : typeTable(alloc), idTable(alloc), ctx(c) {}
1065
1066 TypeMerger::~TypeMerger() = default;
1067
1068 void TypeMerger::mergeTypesWithGHash() {
1069   // Load ghashes. Do type servers and PCH objects first.
1070   {
1071     ScopedTimer t1(ctx.loadGHashTimer);
1072     parallelForEach(dependencySources,
1073                     [&](TpiSource *source) { source->loadGHashes(); });
1074     parallelForEach(objectSources,
1075                     [&](TpiSource *source) { source->loadGHashes(); });
1076   }
1077
1078   ScopedTimer t2(ctx.mergeGHashTimer);
1079   GHashState ghashState;
1080
1081   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1082   // be larger than the number of unique types, or hash table insertion may not
1083   // be able to find a vacant slot. Summing the input types guarantees this, but
1084   // it is a gross overestimate. The table size could be reduced to save memory,
1085   // but it would require implementing rehashing, and this table is generally
1086   // small compared to total memory usage, at eight bytes per input type record,
1087   // and most input type records are larger than eight bytes.
1088   size_t tableSize = 0;
1089   for (TpiSource *source : ctx.tpiSourceList)
1090     tableSize += source->ghashes.size();
1091
1092   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1093   // also 32-bit, so this is an inherent PDB file format limit anyway.
1094   tableSize =
1095       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1096   ghashState.table.init(static_cast<uint32_t>(tableSize));
1097
1098   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1099   // the contents of the hash table cell, but we can remember the insertion
1100   // position. Because the table does not rehash, the position will not change
1101   // under insertion. After insertion is done, the value of the cell can be read
1102   // to retrieve the final PDB type index.
1103   parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1104     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1105     source->indexMapStorage.resize(source->ghashes.size());
1106     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1107       if (source->shouldOmitFromPdb(i)) {
1108         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1109         continue;
1110       }
1111       GloballyHashedType ghash = source->ghashes[i];
1112       bool isItem = source->isItemIndex.test(i);
1113       uint32_t cellIdx =
1114           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1115
1116       // Store the ghash cell index as a type index in indexMapStorage. Later
1117       // we will replace it with the PDB type index.
1118       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1119     }
1120   });
1121
1122   // Collect all non-empty cells and sort them. This will implicitly assign
1123   // destination type indices, and partition the entries into type records and
1124   // item records. It arranges types in this order:
1125   // - type records
1126   //   - source 0, type 0...
1127   //   - source 1, type 1...
1128   // - item records
1129   //   - source 0, type 1...
1130   //   - source 1, type 0...
1131   std::vector<GHashCell> entries;
1132   for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1133     if (!cell.isEmpty())
1134       entries.push_back(cell);
1135   }
1136   parallelSort(entries, std::less<GHashCell>());
1137   log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1138               tableSize ? double(entries.size()) / tableSize : 0,
1139               entries.size(), tableSize));
1140
1141   // Find out how many type and item indices there are.
1142   auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1143   assert((mid == entries.end() || mid->isItem()) &&
1144          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1145          "midpoint is not midpoint");
1146   uint32_t numTypes = std::distance(entries.begin(), mid);
1147   uint32_t numItems = std::distance(mid, entries.end());
1148   log("Tpi record count: " + Twine(numTypes));
1149   log("Ipi record count: " + Twine(numItems));
1150
1151   // Make a list of the "unique" type records to merge for each tpi source. Type
1152   // merging will skip indices not on this list. Store the destination PDB type
1153   // index for these unique types in the tpiMap for each source. The entries for
1154   // non-unique types will be filled in prior to type merging.
1155   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1156     auto &cell = entries[i];
1157     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1158     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1159     source->uniqueTypes.push_back(cell.getGHashIdx());
1160
1161     // Update the ghash table to store the destination PDB type index in the
1162     // table.
1163     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1164     uint32_t ghashCellIndex =
1165         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1166     ghashState.table.table[ghashCellIndex] =
1167         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1168   }
1169
1170   // In parallel, remap all types.
1171   for (TpiSource *source : dependencySources)
1172     source->remapTpiWithGHashes(&ghashState);
1173   parallelForEach(objectSources, [&](TpiSource *source) {
1174     source->remapTpiWithGHashes(&ghashState);
1175   });
1176
1177   // Build a global map of from function ID to function type.
1178   for (TpiSource *source : ctx.tpiSourceList) {
1179     for (auto idToType : source->funcIdToType)
1180       funcIdToType.insert(idToType);
1181     source->funcIdToType.clear();
1182   }
1183
1184   clearGHashes();
1185 }
1186
1187 void TypeMerger::sortDependencies() {
1188   // Order dependencies first, but preserve the existing order.
1189   std::vector<TpiSource *> deps;
1190   std::vector<TpiSource *> objs;
1191   for (TpiSource *s : ctx.tpiSourceList)
1192     (s->isDependency() ? deps : objs).push_back(s);
1193   uint32_t numDeps = deps.size();
1194   uint32_t numObjs = objs.size();
1195   ctx.tpiSourceList = std::move(deps);
1196   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1197   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1198     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1199   dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1200   objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1201 }
1202
1203 /// Given the index into the ghash table for a particular type, return the type
1204 /// index for that type in the output PDB.
1205 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1206                                           uint32_t ghashCellIdx) {
1207   GHashCell cell = g->table.table[ghashCellIdx];
1208   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1209 }
1210
1211 /// Free heap allocated ghashes.
1212 void TypeMerger::clearGHashes() {
1213   for (TpiSource *src : ctx.tpiSourceList) {
1214     if (src->ownedGHashes)
1215       delete[] src->ghashes.data();
1216     src->ghashes = {};
1217     src->isItemIndex.clear();
1218     src->uniqueTypes.clear();
1219   }
1220 }
1221
1222 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1223 // ghash to lookup its final type index in the PDB, and store that in the map.
1224 void TpiSource::fillMapFromGHashes(GHashState *g) {
1225   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1226     TypeIndex fakeCellIndex = indexMapStorage[i];
1227     if (fakeCellIndex.isSimple())
1228       indexMapStorage[i] = fakeCellIndex;
1229     else
1230       indexMapStorage[i] =
1231           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1232   }
1233 }