lld/COFF/DebugTypes.cpp

   1 //===- DebugTypes.cpp -----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "DebugTypes.h"
  10 #include "COFFLinkerContext.h"
  11 #include "Chunks.h"
  12 #include "Driver.h"
  13 #include "InputFiles.h"
  14 #include "PDB.h"
  15 #include "TypeMerger.h"
  16 #include "lld/Common/ErrorHandler.h"
  17 #include "lld/Common/Memory.h"
  18 #include "llvm/ADT/StringExtras.h"
  19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
  20 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
  21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
  22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
  23 #include "llvm/DebugInfo/PDB/GenericError.h"
  24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
  25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
  26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
  28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  29 #include "llvm/Support/FormatVariadic.h"
  30 #include "llvm/Support/Parallel.h"
  31 #include "llvm/Support/Path.h"
  32 #include "llvm/Support/TimeProfiler.h"
  33
  34 using namespace llvm;
  35 using namespace llvm::codeview;
  36 using namespace lld;
  37 using namespace lld::coff;
  38
  39 namespace {
  40 class TypeServerIpiSource;
  41
  42 // The TypeServerSource class represents a PDB type server, a file referenced by
  43 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
  44 // files, therefore there must be only once instance per OBJ lot. The file path
  45 // is discovered from the dependent OBJ's debug type stream. The
  46 // TypeServerSource object is then queued and loaded by the COFF Driver. The
  47 // debug type stream for such PDB files will be merged first in the final PDB,
  48 // before any dependent OBJ.
  49 class TypeServerSource : public TpiSource {
  50 public:
  51   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
  52       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
  53     if (f->loadErrorStr)
  54       return;
  55     pdb::PDBFile &file = f->session->getPDBFile();
  56     auto expectedInfo = file.getPDBInfoStream();
  57     if (!expectedInfo)
  58       return;
  59     Guid = expectedInfo->getGuid();
  60     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
  61     if (!it.second) {
  62       // If we hit here we have collision on Guid's in two PDB files.
  63       // This can happen if the PDB Guid is invalid or if we are really
  64       // unlucky. This should fall back on stright file-system lookup.
  65       it.first->second = nullptr;
  66     }
  67   }
  68
  69   Error mergeDebugT(TypeMerger *m) override;
  70
  71   void loadGHashes() override;
  72   void remapTpiWithGHashes(GHashState *g) override;
  73
  74   bool isDependency() const override { return true; }
  75
  76   PDBInputFile *pdbInputFile = nullptr;
  77
  78   // TpiSource for IPI stream.
  79   TypeServerIpiSource *ipiSrc = nullptr;
  80
  81   // The PDB signature GUID.
  82   codeview::GUID Guid;
  83 };
  84
  85 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
  86 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
  87 // invariant of one type index space per source.
  88 class TypeServerIpiSource : public TpiSource {
  89 public:
  90   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
  91       : TpiSource(ctx, PDBIpi, nullptr) {}
  92
  93   friend class TypeServerSource;
  94
  95   // All of the TpiSource methods are no-ops. The parent TypeServerSource
  96   // handles both TPI and IPI.
  97   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
  98   void loadGHashes() override {}
  99   void remapTpiWithGHashes(GHashState *g) override {}
 100   bool isDependency() const override { return true; }
 101 };
 102
 103 // This class represents the debug type stream of an OBJ file that depends on a
 104 // PDB type server (see TypeServerSource).
 105 class UseTypeServerSource : public TpiSource {
 106   Expected<TypeServerSource *> getTypeServerSource();
 107
 108 public:
 109   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
 110       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
 111
 112   Error mergeDebugT(TypeMerger *m) override;
 113
 114   // No need to load ghashes from /Zi objects.
 115   void loadGHashes() override {}
 116   void remapTpiWithGHashes(GHashState *g) override;
 117
 118   // Information about the PDB type server dependency, that needs to be loaded
 119   // in before merging this OBJ.
 120   TypeServer2Record typeServerDependency;
 121 };
 122
 123 // This class represents the debug type stream of a Microsoft precompiled
 124 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
 125 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
 126 // such files, clang does not.
 127 class PrecompSource : public TpiSource {
 128 public:
 129   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
 130     // If the S_OBJNAME record contains the PCH signature, we'll register this
 131     // source file right away.
 132     registerMapping();
 133   }
 134
 135   Error mergeDebugT(TypeMerger *m) override;
 136
 137   void loadGHashes() override;
 138
 139   bool isDependency() const override { return true; }
 140
 141 private:
 142   void registerMapping();
 143
 144   // Whether this precomp OBJ was recorded in the precompSourceMappings map.
 145   // Only happens if the file->pchSignature is valid.
 146   bool registered = false;
 147 };
 148
 149 // This class represents the debug type stream of an OBJ file that depends on a
 150 // Microsoft precompiled headers OBJ (see PrecompSource).
 151 class UsePrecompSource : public TpiSource {
 152 public:
 153   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
 154       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
 155
 156   Error mergeDebugT(TypeMerger *m) override;
 157
 158   void loadGHashes() override;
 159   void remapTpiWithGHashes(GHashState *g) override;
 160
 161 private:
 162   Error mergeInPrecompHeaderObj();
 163
 164   PrecompSource *findObjByName(StringRef fileNameOnly);
 165   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
 166   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
 167
 168 public:
 169   // Information about the Precomp OBJ dependency, that needs to be loaded in
 170   // before merging this OBJ.
 171   PrecompRecord precompDependency;
 172 };
 173 } // namespace
 174
 175 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
 176     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
 177   ctx.addTpiSource(this);
 178 }
 179
 180 // Vtable key method.
 181 TpiSource::~TpiSource() {
 182   // Silence any assertions about unchecked errors.
 183   consumeError(std::move(typeMergingError));
 184 }
 185
 186 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
 187   return make<TpiSource>(ctx, TpiSource::Regular, file);
 188 }
 189
 190 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
 191                                            PDBInputFile *pdbInputFile) {
 192   // Type server sources come in pairs: the TPI stream, and the IPI stream.
 193   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
 194   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
 195     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
 196   return tpiSource;
 197 }
 198
 199 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
 200                                               ObjFile *file,
 201                                               TypeServer2Record ts) {
 202   return make<UseTypeServerSource>(ctx, file, ts);
 203 }
 204
 205 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
 206   return make<PrecompSource>(ctx, file);
 207 }
 208
 209 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
 210                                            ObjFile *file,
 211                                            PrecompRecord precomp) {
 212   return make<UsePrecompSource>(ctx, file, precomp);
 213 }
 214
 215 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
 216   if (ti.isSimple())
 217     return true;
 218
 219   // This can be an item index or a type index. Choose the appropriate map.
 220   ArrayRef<TypeIndex> tpiOrIpiMap =
 221       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
 222   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
 223     return false;
 224   ti = tpiOrIpiMap[ti.toArrayIndex()];
 225   return true;
 226 }
 227
 228 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
 229                             ArrayRef<TiReference> typeRefs) {
 230   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
 231   for (const TiReference &ref : typeRefs) {
 232     unsigned byteSize = ref.Count * sizeof(TypeIndex);
 233     if (contents.size() < ref.Offset + byteSize)
 234       Fatal(ctx) << "symbol record too short";
 235
 236     MutableArrayRef<TypeIndex> indices(
 237         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
 238     for (TypeIndex &ti : indices) {
 239       if (!remapTypeIndex(ti, ref.Kind)) {
 240         if (ctx.config.verbose) {
 241           uint16_t kind =
 242               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
 243           StringRef fname = file ? file->getName() : "<unknown PDB>";
 244           Log(ctx) << "failed to remap type index in record of kind 0x"
 245                    << utohexstr(kind) << " in " << fname << " with bad "
 246                    << (ref.Kind == TiRefKind::IndexRef ? "item" : "type")
 247                    << " index 0x" << utohexstr(ti.getIndex());
 248         }
 249         ti = TypeIndex(SimpleTypeKind::NotTranslated);
 250         continue;
 251       }
 252     }
 253   }
 254 }
 255
 256 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
 257   // TODO: Handle errors similar to symbols.
 258   SmallVector<TiReference, 32> typeRefs;
 259   discoverTypeIndices(CVType(rec), typeRefs);
 260   remapRecord(rec, typeRefs);
 261 }
 262
 263 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
 264   // Discover type index references in the record. Skip it if we don't
 265   // know where they are.
 266   SmallVector<TiReference, 32> typeRefs;
 267   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
 268     return false;
 269   remapRecord(rec, typeRefs);
 270   return true;
 271 }
 272
 273 // A COFF .debug$H section is currently a clang extension.  This function checks
 274 // if a .debug$H section is in a format that we expect / understand, so that we
 275 // can ignore any sections which are coincidentally also named .debug$H but do
 276 // not contain a format we recognize.
 277 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
 278   if (debugH.size() < sizeof(object::debug_h_header))
 279     return false;
 280   auto *header =
 281       reinterpret_cast<const object::debug_h_header *>(debugH.data());
 282   debugH = debugH.drop_front(sizeof(object::debug_h_header));
 283   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
 284          header->Version == 0 &&
 285          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
 286          (debugH.size() % 8 == 0);
 287 }
 288
 289 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
 290   SectionChunk *sec =
 291       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
 292   if (!sec)
 293     return std::nullopt;
 294   ArrayRef<uint8_t> contents = sec->getContents();
 295   if (!canUseDebugH(contents))
 296     return std::nullopt;
 297   return contents;
 298 }
 299
 300 static ArrayRef<GloballyHashedType>
 301 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
 302   assert(canUseDebugH(debugH));
 303   debugH = debugH.drop_front(sizeof(object::debug_h_header));
 304   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
 305   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
 306 }
 307
 308 // Merge .debug$T for a generic object file.
 309 Error TpiSource::mergeDebugT(TypeMerger *m) {
 310   assert(!ctx.config.debugGHashes &&
 311          "use remapTpiWithGHashes when ghash is enabled");
 312
 313   CVTypeArray types;
 314   BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
 315   cantFail(reader.readArray(types, reader.getLength()));
 316
 317   // When dealing with PCH.OBJ, some indices were already merged.
 318   unsigned nbHeadIndices = indexMapStorage.size();
 319
 320   std::optional<PCHMergerInfo> pchInfo;
 321   if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
 322                                        indexMapStorage, types, pchInfo))
 323     Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: "
 324                << toString(std::move(err));
 325   if (pchInfo) {
 326     file->pchSignature = pchInfo->PCHSignature;
 327     endPrecompIdx = pchInfo->EndPrecompIndex;
 328   }
 329
 330   // In an object, there is only one mapping for both types and items.
 331   tpiMap = indexMapStorage;
 332   ipiMap = indexMapStorage;
 333
 334   if (ctx.config.showSummary) {
 335     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
 336     nbTypeRecordsBytes = reader.getLength();
 337     // Count how many times we saw each type record in our input. This
 338     // calculation requires a second pass over the type records to classify each
 339     // record as a type or index. This is slow, but this code executes when
 340     // collecting statistics.
 341     m->tpiCounts.resize(m->getTypeTable().size());
 342     m->ipiCounts.resize(m->getIDTable().size());
 343     uint32_t srcIdx = nbHeadIndices;
 344     for (const CVType &ty : types) {
 345       TypeIndex dstIdx = tpiMap[srcIdx++];
 346       // Type merging may fail, so a complex source type may become the simple
 347       // NotTranslated type, which cannot be used as an array index.
 348       if (dstIdx.isSimple())
 349         continue;
 350       SmallVectorImpl<uint32_t> &counts =
 351           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
 352       ++counts[dstIdx.toArrayIndex()];
 353     }
 354   }
 355
 356   return Error::success();
 357 }
 358
 359 // Merge types from a type server PDB.
 360 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
 361   assert(!ctx.config.debugGHashes &&
 362          "use remapTpiWithGHashes when ghash is enabled");
 363
 364   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
 365   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
 366   if (auto e = expectedTpi.takeError())
 367     Fatal(ctx) << "Type server does not have TPI stream: "
 368                << toString(std::move(e));
 369   pdb::TpiStream *maybeIpi = nullptr;
 370   if (pdbFile.hasPDBIpiStream()) {
 371     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
 372     if (auto e = expectedIpi.takeError())
 373       Fatal(ctx) << "Error getting type server IPI stream: "
 374                  << toString(std::move(e));
 375     maybeIpi = &*expectedIpi;
 376   }
 377
 378   // Merge TPI first, because the IPI stream will reference type indices.
 379   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
 380                                   expectedTpi->typeArray()))
 381     Fatal(ctx) << "codeview::mergeTypeRecords failed: "
 382                << toString(std::move(err));
 383   tpiMap = indexMapStorage;
 384
 385   // Merge IPI.
 386   if (maybeIpi) {
 387     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
 388                                   maybeIpi->typeArray()))
 389       Fatal(ctx) << "codeview::mergeIdRecords failed: "
 390                  << toString(std::move(err));
 391     ipiMap = ipiSrc->indexMapStorage;
 392   }
 393
 394   if (ctx.config.showSummary) {
 395     nbTypeRecords = tpiMap.size() + ipiMap.size();
 396     nbTypeRecordsBytes =
 397         expectedTpi->typeArray().getUnderlyingStream().getLength() +
 398         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
 399                   : 0);
 400
 401     // Count how many times we saw each type record in our input. If a
 402     // destination type index is present in the source to destination type index
 403     // map, that means we saw it once in the input. Add it to our histogram.
 404     m->tpiCounts.resize(m->getTypeTable().size());
 405     m->ipiCounts.resize(m->getIDTable().size());
 406     for (TypeIndex ti : tpiMap)
 407       if (!ti.isSimple())
 408         ++m->tpiCounts[ti.toArrayIndex()];
 409     for (TypeIndex ti : ipiMap)
 410       if (!ti.isSimple())
 411         ++m->ipiCounts[ti.toArrayIndex()];
 412   }
 413
 414   return Error::success();
 415 }
 416
 417 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
 418   const codeview::GUID &tsId = typeServerDependency.getGuid();
 419   StringRef tsPath = typeServerDependency.getName();
 420
 421   TypeServerSource *tsSrc = nullptr;
 422   auto it = ctx.typeServerSourceMappings.find(tsId);
 423   if (it != ctx.typeServerSourceMappings.end()) {
 424     tsSrc = (TypeServerSource *)it->second;
 425   }
 426   if (tsSrc == nullptr) {
 427     // The file failed to load, lookup by name
 428     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
 429     if (!pdb)
 430       return createFileError(tsPath, errorCodeToError(std::error_code(
 431                                          ENOENT, std::generic_category())));
 432     // If an error occurred during loading, throw it now
 433     if (pdb->loadErrorStr)
 434       return createFileError(
 435           tsPath, make_error<StringError>(*pdb->loadErrorStr,
 436                                           llvm::inconvertibleErrorCode()));
 437
 438     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
 439
 440     // Just because a file with a matching name was found and it was an actual
 441     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
 442     // must match the GUID specified in the TypeServer2 record.
 443     if (tsSrc->Guid != tsId) {
 444       return createFileError(tsPath,
 445                              make_error<pdb::PDBError>(
 446                                  pdb::pdb_error_code::signature_out_of_date));
 447     }
 448   }
 449   return tsSrc;
 450 }
 451
 452 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
 453   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
 454   if (!tsSrc)
 455     return tsSrc.takeError();
 456
 457   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
 458   auto expectedInfo = pdbSession.getPDBInfoStream();
 459   if (!expectedInfo)
 460     return expectedInfo.takeError();
 461
 462   // Reuse the type index map of the type server.
 463   tpiMap = (*tsSrc)->tpiMap;
 464   ipiMap = (*tsSrc)->ipiMap;
 465   return Error::success();
 466 }
 467
 468 static bool equalsPath(StringRef path1, StringRef path2) {
 469 #if defined(_WIN32)
 470   return path1.equals_insensitive(path2);
 471 #else
 472   return path1 == path2;
 473 #endif
 474 }
 475
 476 // Find by name an OBJ provided on the command line
 477 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
 478   SmallString<128> currentPath;
 479   for (auto kv : ctx.precompSourceMappings) {
 480     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
 481                                                     sys::path::Style::windows);
 482
 483     // Compare based solely on the file name (link.exe behavior)
 484     if (equalsPath(currentFileName, fileNameOnly))
 485       return (PrecompSource *)kv.second;
 486   }
 487   return nullptr;
 488 }
 489
 490 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
 491                                                    PrecompRecord &pr) {
 492   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
 493   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
 494   // the paths embedded in the OBJs are in the Windows format.
 495   SmallString<128> prFileName =
 496       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
 497
 498   auto it = ctx.precompSourceMappings.find(pr.getSignature());
 499   if (it != ctx.precompSourceMappings.end()) {
 500     return (PrecompSource *)it->second;
 501   }
 502   // Lookup by name
 503   return findObjByName(prFileName);
 504 }
 505
 506 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
 507                                                            PrecompRecord &pr) {
 508   PrecompSource *precomp = findPrecompSource(file, pr);
 509
 510   if (!precomp)
 511     return createFileError(
 512         pr.getPrecompFilePath(),
 513         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 514
 515   // Don't rely on the PCH signature to validate the concordance between the PCH
 516   // and the OBJ that uses it. However we do validate here that the
 517   // LF_ENDPRECOMP record index lines up with the number of type records
 518   // LF_PRECOMP is expecting.
 519   if (precomp->endPrecompIdx != pr.getTypesCount())
 520     return createFileError(
 521         toString(file),
 522         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 523
 524   return precomp;
 525 }
 526
 527 /// Merges a precompiled headers TPI map into the current TPI map. The
 528 /// precompiled headers object will also be loaded and remapped in the
 529 /// process.
 530 Error UsePrecompSource::mergeInPrecompHeaderObj() {
 531   auto e = findPrecompMap(file, precompDependency);
 532   if (!e)
 533     return e.takeError();
 534
 535   PrecompSource *precompSrc = *e;
 536   if (precompSrc->tpiMap.empty())
 537     return Error::success();
 538
 539   assert(precompDependency.getStartTypeIndex() ==
 540          TypeIndex::FirstNonSimpleIndex);
 541   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
 542   // Use the previously remapped index map from the precompiled headers.
 543   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
 544                          precompSrc->tpiMap.begin() +
 545                              precompDependency.getTypesCount());
 546
 547   return Error::success();
 548 }
 549
 550 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
 551   // This object was compiled with /Yu, so process the corresponding
 552   // precompiled headers object (/Yc) first. Some type indices in the current
 553   // object are referencing data in the precompiled headers object, so we need
 554   // both to be loaded.
 555   if (Error e = mergeInPrecompHeaderObj())
 556     return e;
 557
 558   return TpiSource::mergeDebugT(m);
 559 }
 560
 561 Error PrecompSource::mergeDebugT(TypeMerger *m) {
 562   // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
 563   // The signature comes later with the LF_ENDPRECOMP record, so we first need
 564   // to merge in all the .PCH.OBJ file type records, before registering below.
 565   if (Error e = TpiSource::mergeDebugT(m))
 566     return e;
 567
 568   registerMapping();
 569
 570   return Error::success();
 571 }
 572
 573 void PrecompSource::registerMapping() {
 574   if (registered)
 575     return;
 576   if (file->pchSignature && *file->pchSignature) {
 577     auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
 578     if (!it.second)
 579       Fatal(ctx)
 580           << "a PCH object with the same signature has already been provided ("
 581           << toString(it.first->second->file) << " and " << toString(file)
 582           << ")";
 583     registered = true;
 584   }
 585 }
 586
 587 //===----------------------------------------------------------------------===//
 588 // Parellel GHash type merging implementation.
 589 //===----------------------------------------------------------------------===//
 590
 591 void TpiSource::loadGHashes() {
 592   if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
 593     ghashes = getHashesFromDebugH(*debugH);
 594     ownedGHashes = false;
 595   } else {
 596     CVTypeArray types;
 597     BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
 598     cantFail(reader.readArray(types, reader.getLength()));
 599     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
 600   }
 601
 602   fillIsItemIndexFromDebugT();
 603 }
 604
 605 // Copies ghashes from a vector into an array. These are long lived, so it's
 606 // worth the time to copy these into an appropriately sized vector to reduce
 607 // memory usage.
 608 void TpiSource::assignGHashesFromVector(
 609     std::vector<GloballyHashedType> &&hashVec) {
 610   if (hashVec.empty())
 611     return;
 612   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
 613   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
 614   ghashes = ArrayRef(hashes, hashVec.size());
 615   ownedGHashes = true;
 616 }
 617
 618 // Faster way to iterate type records. forEachTypeChecked is faster than
 619 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
 620 static void forEachTypeChecked(ArrayRef<uint8_t> types,
 621                                function_ref<void(const CVType &)> fn) {
 622   checkError(
 623       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
 624         fn(ty);
 625         return Error::success();
 626       }));
 627 }
 628
 629 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
 630 // TODO: Store this information in .debug$H so that we don't have to recompute
 631 // it. This is the main bottleneck slowing down parallel ghashing with one
 632 // thread over single-threaded ghashing.
 633 void TpiSource::fillIsItemIndexFromDebugT() {
 634   uint32_t index = 0;
 635   isItemIndex.resize(ghashes.size());
 636   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
 637     if (isIdRecord(ty.kind()))
 638       isItemIndex.set(index);
 639     ++index;
 640   });
 641 }
 642
 643 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
 644   // Decide if the merged type goes into TPI or IPI.
 645   bool isItem = isIdRecord(ty.kind());
 646   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
 647
 648   // Copy the type into our mutable buffer.
 649   assert(ty.length() <= codeview::MaxRecordLength);
 650   size_t offset = merged.recs.size();
 651   size_t newSize = alignTo(ty.length(), 4);
 652   merged.recs.resize(offset + newSize);
 653   auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
 654   memcpy(newRec.data(), ty.data().data(), newSize);
 655
 656   // Fix up the record prefix and padding bytes if it required resizing.
 657   if (newSize != ty.length()) {
 658     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
 659     for (size_t i = ty.length(); i < newSize; ++i)
 660       newRec[i] = LF_PAD0 + (newSize - i);
 661   }
 662
 663   // Remap the type indices in the new record.
 664   remapTypesInTypeRecord(newRec);
 665   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
 666   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
 667   merged.recHashes.push_back(pdbHash);
 668
 669   // Retain a mapping from PDB function id to PDB function type. This mapping is
 670   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
 671   // symbols.
 672   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
 673     bool success = ty.length() >= 12;
 674     TypeIndex funcId = curIndex;
 675     if (success)
 676       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
 677     TypeIndex funcType =
 678         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
 679     if (success) {
 680       funcIdToType.push_back({funcId, funcType});
 681     } else {
 682       StringRef fname = file ? file->getName() : "<unknown PDB>";
 683       Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x"
 684                 << utohexstr(curIndex.getIndex()) << " in " << fname;
 685     }
 686   }
 687 }
 688
 689 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
 690                                        TypeIndex beginIndex) {
 691   // Re-sort the list of unique types by index.
 692   if (kind == PDB)
 693     assert(llvm::is_sorted(uniqueTypes));
 694   else
 695     llvm::sort(uniqueTypes);
 696
 697   // Accumulate all the unique types into one buffer in mergedTypes.
 698   uint32_t ghashIndex = 0;
 699   auto nextUniqueIndex = uniqueTypes.begin();
 700   assert(mergedTpi.recs.empty());
 701   assert(mergedIpi.recs.empty());
 702
 703   // Pre-compute the number of elements in advance to avoid std::vector resizes.
 704   unsigned nbTpiRecs = 0;
 705   unsigned nbIpiRecs = 0;
 706   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
 707     if (nextUniqueIndex != uniqueTypes.end() &&
 708         *nextUniqueIndex == ghashIndex) {
 709       assert(ty.length() <= codeview::MaxRecordLength);
 710       size_t newSize = alignTo(ty.length(), 4);
 711       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
 712       ++nextUniqueIndex;
 713     }
 714     ++ghashIndex;
 715   });
 716   mergedTpi.recs.reserve(nbTpiRecs);
 717   mergedIpi.recs.reserve(nbIpiRecs);
 718
 719   // Do the actual type merge.
 720   ghashIndex = 0;
 721   nextUniqueIndex = uniqueTypes.begin();
 722   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
 723     if (nextUniqueIndex != uniqueTypes.end() &&
 724         *nextUniqueIndex == ghashIndex) {
 725       mergeTypeRecord(beginIndex + ghashIndex, ty);
 726       ++nextUniqueIndex;
 727     }
 728     ++ghashIndex;
 729   });
 730   assert(nextUniqueIndex == uniqueTypes.end() &&
 731          "failed to merge all desired records");
 732   assert(uniqueTypes.size() ==
 733              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
 734          "missing desired record");
 735 }
 736
 737 void TpiSource::remapTpiWithGHashes(GHashState *g) {
 738   assert(ctx.config.debugGHashes && "ghashes must be enabled");
 739   fillMapFromGHashes(g);
 740   tpiMap = indexMapStorage;
 741   ipiMap = indexMapStorage;
 742   mergeUniqueTypeRecords(file->debugTypes);
 743   // TODO: Free all unneeded ghash resources now that we have a full index map.
 744
 745   if (ctx.config.showSummary) {
 746     nbTypeRecords = ghashes.size();
 747     nbTypeRecordsBytes = file->debugTypes.size();
 748   }
 749 }
 750
 751 // PDBs do not actually store global hashes, so when merging a type server
 752 // PDB we have to synthesize global hashes.  To do this, we first synthesize
 753 // global hashes for the TPI stream, since it is independent, then we
 754 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
 755 // as inputs.
 756 void TypeServerSource::loadGHashes() {
 757   // Don't hash twice.
 758   if (!ghashes.empty())
 759     return;
 760   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
 761
 762   // Hash TPI stream.
 763   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
 764   if (auto e = expectedTpi.takeError())
 765     Fatal(ctx) << "Type server does not have TPI stream: "
 766                << toString(std::move(e));
 767   assignGHashesFromVector(
 768       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
 769   isItemIndex.resize(ghashes.size());
 770
 771   // Hash IPI stream, which depends on TPI ghashes.
 772   if (!pdbFile.hasPDBIpiStream())
 773     return;
 774   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
 775   if (auto e = expectedIpi.takeError())
 776     Fatal(ctx) << "error retrieving IPI stream: " << toString(std::move(e));
 777   ipiSrc->assignGHashesFromVector(
 778       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
 779
 780   // The IPI stream isItemIndex bitvector should be all ones.
 781   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
 782   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
 783 }
 784
 785 // Flatten discontiguous PDB type arrays to bytes so that we can use
 786 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
 787 // type servers is faster than iterating all object files compiled with /Z7 with
 788 // CVTypeArray, which has high overheads due to the virtual interface of
 789 // BinaryStream::readBytes.
 790 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
 791   BinaryStreamRef stream = types.getUnderlyingStream();
 792   ArrayRef<uint8_t> debugTypes;
 793   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
 794   return debugTypes;
 795 }
 796
 797 // Merge types from a type server PDB.
 798 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
 799   assert(ctx.config.debugGHashes && "ghashes must be enabled");
 800
 801   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
 802   // propagate errors, those should've been handled during ghash loading.
 803   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
 804   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
 805   fillMapFromGHashes(g);
 806   tpiMap = indexMapStorage;
 807   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
 808   if (pdbFile.hasPDBIpiStream()) {
 809     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
 810     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
 811     ipiSrc->fillMapFromGHashes(g);
 812     ipiMap = ipiSrc->indexMapStorage;
 813     ipiSrc->tpiMap = tpiMap;
 814     ipiSrc->ipiMap = ipiMap;
 815     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
 816
 817     if (ctx.config.showSummary) {
 818       nbTypeRecords = ipiSrc->ghashes.size();
 819       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
 820     }
 821   }
 822
 823   if (ctx.config.showSummary) {
 824     nbTypeRecords += ghashes.size();
 825     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
 826   }
 827 }
 828
 829 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
 830   // No remapping to do with /Zi objects. Simply use the index map from the type
 831   // server. Errors should have been reported earlier. Symbols from this object
 832   // will be ignored.
 833   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
 834   if (!maybeTsSrc) {
 835     typeMergingError =
 836         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
 837     return;
 838   }
 839   TypeServerSource *tsSrc = *maybeTsSrc;
 840   tpiMap = tsSrc->tpiMap;
 841   ipiMap = tsSrc->ipiMap;
 842 }
 843
 844 void PrecompSource::loadGHashes() {
 845   if (getDebugH(file)) {
 846     Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented";
 847   }
 848
 849   uint32_t ghashIdx = 0;
 850   std::vector<GloballyHashedType> hashVec;
 851   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
 852     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
 853     // the PDB. There must be an entry in the list of ghashes so that the type
 854     // indexes of the following records in the /Yc PCH object line up.
 855     if (ty.kind() == LF_ENDPRECOMP) {
 856       EndPrecompRecord endPrecomp;
 857       cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
 858           const_cast<CVType &>(ty), endPrecomp));
 859       file->pchSignature = endPrecomp.getSignature();
 860       registerMapping();
 861       endPrecompIdx = ghashIdx;
 862     }
 863
 864     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
 865     isItemIndex.push_back(isIdRecord(ty.kind()));
 866     ++ghashIdx;
 867   });
 868   assignGHashesFromVector(std::move(hashVec));
 869 }
 870
 871 void UsePrecompSource::loadGHashes() {
 872   auto e = findPrecompMap(file, precompDependency);
 873   if (!e) {
 874     Warn(ctx) << e.takeError();
 875     return;
 876   }
 877
 878   PrecompSource *pchSrc = *e;
 879
 880   // To compute ghashes of a /Yu object file, we need to build on the ghashes of
 881   // the /Yc PCH object. After we are done hashing, discard the ghashes from the
 882   // PCH source so we don't unnecessarily try to deduplicate them.
 883   std::vector<GloballyHashedType> hashVec =
 884       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
 885   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
 886     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
 887     isItemIndex.push_back(isIdRecord(ty.kind()));
 888   });
 889   hashVec.erase(hashVec.begin(),
 890                 hashVec.begin() + precompDependency.getTypesCount());
 891   assignGHashesFromVector(std::move(hashVec));
 892 }
 893
 894 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
 895   fillMapFromGHashes(g);
 896   // This object was compiled with /Yu, so process the corresponding
 897   // precompiled headers object (/Yc) first. Some type indices in the current
 898   // object are referencing data in the precompiled headers object, so we need
 899   // both to be loaded.
 900   if (Error e = mergeInPrecompHeaderObj()) {
 901     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
 902     return;
 903   }
 904
 905   tpiMap = indexMapStorage;
 906   ipiMap = indexMapStorage;
 907   mergeUniqueTypeRecords(file->debugTypes,
 908                          TypeIndex(precompDependency.getStartTypeIndex() +
 909                                    precompDependency.getTypesCount()));
 910   if (ctx.config.showSummary) {
 911     nbTypeRecords = ghashes.size();
 912     nbTypeRecordsBytes = file->debugTypes.size();
 913   }
 914 }
 915
 916 namespace {
 917 /// A concurrent hash table for global type hashing. It is based on this paper:
 918 /// Concurrent Hash Tables: Fast and General(?)!
 919 /// https://dl.acm.org/doi/10.1145/3309206
 920 ///
 921 /// This hash table is meant to be used in two phases:
 922 /// 1. concurrent insertions
 923 /// 2. concurrent reads
 924 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
 925 ///
 926 /// The paper describes storing a key-value pair in two machine words.
 927 /// Generally, the values stored in this map are type indices, and we can use
 928 /// those values to recover the ghash key from a side table. This allows us to
 929 /// shrink the table entries further at the cost of some loads, and sidesteps
 930 /// the need for a 128 bit atomic compare-and-swap operation.
 931 ///
 932 /// During insertion, a priority function is used to decide which insertion
 933 /// should be preferred. This ensures that the output is deterministic. For
 934 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
 935 ///
 936 class GHashCell;
 937 struct GHashTable {
 938   GHashCell *table = nullptr;
 939   uint32_t tableSize = 0;
 940
 941   GHashTable() = default;
 942   ~GHashTable();
 943
 944   /// Initialize the table with the given size. Because the table cannot be
 945   /// resized, the initial size of the table must be large enough to contain all
 946   /// inputs, or insertion may not be able to find an empty cell.
 947   void init(uint32_t newTableSize);
 948
 949   /// Insert the cell with the given ghash into the table. Return the insertion
 950   /// position in the table. It is safe for the caller to store the insertion
 951   /// position because the table cannot be resized.
 952   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
 953                   GHashCell newCell);
 954 };
 955
 956 /// A ghash table cell for deduplicating types from TpiSources.
 957 class GHashCell {
 958   // Force "data" to be 64-bit aligned; otherwise, some versions of clang
 959   // will generate calls to libatomic when using some versions of libstdc++
 960   // on 32-bit targets.  (Also, in theory, there could be a target where
 961   // new[] doesn't always return an 8-byte-aligned allocation.)
 962   alignas(sizeof(uint64_t)) uint64_t data = 0;
 963
 964 public:
 965   GHashCell() = default;
 966
 967   // Construct data most to least significant so that sorting works well:
 968   // - isItem
 969   // - tpiSrcIdx
 970   // - ghashIdx
 971   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
 972   // non-zero representation.
 973   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
 974       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
 975              ghashIdx) {
 976     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
 977     assert(ghashIdx == getGHashIdx() && "round trip failure");
 978   }
 979
 980   explicit GHashCell(uint64_t data) : data(data) {}
 981
 982   // The empty cell is all zeros.
 983   bool isEmpty() const { return data == 0ULL; }
 984
 985   /// Extract the tpiSrcIdx.
 986   uint32_t getTpiSrcIdx() const {
 987     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
 988   }
 989
 990   /// Extract the index into the ghash array of the TpiSource.
 991   uint32_t getGHashIdx() const { return (uint32_t)data; }
 992
 993   bool isItem() const { return data & (1ULL << 63U); }
 994
 995   /// Get the ghash key for this cell.
 996   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
 997     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
 998   }
 999
1000   /// The priority function for the cell. The data is stored such that lower
1001   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
1002   /// from earlier sources are more likely to prevail.
1003   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
1004     return l.data < r.data;
1005   }
1006 };
1007 } // namespace
1008
1009 namespace lld::coff {
1010 /// This type is just a wrapper around GHashTable with external linkage so it
1011 /// can be used from a header.
1012 struct GHashState {
1013   GHashTable table;
1014 };
1015 } // namespace lld::coff
1016
1017 GHashTable::~GHashTable() { delete[] table; }
1018
1019 void GHashTable::init(uint32_t newTableSize) {
1020   table = new GHashCell[newTableSize];
1021   memset(table, 0, newTableSize * sizeof(GHashCell));
1022   tableSize = newTableSize;
1023 }
1024
1025 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1026                             GHashCell newCell) {
1027   assert(!newCell.isEmpty() && "cannot insert empty cell value");
1028
1029   // FIXME: The low bytes of SHA1 have low entropy for short records, which
1030   // type records are. Swap the byte order for better entropy. A better ghash
1031   // won't need this.
1032   uint32_t startIdx =
1033       llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) %
1034       tableSize;
1035
1036   // Do a linear probe starting at startIdx.
1037   uint32_t idx = startIdx;
1038   while (true) {
1039     // Run a compare and swap loop. There are four cases:
1040     // - cell is empty: CAS into place and return
1041     // - cell has matching key, earlier priority: do nothing, return
1042     // - cell has matching key, later priority: CAS into place and return
1043     // - cell has non-matching key: hash collision, probe next cell
1044     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1045     GHashCell oldCell(cellPtr->load());
1046     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1047       // Check if there is an existing ghash entry with a higher priority
1048       // (earlier ordering). If so, this is a duplicate, we are done.
1049       if (!oldCell.isEmpty() && oldCell < newCell)
1050         return idx;
1051       // Either the cell is empty, or our value is higher priority. Try to
1052       // compare and swap. If it succeeds, we are done.
1053       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1054         return idx;
1055       // If the CAS failed, check this cell again.
1056     }
1057
1058     // Advance the probe. Wrap around to the beginning if we run off the end.
1059     ++idx;
1060     idx = idx == tableSize ? 0 : idx;
1061     if (idx == startIdx) {
1062       // If this becomes an issue, we could mark failure and rehash from the
1063       // beginning with a bigger table. There is no difference between rehashing
1064       // internally and starting over.
1065       report_fatal_error("ghash table is full");
1066     }
1067   }
1068   llvm_unreachable("left infloop");
1069 }
1070
1071 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1072     : typeTable(alloc), idTable(alloc), ctx(c) {}
1073
1074 TypeMerger::~TypeMerger() = default;
1075
1076 void TypeMerger::mergeTypesWithGHash() {
1077   // Load ghashes. Do type servers and PCH objects first.
1078   {
1079     llvm::TimeTraceScope timeScope("Load GHASHes");
1080     ScopedTimer t1(ctx.loadGHashTimer);
1081     parallelForEach(dependencySources,
1082                     [&](TpiSource *source) { source->loadGHashes(); });
1083     parallelForEach(objectSources,
1084                     [&](TpiSource *source) { source->loadGHashes(); });
1085   }
1086
1087   llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1088   ScopedTimer t2(ctx.mergeGHashTimer);
1089   GHashState ghashState;
1090
1091   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1092   // be larger than the number of unique types, or hash table insertion may not
1093   // be able to find a vacant slot. Summing the input types guarantees this, but
1094   // it is a gross overestimate. The table size could be reduced to save memory,
1095   // but it would require implementing rehashing, and this table is generally
1096   // small compared to total memory usage, at eight bytes per input type record,
1097   // and most input type records are larger than eight bytes.
1098   size_t tableSize = 0;
1099   for (TpiSource *source : ctx.tpiSourceList)
1100     tableSize += source->ghashes.size();
1101
1102   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1103   // also 32-bit, so this is an inherent PDB file format limit anyway.
1104   tableSize =
1105       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1106   ghashState.table.init(static_cast<uint32_t>(tableSize));
1107
1108   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1109   // the contents of the hash table cell, but we can remember the insertion
1110   // position. Because the table does not rehash, the position will not change
1111   // under insertion. After insertion is done, the value of the cell can be read
1112   // to retrieve the final PDB type index.
1113   parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1114     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1115     source->indexMapStorage.resize(source->ghashes.size());
1116     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1117       if (source->shouldOmitFromPdb(i)) {
1118         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1119         continue;
1120       }
1121       GloballyHashedType ghash = source->ghashes[i];
1122       bool isItem = source->isItemIndex.test(i);
1123       uint32_t cellIdx =
1124           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1125
1126       // Store the ghash cell index as a type index in indexMapStorage. Later
1127       // we will replace it with the PDB type index.
1128       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1129     }
1130   });
1131
1132   // Collect all non-empty cells and sort them. This will implicitly assign
1133   // destination type indices, and partition the entries into type records and
1134   // item records. It arranges types in this order:
1135   // - type records
1136   //   - source 0, type 0...
1137   //   - source 1, type 1...
1138   // - item records
1139   //   - source 0, type 1...
1140   //   - source 1, type 0...
1141   std::vector<GHashCell> entries;
1142   for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1143     if (!cell.isEmpty())
1144       entries.push_back(cell);
1145   }
1146   parallelSort(entries, std::less<GHashCell>());
1147   Log(ctx) << formatv(
1148       "ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1149       tableSize ? double(entries.size()) / tableSize : 0, entries.size(),
1150       tableSize);
1151
1152   // Find out how many type and item indices there are.
1153   auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1154   assert((mid == entries.end() || mid->isItem()) &&
1155          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1156          "midpoint is not midpoint");
1157   uint32_t numTypes = std::distance(entries.begin(), mid);
1158   uint32_t numItems = std::distance(mid, entries.end());
1159   Log(ctx) << "Tpi record count: " << numTypes;
1160   Log(ctx) << "Ipi record count: " << numItems;
1161
1162   // Make a list of the "unique" type records to merge for each tpi source. Type
1163   // merging will skip indices not on this list. Store the destination PDB type
1164   // index for these unique types in the tpiMap for each source. The entries for
1165   // non-unique types will be filled in prior to type merging.
1166   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1167     auto &cell = entries[i];
1168     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1169     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1170     source->uniqueTypes.push_back(cell.getGHashIdx());
1171
1172     // Update the ghash table to store the destination PDB type index in the
1173     // table.
1174     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1175     uint32_t ghashCellIndex =
1176         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1177     ghashState.table.table[ghashCellIndex] =
1178         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1179   }
1180
1181   // In parallel, remap all types.
1182   for (TpiSource *source : dependencySources)
1183     source->remapTpiWithGHashes(&ghashState);
1184   parallelForEach(objectSources, [&](TpiSource *source) {
1185     source->remapTpiWithGHashes(&ghashState);
1186   });
1187
1188   // Build a global map of from function ID to function type.
1189   for (TpiSource *source : ctx.tpiSourceList) {
1190     for (auto idToType : source->funcIdToType)
1191       funcIdToType.insert(idToType);
1192     source->funcIdToType.clear();
1193   }
1194
1195   clearGHashes();
1196 }
1197
1198 void TypeMerger::sortDependencies() {
1199   // Order dependencies first, but preserve the existing order.
1200   std::vector<TpiSource *> deps;
1201   std::vector<TpiSource *> objs;
1202   for (TpiSource *s : ctx.tpiSourceList)
1203     (s->isDependency() ? deps : objs).push_back(s);
1204   uint32_t numDeps = deps.size();
1205   uint32_t numObjs = objs.size();
1206   ctx.tpiSourceList = std::move(deps);
1207   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1208   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1209     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1210   dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1211   objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1212 }
1213
1214 /// Given the index into the ghash table for a particular type, return the type
1215 /// index for that type in the output PDB.
1216 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1217                                           uint32_t ghashCellIdx) {
1218   GHashCell cell = g->table.table[ghashCellIdx];
1219   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1220 }
1221
1222 /// Free heap allocated ghashes.
1223 void TypeMerger::clearGHashes() {
1224   for (TpiSource *src : ctx.tpiSourceList) {
1225     if (src->ownedGHashes)
1226       delete[] src->ghashes.data();
1227     src->ghashes = {};
1228     src->isItemIndex.clear();
1229     src->uniqueTypes.clear();
1230   }
1231 }
1232
1233 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1234 // ghash to lookup its final type index in the PDB, and store that in the map.
1235 void TpiSource::fillMapFromGHashes(GHashState *g) {
1236   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1237     TypeIndex fakeCellIndex = indexMapStorage[i];
1238     if (fakeCellIndex.isSimple())
1239       indexMapStorage[i] = fakeCellIndex;
1240     else
1241       indexMapStorage[i] =
1242           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1243   }
1244 }