[flang] Support OPEN(..., FORM="BINARY") (#124657)
[llvm-project.git] / lld / COFF / DebugTypes.cpp
blobd4d80bfd92efbe2692248cb2dcb427827d0972b5
1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Driver.h"
13 #include "InputFiles.h"
14 #include "PDB.h"
15 #include "TypeMerger.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
21 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
22 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
23 #include "llvm/DebugInfo/PDB/GenericError.h"
24 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
28 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
29 #include "llvm/Support/FormatVariadic.h"
30 #include "llvm/Support/Parallel.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TimeProfiler.h"
34 using namespace llvm;
35 using namespace llvm::codeview;
36 using namespace lld;
37 using namespace lld::coff;
39 namespace {
40 class TypeServerIpiSource;
42 // The TypeServerSource class represents a PDB type server, a file referenced by
43 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
44 // files, therefore there must be only once instance per OBJ lot. The file path
45 // is discovered from the dependent OBJ's debug type stream. The
46 // TypeServerSource object is then queued and loaded by the COFF Driver. The
47 // debug type stream for such PDB files will be merged first in the final PDB,
48 // before any dependent OBJ.
49 class TypeServerSource : public TpiSource {
50 public:
51 explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
52 : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
53 if (f->loadErrorStr)
54 return;
55 pdb::PDBFile &file = f->session->getPDBFile();
56 auto expectedInfo = file.getPDBInfoStream();
57 if (!expectedInfo)
58 return;
59 Guid = expectedInfo->getGuid();
60 auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
61 if (!it.second) {
62 // If we hit here we have collision on Guid's in two PDB files.
63 // This can happen if the PDB Guid is invalid or if we are really
64 // unlucky. This should fall back on stright file-system lookup.
65 it.first->second = nullptr;
69 Error mergeDebugT(TypeMerger *m) override;
71 void loadGHashes() override;
72 void remapTpiWithGHashes(GHashState *g) override;
74 bool isDependency() const override { return true; }
76 PDBInputFile *pdbInputFile = nullptr;
78 // TpiSource for IPI stream.
79 TypeServerIpiSource *ipiSrc = nullptr;
81 // The PDB signature GUID.
82 codeview::GUID Guid;
85 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
86 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
87 // invariant of one type index space per source.
88 class TypeServerIpiSource : public TpiSource {
89 public:
90 explicit TypeServerIpiSource(COFFLinkerContext &ctx)
91 : TpiSource(ctx, PDBIpi, nullptr) {}
93 friend class TypeServerSource;
95 // All of the TpiSource methods are no-ops. The parent TypeServerSource
96 // handles both TPI and IPI.
97 Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
98 void loadGHashes() override {}
99 void remapTpiWithGHashes(GHashState *g) override {}
100 bool isDependency() const override { return true; }
103 // This class represents the debug type stream of an OBJ file that depends on a
104 // PDB type server (see TypeServerSource).
105 class UseTypeServerSource : public TpiSource {
106 Expected<TypeServerSource *> getTypeServerSource();
108 public:
109 UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
110 : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
112 Error mergeDebugT(TypeMerger *m) override;
114 // No need to load ghashes from /Zi objects.
115 void loadGHashes() override {}
116 void remapTpiWithGHashes(GHashState *g) override;
118 // Information about the PDB type server dependency, that needs to be loaded
119 // in before merging this OBJ.
120 TypeServer2Record typeServerDependency;
123 // This class represents the debug type stream of a Microsoft precompiled
124 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
125 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
126 // such files, clang does not.
127 class PrecompSource : public TpiSource {
128 public:
129 PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
130 // If the S_OBJNAME record contains the PCH signature, we'll register this
131 // source file right away.
132 registerMapping();
135 Error mergeDebugT(TypeMerger *m) override;
137 void loadGHashes() override;
139 bool isDependency() const override { return true; }
141 private:
142 void registerMapping();
144 // Whether this precomp OBJ was recorded in the precompSourceMappings map.
145 // Only happens if the file->pchSignature is valid.
146 bool registered = false;
149 // This class represents the debug type stream of an OBJ file that depends on a
150 // Microsoft precompiled headers OBJ (see PrecompSource).
151 class UsePrecompSource : public TpiSource {
152 public:
153 UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
154 : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
156 Error mergeDebugT(TypeMerger *m) override;
158 void loadGHashes() override;
159 void remapTpiWithGHashes(GHashState *g) override;
161 private:
162 Error mergeInPrecompHeaderObj();
164 PrecompSource *findObjByName(StringRef fileNameOnly);
165 PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
166 Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
168 public:
169 // Information about the Precomp OBJ dependency, that needs to be loaded in
170 // before merging this OBJ.
171 PrecompRecord precompDependency;
173 } // namespace
175 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
176 : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
177 ctx.addTpiSource(this);
180 // Vtable key method.
181 TpiSource::~TpiSource() {
182 // Silence any assertions about unchecked errors.
183 consumeError(std::move(typeMergingError));
186 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
187 return make<TpiSource>(ctx, TpiSource::Regular, file);
190 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
191 PDBInputFile *pdbInputFile) {
192 // Type server sources come in pairs: the TPI stream, and the IPI stream.
193 auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
194 if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
195 tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
196 return tpiSource;
199 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
200 ObjFile *file,
201 TypeServer2Record ts) {
202 return make<UseTypeServerSource>(ctx, file, ts);
205 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
206 return make<PrecompSource>(ctx, file);
209 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
210 ObjFile *file,
211 PrecompRecord precomp) {
212 return make<UsePrecompSource>(ctx, file, precomp);
215 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
216 if (ti.isSimple())
217 return true;
219 // This can be an item index or a type index. Choose the appropriate map.
220 ArrayRef<TypeIndex> tpiOrIpiMap =
221 (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
222 if (ti.toArrayIndex() >= tpiOrIpiMap.size())
223 return false;
224 ti = tpiOrIpiMap[ti.toArrayIndex()];
225 return true;
228 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
229 ArrayRef<TiReference> typeRefs) {
230 MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
231 for (const TiReference &ref : typeRefs) {
232 unsigned byteSize = ref.Count * sizeof(TypeIndex);
233 if (contents.size() < ref.Offset + byteSize)
234 Fatal(ctx) << "symbol record too short";
236 MutableArrayRef<TypeIndex> indices(
237 reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
238 for (TypeIndex &ti : indices) {
239 if (!remapTypeIndex(ti, ref.Kind)) {
240 if (ctx.config.verbose) {
241 uint16_t kind =
242 reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
243 StringRef fname = file ? file->getName() : "<unknown PDB>";
244 Log(ctx) << "failed to remap type index in record of kind 0x"
245 << utohexstr(kind) << " in " << fname << " with bad "
246 << (ref.Kind == TiRefKind::IndexRef ? "item" : "type")
247 << " index 0x" << utohexstr(ti.getIndex());
249 ti = TypeIndex(SimpleTypeKind::NotTranslated);
250 continue;
256 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
257 // TODO: Handle errors similar to symbols.
258 SmallVector<TiReference, 32> typeRefs;
259 discoverTypeIndices(CVType(rec), typeRefs);
260 remapRecord(rec, typeRefs);
263 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
264 // Discover type index references in the record. Skip it if we don't
265 // know where they are.
266 SmallVector<TiReference, 32> typeRefs;
267 if (!discoverTypeIndicesInSymbol(rec, typeRefs))
268 return false;
269 remapRecord(rec, typeRefs);
270 return true;
273 // A COFF .debug$H section is currently a clang extension. This function checks
274 // if a .debug$H section is in a format that we expect / understand, so that we
275 // can ignore any sections which are coincidentally also named .debug$H but do
276 // not contain a format we recognize.
277 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
278 if (debugH.size() < sizeof(object::debug_h_header))
279 return false;
280 auto *header =
281 reinterpret_cast<const object::debug_h_header *>(debugH.data());
282 debugH = debugH.drop_front(sizeof(object::debug_h_header));
283 return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
284 header->Version == 0 &&
285 header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
286 (debugH.size() % 8 == 0);
289 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
290 SectionChunk *sec =
291 SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
292 if (!sec)
293 return std::nullopt;
294 ArrayRef<uint8_t> contents = sec->getContents();
295 if (!canUseDebugH(contents))
296 return std::nullopt;
297 return contents;
300 static ArrayRef<GloballyHashedType>
301 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
302 assert(canUseDebugH(debugH));
303 debugH = debugH.drop_front(sizeof(object::debug_h_header));
304 uint32_t count = debugH.size() / sizeof(GloballyHashedType);
305 return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
308 // Merge .debug$T for a generic object file.
309 Error TpiSource::mergeDebugT(TypeMerger *m) {
310 assert(!ctx.config.debugGHashes &&
311 "use remapTpiWithGHashes when ghash is enabled");
313 CVTypeArray types;
314 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
315 cantFail(reader.readArray(types, reader.getLength()));
317 // When dealing with PCH.OBJ, some indices were already merged.
318 unsigned nbHeadIndices = indexMapStorage.size();
320 std::optional<PCHMergerInfo> pchInfo;
321 if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
322 indexMapStorage, types, pchInfo))
323 Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: "
324 << toString(std::move(err));
325 if (pchInfo) {
326 file->pchSignature = pchInfo->PCHSignature;
327 endPrecompIdx = pchInfo->EndPrecompIndex;
330 // In an object, there is only one mapping for both types and items.
331 tpiMap = indexMapStorage;
332 ipiMap = indexMapStorage;
334 if (ctx.config.showSummary) {
335 nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
336 nbTypeRecordsBytes = reader.getLength();
337 // Count how many times we saw each type record in our input. This
338 // calculation requires a second pass over the type records to classify each
339 // record as a type or index. This is slow, but this code executes when
340 // collecting statistics.
341 m->tpiCounts.resize(m->getTypeTable().size());
342 m->ipiCounts.resize(m->getIDTable().size());
343 uint32_t srcIdx = nbHeadIndices;
344 for (const CVType &ty : types) {
345 TypeIndex dstIdx = tpiMap[srcIdx++];
346 // Type merging may fail, so a complex source type may become the simple
347 // NotTranslated type, which cannot be used as an array index.
348 if (dstIdx.isSimple())
349 continue;
350 SmallVectorImpl<uint32_t> &counts =
351 isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
352 ++counts[dstIdx.toArrayIndex()];
356 return Error::success();
359 // Merge types from a type server PDB.
360 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
361 assert(!ctx.config.debugGHashes &&
362 "use remapTpiWithGHashes when ghash is enabled");
364 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
365 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
366 if (auto e = expectedTpi.takeError())
367 Fatal(ctx) << "Type server does not have TPI stream: "
368 << toString(std::move(e));
369 pdb::TpiStream *maybeIpi = nullptr;
370 if (pdbFile.hasPDBIpiStream()) {
371 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
372 if (auto e = expectedIpi.takeError())
373 Fatal(ctx) << "Error getting type server IPI stream: "
374 << toString(std::move(e));
375 maybeIpi = &*expectedIpi;
378 // Merge TPI first, because the IPI stream will reference type indices.
379 if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
380 expectedTpi->typeArray()))
381 Fatal(ctx) << "codeview::mergeTypeRecords failed: "
382 << toString(std::move(err));
383 tpiMap = indexMapStorage;
385 // Merge IPI.
386 if (maybeIpi) {
387 if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
388 maybeIpi->typeArray()))
389 Fatal(ctx) << "codeview::mergeIdRecords failed: "
390 << toString(std::move(err));
391 ipiMap = ipiSrc->indexMapStorage;
394 if (ctx.config.showSummary) {
395 nbTypeRecords = tpiMap.size() + ipiMap.size();
396 nbTypeRecordsBytes =
397 expectedTpi->typeArray().getUnderlyingStream().getLength() +
398 (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
399 : 0);
401 // Count how many times we saw each type record in our input. If a
402 // destination type index is present in the source to destination type index
403 // map, that means we saw it once in the input. Add it to our histogram.
404 m->tpiCounts.resize(m->getTypeTable().size());
405 m->ipiCounts.resize(m->getIDTable().size());
406 for (TypeIndex ti : tpiMap)
407 if (!ti.isSimple())
408 ++m->tpiCounts[ti.toArrayIndex()];
409 for (TypeIndex ti : ipiMap)
410 if (!ti.isSimple())
411 ++m->ipiCounts[ti.toArrayIndex()];
414 return Error::success();
417 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
418 const codeview::GUID &tsId = typeServerDependency.getGuid();
419 StringRef tsPath = typeServerDependency.getName();
421 TypeServerSource *tsSrc = nullptr;
422 auto it = ctx.typeServerSourceMappings.find(tsId);
423 if (it != ctx.typeServerSourceMappings.end()) {
424 tsSrc = (TypeServerSource *)it->second;
426 if (tsSrc == nullptr) {
427 // The file failed to load, lookup by name
428 PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
429 if (!pdb)
430 return createFileError(tsPath, errorCodeToError(std::error_code(
431 ENOENT, std::generic_category())));
432 // If an error occurred during loading, throw it now
433 if (pdb->loadErrorStr)
434 return createFileError(
435 tsPath, make_error<StringError>(*pdb->loadErrorStr,
436 llvm::inconvertibleErrorCode()));
438 tsSrc = (TypeServerSource *)pdb->debugTypesObj;
440 // Just because a file with a matching name was found and it was an actual
441 // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
442 // must match the GUID specified in the TypeServer2 record.
443 if (tsSrc->Guid != tsId) {
444 return createFileError(tsPath,
445 make_error<pdb::PDBError>(
446 pdb::pdb_error_code::signature_out_of_date));
449 return tsSrc;
452 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
453 Expected<TypeServerSource *> tsSrc = getTypeServerSource();
454 if (!tsSrc)
455 return tsSrc.takeError();
457 pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
458 auto expectedInfo = pdbSession.getPDBInfoStream();
459 if (!expectedInfo)
460 return expectedInfo.takeError();
462 // Reuse the type index map of the type server.
463 tpiMap = (*tsSrc)->tpiMap;
464 ipiMap = (*tsSrc)->ipiMap;
465 return Error::success();
468 static bool equalsPath(StringRef path1, StringRef path2) {
469 #if defined(_WIN32)
470 return path1.equals_insensitive(path2);
471 #else
472 return path1 == path2;
473 #endif
476 // Find by name an OBJ provided on the command line
477 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
478 SmallString<128> currentPath;
479 for (auto kv : ctx.precompSourceMappings) {
480 StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
481 sys::path::Style::windows);
483 // Compare based solely on the file name (link.exe behavior)
484 if (equalsPath(currentFileName, fileNameOnly))
485 return (PrecompSource *)kv.second;
487 return nullptr;
490 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
491 PrecompRecord &pr) {
492 // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
493 // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
494 // the paths embedded in the OBJs are in the Windows format.
495 SmallString<128> prFileName =
496 sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
498 auto it = ctx.precompSourceMappings.find(pr.getSignature());
499 if (it != ctx.precompSourceMappings.end()) {
500 return (PrecompSource *)it->second;
502 // Lookup by name
503 return findObjByName(prFileName);
506 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
507 PrecompRecord &pr) {
508 PrecompSource *precomp = findPrecompSource(file, pr);
510 if (!precomp)
511 return createFileError(
512 pr.getPrecompFilePath(),
513 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
515 // Don't rely on the PCH signature to validate the concordance between the PCH
516 // and the OBJ that uses it. However we do validate here that the
517 // LF_ENDPRECOMP record index lines up with the number of type records
518 // LF_PRECOMP is expecting.
519 if (precomp->endPrecompIdx != pr.getTypesCount())
520 return createFileError(
521 toString(file),
522 make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
524 return precomp;
527 /// Merges a precompiled headers TPI map into the current TPI map. The
528 /// precompiled headers object will also be loaded and remapped in the
529 /// process.
530 Error UsePrecompSource::mergeInPrecompHeaderObj() {
531 auto e = findPrecompMap(file, precompDependency);
532 if (!e)
533 return e.takeError();
535 PrecompSource *precompSrc = *e;
536 if (precompSrc->tpiMap.empty())
537 return Error::success();
539 assert(precompDependency.getStartTypeIndex() ==
540 TypeIndex::FirstNonSimpleIndex);
541 assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
542 // Use the previously remapped index map from the precompiled headers.
543 indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
544 precompSrc->tpiMap.begin() +
545 precompDependency.getTypesCount());
547 return Error::success();
550 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
551 // This object was compiled with /Yu, so process the corresponding
552 // precompiled headers object (/Yc) first. Some type indices in the current
553 // object are referencing data in the precompiled headers object, so we need
554 // both to be loaded.
555 if (Error e = mergeInPrecompHeaderObj())
556 return e;
558 return TpiSource::mergeDebugT(m);
561 Error PrecompSource::mergeDebugT(TypeMerger *m) {
562 // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
563 // The signature comes later with the LF_ENDPRECOMP record, so we first need
564 // to merge in all the .PCH.OBJ file type records, before registering below.
565 if (Error e = TpiSource::mergeDebugT(m))
566 return e;
568 registerMapping();
570 return Error::success();
573 void PrecompSource::registerMapping() {
574 if (registered)
575 return;
576 if (file->pchSignature && *file->pchSignature) {
577 auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
578 if (!it.second)
579 Fatal(ctx)
580 << "a PCH object with the same signature has already been provided ("
581 << toString(it.first->second->file) << " and " << toString(file)
582 << ")";
583 registered = true;
587 //===----------------------------------------------------------------------===//
588 // Parellel GHash type merging implementation.
589 //===----------------------------------------------------------------------===//
591 void TpiSource::loadGHashes() {
592 if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
593 ghashes = getHashesFromDebugH(*debugH);
594 ownedGHashes = false;
595 } else {
596 CVTypeArray types;
597 BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
598 cantFail(reader.readArray(types, reader.getLength()));
599 assignGHashesFromVector(GloballyHashedType::hashTypes(types));
602 fillIsItemIndexFromDebugT();
605 // Copies ghashes from a vector into an array. These are long lived, so it's
606 // worth the time to copy these into an appropriately sized vector to reduce
607 // memory usage.
608 void TpiSource::assignGHashesFromVector(
609 std::vector<GloballyHashedType> &&hashVec) {
610 if (hashVec.empty())
611 return;
612 GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
613 memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
614 ghashes = ArrayRef(hashes, hashVec.size());
615 ownedGHashes = true;
618 // Faster way to iterate type records. forEachTypeChecked is faster than
619 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
620 static void forEachTypeChecked(ArrayRef<uint8_t> types,
621 function_ref<void(const CVType &)> fn) {
622 checkError(
623 forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
624 fn(ty);
625 return Error::success();
626 }));
629 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
630 // TODO: Store this information in .debug$H so that we don't have to recompute
631 // it. This is the main bottleneck slowing down parallel ghashing with one
632 // thread over single-threaded ghashing.
633 void TpiSource::fillIsItemIndexFromDebugT() {
634 uint32_t index = 0;
635 isItemIndex.resize(ghashes.size());
636 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
637 if (isIdRecord(ty.kind()))
638 isItemIndex.set(index);
639 ++index;
643 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
644 // Decide if the merged type goes into TPI or IPI.
645 bool isItem = isIdRecord(ty.kind());
646 MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
648 // Copy the type into our mutable buffer.
649 assert(ty.length() <= codeview::MaxRecordLength);
650 size_t offset = merged.recs.size();
651 size_t newSize = alignTo(ty.length(), 4);
652 merged.recs.resize(offset + newSize);
653 auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
654 memcpy(newRec.data(), ty.data().data(), newSize);
656 // Fix up the record prefix and padding bytes if it required resizing.
657 if (newSize != ty.length()) {
658 reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
659 for (size_t i = ty.length(); i < newSize; ++i)
660 newRec[i] = LF_PAD0 + (newSize - i);
663 // Remap the type indices in the new record.
664 remapTypesInTypeRecord(newRec);
665 uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
666 merged.recSizes.push_back(static_cast<uint16_t>(newSize));
667 merged.recHashes.push_back(pdbHash);
669 // Retain a mapping from PDB function id to PDB function type. This mapping is
670 // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
671 // symbols.
672 if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
673 bool success = ty.length() >= 12;
674 TypeIndex funcId = curIndex;
675 if (success)
676 success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
677 TypeIndex funcType =
678 *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
679 if (success) {
680 funcIdToType.push_back({funcId, funcType});
681 } else {
682 StringRef fname = file ? file->getName() : "<unknown PDB>";
683 Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x"
684 << utohexstr(curIndex.getIndex()) << " in " << fname;
689 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
690 TypeIndex beginIndex) {
691 // Re-sort the list of unique types by index.
692 if (kind == PDB)
693 assert(llvm::is_sorted(uniqueTypes));
694 else
695 llvm::sort(uniqueTypes);
697 // Accumulate all the unique types into one buffer in mergedTypes.
698 uint32_t ghashIndex = 0;
699 auto nextUniqueIndex = uniqueTypes.begin();
700 assert(mergedTpi.recs.empty());
701 assert(mergedIpi.recs.empty());
703 // Pre-compute the number of elements in advance to avoid std::vector resizes.
704 unsigned nbTpiRecs = 0;
705 unsigned nbIpiRecs = 0;
706 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
707 if (nextUniqueIndex != uniqueTypes.end() &&
708 *nextUniqueIndex == ghashIndex) {
709 assert(ty.length() <= codeview::MaxRecordLength);
710 size_t newSize = alignTo(ty.length(), 4);
711 (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
712 ++nextUniqueIndex;
714 ++ghashIndex;
716 mergedTpi.recs.reserve(nbTpiRecs);
717 mergedIpi.recs.reserve(nbIpiRecs);
719 // Do the actual type merge.
720 ghashIndex = 0;
721 nextUniqueIndex = uniqueTypes.begin();
722 forEachTypeChecked(typeRecords, [&](const CVType &ty) {
723 if (nextUniqueIndex != uniqueTypes.end() &&
724 *nextUniqueIndex == ghashIndex) {
725 mergeTypeRecord(beginIndex + ghashIndex, ty);
726 ++nextUniqueIndex;
728 ++ghashIndex;
730 assert(nextUniqueIndex == uniqueTypes.end() &&
731 "failed to merge all desired records");
732 assert(uniqueTypes.size() ==
733 mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
734 "missing desired record");
737 void TpiSource::remapTpiWithGHashes(GHashState *g) {
738 assert(ctx.config.debugGHashes && "ghashes must be enabled");
739 fillMapFromGHashes(g);
740 tpiMap = indexMapStorage;
741 ipiMap = indexMapStorage;
742 mergeUniqueTypeRecords(file->debugTypes);
743 // TODO: Free all unneeded ghash resources now that we have a full index map.
745 if (ctx.config.showSummary) {
746 nbTypeRecords = ghashes.size();
747 nbTypeRecordsBytes = file->debugTypes.size();
751 // PDBs do not actually store global hashes, so when merging a type server
752 // PDB we have to synthesize global hashes. To do this, we first synthesize
753 // global hashes for the TPI stream, since it is independent, then we
754 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
755 // as inputs.
756 void TypeServerSource::loadGHashes() {
757 // Don't hash twice.
758 if (!ghashes.empty())
759 return;
760 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
762 // Hash TPI stream.
763 Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
764 if (auto e = expectedTpi.takeError())
765 Fatal(ctx) << "Type server does not have TPI stream: "
766 << toString(std::move(e));
767 assignGHashesFromVector(
768 GloballyHashedType::hashTypes(expectedTpi->typeArray()));
769 isItemIndex.resize(ghashes.size());
771 // Hash IPI stream, which depends on TPI ghashes.
772 if (!pdbFile.hasPDBIpiStream())
773 return;
774 Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
775 if (auto e = expectedIpi.takeError())
776 Fatal(ctx) << "error retrieving IPI stream: " << toString(std::move(e));
777 ipiSrc->assignGHashesFromVector(
778 GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
780 // The IPI stream isItemIndex bitvector should be all ones.
781 ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
782 ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
785 // Flatten discontiguous PDB type arrays to bytes so that we can use
786 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
787 // type servers is faster than iterating all object files compiled with /Z7 with
788 // CVTypeArray, which has high overheads due to the virtual interface of
789 // BinaryStream::readBytes.
790 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
791 BinaryStreamRef stream = types.getUnderlyingStream();
792 ArrayRef<uint8_t> debugTypes;
793 checkError(stream.readBytes(0, stream.getLength(), debugTypes));
794 return debugTypes;
797 // Merge types from a type server PDB.
798 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
799 assert(ctx.config.debugGHashes && "ghashes must be enabled");
801 // IPI merging depends on TPI, so do TPI first, then do IPI. No need to
802 // propagate errors, those should've been handled during ghash loading.
803 pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
804 pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
805 fillMapFromGHashes(g);
806 tpiMap = indexMapStorage;
807 mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
808 if (pdbFile.hasPDBIpiStream()) {
809 pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
810 ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
811 ipiSrc->fillMapFromGHashes(g);
812 ipiMap = ipiSrc->indexMapStorage;
813 ipiSrc->tpiMap = tpiMap;
814 ipiSrc->ipiMap = ipiMap;
815 ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
817 if (ctx.config.showSummary) {
818 nbTypeRecords = ipiSrc->ghashes.size();
819 nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
823 if (ctx.config.showSummary) {
824 nbTypeRecords += ghashes.size();
825 nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
829 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
830 // No remapping to do with /Zi objects. Simply use the index map from the type
831 // server. Errors should have been reported earlier. Symbols from this object
832 // will be ignored.
833 Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
834 if (!maybeTsSrc) {
835 typeMergingError =
836 joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
837 return;
839 TypeServerSource *tsSrc = *maybeTsSrc;
840 tpiMap = tsSrc->tpiMap;
841 ipiMap = tsSrc->ipiMap;
844 void PrecompSource::loadGHashes() {
845 if (getDebugH(file)) {
846 Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented";
849 uint32_t ghashIdx = 0;
850 std::vector<GloballyHashedType> hashVec;
851 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
852 // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
853 // the PDB. There must be an entry in the list of ghashes so that the type
854 // indexes of the following records in the /Yc PCH object line up.
855 if (ty.kind() == LF_ENDPRECOMP) {
856 EndPrecompRecord endPrecomp;
857 cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
858 const_cast<CVType &>(ty), endPrecomp));
859 file->pchSignature = endPrecomp.getSignature();
860 registerMapping();
861 endPrecompIdx = ghashIdx;
864 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
865 isItemIndex.push_back(isIdRecord(ty.kind()));
866 ++ghashIdx;
868 assignGHashesFromVector(std::move(hashVec));
871 void UsePrecompSource::loadGHashes() {
872 auto e = findPrecompMap(file, precompDependency);
873 if (!e) {
874 Warn(ctx) << e.takeError();
875 return;
878 PrecompSource *pchSrc = *e;
880 // To compute ghashes of a /Yu object file, we need to build on the ghashes of
881 // the /Yc PCH object. After we are done hashing, discard the ghashes from the
882 // PCH source so we don't unnecessarily try to deduplicate them.
883 std::vector<GloballyHashedType> hashVec =
884 pchSrc->ghashes.take_front(precompDependency.getTypesCount());
885 forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
886 hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
887 isItemIndex.push_back(isIdRecord(ty.kind()));
889 hashVec.erase(hashVec.begin(),
890 hashVec.begin() + precompDependency.getTypesCount());
891 assignGHashesFromVector(std::move(hashVec));
894 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
895 fillMapFromGHashes(g);
896 // This object was compiled with /Yu, so process the corresponding
897 // precompiled headers object (/Yc) first. Some type indices in the current
898 // object are referencing data in the precompiled headers object, so we need
899 // both to be loaded.
900 if (Error e = mergeInPrecompHeaderObj()) {
901 typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
902 return;
905 tpiMap = indexMapStorage;
906 ipiMap = indexMapStorage;
907 mergeUniqueTypeRecords(file->debugTypes,
908 TypeIndex(precompDependency.getStartTypeIndex() +
909 precompDependency.getTypesCount()));
910 if (ctx.config.showSummary) {
911 nbTypeRecords = ghashes.size();
912 nbTypeRecordsBytes = file->debugTypes.size();
916 namespace {
917 /// A concurrent hash table for global type hashing. It is based on this paper:
918 /// Concurrent Hash Tables: Fast and General(?)!
919 /// https://dl.acm.org/doi/10.1145/3309206
921 /// This hash table is meant to be used in two phases:
922 /// 1. concurrent insertions
923 /// 2. concurrent reads
924 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
926 /// The paper describes storing a key-value pair in two machine words.
927 /// Generally, the values stored in this map are type indices, and we can use
928 /// those values to recover the ghash key from a side table. This allows us to
929 /// shrink the table entries further at the cost of some loads, and sidesteps
930 /// the need for a 128 bit atomic compare-and-swap operation.
932 /// During insertion, a priority function is used to decide which insertion
933 /// should be preferred. This ensures that the output is deterministic. For
934 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
936 class GHashCell;
937 struct GHashTable {
938 GHashCell *table = nullptr;
939 uint32_t tableSize = 0;
941 GHashTable() = default;
942 ~GHashTable();
944 /// Initialize the table with the given size. Because the table cannot be
945 /// resized, the initial size of the table must be large enough to contain all
946 /// inputs, or insertion may not be able to find an empty cell.
947 void init(uint32_t newTableSize);
949 /// Insert the cell with the given ghash into the table. Return the insertion
950 /// position in the table. It is safe for the caller to store the insertion
951 /// position because the table cannot be resized.
952 uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
953 GHashCell newCell);
956 /// A ghash table cell for deduplicating types from TpiSources.
957 class GHashCell {
958 // Force "data" to be 64-bit aligned; otherwise, some versions of clang
959 // will generate calls to libatomic when using some versions of libstdc++
960 // on 32-bit targets. (Also, in theory, there could be a target where
961 // new[] doesn't always return an 8-byte-aligned allocation.)
962 alignas(sizeof(uint64_t)) uint64_t data = 0;
964 public:
965 GHashCell() = default;
967 // Construct data most to least significant so that sorting works well:
968 // - isItem
969 // - tpiSrcIdx
970 // - ghashIdx
971 // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
972 // non-zero representation.
973 GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
974 : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
975 ghashIdx) {
976 assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
977 assert(ghashIdx == getGHashIdx() && "round trip failure");
980 explicit GHashCell(uint64_t data) : data(data) {}
982 // The empty cell is all zeros.
983 bool isEmpty() const { return data == 0ULL; }
985 /// Extract the tpiSrcIdx.
986 uint32_t getTpiSrcIdx() const {
987 return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
990 /// Extract the index into the ghash array of the TpiSource.
991 uint32_t getGHashIdx() const { return (uint32_t)data; }
993 bool isItem() const { return data & (1ULL << 63U); }
995 /// Get the ghash key for this cell.
996 GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
997 return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
1000 /// The priority function for the cell. The data is stored such that lower
1001 /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
1002 /// from earlier sources are more likely to prevail.
1003 friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
1004 return l.data < r.data;
1007 } // namespace
1009 namespace lld::coff {
1010 /// This type is just a wrapper around GHashTable with external linkage so it
1011 /// can be used from a header.
1012 struct GHashState {
1013 GHashTable table;
1015 } // namespace lld::coff
1017 GHashTable::~GHashTable() { delete[] table; }
1019 void GHashTable::init(uint32_t newTableSize) {
1020 table = new GHashCell[newTableSize];
1021 memset(table, 0, newTableSize * sizeof(GHashCell));
1022 tableSize = newTableSize;
1025 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1026 GHashCell newCell) {
1027 assert(!newCell.isEmpty() && "cannot insert empty cell value");
1029 // FIXME: The low bytes of SHA1 have low entropy for short records, which
1030 // type records are. Swap the byte order for better entropy. A better ghash
1031 // won't need this.
1032 uint32_t startIdx =
1033 llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) %
1034 tableSize;
1036 // Do a linear probe starting at startIdx.
1037 uint32_t idx = startIdx;
1038 while (true) {
1039 // Run a compare and swap loop. There are four cases:
1040 // - cell is empty: CAS into place and return
1041 // - cell has matching key, earlier priority: do nothing, return
1042 // - cell has matching key, later priority: CAS into place and return
1043 // - cell has non-matching key: hash collision, probe next cell
1044 auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1045 GHashCell oldCell(cellPtr->load());
1046 while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1047 // Check if there is an existing ghash entry with a higher priority
1048 // (earlier ordering). If so, this is a duplicate, we are done.
1049 if (!oldCell.isEmpty() && oldCell < newCell)
1050 return idx;
1051 // Either the cell is empty, or our value is higher priority. Try to
1052 // compare and swap. If it succeeds, we are done.
1053 if (cellPtr->compare_exchange_weak(oldCell, newCell))
1054 return idx;
1055 // If the CAS failed, check this cell again.
1058 // Advance the probe. Wrap around to the beginning if we run off the end.
1059 ++idx;
1060 idx = idx == tableSize ? 0 : idx;
1061 if (idx == startIdx) {
1062 // If this becomes an issue, we could mark failure and rehash from the
1063 // beginning with a bigger table. There is no difference between rehashing
1064 // internally and starting over.
1065 report_fatal_error("ghash table is full");
1068 llvm_unreachable("left infloop");
1071 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1072 : typeTable(alloc), idTable(alloc), ctx(c) {}
1074 TypeMerger::~TypeMerger() = default;
1076 void TypeMerger::mergeTypesWithGHash() {
1077 // Load ghashes. Do type servers and PCH objects first.
1079 llvm::TimeTraceScope timeScope("Load GHASHes");
1080 ScopedTimer t1(ctx.loadGHashTimer);
1081 parallelForEach(dependencySources,
1082 [&](TpiSource *source) { source->loadGHashes(); });
1083 parallelForEach(objectSources,
1084 [&](TpiSource *source) { source->loadGHashes(); });
1087 llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1088 ScopedTimer t2(ctx.mergeGHashTimer);
1089 GHashState ghashState;
1091 // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1092 // be larger than the number of unique types, or hash table insertion may not
1093 // be able to find a vacant slot. Summing the input types guarantees this, but
1094 // it is a gross overestimate. The table size could be reduced to save memory,
1095 // but it would require implementing rehashing, and this table is generally
1096 // small compared to total memory usage, at eight bytes per input type record,
1097 // and most input type records are larger than eight bytes.
1098 size_t tableSize = 0;
1099 for (TpiSource *source : ctx.tpiSourceList)
1100 tableSize += source->ghashes.size();
1102 // Cap the table size so that we can use 32-bit cell indices. Type indices are
1103 // also 32-bit, so this is an inherent PDB file format limit anyway.
1104 tableSize =
1105 std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1106 ghashState.table.init(static_cast<uint32_t>(tableSize));
1108 // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1109 // the contents of the hash table cell, but we can remember the insertion
1110 // position. Because the table does not rehash, the position will not change
1111 // under insertion. After insertion is done, the value of the cell can be read
1112 // to retrieve the final PDB type index.
1113 parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1114 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1115 source->indexMapStorage.resize(source->ghashes.size());
1116 for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1117 if (source->shouldOmitFromPdb(i)) {
1118 source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1119 continue;
1121 GloballyHashedType ghash = source->ghashes[i];
1122 bool isItem = source->isItemIndex.test(i);
1123 uint32_t cellIdx =
1124 ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1126 // Store the ghash cell index as a type index in indexMapStorage. Later
1127 // we will replace it with the PDB type index.
1128 source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1132 // Collect all non-empty cells and sort them. This will implicitly assign
1133 // destination type indices, and partition the entries into type records and
1134 // item records. It arranges types in this order:
1135 // - type records
1136 // - source 0, type 0...
1137 // - source 1, type 1...
1138 // - item records
1139 // - source 0, type 1...
1140 // - source 1, type 0...
1141 std::vector<GHashCell> entries;
1142 for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1143 if (!cell.isEmpty())
1144 entries.push_back(cell);
1146 parallelSort(entries, std::less<GHashCell>());
1147 Log(ctx) << formatv(
1148 "ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1149 tableSize ? double(entries.size()) / tableSize : 0, entries.size(),
1150 tableSize);
1152 // Find out how many type and item indices there are.
1153 auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1154 assert((mid == entries.end() || mid->isItem()) &&
1155 (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1156 "midpoint is not midpoint");
1157 uint32_t numTypes = std::distance(entries.begin(), mid);
1158 uint32_t numItems = std::distance(mid, entries.end());
1159 Log(ctx) << "Tpi record count: " << numTypes;
1160 Log(ctx) << "Ipi record count: " << numItems;
1162 // Make a list of the "unique" type records to merge for each tpi source. Type
1163 // merging will skip indices not on this list. Store the destination PDB type
1164 // index for these unique types in the tpiMap for each source. The entries for
1165 // non-unique types will be filled in prior to type merging.
1166 for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1167 auto &cell = entries[i];
1168 uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1169 TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1170 source->uniqueTypes.push_back(cell.getGHashIdx());
1172 // Update the ghash table to store the destination PDB type index in the
1173 // table.
1174 uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1175 uint32_t ghashCellIndex =
1176 source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1177 ghashState.table.table[ghashCellIndex] =
1178 GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1181 // In parallel, remap all types.
1182 for (TpiSource *source : dependencySources)
1183 source->remapTpiWithGHashes(&ghashState);
1184 parallelForEach(objectSources, [&](TpiSource *source) {
1185 source->remapTpiWithGHashes(&ghashState);
1188 // Build a global map of from function ID to function type.
1189 for (TpiSource *source : ctx.tpiSourceList) {
1190 for (auto idToType : source->funcIdToType)
1191 funcIdToType.insert(idToType);
1192 source->funcIdToType.clear();
1195 clearGHashes();
1198 void TypeMerger::sortDependencies() {
1199 // Order dependencies first, but preserve the existing order.
1200 std::vector<TpiSource *> deps;
1201 std::vector<TpiSource *> objs;
1202 for (TpiSource *s : ctx.tpiSourceList)
1203 (s->isDependency() ? deps : objs).push_back(s);
1204 uint32_t numDeps = deps.size();
1205 uint32_t numObjs = objs.size();
1206 ctx.tpiSourceList = std::move(deps);
1207 ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1208 for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1209 ctx.tpiSourceList[i]->tpiSrcIdx = i;
1210 dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1211 objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1214 /// Given the index into the ghash table for a particular type, return the type
1215 /// index for that type in the output PDB.
1216 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1217 uint32_t ghashCellIdx) {
1218 GHashCell cell = g->table.table[ghashCellIdx];
1219 return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1222 /// Free heap allocated ghashes.
1223 void TypeMerger::clearGHashes() {
1224 for (TpiSource *src : ctx.tpiSourceList) {
1225 if (src->ownedGHashes)
1226 delete[] src->ghashes.data();
1227 src->ghashes = {};
1228 src->isItemIndex.clear();
1229 src->uniqueTypes.clear();
1233 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1234 // ghash to lookup its final type index in the PDB, and store that in the map.
1235 void TpiSource::fillMapFromGHashes(GHashState *g) {
1236 for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1237 TypeIndex fakeCellIndex = indexMapStorage[i];
1238 if (fakeCellIndex.isSimple())
1239 indexMapStorage[i] = fakeCellIndex;
1240 else
1241 indexMapStorage[i] =
1242 loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());