1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
10 #include "llvm/ADT/SmallVector.h"
11 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
12 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
13 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
14 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
15 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
16 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
17 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
18 #include "llvm/Support/Error.h"
22 using namespace llvm::codeview
;
24 static inline size_t slotForIndex(TypeIndex Idx
) {
25 assert(!Idx
.isSimple() && "simple type indices have no slots");
26 return Idx
.getIndex() - TypeIndex::FirstNonSimpleIndex
;
31 /// Implementation of CodeView type stream merging.
33 /// A CodeView type stream is a series of records that reference each other
34 /// through type indices. A type index is either "simple", meaning it is less
35 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36 /// refers to a prior type record in the current stream. The type index of a
37 /// record is equal to the number of records before it in the stream plus
40 /// Type records are only allowed to use type indices smaller than their own, so
41 /// a type stream is effectively a topologically sorted DAG. Cycles occurring in
42 /// the type graph of the source program are resolved with forward declarations
43 /// of composite types. This class implements the following type stream merging
44 /// algorithm, which relies on this DAG structure:
46 /// - Begin with a new empty stream, and a new empty hash table that maps from
47 /// type record contents to new type index.
48 /// - For each new type stream, maintain a map from source type index to
49 /// destination type index.
50 /// - For each record, copy it and rewrite its type indices to be valid in the
51 /// destination type stream.
52 /// - If the new type record is not already present in the destination stream
53 /// hash table, append it to the destination type stream, assign it the next
54 /// type index, and update the two hash tables.
55 /// - If the type record already exists in the destination stream, discard it
56 /// and update the type index map to forward the source type index to the
57 /// existing destination type index.
59 /// As an additional complication, type stream merging actually produces two
60 /// streams: an item (or IPI) stream and a type stream, as this is what is
61 /// actually stored in the final PDB. We choose which records go where by
62 /// looking at the record kind.
63 class TypeStreamMerger
{
65 explicit TypeStreamMerger(SmallVectorImpl
<TypeIndex
> &SourceToDest
)
66 : IndexMap(SourceToDest
) {
67 // When dealing with precompiled headers objects, all data in SourceToDest
68 // belongs to the precompiled headers object, and is assumed to be already
69 // remapped to the target PDB. Any forthcoming type that will be merged in
70 // might potentially back-reference this data. We also don't want to resolve
71 // twice the types in the precompiled object.
72 CurIndex
+= SourceToDest
.size();
75 static const TypeIndex Untranslated
;
77 // Local hashing entry points
78 Error
mergeTypesAndIds(MergingTypeTableBuilder
&DestIds
,
79 MergingTypeTableBuilder
&DestTypes
,
80 const CVTypeArray
&IdsAndTypes
,
81 std::optional
<PCHMergerInfo
> &PCHInfo
);
82 Error
mergeIdRecords(MergingTypeTableBuilder
&Dest
,
83 ArrayRef
<TypeIndex
> TypeSourceToDest
,
84 const CVTypeArray
&Ids
);
85 Error
mergeTypeRecords(MergingTypeTableBuilder
&Dest
,
86 const CVTypeArray
&Types
);
88 // Global hashing entry points
89 Error
mergeTypesAndIds(GlobalTypeTableBuilder
&DestIds
,
90 GlobalTypeTableBuilder
&DestTypes
,
91 const CVTypeArray
&IdsAndTypes
,
92 ArrayRef
<GloballyHashedType
> Hashes
,
93 std::optional
<PCHMergerInfo
> &PCHInfo
);
94 Error
mergeIdRecords(GlobalTypeTableBuilder
&Dest
,
95 ArrayRef
<TypeIndex
> TypeSourceToDest
,
96 const CVTypeArray
&Ids
,
97 ArrayRef
<GloballyHashedType
> Hashes
);
98 Error
mergeTypeRecords(GlobalTypeTableBuilder
&Dest
, const CVTypeArray
&Types
,
99 ArrayRef
<GloballyHashedType
> Hashes
,
100 std::optional
<PCHMergerInfo
> &PCHInfo
);
103 Error
doit(const CVTypeArray
&Types
);
105 Error
remapAllTypes(const CVTypeArray
&Types
);
107 Error
remapType(const CVType
&Type
);
109 void addMapping(TypeIndex Idx
);
111 inline bool remapTypeIndex(TypeIndex
&Idx
) {
112 // If we're mapping a pure index stream, then IndexMap only contains
113 // mappings from OldIdStream -> NewIdStream, in which case we will need to
114 // use the special mapping from OldTypeStream -> NewTypeStream which was
115 // computed externally. Regardless, we use this special map if and only if
116 // we are doing an id-only mapping.
117 if (!hasTypeStream())
118 return remapIndex(Idx
, TypeLookup
);
120 assert(TypeLookup
.empty());
121 return remapIndex(Idx
, IndexMap
);
123 inline bool remapItemIndex(TypeIndex
&Idx
) {
124 assert(hasIdStream());
125 return remapIndex(Idx
, IndexMap
);
128 bool hasTypeStream() const {
129 return (UseGlobalHashes
) ? (!!DestGlobalTypeStream
) : (!!DestTypeStream
);
132 bool hasIdStream() const {
133 return (UseGlobalHashes
) ? (!!DestGlobalIdStream
) : (!!DestIdStream
);
136 ArrayRef
<uint8_t> remapIndices(const CVType
&OriginalType
,
137 MutableArrayRef
<uint8_t> Storage
);
139 inline bool remapIndex(TypeIndex
&Idx
, ArrayRef
<TypeIndex
> Map
) {
140 if (LLVM_LIKELY(remapIndexSimple(Idx
, Map
)))
143 return remapIndexFallback(Idx
, Map
);
146 inline bool remapIndexSimple(TypeIndex
&Idx
, ArrayRef
<TypeIndex
> Map
) const {
147 // Simple types are unchanged.
151 // Check if this type index refers to a record we've already translated
152 // successfully. If it refers to a type later in the stream or a record we
153 // had to defer, defer it until later pass.
154 unsigned MapPos
= slotForIndex(Idx
);
155 if (LLVM_UNLIKELY(MapPos
>= Map
.size() || Map
[MapPos
] == Untranslated
))
162 bool remapIndexFallback(TypeIndex
&Idx
, ArrayRef
<TypeIndex
> Map
);
164 Error
errorCorruptRecord() const {
165 return llvm::make_error
<CodeViewError
>(cv_error_code::corrupt_record
);
168 Expected
<bool> shouldRemapType(const CVType
&Type
);
170 std::optional
<Error
> LastError
;
172 bool UseGlobalHashes
= false;
174 bool IsSecondPass
= false;
176 unsigned NumBadIndices
= 0;
178 TypeIndex CurIndex
{TypeIndex::FirstNonSimpleIndex
};
180 MergingTypeTableBuilder
*DestIdStream
= nullptr;
181 MergingTypeTableBuilder
*DestTypeStream
= nullptr;
183 GlobalTypeTableBuilder
*DestGlobalIdStream
= nullptr;
184 GlobalTypeTableBuilder
*DestGlobalTypeStream
= nullptr;
186 ArrayRef
<GloballyHashedType
> GlobalHashes
;
188 // If we're only mapping id records, this array contains the mapping for
190 ArrayRef
<TypeIndex
> TypeLookup
;
192 /// Map from source type index to destination type index. Indexed by source
193 /// type index minus 0x1000.
194 SmallVectorImpl
<TypeIndex
> &IndexMap
;
196 /// Temporary storage that we use to copy a record's data while re-writing
197 /// its type indices.
198 SmallVector
<uint8_t, 256> RemapStorage
;
200 std::optional
<PCHMergerInfo
> PCHInfo
;
203 } // end anonymous namespace
205 const TypeIndex
TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated
);
207 void TypeStreamMerger::addMapping(TypeIndex Idx
) {
209 assert(IndexMap
.size() == slotForIndex(CurIndex
) &&
210 "visitKnownRecord should add one index map entry");
211 IndexMap
.push_back(Idx
);
213 assert(slotForIndex(CurIndex
) < IndexMap
.size());
214 IndexMap
[slotForIndex(CurIndex
)] = Idx
;
218 bool TypeStreamMerger::remapIndexFallback(TypeIndex
&Idx
,
219 ArrayRef
<TypeIndex
> Map
) {
220 size_t MapPos
= slotForIndex(Idx
);
222 // If this is the second pass and this index isn't in the map, then it points
223 // outside the current type stream, and this is a corrupt record.
224 if (IsSecondPass
&& MapPos
>= Map
.size()) {
225 // FIXME: Print a more useful error. We can give the current record and the
226 // index that we think its pointing to.
228 LastError
= joinErrors(std::move(*LastError
), errorCorruptRecord());
230 LastError
= errorCorruptRecord();
235 // This type index is invalid. Remap this to "not translated by cvpack",
236 // and return failure.
241 // Local hashing entry points
242 Error
TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder
&Dest
,
243 const CVTypeArray
&Types
) {
244 DestTypeStream
= &Dest
;
245 UseGlobalHashes
= false;
250 Error
TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder
&Dest
,
251 ArrayRef
<TypeIndex
> TypeSourceToDest
,
252 const CVTypeArray
&Ids
) {
253 DestIdStream
= &Dest
;
254 TypeLookup
= TypeSourceToDest
;
255 UseGlobalHashes
= false;
260 Error
TypeStreamMerger::mergeTypesAndIds(
261 MergingTypeTableBuilder
&DestIds
, MergingTypeTableBuilder
&DestTypes
,
262 const CVTypeArray
&IdsAndTypes
, std::optional
<PCHMergerInfo
> &PCHInfo
) {
263 DestIdStream
= &DestIds
;
264 DestTypeStream
= &DestTypes
;
265 UseGlobalHashes
= false;
266 auto Err
= doit(IdsAndTypes
);
267 PCHInfo
= this->PCHInfo
;
271 // Global hashing entry points
272 Error
TypeStreamMerger::mergeTypeRecords(
273 GlobalTypeTableBuilder
&Dest
, const CVTypeArray
&Types
,
274 ArrayRef
<GloballyHashedType
> Hashes
,
275 std::optional
<PCHMergerInfo
> &PCHInfo
) {
276 DestGlobalTypeStream
= &Dest
;
277 UseGlobalHashes
= true;
278 GlobalHashes
= Hashes
;
279 auto Err
= doit(Types
);
280 PCHInfo
= this->PCHInfo
;
284 Error
TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder
&Dest
,
285 ArrayRef
<TypeIndex
> TypeSourceToDest
,
286 const CVTypeArray
&Ids
,
287 ArrayRef
<GloballyHashedType
> Hashes
) {
288 DestGlobalIdStream
= &Dest
;
289 TypeLookup
= TypeSourceToDest
;
290 UseGlobalHashes
= true;
291 GlobalHashes
= Hashes
;
296 Error
TypeStreamMerger::mergeTypesAndIds(
297 GlobalTypeTableBuilder
&DestIds
, GlobalTypeTableBuilder
&DestTypes
,
298 const CVTypeArray
&IdsAndTypes
, ArrayRef
<GloballyHashedType
> Hashes
,
299 std::optional
<PCHMergerInfo
> &PCHInfo
) {
300 DestGlobalIdStream
= &DestIds
;
301 DestGlobalTypeStream
= &DestTypes
;
302 UseGlobalHashes
= true;
303 GlobalHashes
= Hashes
;
304 auto Err
= doit(IdsAndTypes
);
305 PCHInfo
= this->PCHInfo
;
309 Error
TypeStreamMerger::doit(const CVTypeArray
&Types
) {
310 if (auto EC
= remapAllTypes(Types
))
313 // If we found bad indices but no other errors, try doing another pass and see
314 // if we can resolve the indices that weren't in the map on the first pass.
315 // This may require multiple passes, but we should always make progress. MASM
316 // is the only known CodeView producer that makes type streams that aren't
317 // topologically sorted. The standard library contains MASM-produced objects,
318 // so this is important to handle correctly, but we don't have to be too
319 // efficient. MASM type streams are usually very small.
320 while (!LastError
&& NumBadIndices
> 0) {
321 unsigned BadIndicesRemaining
= NumBadIndices
;
324 CurIndex
= TypeIndex(TypeIndex::FirstNonSimpleIndex
);
326 if (auto EC
= remapAllTypes(Types
))
329 assert(NumBadIndices
<= BadIndicesRemaining
&&
330 "second pass found more bad indices");
331 if (!LastError
&& NumBadIndices
== BadIndicesRemaining
) {
332 return llvm::make_error
<CodeViewError
>(
333 cv_error_code::corrupt_record
, "Input type graph contains cycles");
338 return std::move(*LastError
);
339 return Error::success();
342 Error
TypeStreamMerger::remapAllTypes(const CVTypeArray
&Types
) {
343 BinaryStreamRef Stream
= Types
.getUnderlyingStream();
344 ArrayRef
<uint8_t> Buffer
;
345 cantFail(Stream
.readBytes(0, Stream
.getLength(), Buffer
));
347 return forEachCodeViewRecord
<CVType
>(
348 Buffer
, [this](const CVType
&T
) { return remapType(T
); });
351 Error
TypeStreamMerger::remapType(const CVType
&Type
) {
352 auto R
= shouldRemapType(Type
);
354 return R
.takeError();
356 TypeIndex DestIdx
= Untranslated
;
359 [this, Type
](MutableArrayRef
<uint8_t> Storage
) -> ArrayRef
<uint8_t> {
360 return remapIndices(Type
, Storage
);
362 unsigned AlignedSize
= alignTo(Type
.RecordData
.size(), 4);
364 if (LLVM_LIKELY(UseGlobalHashes
)) {
365 GlobalTypeTableBuilder
&Dest
=
366 isIdRecord(Type
.kind()) ? *DestGlobalIdStream
: *DestGlobalTypeStream
;
367 GloballyHashedType H
= GlobalHashes
[CurIndex
.toArrayIndex()];
368 DestIdx
= Dest
.insertRecordAs(H
, AlignedSize
, DoSerialize
);
370 MergingTypeTableBuilder
&Dest
=
371 isIdRecord(Type
.kind()) ? *DestIdStream
: *DestTypeStream
;
373 RemapStorage
.resize(AlignedSize
);
374 ArrayRef
<uint8_t> Result
= DoSerialize(RemapStorage
);
376 DestIdx
= Dest
.insertRecordBytes(Result
);
382 assert((IsSecondPass
|| IndexMap
.size() == slotForIndex(CurIndex
)) &&
383 "visitKnownRecord should add one index map entry");
384 return Error::success();
388 TypeStreamMerger::remapIndices(const CVType
&OriginalType
,
389 MutableArrayRef
<uint8_t> Storage
) {
390 unsigned Align
= OriginalType
.RecordData
.size() & 3;
391 assert(Storage
.size() == alignTo(OriginalType
.RecordData
.size(), 4) &&
392 "The storage buffer size is not a multiple of 4 bytes which will "
393 "cause misalignment in the output TPI stream!");
395 SmallVector
<TiReference
, 4> Refs
;
396 discoverTypeIndices(OriginalType
.RecordData
, Refs
);
397 if (Refs
.empty() && Align
== 0)
398 return OriginalType
.RecordData
;
400 ::memcpy(Storage
.data(), OriginalType
.RecordData
.data(),
401 OriginalType
.RecordData
.size());
403 uint8_t *DestContent
= Storage
.data() + sizeof(RecordPrefix
);
405 for (auto &Ref
: Refs
) {
407 reinterpret_cast<TypeIndex
*>(DestContent
+ Ref
.Offset
);
409 for (size_t I
= 0; I
< Ref
.Count
; ++I
) {
410 TypeIndex
&TI
= DestTIs
[I
];
411 bool Success
= (Ref
.Kind
== TiRefKind::IndexRef
) ? remapItemIndex(TI
)
412 : remapTypeIndex(TI
);
413 if (LLVM_UNLIKELY(!Success
))
419 RecordPrefix
*StorageHeader
=
420 reinterpret_cast<RecordPrefix
*>(Storage
.data());
421 StorageHeader
->RecordLen
+= 4 - Align
;
423 DestContent
= Storage
.data() + OriginalType
.RecordData
.size();
424 for (; Align
< 4; ++Align
)
425 *DestContent
++ = LF_PAD4
- Align
;
430 Error
llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder
&Dest
,
431 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
432 const CVTypeArray
&Types
) {
433 TypeStreamMerger
M(SourceToDest
);
434 return M
.mergeTypeRecords(Dest
, Types
);
437 Error
llvm::codeview::mergeIdRecords(MergingTypeTableBuilder
&Dest
,
438 ArrayRef
<TypeIndex
> TypeSourceToDest
,
439 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
440 const CVTypeArray
&Ids
) {
441 TypeStreamMerger
M(SourceToDest
);
442 return M
.mergeIdRecords(Dest
, TypeSourceToDest
, Ids
);
445 Error
llvm::codeview::mergeTypeAndIdRecords(
446 MergingTypeTableBuilder
&DestIds
, MergingTypeTableBuilder
&DestTypes
,
447 SmallVectorImpl
<TypeIndex
> &SourceToDest
, const CVTypeArray
&IdsAndTypes
,
448 std::optional
<PCHMergerInfo
> &PCHInfo
) {
449 TypeStreamMerger
M(SourceToDest
);
450 return M
.mergeTypesAndIds(DestIds
, DestTypes
, IdsAndTypes
, PCHInfo
);
453 Error
llvm::codeview::mergeTypeAndIdRecords(
454 GlobalTypeTableBuilder
&DestIds
, GlobalTypeTableBuilder
&DestTypes
,
455 SmallVectorImpl
<TypeIndex
> &SourceToDest
, const CVTypeArray
&IdsAndTypes
,
456 ArrayRef
<GloballyHashedType
> Hashes
,
457 std::optional
<PCHMergerInfo
> &PCHInfo
) {
458 TypeStreamMerger
M(SourceToDest
);
459 return M
.mergeTypesAndIds(DestIds
, DestTypes
, IdsAndTypes
, Hashes
, PCHInfo
);
462 Error
llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder
&Dest
,
463 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
464 const CVTypeArray
&Types
,
465 ArrayRef
<GloballyHashedType
> Hashes
,
466 std::optional
<PCHMergerInfo
> &PCHInfo
) {
467 TypeStreamMerger
M(SourceToDest
);
468 return M
.mergeTypeRecords(Dest
, Types
, Hashes
, PCHInfo
);
471 Error
llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder
&Dest
,
472 ArrayRef
<TypeIndex
> Types
,
473 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
474 const CVTypeArray
&Ids
,
475 ArrayRef
<GloballyHashedType
> Hashes
) {
476 TypeStreamMerger
M(SourceToDest
);
477 return M
.mergeIdRecords(Dest
, Types
, Ids
, Hashes
);
480 Expected
<bool> TypeStreamMerger::shouldRemapType(const CVType
&Type
) {
481 // For object files containing precompiled types, we need to extract the
482 // signature, through EndPrecompRecord. This is done here for performance
483 // reasons, to avoid re-parsing the Types stream.
484 if (Type
.kind() == LF_ENDPRECOMP
) {
486 if (auto EC
= TypeDeserializer::deserializeAs(const_cast<CVType
&>(Type
),
488 return joinErrors(std::move(EC
), errorCorruptRecord());
489 // Only one record of this kind can appear in a OBJ.
491 return errorCorruptRecord();
492 PCHInfo
.emplace(PCHMergerInfo
{EP
.getSignature(), CurIndex
.toArrayIndex()});