1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
10 #include "llvm/ADT/SmallString.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
13 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
14 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
15 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
16 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
17 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
18 #include "llvm/Support/Error.h"
21 using namespace llvm::codeview
;
23 static inline size_t slotForIndex(TypeIndex Idx
) {
24 assert(!Idx
.isSimple() && "simple type indices have no slots");
25 return Idx
.getIndex() - TypeIndex::FirstNonSimpleIndex
;
30 /// Implementation of CodeView type stream merging.
32 /// A CodeView type stream is a series of records that reference each other
33 /// through type indices. A type index is either "simple", meaning it is less
34 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it
35 /// refers to a prior type record in the current stream. The type index of a
36 /// record is equal to the number of records before it in the stream plus
39 /// Type records are only allowed to use type indices smaller than their own, so
40 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in
41 /// the type graph of the source program are resolved with forward declarations
42 /// of composite types. This class implements the following type stream merging
43 /// algorithm, which relies on this DAG structure:
45 /// - Begin with a new empty stream, and a new empty hash table that maps from
46 /// type record contents to new type index.
47 /// - For each new type stream, maintain a map from source type index to
48 /// destination type index.
49 /// - For each record, copy it and rewrite its type indices to be valid in the
50 /// destination type stream.
51 /// - If the new type record is not already present in the destination stream
52 /// hash table, append it to the destination type stream, assign it the next
53 /// type index, and update the two hash tables.
54 /// - If the type record already exists in the destination stream, discard it
55 /// and update the type index map to forward the source type index to the
56 /// existing destination type index.
58 /// As an additional complication, type stream merging actually produces two
59 /// streams: an item (or IPI) stream and a type stream, as this is what is
60 /// actually stored in the final PDB. We choose which records go where by
61 /// looking at the record kind.
62 class TypeStreamMerger
{
64 explicit TypeStreamMerger(SmallVectorImpl
<TypeIndex
> &SourceToDest
)
65 : IndexMap(SourceToDest
) {
66 // When dealing with precompiled headers objects, all data in SourceToDest
67 // belongs to the precompiled headers object, and is assumed to be already
68 // remapped to the target PDB. Any forthcoming type that will be merged in
69 // might potentially back-reference this data. We also don't want to resolve
70 // twice the types in the precompiled object.
71 CurIndex
+= SourceToDest
.size();
74 static const TypeIndex Untranslated
;
76 // Local hashing entry points
77 Error
mergeTypesAndIds(MergingTypeTableBuilder
&DestIds
,
78 MergingTypeTableBuilder
&DestTypes
,
79 const CVTypeArray
&IdsAndTypes
, Optional
<uint32_t> &S
);
80 Error
mergeIdRecords(MergingTypeTableBuilder
&Dest
,
81 ArrayRef
<TypeIndex
> TypeSourceToDest
,
82 const CVTypeArray
&Ids
);
83 Error
mergeTypeRecords(MergingTypeTableBuilder
&Dest
,
84 const CVTypeArray
&Types
);
86 // Global hashing entry points
87 Error
mergeTypesAndIds(GlobalTypeTableBuilder
&DestIds
,
88 GlobalTypeTableBuilder
&DestTypes
,
89 const CVTypeArray
&IdsAndTypes
,
90 ArrayRef
<GloballyHashedType
> Hashes
,
91 Optional
<uint32_t> &S
);
92 Error
mergeIdRecords(GlobalTypeTableBuilder
&Dest
,
93 ArrayRef
<TypeIndex
> TypeSourceToDest
,
94 const CVTypeArray
&Ids
,
95 ArrayRef
<GloballyHashedType
> Hashes
);
96 Error
mergeTypeRecords(GlobalTypeTableBuilder
&Dest
, const CVTypeArray
&Types
,
97 ArrayRef
<GloballyHashedType
> Hashes
,
98 Optional
<uint32_t> &S
);
101 Error
doit(const CVTypeArray
&Types
);
103 Error
remapAllTypes(const CVTypeArray
&Types
);
105 Error
remapType(const CVType
&Type
);
107 void addMapping(TypeIndex Idx
);
109 inline bool remapTypeIndex(TypeIndex
&Idx
) {
110 // If we're mapping a pure index stream, then IndexMap only contains
111 // mappings from OldIdStream -> NewIdStream, in which case we will need to
112 // use the special mapping from OldTypeStream -> NewTypeStream which was
113 // computed externally. Regardless, we use this special map if and only if
114 // we are doing an id-only mapping.
115 if (!hasTypeStream())
116 return remapIndex(Idx
, TypeLookup
);
118 assert(TypeLookup
.empty());
119 return remapIndex(Idx
, IndexMap
);
121 inline bool remapItemIndex(TypeIndex
&Idx
) {
122 assert(hasIdStream());
123 return remapIndex(Idx
, IndexMap
);
126 bool hasTypeStream() const {
127 return (UseGlobalHashes
) ? (!!DestGlobalTypeStream
) : (!!DestTypeStream
);
130 bool hasIdStream() const {
131 return (UseGlobalHashes
) ? (!!DestGlobalIdStream
) : (!!DestIdStream
);
134 ArrayRef
<uint8_t> remapIndices(const CVType
&OriginalType
,
135 MutableArrayRef
<uint8_t> Storage
);
137 inline bool remapIndex(TypeIndex
&Idx
, ArrayRef
<TypeIndex
> Map
) {
138 if (LLVM_LIKELY(remapIndexSimple(Idx
, Map
)))
141 return remapIndexFallback(Idx
, Map
);
144 inline bool remapIndexSimple(TypeIndex
&Idx
, ArrayRef
<TypeIndex
> Map
) const {
145 // Simple types are unchanged.
149 // Check if this type index refers to a record we've already translated
150 // successfully. If it refers to a type later in the stream or a record we
151 // had to defer, defer it until later pass.
152 unsigned MapPos
= slotForIndex(Idx
);
153 if (LLVM_UNLIKELY(MapPos
>= Map
.size() || Map
[MapPos
] == Untranslated
))
160 bool remapIndexFallback(TypeIndex
&Idx
, ArrayRef
<TypeIndex
> Map
);
162 Error
errorCorruptRecord() const {
163 return llvm::make_error
<CodeViewError
>(cv_error_code::corrupt_record
);
166 Expected
<bool> shouldRemapType(const CVType
&Type
);
168 Optional
<Error
> LastError
;
170 bool UseGlobalHashes
= false;
172 bool IsSecondPass
= false;
174 unsigned NumBadIndices
= 0;
176 TypeIndex CurIndex
{TypeIndex::FirstNonSimpleIndex
};
178 MergingTypeTableBuilder
*DestIdStream
= nullptr;
179 MergingTypeTableBuilder
*DestTypeStream
= nullptr;
181 GlobalTypeTableBuilder
*DestGlobalIdStream
= nullptr;
182 GlobalTypeTableBuilder
*DestGlobalTypeStream
= nullptr;
184 ArrayRef
<GloballyHashedType
> GlobalHashes
;
186 // If we're only mapping id records, this array contains the mapping for
188 ArrayRef
<TypeIndex
> TypeLookup
;
190 /// Map from source type index to destination type index. Indexed by source
191 /// type index minus 0x1000.
192 SmallVectorImpl
<TypeIndex
> &IndexMap
;
194 /// Temporary storage that we use to copy a record's data while re-writing
195 /// its type indices.
196 SmallVector
<uint8_t, 256> RemapStorage
;
198 Optional
<uint32_t> PCHSignature
;
201 } // end anonymous namespace
203 const TypeIndex
TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated
);
205 static bool isIdRecord(TypeLeafKind K
) {
207 case TypeLeafKind::LF_FUNC_ID
:
208 case TypeLeafKind::LF_MFUNC_ID
:
209 case TypeLeafKind::LF_STRING_ID
:
210 case TypeLeafKind::LF_SUBSTR_LIST
:
211 case TypeLeafKind::LF_BUILDINFO
:
212 case TypeLeafKind::LF_UDT_SRC_LINE
:
213 case TypeLeafKind::LF_UDT_MOD_SRC_LINE
:
220 void TypeStreamMerger::addMapping(TypeIndex Idx
) {
222 assert(IndexMap
.size() == slotForIndex(CurIndex
) &&
223 "visitKnownRecord should add one index map entry");
224 IndexMap
.push_back(Idx
);
226 assert(slotForIndex(CurIndex
) < IndexMap
.size());
227 IndexMap
[slotForIndex(CurIndex
)] = Idx
;
231 bool TypeStreamMerger::remapIndexFallback(TypeIndex
&Idx
,
232 ArrayRef
<TypeIndex
> Map
) {
233 size_t MapPos
= slotForIndex(Idx
);
235 // If this is the second pass and this index isn't in the map, then it points
236 // outside the current type stream, and this is a corrupt record.
237 if (IsSecondPass
&& MapPos
>= Map
.size()) {
238 // FIXME: Print a more useful error. We can give the current record and the
239 // index that we think its pointing to.
241 LastError
= joinErrors(std::move(*LastError
), errorCorruptRecord());
243 LastError
= errorCorruptRecord();
248 // This type index is invalid. Remap this to "not translated by cvpack",
249 // and return failure.
254 // Local hashing entry points
255 Error
TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder
&Dest
,
256 const CVTypeArray
&Types
) {
257 DestTypeStream
= &Dest
;
258 UseGlobalHashes
= false;
263 Error
TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder
&Dest
,
264 ArrayRef
<TypeIndex
> TypeSourceToDest
,
265 const CVTypeArray
&Ids
) {
266 DestIdStream
= &Dest
;
267 TypeLookup
= TypeSourceToDest
;
268 UseGlobalHashes
= false;
273 Error
TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder
&DestIds
,
274 MergingTypeTableBuilder
&DestTypes
,
275 const CVTypeArray
&IdsAndTypes
,
276 Optional
<uint32_t> &S
) {
277 DestIdStream
= &DestIds
;
278 DestTypeStream
= &DestTypes
;
279 UseGlobalHashes
= false;
280 auto Err
= doit(IdsAndTypes
);
285 // Global hashing entry points
286 Error
TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder
&Dest
,
287 const CVTypeArray
&Types
,
288 ArrayRef
<GloballyHashedType
> Hashes
,
289 Optional
<uint32_t> &S
) {
290 DestGlobalTypeStream
= &Dest
;
291 UseGlobalHashes
= true;
292 GlobalHashes
= Hashes
;
293 auto Err
= doit(Types
);
298 Error
TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder
&Dest
,
299 ArrayRef
<TypeIndex
> TypeSourceToDest
,
300 const CVTypeArray
&Ids
,
301 ArrayRef
<GloballyHashedType
> Hashes
) {
302 DestGlobalIdStream
= &Dest
;
303 TypeLookup
= TypeSourceToDest
;
304 UseGlobalHashes
= true;
305 GlobalHashes
= Hashes
;
310 Error
TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder
&DestIds
,
311 GlobalTypeTableBuilder
&DestTypes
,
312 const CVTypeArray
&IdsAndTypes
,
313 ArrayRef
<GloballyHashedType
> Hashes
,
314 Optional
<uint32_t> &S
) {
315 DestGlobalIdStream
= &DestIds
;
316 DestGlobalTypeStream
= &DestTypes
;
317 UseGlobalHashes
= true;
318 GlobalHashes
= Hashes
;
319 auto Err
= doit(IdsAndTypes
);
324 Error
TypeStreamMerger::doit(const CVTypeArray
&Types
) {
325 if (auto EC
= remapAllTypes(Types
))
328 // If we found bad indices but no other errors, try doing another pass and see
329 // if we can resolve the indices that weren't in the map on the first pass.
330 // This may require multiple passes, but we should always make progress. MASM
331 // is the only known CodeView producer that makes type streams that aren't
332 // topologically sorted. The standard library contains MASM-produced objects,
333 // so this is important to handle correctly, but we don't have to be too
334 // efficient. MASM type streams are usually very small.
335 while (!LastError
&& NumBadIndices
> 0) {
336 unsigned BadIndicesRemaining
= NumBadIndices
;
339 CurIndex
= TypeIndex(TypeIndex::FirstNonSimpleIndex
);
341 if (auto EC
= remapAllTypes(Types
))
344 assert(NumBadIndices
<= BadIndicesRemaining
&&
345 "second pass found more bad indices");
346 if (!LastError
&& NumBadIndices
== BadIndicesRemaining
) {
347 return llvm::make_error
<CodeViewError
>(
348 cv_error_code::corrupt_record
, "Input type graph contains cycles");
353 return std::move(*LastError
);
354 return Error::success();
357 Error
TypeStreamMerger::remapAllTypes(const CVTypeArray
&Types
) {
358 BinaryStreamRef Stream
= Types
.getUnderlyingStream();
359 ArrayRef
<uint8_t> Buffer
;
360 cantFail(Stream
.readBytes(0, Stream
.getLength(), Buffer
));
362 return forEachCodeViewRecord
<CVType
>(
363 Buffer
, [this](const CVType
&T
) { return remapType(T
); });
366 Error
TypeStreamMerger::remapType(const CVType
&Type
) {
367 auto R
= shouldRemapType(Type
);
369 return R
.takeError();
371 TypeIndex DestIdx
= Untranslated
;
374 [this, Type
](MutableArrayRef
<uint8_t> Storage
) -> ArrayRef
<uint8_t> {
375 return remapIndices(Type
, Storage
);
377 if (LLVM_LIKELY(UseGlobalHashes
)) {
378 GlobalTypeTableBuilder
&Dest
=
379 isIdRecord(Type
.kind()) ? *DestGlobalIdStream
: *DestGlobalTypeStream
;
380 GloballyHashedType H
= GlobalHashes
[CurIndex
.toArrayIndex()];
381 DestIdx
= Dest
.insertRecordAs(H
, Type
.RecordData
.size(), DoSerialize
);
383 MergingTypeTableBuilder
&Dest
=
384 isIdRecord(Type
.kind()) ? *DestIdStream
: *DestTypeStream
;
386 RemapStorage
.resize(Type
.RecordData
.size());
387 ArrayRef
<uint8_t> Result
= DoSerialize(RemapStorage
);
389 DestIdx
= Dest
.insertRecordBytes(Result
);
395 assert((IsSecondPass
|| IndexMap
.size() == slotForIndex(CurIndex
)) &&
396 "visitKnownRecord should add one index map entry");
397 return Error::success();
401 TypeStreamMerger::remapIndices(const CVType
&OriginalType
,
402 MutableArrayRef
<uint8_t> Storage
) {
403 SmallVector
<TiReference
, 4> Refs
;
404 discoverTypeIndices(OriginalType
.RecordData
, Refs
);
406 return OriginalType
.RecordData
;
408 ::memcpy(Storage
.data(), OriginalType
.RecordData
.data(),
409 OriginalType
.RecordData
.size());
411 uint8_t *DestContent
= Storage
.data() + sizeof(RecordPrefix
);
413 for (auto &Ref
: Refs
) {
415 reinterpret_cast<TypeIndex
*>(DestContent
+ Ref
.Offset
);
417 for (size_t I
= 0; I
< Ref
.Count
; ++I
) {
418 TypeIndex
&TI
= DestTIs
[I
];
419 bool Success
= (Ref
.Kind
== TiRefKind::IndexRef
) ? remapItemIndex(TI
)
420 : remapTypeIndex(TI
);
421 if (LLVM_UNLIKELY(!Success
))
428 Error
llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder
&Dest
,
429 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
430 const CVTypeArray
&Types
) {
431 TypeStreamMerger
M(SourceToDest
);
432 return M
.mergeTypeRecords(Dest
, Types
);
435 Error
llvm::codeview::mergeIdRecords(MergingTypeTableBuilder
&Dest
,
436 ArrayRef
<TypeIndex
> TypeSourceToDest
,
437 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
438 const CVTypeArray
&Ids
) {
439 TypeStreamMerger
M(SourceToDest
);
440 return M
.mergeIdRecords(Dest
, TypeSourceToDest
, Ids
);
443 Error
llvm::codeview::mergeTypeAndIdRecords(
444 MergingTypeTableBuilder
&DestIds
, MergingTypeTableBuilder
&DestTypes
,
445 SmallVectorImpl
<TypeIndex
> &SourceToDest
, const CVTypeArray
&IdsAndTypes
,
446 Optional
<uint32_t> &PCHSignature
) {
447 TypeStreamMerger
M(SourceToDest
);
448 return M
.mergeTypesAndIds(DestIds
, DestTypes
, IdsAndTypes
, PCHSignature
);
451 Error
llvm::codeview::mergeTypeAndIdRecords(
452 GlobalTypeTableBuilder
&DestIds
, GlobalTypeTableBuilder
&DestTypes
,
453 SmallVectorImpl
<TypeIndex
> &SourceToDest
, const CVTypeArray
&IdsAndTypes
,
454 ArrayRef
<GloballyHashedType
> Hashes
, Optional
<uint32_t> &PCHSignature
) {
455 TypeStreamMerger
M(SourceToDest
);
456 return M
.mergeTypesAndIds(DestIds
, DestTypes
, IdsAndTypes
, Hashes
,
460 Error
llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder
&Dest
,
461 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
462 const CVTypeArray
&Types
,
463 ArrayRef
<GloballyHashedType
> Hashes
,
464 Optional
<uint32_t> &PCHSignature
) {
465 TypeStreamMerger
M(SourceToDest
);
466 return M
.mergeTypeRecords(Dest
, Types
, Hashes
, PCHSignature
);
469 Error
llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder
&Dest
,
470 ArrayRef
<TypeIndex
> Types
,
471 SmallVectorImpl
<TypeIndex
> &SourceToDest
,
472 const CVTypeArray
&Ids
,
473 ArrayRef
<GloballyHashedType
> Hashes
) {
474 TypeStreamMerger
M(SourceToDest
);
475 return M
.mergeIdRecords(Dest
, Types
, Ids
, Hashes
);
478 Expected
<bool> TypeStreamMerger::shouldRemapType(const CVType
&Type
) {
479 // For object files containing precompiled types, we need to extract the
480 // signature, through EndPrecompRecord. This is done here for performance
481 // reasons, to avoid re-parsing the Types stream.
482 if (Type
.kind() == LF_ENDPRECOMP
) {
484 if (auto EC
= TypeDeserializer::deserializeAs(const_cast<CVType
&>(Type
),
486 return joinErrors(std::move(EC
), errorCorruptRecord());
487 if (PCHSignature
.hasValue())
488 return errorCorruptRecord();
489 PCHSignature
.emplace(EP
.getSignature());