1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
11 #include "llvm/ADT/DenseSet.h"
12 #include "llvm/DebugInfo/CodeView/RecordName.h"
13 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
14 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
15 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
16 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
17 #include "llvm/DebugInfo/MSF/MSFCommon.h"
18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/Hash.h"
21 #include "llvm/Support/BinaryItemStream.h"
22 #include "llvm/Support/BinaryStreamWriter.h"
23 #include "llvm/Support/xxhash.h"
28 using namespace llvm::msf
;
29 using namespace llvm::pdb
;
30 using namespace llvm::codeview
;
32 struct llvm::pdb::GSIHashStreamBuilder
{
33 struct SymbolDenseMapInfo
{
34 static inline CVSymbol
getEmptyKey() {
35 static CVSymbol Empty
;
38 static inline CVSymbol
getTombstoneKey() {
39 static CVSymbol
Tombstone(
40 DenseMapInfo
<ArrayRef
<uint8_t>>::getTombstoneKey());
43 static unsigned getHashValue(const CVSymbol
&Val
) {
44 return xxHash64(Val
.RecordData
);
46 static bool isEqual(const CVSymbol
&LHS
, const CVSymbol
&RHS
) {
47 return LHS
.RecordData
== RHS
.RecordData
;
51 std::vector
<CVSymbol
> Records
;
53 llvm::DenseSet
<CVSymbol
, SymbolDenseMapInfo
> SymbolHashes
;
54 std::vector
<PSHashRecord
> HashRecords
;
55 std::array
<support::ulittle32_t
, (IPHR_HASH
+ 32) / 32> HashBitmap
;
56 std::vector
<support::ulittle32_t
> HashBuckets
;
58 uint32_t calculateSerializedLength() const;
59 uint32_t calculateRecordByteSize() const;
60 Error
commit(BinaryStreamWriter
&Writer
);
61 void finalizeBuckets(uint32_t RecordZeroOffset
);
63 template <typename T
> void addSymbol(const T
&Symbol
, MSFBuilder
&Msf
) {
65 addSymbol(SymbolSerializer::writeOneSymbol(Copy
, Msf
.getAllocator(),
66 CodeViewContainer::Pdb
));
68 void addSymbol(const CVSymbol
&Symbol
) {
69 if (Symbol
.kind() == S_UDT
|| Symbol
.kind() == S_CONSTANT
) {
70 auto Iter
= SymbolHashes
.insert(Symbol
);
75 Records
.push_back(Symbol
);
79 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
80 uint32_t Size
= sizeof(GSIHashHeader
);
81 Size
+= HashRecords
.size() * sizeof(PSHashRecord
);
82 Size
+= HashBitmap
.size() * sizeof(uint32_t);
83 Size
+= HashBuckets
.size() * sizeof(uint32_t);
87 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
89 for (const auto &Sym
: Records
)
94 Error
GSIHashStreamBuilder::commit(BinaryStreamWriter
&Writer
) {
96 Header
.VerSignature
= GSIHashHeader::HdrSignature
;
97 Header
.VerHdr
= GSIHashHeader::HdrVersion
;
98 Header
.HrSize
= HashRecords
.size() * sizeof(PSHashRecord
);
99 Header
.NumBuckets
= HashBitmap
.size() * 4 + HashBuckets
.size() * 4;
101 if (auto EC
= Writer
.writeObject(Header
))
104 if (auto EC
= Writer
.writeArray(makeArrayRef(HashRecords
)))
106 if (auto EC
= Writer
.writeArray(makeArrayRef(HashBitmap
)))
108 if (auto EC
= Writer
.writeArray(makeArrayRef(HashBuckets
)))
110 return Error::success();
113 static bool isAsciiString(StringRef S
) {
114 return llvm::all_of(S
, [](char C
) { return unsigned(C
) < 0x80; });
117 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
118 static bool gsiRecordLess(StringRef S1
, StringRef S2
) {
119 size_t LS
= S1
.size();
120 size_t RS
= S2
.size();
121 // Shorter strings always compare less than longer strings.
125 // If either string contains non ascii characters, memcmp them.
126 if (LLVM_UNLIKELY(!isAsciiString(S1
) || !isAsciiString(S2
)))
127 return memcmp(S1
.data(), S2
.data(), LS
) < 0;
129 // Both strings are ascii, perform a case-insenstive comparison.
130 return S1
.compare_lower(S2
.data()) < 0;
133 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset
) {
134 std::array
<std::vector
<std::pair
<StringRef
, PSHashRecord
>>, IPHR_HASH
+ 1>
136 uint32_t SymOffset
= RecordZeroOffset
;
137 for (const CVSymbol
&Sym
: Records
) {
139 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
140 HR
.Off
= SymOffset
+ 1;
141 HR
.CRef
= 1; // Always use a refcount of 1.
143 // Hash the name to figure out which bucket this goes into.
144 StringRef Name
= getSymbolName(Sym
);
145 size_t BucketIdx
= hashStringV1(Name
) % IPHR_HASH
;
146 TmpBuckets
[BucketIdx
].push_back(std::make_pair(Name
, HR
));
147 SymOffset
+= Sym
.length();
150 // Compute the three tables: the hash records in bucket and chain order, the
151 // bucket presence bitmap, and the bucket chain start offsets.
152 HashRecords
.reserve(Records
.size());
153 for (ulittle32_t
&Word
: HashBitmap
)
155 for (size_t BucketIdx
= 0; BucketIdx
< IPHR_HASH
+ 1; ++BucketIdx
) {
156 auto &Bucket
= TmpBuckets
[BucketIdx
];
159 HashBitmap
[BucketIdx
/ 32] |= 1U << (BucketIdx
% 32);
161 // Calculate what the offset of the first hash record in the chain would
162 // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
163 // each record would be 12 bytes. See HROffsetCalc in gsi.h.
164 const int SizeOfHROffsetCalc
= 12;
165 ulittle32_t ChainStartOff
=
166 ulittle32_t(HashRecords
.size() * SizeOfHROffsetCalc
);
167 HashBuckets
.push_back(ChainStartOff
);
169 // Sort each bucket by memcmp of the symbol's name. It's important that
170 // we use the same sorting algorithm as is used by the reference
171 // implementation to ensure that the search for a record within a bucket
172 // can properly early-out when it detects the record won't be found. The
173 // algorithm used here corredsponds to the function
174 // caseInsensitiveComparePchPchCchCch in the reference implementation.
175 llvm::sort(Bucket
, [](const std::pair
<StringRef
, PSHashRecord
> &Left
,
176 const std::pair
<StringRef
, PSHashRecord
> &Right
) {
177 return gsiRecordLess(Left
.first
, Right
.first
);
180 for (const auto &Entry
: Bucket
)
181 HashRecords
.push_back(Entry
.second
);
185 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder
&Msf
)
186 : Msf(Msf
), PSH(std::make_unique
<GSIHashStreamBuilder
>()),
187 GSH(std::make_unique
<GSIHashStreamBuilder
>()) {}
189 GSIStreamBuilder::~GSIStreamBuilder() {}
191 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
193 Size
+= sizeof(PublicsStreamHeader
);
194 Size
+= PSH
->calculateSerializedLength();
195 Size
+= PSH
->Records
.size() * sizeof(uint32_t); // AddrMap
196 // FIXME: Add thunk map and section offsets for incremental linking.
201 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
202 return GSH
->calculateSerializedLength();
205 Error
GSIStreamBuilder::finalizeMsfLayout() {
206 // First we write public symbol records, then we write global symbol records.
207 uint32_t PSHZero
= 0;
208 uint32_t GSHZero
= PSH
->calculateRecordByteSize();
210 PSH
->finalizeBuckets(PSHZero
);
211 GSH
->finalizeBuckets(GSHZero
);
213 Expected
<uint32_t> Idx
= Msf
.addStream(calculateGlobalsHashStreamSize());
215 return Idx
.takeError();
216 GSH
->StreamIndex
= *Idx
;
217 Idx
= Msf
.addStream(calculatePublicsHashStreamSize());
219 return Idx
.takeError();
220 PSH
->StreamIndex
= *Idx
;
222 uint32_t RecordBytes
=
223 GSH
->calculateRecordByteSize() + PSH
->calculateRecordByteSize();
225 Idx
= Msf
.addStream(RecordBytes
);
227 return Idx
.takeError();
228 RecordStreamIdx
= *Idx
;
229 return Error::success();
232 static bool comparePubSymByAddrAndName(
233 const std::pair
<const CVSymbol
*, const PublicSym32
*> &LS
,
234 const std::pair
<const CVSymbol
*, const PublicSym32
*> &RS
) {
235 if (LS
.second
->Segment
!= RS
.second
->Segment
)
236 return LS
.second
->Segment
< RS
.second
->Segment
;
237 if (LS
.second
->Offset
!= RS
.second
->Offset
)
238 return LS
.second
->Offset
< RS
.second
->Offset
;
240 return LS
.second
->Name
< RS
.second
->Name
;
243 /// Compute the address map. The address map is an array of symbol offsets
244 /// sorted so that it can be binary searched by address.
245 static std::vector
<ulittle32_t
> computeAddrMap(ArrayRef
<CVSymbol
> Records
) {
246 // Make a vector of pointers to the symbols so we can sort it by address.
247 // Also gather the symbol offsets while we're at it.
249 std::vector
<PublicSym32
> DeserializedPublics
;
250 std::vector
<std::pair
<const CVSymbol
*, const PublicSym32
*>> PublicsByAddr
;
251 std::vector
<uint32_t> SymOffsets
;
252 DeserializedPublics
.reserve(Records
.size());
253 PublicsByAddr
.reserve(Records
.size());
254 SymOffsets
.reserve(Records
.size());
256 uint32_t SymOffset
= 0;
257 for (const CVSymbol
&Sym
: Records
) {
258 assert(Sym
.kind() == SymbolKind::S_PUB32
);
259 DeserializedPublics
.push_back(
260 cantFail(SymbolDeserializer::deserializeAs
<PublicSym32
>(Sym
)));
261 PublicsByAddr
.emplace_back(&Sym
, &DeserializedPublics
.back());
262 SymOffsets
.push_back(SymOffset
);
263 SymOffset
+= Sym
.length();
265 llvm::stable_sort(PublicsByAddr
, comparePubSymByAddrAndName
);
267 // Fill in the symbol offsets in the appropriate order.
268 std::vector
<ulittle32_t
> AddrMap
;
269 AddrMap
.reserve(Records
.size());
270 for (auto &Sym
: PublicsByAddr
) {
271 ptrdiff_t Idx
= std::distance(Records
.data(), Sym
.first
);
272 assert(Idx
>= 0 && size_t(Idx
) < Records
.size());
273 AddrMap
.push_back(ulittle32_t(SymOffsets
[Idx
]));
278 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const {
279 return PSH
->StreamIndex
;
282 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const {
283 return GSH
->StreamIndex
;
286 void GSIStreamBuilder::addPublicSymbol(const PublicSym32
&Pub
) {
287 PSH
->addSymbol(Pub
, Msf
);
290 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym
&Sym
) {
291 GSH
->addSymbol(Sym
, Msf
);
294 void GSIStreamBuilder::addGlobalSymbol(const DataSym
&Sym
) {
295 GSH
->addSymbol(Sym
, Msf
);
298 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym
&Sym
) {
299 GSH
->addSymbol(Sym
, Msf
);
302 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol
&Sym
) {
306 static Error
writeRecords(BinaryStreamWriter
&Writer
,
307 ArrayRef
<CVSymbol
> Records
) {
308 BinaryItemStream
<CVSymbol
> ItemStream(support::endianness::little
);
309 ItemStream
.setItems(Records
);
310 BinaryStreamRef
RecordsRef(ItemStream
);
311 return Writer
.writeStreamRef(RecordsRef
);
314 Error
GSIStreamBuilder::commitSymbolRecordStream(
315 WritableBinaryStreamRef Stream
) {
316 BinaryStreamWriter
Writer(Stream
);
318 // Write public symbol records first, followed by global symbol records. This
319 // must match the order that we assume in finalizeMsfLayout when computing
320 // PSHZero and GSHZero.
321 if (auto EC
= writeRecords(Writer
, PSH
->Records
))
323 if (auto EC
= writeRecords(Writer
, GSH
->Records
))
326 return Error::success();
329 Error
GSIStreamBuilder::commitPublicsHashStream(
330 WritableBinaryStreamRef Stream
) {
331 BinaryStreamWriter
Writer(Stream
);
332 PublicsStreamHeader Header
;
334 // FIXME: Fill these in. They are for incremental linking.
335 Header
.SymHash
= PSH
->calculateSerializedLength();
336 Header
.AddrMap
= PSH
->Records
.size() * 4;
337 Header
.NumThunks
= 0;
338 Header
.SizeOfThunk
= 0;
339 Header
.ISectThunkTable
= 0;
340 memset(Header
.Padding
, 0, sizeof(Header
.Padding
));
341 Header
.OffThunkTable
= 0;
342 Header
.NumSections
= 0;
343 if (auto EC
= Writer
.writeObject(Header
))
346 if (auto EC
= PSH
->commit(Writer
))
349 std::vector
<ulittle32_t
> AddrMap
= computeAddrMap(PSH
->Records
);
350 if (auto EC
= Writer
.writeArray(makeArrayRef(AddrMap
)))
353 return Error::success();
356 Error
GSIStreamBuilder::commitGlobalsHashStream(
357 WritableBinaryStreamRef Stream
) {
358 BinaryStreamWriter
Writer(Stream
);
359 return GSH
->commit(Writer
);
362 Error
GSIStreamBuilder::commit(const msf::MSFLayout
&Layout
,
363 WritableBinaryStreamRef Buffer
) {
364 auto GS
= WritableMappedBlockStream::createIndexedStream(
365 Layout
, Buffer
, getGlobalsStreamIndex(), Msf
.getAllocator());
366 auto PS
= WritableMappedBlockStream::createIndexedStream(
367 Layout
, Buffer
, getPublicsStreamIndex(), Msf
.getAllocator());
368 auto PRS
= WritableMappedBlockStream::createIndexedStream(
369 Layout
, Buffer
, getRecordStreamIdx(), Msf
.getAllocator());
371 if (auto EC
= commitSymbolRecordStream(*PRS
))
373 if (auto EC
= commitGlobalsHashStream(*GS
))
375 if (auto EC
= commitPublicsHashStream(*PS
))
377 return Error::success();