1 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/DebugInfo/PDB/Native/Hash.h"
13 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14 #include "llvm/Support/BinaryStreamWriter.h"
15 #include "llvm/Support/Endian.h"
20 using namespace llvm::msf
;
21 using namespace llvm::support
;
22 using namespace llvm::support::endian
;
23 using namespace llvm::pdb
;
25 StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder
&Table
)
28 uint32_t StringTableHashTraits::hashLookupKey(StringRef S
) const {
29 // The reference implementation doesn't include code for /src/headerblock
30 // handling, but it can only read natvis entries lld's PDB files if
31 // this hash function truncates the hash to 16 bit.
32 // PDB/include/misc.h in the reference implementation has a hashSz() function
33 // that returns an unsigned short, that seems what's being used for
35 return static_cast<uint16_t>(Table
->getIdForString(S
));
38 StringRef
StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset
) const {
39 return Table
->getStringForId(Offset
);
42 uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S
) {
43 return Table
->insert(S
);
46 uint32_t PDBStringTableBuilder::insert(StringRef S
) {
47 return Strings
.insert(S
);
50 uint32_t PDBStringTableBuilder::getIdForString(StringRef S
) const {
51 return Strings
.getIdForString(S
);
54 StringRef
PDBStringTableBuilder::getStringForId(uint32_t Id
) const {
55 return Strings
.getStringForId(Id
);
58 static uint32_t computeBucketCount(uint32_t NumStrings
) {
59 // This is a precomputed list of Buckets given the specified number of
60 // strings. Matching the reference algorithm exactly is not strictly
61 // necessary for correctness, but it helps when comparing LLD's PDBs with
62 // Microsoft's PDBs so as to eliminate superfluous differences.
63 // The reference implementation does (in nmt.h, NMT::grow()):
64 // unsigned StringCount = 0;
65 // unsigned BucketCount = 1;
68 // if (BucketCount * 3 / 4 < StringCount)
69 // BucketCount = BucketCount * 3 / 2 + 1;
71 // This list contains all StringCount, BucketCount pairs where BucketCount was
72 // just incremented. It ends before the first BucketCount entry where
73 // BucketCount * 3 would overflow a 32-bit unsigned int.
74 static std::map
<uint32_t, uint32_t> StringsToBuckets
= {
115 {13453488, 26906975},
116 {20180232, 40360463},
117 {30270348, 60540695},
118 {45405522, 90811043},
119 {68108283, 136216565},
120 {102162424, 204324848},
121 {153243637, 306487273},
122 {229865455, 459730910},
123 {344798183, 689596366},
124 {517197275, 1034394550},
125 {775795913, 1551591826},
126 {1163693870, 2327387740}};
127 auto Entry
= StringsToBuckets
.lower_bound(NumStrings
);
128 assert(Entry
!= StringsToBuckets
.end());
129 return Entry
->second
;
132 uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133 uint32_t Size
= sizeof(uint32_t); // Hash table begins with 4-byte size field.
134 Size
+= sizeof(uint32_t) * computeBucketCount(Strings
.size());
139 uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
141 Size
+= sizeof(PDBStringTableHeader
);
142 Size
+= Strings
.calculateSerializedSize();
143 Size
+= calculateHashTableSize();
144 Size
+= sizeof(uint32_t); // The /names stream ends with the string count.
148 void PDBStringTableBuilder::setStrings(
149 const codeview::DebugStringTableSubsection
&Strings
) {
150 this->Strings
= Strings
;
153 Error
PDBStringTableBuilder::writeHeader(BinaryStreamWriter
&Writer
) const {
155 PDBStringTableHeader H
;
156 H
.Signature
= PDBStringTableSignature
;
158 H
.ByteSize
= Strings
.calculateSerializedSize();
159 if (auto EC
= Writer
.writeObject(H
))
161 assert(Writer
.bytesRemaining() == 0);
162 return Error::success();
165 Error
PDBStringTableBuilder::writeStrings(BinaryStreamWriter
&Writer
) const {
166 if (auto EC
= Strings
.commit(Writer
))
169 assert(Writer
.bytesRemaining() == 0);
170 return Error::success();
173 Error
PDBStringTableBuilder::writeHashTable(BinaryStreamWriter
&Writer
) const {
174 // Write a hash table.
175 uint32_t BucketCount
= computeBucketCount(Strings
.size());
176 if (auto EC
= Writer
.writeInteger(BucketCount
))
178 std::vector
<ulittle32_t
> Buckets(BucketCount
);
180 for (auto &Pair
: Strings
) {
181 StringRef S
= Pair
.getKey();
182 uint32_t Offset
= Pair
.getValue();
183 uint32_t Hash
= hashStringV1(S
);
185 for (uint32_t I
= 0; I
!= BucketCount
; ++I
) {
186 uint32_t Slot
= (Hash
+ I
) % BucketCount
;
187 if (Buckets
[Slot
] != 0)
189 Buckets
[Slot
] = Offset
;
194 if (auto EC
= Writer
.writeArray(ArrayRef
<ulittle32_t
>(Buckets
)))
197 assert(Writer
.bytesRemaining() == 0);
198 return Error::success();
201 Error
PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter
&Writer
) const {
202 if (auto EC
= Writer
.writeInteger
<uint32_t>(Strings
.size()))
204 assert(Writer
.bytesRemaining() == 0);
205 return Error::success();
208 Error
PDBStringTableBuilder::commit(BinaryStreamWriter
&Writer
) const {
209 BinaryStreamWriter SectionWriter
;
211 std::tie(SectionWriter
, Writer
) = Writer
.split(sizeof(PDBStringTableHeader
));
212 if (auto EC
= writeHeader(SectionWriter
))
215 std::tie(SectionWriter
, Writer
) =
216 Writer
.split(Strings
.calculateSerializedSize());
217 if (auto EC
= writeStrings(SectionWriter
))
220 std::tie(SectionWriter
, Writer
) = Writer
.split(calculateHashTableSize());
221 if (auto EC
= writeHashTable(SectionWriter
))
224 std::tie(SectionWriter
, Writer
) = Writer
.split(sizeof(uint32_t));
225 if (auto EC
= writeEpilogue(SectionWriter
))
228 return Error::success();