1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "FormatUtil.h"
12 #include "LinePrinter.h"
14 #include "llvm/BinaryFormat/Magic.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
17 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
18 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
22 #include "llvm/DebugInfo/PDB/Native/RawError.h"
23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24 #include "llvm/DebugInfo/PDB/PDB.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FormatVariadic.h"
30 using namespace llvm::codeview
;
31 using namespace llvm::object
;
32 using namespace llvm::pdb
;
34 InputFile::InputFile() {}
35 InputFile::~InputFile() {}
37 static Expected
<ModuleDebugStreamRef
>
38 getModuleDebugStream(PDBFile
&File
, StringRef
&ModuleName
, uint32_t Index
) {
39 ExitOnError
Err("Unexpected error: ");
41 auto &Dbi
= Err(File
.getPDBDbiStream());
42 const auto &Modules
= Dbi
.modules();
43 if (Index
>= Modules
.getModuleCount())
44 return make_error
<RawError
>(raw_error_code::index_out_of_bounds
,
45 "Invalid module index");
47 auto Modi
= Modules
.getModuleDescriptor(Index
);
49 ModuleName
= Modi
.getModuleName();
51 uint16_t ModiStream
= Modi
.getModuleStreamIndex();
52 if (ModiStream
== kInvalidStreamIndex
)
53 return make_error
<RawError
>(raw_error_code::no_stream
,
54 "Module stream not present");
56 auto ModStreamData
= File
.createIndexedStream(ModiStream
);
58 ModuleDebugStreamRef
ModS(Modi
, std::move(ModStreamData
));
59 if (auto EC
= ModS
.reload())
60 return make_error
<RawError
>(raw_error_code::corrupt_file
,
61 "Invalid module stream");
63 return std::move(ModS
);
66 static inline bool isCodeViewDebugSubsection(object::SectionRef Section
,
68 BinaryStreamReader
&Reader
) {
69 if (Expected
<StringRef
> NameOrErr
= Section
.getName()) {
70 if (*NameOrErr
!= Name
)
73 consumeError(NameOrErr
.takeError());
77 Expected
<StringRef
> ContentsOrErr
= Section
.getContents();
79 consumeError(ContentsOrErr
.takeError());
83 Reader
= BinaryStreamReader(*ContentsOrErr
, support::little
);
85 if (Reader
.bytesRemaining() < sizeof(uint32_t))
87 cantFail(Reader
.readInteger(Magic
));
88 if (Magic
!= COFF::DEBUG_SECTION_MAGIC
)
93 static inline bool isDebugSSection(object::SectionRef Section
,
94 DebugSubsectionArray
&Subsections
) {
95 BinaryStreamReader Reader
;
96 if (!isCodeViewDebugSubsection(Section
, ".debug$S", Reader
))
99 cantFail(Reader
.readArray(Subsections
, Reader
.bytesRemaining()));
103 static bool isDebugTSection(SectionRef Section
, CVTypeArray
&Types
) {
104 BinaryStreamReader Reader
;
105 if (!isCodeViewDebugSubsection(Section
, ".debug$T", Reader
) &&
106 !isCodeViewDebugSubsection(Section
, ".debug$P", Reader
))
108 cantFail(Reader
.readArray(Types
, Reader
.bytesRemaining()));
112 static std::string
formatChecksumKind(FileChecksumKind Kind
) {
114 RETURN_CASE(FileChecksumKind
, None
, "None");
115 RETURN_CASE(FileChecksumKind
, MD5
, "MD5");
116 RETURN_CASE(FileChecksumKind
, SHA1
, "SHA-1");
117 RETURN_CASE(FileChecksumKind
, SHA256
, "SHA-256");
119 return formatUnknownEnum(Kind
);
122 template <typename
... Args
>
123 static void formatInternal(LinePrinter
&Printer
, bool Append
, Args
&&... args
) {
125 Printer
.format(std::forward
<Args
>(args
)...);
127 Printer
.formatLine(std::forward
<Args
>(args
)...);
130 SymbolGroup::SymbolGroup(InputFile
*File
, uint32_t GroupIndex
) : File(File
) {
135 initializeForPdb(GroupIndex
);
139 for (const auto &S
: File
->obj().sections()) {
140 DebugSubsectionArray SS
;
141 if (!isDebugSSection(S
, SS
))
144 if (!SC
.hasChecksums() || !SC
.hasStrings())
150 if (SC
.hasChecksums() && SC
.hasStrings())
153 rebuildChecksumMap();
157 StringRef
SymbolGroup::name() const { return Name
; }
159 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray
&SS
) {
163 void SymbolGroup::updatePdbModi(uint32_t Modi
) { initializeForPdb(Modi
); }
165 void SymbolGroup::initializeForPdb(uint32_t Modi
) {
166 assert(File
&& File
->isPdb());
168 // PDB always uses the same string table, but each module has its own
169 // checksums. So we only set the strings if they're not already set.
170 if (!SC
.hasStrings()) {
171 auto StringTable
= File
->pdb().getStringTable();
173 SC
.setStrings(StringTable
->getStringTable());
175 consumeError(StringTable
.takeError());
179 auto MDS
= getModuleDebugStream(File
->pdb(), Name
, Modi
);
181 consumeError(MDS
.takeError());
185 DebugStream
= std::make_shared
<ModuleDebugStreamRef
>(std::move(*MDS
));
186 Subsections
= DebugStream
->getSubsectionsArray();
187 SC
.initialize(Subsections
);
188 rebuildChecksumMap();
191 void SymbolGroup::rebuildChecksumMap() {
192 if (!SC
.hasChecksums())
195 for (const auto &Entry
: SC
.checksums()) {
196 auto S
= SC
.strings().getString(Entry
.FileNameOffset
);
199 ChecksumsByFile
[*S
] = Entry
;
203 const ModuleDebugStreamRef
&SymbolGroup::getPdbModuleStream() const {
204 assert(File
&& File
->isPdb() && DebugStream
);
208 Expected
<StringRef
> SymbolGroup::getNameFromStringTable(uint32_t Offset
) const {
209 return SC
.strings().getString(Offset
);
212 void SymbolGroup::formatFromFileName(LinePrinter
&Printer
, StringRef File
,
214 auto FC
= ChecksumsByFile
.find(File
);
215 if (FC
== ChecksumsByFile
.end()) {
216 formatInternal(Printer
, Append
, "- (no checksum) {0}", File
);
220 formatInternal(Printer
, Append
, "- ({0}: {1}) {2}",
221 formatChecksumKind(FC
->getValue().Kind
),
222 toHex(FC
->getValue().Checksum
), File
);
225 void SymbolGroup::formatFromChecksumsOffset(LinePrinter
&Printer
,
228 if (!SC
.hasChecksums()) {
229 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
233 auto Iter
= SC
.checksums().getArray().at(Offset
);
234 if (Iter
== SC
.checksums().getArray().end()) {
235 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
239 uint32_t FO
= Iter
->FileNameOffset
;
240 auto ExpectedFile
= getNameFromStringTable(FO
);
242 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
243 consumeError(ExpectedFile
.takeError());
246 if (Iter
->Kind
== FileChecksumKind::None
) {
247 formatInternal(Printer
, Append
, "{0} (no checksum)", *ExpectedFile
);
249 formatInternal(Printer
, Append
, "{0} ({1}: {2})", *ExpectedFile
,
250 formatChecksumKind(Iter
->Kind
), toHex(Iter
->Checksum
));
254 Expected
<InputFile
> InputFile::open(StringRef Path
, bool AllowUnknownFile
) {
256 if (!llvm::sys::fs::exists(Path
))
257 return make_error
<StringError
>(formatv("File {0} not found", Path
),
258 inconvertibleErrorCode());
261 if (auto EC
= identify_magic(Path
, Magic
))
262 return make_error
<StringError
>(
263 formatv("Unable to identify file type for file {0}", Path
), EC
);
265 if (Magic
== file_magic::coff_object
) {
266 Expected
<OwningBinary
<Binary
>> BinaryOrErr
= createBinary(Path
);
268 return BinaryOrErr
.takeError();
270 IF
.CoffObject
= std::move(*BinaryOrErr
);
271 IF
.PdbOrObj
= llvm::cast
<COFFObjectFile
>(IF
.CoffObject
.getBinary());
272 return std::move(IF
);
275 if (Magic
== file_magic::pdb
) {
276 std::unique_ptr
<IPDBSession
> Session
;
277 if (auto Err
= loadDataForPDB(PDB_ReaderType::Native
, Path
, Session
))
278 return std::move(Err
);
280 IF
.PdbSession
.reset(static_cast<NativeSession
*>(Session
.release()));
281 IF
.PdbOrObj
= &IF
.PdbSession
->getPDBFile();
283 return std::move(IF
);
286 if (!AllowUnknownFile
)
287 return make_error
<StringError
>(
288 formatv("File {0} is not a supported file type", Path
),
289 inconvertibleErrorCode());
291 auto Result
= MemoryBuffer::getFile(Path
, /*IsText=*/false,
292 /*RequiresNullTerminator=*/false);
294 return make_error
<StringError
>(
295 formatv("File {0} could not be opened", Path
), Result
.getError());
297 IF
.UnknownFile
= std::move(*Result
);
298 IF
.PdbOrObj
= IF
.UnknownFile
.get();
299 return std::move(IF
);
302 PDBFile
&InputFile::pdb() {
304 return *PdbOrObj
.get
<PDBFile
*>();
307 const PDBFile
&InputFile::pdb() const {
309 return *PdbOrObj
.get
<PDBFile
*>();
312 object::COFFObjectFile
&InputFile::obj() {
314 return *PdbOrObj
.get
<object::COFFObjectFile
*>();
317 const object::COFFObjectFile
&InputFile::obj() const {
319 return *PdbOrObj
.get
<object::COFFObjectFile
*>();
322 MemoryBuffer
&InputFile::unknown() {
324 return *PdbOrObj
.get
<MemoryBuffer
*>();
327 const MemoryBuffer
&InputFile::unknown() const {
329 return *PdbOrObj
.get
<MemoryBuffer
*>();
332 StringRef
InputFile::getFilePath() const {
334 return pdb().getFilePath();
336 return obj().getFileName();
338 return unknown().getBufferIdentifier();
341 bool InputFile::hasTypes() const {
343 return pdb().hasPDBTpiStream();
345 for (const auto &Section
: obj().sections()) {
347 if (isDebugTSection(Section
, Types
))
353 bool InputFile::hasIds() const {
356 return pdb().hasPDBIpiStream();
359 bool InputFile::isPdb() const { return PdbOrObj
.is
<PDBFile
*>(); }
361 bool InputFile::isObj() const {
362 return PdbOrObj
.is
<object::COFFObjectFile
*>();
365 bool InputFile::isUnknown() const { return PdbOrObj
.is
<MemoryBuffer
*>(); }
367 codeview::LazyRandomTypeCollection
&
368 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind
) {
369 if (Types
&& Kind
== kTypes
)
371 if (Ids
&& Kind
== kIds
)
375 assert(isPdb() && pdb().hasPDBIpiStream());
378 // If the collection was already initialized, we should have just returned it
381 TypeCollectionPtr
&Collection
= (Kind
== kIds
) ? Ids
: Types
;
382 auto &Stream
= cantFail((Kind
== kIds
) ? pdb().getPDBIpiStream()
383 : pdb().getPDBTpiStream());
385 auto &Array
= Stream
.typeArray();
386 uint32_t Count
= Stream
.getNumTypeRecords();
387 auto Offsets
= Stream
.getTypeIndexOffsets();
389 std::make_unique
<LazyRandomTypeCollection
>(Array
, Count
, Offsets
);
394 assert(Kind
== kTypes
);
397 for (const auto &Section
: obj().sections()) {
399 if (!isDebugTSection(Section
, Records
))
402 Types
= std::make_unique
<LazyRandomTypeCollection
>(Records
, 100);
406 Types
= std::make_unique
<LazyRandomTypeCollection
>(100);
410 codeview::LazyRandomTypeCollection
&InputFile::types() {
411 return getOrCreateTypeCollection(kTypes
);
414 codeview::LazyRandomTypeCollection
&InputFile::ids() {
415 // Object files have only one type stream that contains both types and ids.
416 // Similarly, some PDBs don't contain an IPI stream, and for those both types
417 // and IDs are in the same stream.
418 if (isObj() || !pdb().hasPDBIpiStream())
421 return getOrCreateTypeCollection(kIds
);
424 iterator_range
<SymbolGroupIterator
> InputFile::symbol_groups() {
425 return make_range
<SymbolGroupIterator
>(symbol_groups_begin(),
426 symbol_groups_end());
429 SymbolGroupIterator
InputFile::symbol_groups_begin() {
430 return SymbolGroupIterator(*this);
433 SymbolGroupIterator
InputFile::symbol_groups_end() {
434 return SymbolGroupIterator();
437 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
439 SymbolGroupIterator::SymbolGroupIterator(InputFile
&File
) : Value(&File
) {
441 SectionIter
= File
.obj().section_begin();
446 bool SymbolGroupIterator::operator==(const SymbolGroupIterator
&R
) const {
452 if (Value
.File
!= R
.Value
.File
)
454 return Index
== R
.Index
;
457 const SymbolGroup
&SymbolGroupIterator::operator*() const {
461 SymbolGroup
&SymbolGroupIterator::operator*() {
466 SymbolGroupIterator
&SymbolGroupIterator::operator++() {
467 assert(Value
.File
&& !isEnd());
472 if (Value
.File
->isPdb()) {
473 Value
.updatePdbModi(Index
);
481 void SymbolGroupIterator::scanToNextDebugS() {
482 assert(SectionIter
.hasValue());
483 auto End
= Value
.File
->obj().section_end();
484 auto &Iter
= *SectionIter
;
487 while (++Iter
!= End
) {
488 DebugSubsectionArray SS
;
489 SectionRef SR
= *Iter
;
490 if (!isDebugSSection(SR
, SS
))
493 Value
.updateDebugS(SS
);
498 bool SymbolGroupIterator::isEnd() const {
501 if (Value
.File
->isPdb()) {
502 auto &Dbi
= cantFail(Value
.File
->pdb().getPDBDbiStream());
503 uint32_t Count
= Dbi
.modules().getModuleCount();
504 assert(Index
<= Count
);
505 return Index
== Count
;
508 assert(SectionIter
.hasValue());
509 return *SectionIter
== Value
.File
->obj().section_end();