1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "FormatUtil.h"
12 #include "LinePrinter.h"
14 #include "llvm/BinaryFormat/Magic.h"
15 #include "llvm/DebugInfo/CodeView/CodeView.h"
16 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
17 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
18 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
22 #include "llvm/DebugInfo/PDB/Native/RawError.h"
23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24 #include "llvm/DebugInfo/PDB/PDB.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FormatVariadic.h"
30 using namespace llvm::codeview
;
31 using namespace llvm::object
;
32 using namespace llvm::pdb
;
34 InputFile::InputFile() {}
35 InputFile::~InputFile() {}
37 static Expected
<ModuleDebugStreamRef
>
38 getModuleDebugStream(PDBFile
&File
, StringRef
&ModuleName
, uint32_t Index
) {
39 ExitOnError
Err("Unexpected error: ");
41 auto &Dbi
= Err(File
.getPDBDbiStream());
42 const auto &Modules
= Dbi
.modules();
43 if (Index
>= Modules
.getModuleCount())
44 return make_error
<RawError
>(raw_error_code::index_out_of_bounds
,
45 "Invalid module index");
47 auto Modi
= Modules
.getModuleDescriptor(Index
);
49 ModuleName
= Modi
.getModuleName();
51 uint16_t ModiStream
= Modi
.getModuleStreamIndex();
52 if (ModiStream
== kInvalidStreamIndex
)
53 return make_error
<RawError
>(raw_error_code::no_stream
,
54 "Module stream not present");
56 auto ModStreamData
= File
.createIndexedStream(ModiStream
);
58 ModuleDebugStreamRef
ModS(Modi
, std::move(ModStreamData
));
59 if (auto EC
= ModS
.reload())
60 return make_error
<RawError
>(raw_error_code::corrupt_file
,
61 "Invalid module stream");
63 return std::move(ModS
);
66 static inline bool isCodeViewDebugSubsection(object::SectionRef Section
,
68 BinaryStreamReader
&Reader
) {
69 StringRef SectionName
;
70 if (Section
.getName(SectionName
))
73 if (SectionName
!= Name
)
76 Expected
<StringRef
> ContentsOrErr
= Section
.getContents();
78 consumeError(ContentsOrErr
.takeError());
82 Reader
= BinaryStreamReader(*ContentsOrErr
, support::little
);
84 if (Reader
.bytesRemaining() < sizeof(uint32_t))
86 cantFail(Reader
.readInteger(Magic
));
87 if (Magic
!= COFF::DEBUG_SECTION_MAGIC
)
92 static inline bool isDebugSSection(object::SectionRef Section
,
93 DebugSubsectionArray
&Subsections
) {
94 BinaryStreamReader Reader
;
95 if (!isCodeViewDebugSubsection(Section
, ".debug$S", Reader
))
98 cantFail(Reader
.readArray(Subsections
, Reader
.bytesRemaining()));
102 static bool isDebugTSection(SectionRef Section
, CVTypeArray
&Types
) {
103 BinaryStreamReader Reader
;
104 if (!isCodeViewDebugSubsection(Section
, ".debug$T", Reader
) &&
105 !isCodeViewDebugSubsection(Section
, ".debug$P", Reader
))
107 cantFail(Reader
.readArray(Types
, Reader
.bytesRemaining()));
111 static std::string
formatChecksumKind(FileChecksumKind Kind
) {
113 RETURN_CASE(FileChecksumKind
, None
, "None");
114 RETURN_CASE(FileChecksumKind
, MD5
, "MD5");
115 RETURN_CASE(FileChecksumKind
, SHA1
, "SHA-1");
116 RETURN_CASE(FileChecksumKind
, SHA256
, "SHA-256");
118 return formatUnknownEnum(Kind
);
121 template <typename
... Args
>
122 static void formatInternal(LinePrinter
&Printer
, bool Append
, Args
&&... args
) {
124 Printer
.format(std::forward
<Args
>(args
)...);
126 Printer
.formatLine(std::forward
<Args
>(args
)...);
129 SymbolGroup::SymbolGroup(InputFile
*File
, uint32_t GroupIndex
) : File(File
) {
134 initializeForPdb(GroupIndex
);
138 for (const auto &S
: File
->obj().sections()) {
139 DebugSubsectionArray SS
;
140 if (!isDebugSSection(S
, SS
))
143 if (!SC
.hasChecksums() || !SC
.hasStrings())
149 if (SC
.hasChecksums() && SC
.hasStrings())
152 rebuildChecksumMap();
156 StringRef
SymbolGroup::name() const { return Name
; }
158 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray
&SS
) {
162 void SymbolGroup::updatePdbModi(uint32_t Modi
) { initializeForPdb(Modi
); }
164 void SymbolGroup::initializeForPdb(uint32_t Modi
) {
165 assert(File
&& File
->isPdb());
167 // PDB always uses the same string table, but each module has its own
168 // checksums. So we only set the strings if they're not already set.
169 if (!SC
.hasStrings()) {
170 auto StringTable
= File
->pdb().getStringTable();
172 SC
.setStrings(StringTable
->getStringTable());
174 consumeError(StringTable
.takeError());
178 auto MDS
= getModuleDebugStream(File
->pdb(), Name
, Modi
);
180 consumeError(MDS
.takeError());
184 DebugStream
= std::make_shared
<ModuleDebugStreamRef
>(std::move(*MDS
));
185 Subsections
= DebugStream
->getSubsectionsArray();
186 SC
.initialize(Subsections
);
187 rebuildChecksumMap();
190 void SymbolGroup::rebuildChecksumMap() {
191 if (!SC
.hasChecksums())
194 for (const auto &Entry
: SC
.checksums()) {
195 auto S
= SC
.strings().getString(Entry
.FileNameOffset
);
198 ChecksumsByFile
[*S
] = Entry
;
202 const ModuleDebugStreamRef
&SymbolGroup::getPdbModuleStream() const {
203 assert(File
&& File
->isPdb() && DebugStream
);
207 Expected
<StringRef
> SymbolGroup::getNameFromStringTable(uint32_t Offset
) const {
208 return SC
.strings().getString(Offset
);
211 void SymbolGroup::formatFromFileName(LinePrinter
&Printer
, StringRef File
,
213 auto FC
= ChecksumsByFile
.find(File
);
214 if (FC
== ChecksumsByFile
.end()) {
215 formatInternal(Printer
, Append
, "- (no checksum) {0}", File
);
219 formatInternal(Printer
, Append
, "- ({0}: {1}) {2}",
220 formatChecksumKind(FC
->getValue().Kind
),
221 toHex(FC
->getValue().Checksum
), File
);
224 void SymbolGroup::formatFromChecksumsOffset(LinePrinter
&Printer
,
227 if (!SC
.hasChecksums()) {
228 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
232 auto Iter
= SC
.checksums().getArray().at(Offset
);
233 if (Iter
== SC
.checksums().getArray().end()) {
234 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
238 uint32_t FO
= Iter
->FileNameOffset
;
239 auto ExpectedFile
= getNameFromStringTable(FO
);
241 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
242 consumeError(ExpectedFile
.takeError());
245 if (Iter
->Kind
== FileChecksumKind::None
) {
246 formatInternal(Printer
, Append
, "{0} (no checksum)", *ExpectedFile
);
248 formatInternal(Printer
, Append
, "{0} ({1}: {2})", *ExpectedFile
,
249 formatChecksumKind(Iter
->Kind
), toHex(Iter
->Checksum
));
253 Expected
<InputFile
> InputFile::open(StringRef Path
, bool AllowUnknownFile
) {
255 if (!llvm::sys::fs::exists(Path
))
256 return make_error
<StringError
>(formatv("File {0} not found", Path
),
257 inconvertibleErrorCode());
260 if (auto EC
= identify_magic(Path
, Magic
))
261 return make_error
<StringError
>(
262 formatv("Unable to identify file type for file {0}", Path
), EC
);
264 if (Magic
== file_magic::coff_object
) {
265 Expected
<OwningBinary
<Binary
>> BinaryOrErr
= createBinary(Path
);
267 return BinaryOrErr
.takeError();
269 IF
.CoffObject
= std::move(*BinaryOrErr
);
270 IF
.PdbOrObj
= llvm::cast
<COFFObjectFile
>(IF
.CoffObject
.getBinary());
271 return std::move(IF
);
274 if (Magic
== file_magic::pdb
) {
275 std::unique_ptr
<IPDBSession
> Session
;
276 if (auto Err
= loadDataForPDB(PDB_ReaderType::Native
, Path
, Session
))
277 return std::move(Err
);
279 IF
.PdbSession
.reset(static_cast<NativeSession
*>(Session
.release()));
280 IF
.PdbOrObj
= &IF
.PdbSession
->getPDBFile();
282 return std::move(IF
);
285 if (!AllowUnknownFile
)
286 return make_error
<StringError
>(
287 formatv("File {0} is not a supported file type", Path
),
288 inconvertibleErrorCode());
290 auto Result
= MemoryBuffer::getFile(Path
, -1LL, false);
292 return make_error
<StringError
>(
293 formatv("File {0} could not be opened", Path
), Result
.getError());
295 IF
.UnknownFile
= std::move(*Result
);
296 IF
.PdbOrObj
= IF
.UnknownFile
.get();
297 return std::move(IF
);
300 PDBFile
&InputFile::pdb() {
302 return *PdbOrObj
.get
<PDBFile
*>();
305 const PDBFile
&InputFile::pdb() const {
307 return *PdbOrObj
.get
<PDBFile
*>();
310 object::COFFObjectFile
&InputFile::obj() {
312 return *PdbOrObj
.get
<object::COFFObjectFile
*>();
315 const object::COFFObjectFile
&InputFile::obj() const {
317 return *PdbOrObj
.get
<object::COFFObjectFile
*>();
320 MemoryBuffer
&InputFile::unknown() {
322 return *PdbOrObj
.get
<MemoryBuffer
*>();
325 const MemoryBuffer
&InputFile::unknown() const {
327 return *PdbOrObj
.get
<MemoryBuffer
*>();
330 StringRef
InputFile::getFilePath() const {
332 return pdb().getFilePath();
334 return obj().getFileName();
336 return unknown().getBufferIdentifier();
339 bool InputFile::hasTypes() const {
341 return pdb().hasPDBTpiStream();
343 for (const auto &Section
: obj().sections()) {
345 if (isDebugTSection(Section
, Types
))
351 bool InputFile::hasIds() const {
354 return pdb().hasPDBIpiStream();
357 bool InputFile::isPdb() const { return PdbOrObj
.is
<PDBFile
*>(); }
359 bool InputFile::isObj() const {
360 return PdbOrObj
.is
<object::COFFObjectFile
*>();
363 bool InputFile::isUnknown() const { return PdbOrObj
.is
<MemoryBuffer
*>(); }
365 codeview::LazyRandomTypeCollection
&
366 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind
) {
367 if (Types
&& Kind
== kTypes
)
369 if (Ids
&& Kind
== kIds
)
373 assert(isPdb() && pdb().hasPDBIpiStream());
376 // If the collection was already initialized, we should have just returned it
379 TypeCollectionPtr
&Collection
= (Kind
== kIds
) ? Ids
: Types
;
380 auto &Stream
= cantFail((Kind
== kIds
) ? pdb().getPDBIpiStream()
381 : pdb().getPDBTpiStream());
383 auto &Array
= Stream
.typeArray();
384 uint32_t Count
= Stream
.getNumTypeRecords();
385 auto Offsets
= Stream
.getTypeIndexOffsets();
387 llvm::make_unique
<LazyRandomTypeCollection
>(Array
, Count
, Offsets
);
392 assert(Kind
== kTypes
);
395 for (const auto &Section
: obj().sections()) {
397 if (!isDebugTSection(Section
, Records
))
400 Types
= llvm::make_unique
<LazyRandomTypeCollection
>(Records
, 100);
404 Types
= llvm::make_unique
<LazyRandomTypeCollection
>(100);
408 codeview::LazyRandomTypeCollection
&InputFile::types() {
409 return getOrCreateTypeCollection(kTypes
);
412 codeview::LazyRandomTypeCollection
&InputFile::ids() {
413 // Object files have only one type stream that contains both types and ids.
414 // Similarly, some PDBs don't contain an IPI stream, and for those both types
415 // and IDs are in the same stream.
416 if (isObj() || !pdb().hasPDBIpiStream())
419 return getOrCreateTypeCollection(kIds
);
422 iterator_range
<SymbolGroupIterator
> InputFile::symbol_groups() {
423 return make_range
<SymbolGroupIterator
>(symbol_groups_begin(),
424 symbol_groups_end());
427 SymbolGroupIterator
InputFile::symbol_groups_begin() {
428 return SymbolGroupIterator(*this);
431 SymbolGroupIterator
InputFile::symbol_groups_end() {
432 return SymbolGroupIterator();
435 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
437 SymbolGroupIterator::SymbolGroupIterator(InputFile
&File
) : Value(&File
) {
439 SectionIter
= File
.obj().section_begin();
444 bool SymbolGroupIterator::operator==(const SymbolGroupIterator
&R
) const {
450 if (Value
.File
!= R
.Value
.File
)
452 return Index
== R
.Index
;
455 const SymbolGroup
&SymbolGroupIterator::operator*() const {
459 SymbolGroup
&SymbolGroupIterator::operator*() {
464 SymbolGroupIterator
&SymbolGroupIterator::operator++() {
465 assert(Value
.File
&& !isEnd());
470 if (Value
.File
->isPdb()) {
471 Value
.updatePdbModi(Index
);
479 void SymbolGroupIterator::scanToNextDebugS() {
480 assert(SectionIter
.hasValue());
481 auto End
= Value
.File
->obj().section_end();
482 auto &Iter
= *SectionIter
;
485 while (++Iter
!= End
) {
486 DebugSubsectionArray SS
;
487 SectionRef SR
= *Iter
;
488 if (!isDebugSSection(SR
, SS
))
491 Value
.updateDebugS(SS
);
496 bool SymbolGroupIterator::isEnd() const {
499 if (Value
.File
->isPdb()) {
500 auto &Dbi
= cantFail(Value
.File
->pdb().getPDBDbiStream());
501 uint32_t Count
= Dbi
.modules().getModuleCount();
502 assert(Index
<= Count
);
503 return Index
== Count
;
506 assert(SectionIter
.hasValue());
507 return *SectionIter
== Value
.File
->obj().section_end();