1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFile.h"
12 #include "FormatUtil.h"
13 #include "LinePrinter.h"
15 #include "llvm/BinaryFormat/Magic.h"
16 #include "llvm/DebugInfo/CodeView/CodeView.h"
17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/DebugInfo/PDB/PDB.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/FormatVariadic.h"
31 using namespace llvm::codeview
;
32 using namespace llvm::object
;
33 using namespace llvm::pdb
;
35 InputFile::InputFile() {}
36 InputFile::~InputFile() {}
38 static Expected
<ModuleDebugStreamRef
>
39 getModuleDebugStream(PDBFile
&File
, StringRef
&ModuleName
, uint32_t Index
) {
40 ExitOnError
Err("Unexpected error: ");
42 auto &Dbi
= Err(File
.getPDBDbiStream());
43 const auto &Modules
= Dbi
.modules();
44 auto Modi
= Modules
.getModuleDescriptor(Index
);
46 ModuleName
= Modi
.getModuleName();
48 uint16_t ModiStream
= Modi
.getModuleStreamIndex();
49 if (ModiStream
== kInvalidStreamIndex
)
50 return make_error
<RawError
>(raw_error_code::no_stream
,
51 "Module stream not present");
53 auto ModStreamData
= File
.createIndexedStream(ModiStream
);
55 ModuleDebugStreamRef
ModS(Modi
, std::move(ModStreamData
));
56 if (auto EC
= ModS
.reload())
57 return make_error
<RawError
>(raw_error_code::corrupt_file
,
58 "Invalid module stream");
60 return std::move(ModS
);
63 static inline bool isCodeViewDebugSubsection(object::SectionRef Section
,
65 BinaryStreamReader
&Reader
) {
66 StringRef SectionName
, Contents
;
67 if (Section
.getName(SectionName
))
70 if (SectionName
!= Name
)
73 if (Section
.getContents(Contents
))
76 Reader
= BinaryStreamReader(Contents
, support::little
);
78 if (Reader
.bytesRemaining() < sizeof(uint32_t))
80 cantFail(Reader
.readInteger(Magic
));
81 if (Magic
!= COFF::DEBUG_SECTION_MAGIC
)
86 static inline bool isDebugSSection(object::SectionRef Section
,
87 DebugSubsectionArray
&Subsections
) {
88 BinaryStreamReader Reader
;
89 if (!isCodeViewDebugSubsection(Section
, ".debug$S", Reader
))
92 cantFail(Reader
.readArray(Subsections
, Reader
.bytesRemaining()));
96 static bool isDebugTSection(SectionRef Section
, CVTypeArray
&Types
) {
97 BinaryStreamReader Reader
;
98 if (!isCodeViewDebugSubsection(Section
, ".debug$T", Reader
))
100 cantFail(Reader
.readArray(Types
, Reader
.bytesRemaining()));
104 static std::string
formatChecksumKind(FileChecksumKind Kind
) {
106 RETURN_CASE(FileChecksumKind
, None
, "None");
107 RETURN_CASE(FileChecksumKind
, MD5
, "MD5");
108 RETURN_CASE(FileChecksumKind
, SHA1
, "SHA-1");
109 RETURN_CASE(FileChecksumKind
, SHA256
, "SHA-256");
111 return formatUnknownEnum(Kind
);
114 static const DebugStringTableSubsectionRef
&extractStringTable(PDBFile
&File
) {
115 return cantFail(File
.getStringTable()).getStringTable();
118 template <typename
... Args
>
119 static void formatInternal(LinePrinter
&Printer
, bool Append
, Args
&&... args
) {
121 Printer
.format(std::forward
<Args
>(args
)...);
123 Printer
.formatLine(std::forward
<Args
>(args
)...);
126 SymbolGroup::SymbolGroup(InputFile
*File
, uint32_t GroupIndex
) : File(File
) {
131 initializeForPdb(GroupIndex
);
135 for (const auto &S
: File
->obj().sections()) {
136 DebugSubsectionArray SS
;
137 if (!isDebugSSection(S
, SS
))
140 if (!SC
.hasChecksums() || !SC
.hasStrings())
146 if (SC
.hasChecksums() && SC
.hasStrings())
149 rebuildChecksumMap();
153 StringRef
SymbolGroup::name() const { return Name
; }
155 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray
&SS
) {
159 void SymbolGroup::updatePdbModi(uint32_t Modi
) { initializeForPdb(Modi
); }
161 void SymbolGroup::initializeForPdb(uint32_t Modi
) {
162 assert(File
&& File
->isPdb());
164 // PDB always uses the same string table, but each module has its own
165 // checksums. So we only set the strings if they're not already set.
166 if (!SC
.hasStrings())
167 SC
.setStrings(extractStringTable(File
->pdb()));
170 auto MDS
= getModuleDebugStream(File
->pdb(), Name
, Modi
);
172 consumeError(MDS
.takeError());
176 DebugStream
= std::make_shared
<ModuleDebugStreamRef
>(std::move(*MDS
));
177 Subsections
= DebugStream
->getSubsectionsArray();
178 SC
.initialize(Subsections
);
179 rebuildChecksumMap();
182 void SymbolGroup::rebuildChecksumMap() {
183 if (!SC
.hasChecksums())
186 for (const auto &Entry
: SC
.checksums()) {
187 auto S
= SC
.strings().getString(Entry
.FileNameOffset
);
190 ChecksumsByFile
[*S
] = Entry
;
194 const ModuleDebugStreamRef
&SymbolGroup::getPdbModuleStream() const {
195 assert(File
&& File
->isPdb() && DebugStream
);
199 Expected
<StringRef
> SymbolGroup::getNameFromStringTable(uint32_t Offset
) const {
200 return SC
.strings().getString(Offset
);
203 void SymbolGroup::formatFromFileName(LinePrinter
&Printer
, StringRef File
,
205 auto FC
= ChecksumsByFile
.find(File
);
206 if (FC
== ChecksumsByFile
.end()) {
207 formatInternal(Printer
, Append
, "- (no checksum) {0}", File
);
211 formatInternal(Printer
, Append
, "- ({0}: {1}) {2}",
212 formatChecksumKind(FC
->getValue().Kind
),
213 toHex(FC
->getValue().Checksum
), File
);
216 void SymbolGroup::formatFromChecksumsOffset(LinePrinter
&Printer
,
219 if (!SC
.hasChecksums()) {
220 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
224 auto Iter
= SC
.checksums().getArray().at(Offset
);
225 if (Iter
== SC
.checksums().getArray().end()) {
226 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
230 uint32_t FO
= Iter
->FileNameOffset
;
231 auto ExpectedFile
= getNameFromStringTable(FO
);
233 formatInternal(Printer
, Append
, "(unknown file name offset {0})", Offset
);
234 consumeError(ExpectedFile
.takeError());
237 if (Iter
->Kind
== FileChecksumKind::None
) {
238 formatInternal(Printer
, Append
, "{0} (no checksum)", *ExpectedFile
);
240 formatInternal(Printer
, Append
, "{0} ({1}: {2})", *ExpectedFile
,
241 formatChecksumKind(Iter
->Kind
), toHex(Iter
->Checksum
));
245 Expected
<InputFile
> InputFile::open(StringRef Path
) {
247 if (!llvm::sys::fs::exists(Path
))
248 return make_error
<StringError
>(formatv("File {0} not found", Path
),
249 inconvertibleErrorCode());
252 if (auto EC
= identify_magic(Path
, Magic
))
253 return make_error
<StringError
>(
254 formatv("Unable to identify file type for file {0}", Path
), EC
);
256 if (Magic
== file_magic::coff_object
) {
257 Expected
<OwningBinary
<Binary
>> BinaryOrErr
= createBinary(Path
);
259 return BinaryOrErr
.takeError();
261 IF
.CoffObject
= std::move(*BinaryOrErr
);
262 IF
.PdbOrObj
= llvm::cast
<COFFObjectFile
>(IF
.CoffObject
.getBinary());
263 return std::move(IF
);
266 if (Magic
== file_magic::unknown
) {
267 std::unique_ptr
<IPDBSession
> Session
;
268 if (auto Err
= loadDataForPDB(PDB_ReaderType::Native
, Path
, Session
))
269 return std::move(Err
);
271 IF
.PdbSession
.reset(static_cast<NativeSession
*>(Session
.release()));
272 IF
.PdbOrObj
= &IF
.PdbSession
->getPDBFile();
274 return std::move(IF
);
277 return make_error
<StringError
>(
278 formatv("File {0} is not a supported file type", Path
),
279 inconvertibleErrorCode());
282 PDBFile
&InputFile::pdb() {
284 return *PdbOrObj
.get
<PDBFile
*>();
287 const PDBFile
&InputFile::pdb() const {
289 return *PdbOrObj
.get
<PDBFile
*>();
292 object::COFFObjectFile
&InputFile::obj() {
294 return *PdbOrObj
.get
<object::COFFObjectFile
*>();
297 const object::COFFObjectFile
&InputFile::obj() const {
299 return *PdbOrObj
.get
<object::COFFObjectFile
*>();
302 bool InputFile::hasTypes() const {
304 return pdb().hasPDBTpiStream();
306 for (const auto &Section
: obj().sections()) {
308 if (isDebugTSection(Section
, Types
))
314 bool InputFile::hasIds() const {
317 return pdb().hasPDBIpiStream();
320 bool InputFile::isPdb() const { return PdbOrObj
.is
<PDBFile
*>(); }
322 bool InputFile::isObj() const {
323 return PdbOrObj
.is
<object::COFFObjectFile
*>();
326 codeview::LazyRandomTypeCollection
&
327 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind
) {
328 if (Types
&& Kind
== kTypes
)
330 if (Ids
&& Kind
== kIds
)
334 assert(isPdb() && pdb().hasPDBIpiStream());
337 // If the collection was already initialized, we should have just returned it
340 TypeCollectionPtr
&Collection
= (Kind
== kIds
) ? Ids
: Types
;
341 auto &Stream
= cantFail((Kind
== kIds
) ? pdb().getPDBIpiStream()
342 : pdb().getPDBTpiStream());
344 auto &Array
= Stream
.typeArray();
345 uint32_t Count
= Stream
.getNumTypeRecords();
346 auto Offsets
= Stream
.getTypeIndexOffsets();
348 llvm::make_unique
<LazyRandomTypeCollection
>(Array
, Count
, Offsets
);
353 assert(Kind
== kTypes
);
356 for (const auto &Section
: obj().sections()) {
358 if (!isDebugTSection(Section
, Records
))
361 Types
= llvm::make_unique
<LazyRandomTypeCollection
>(Records
, 100);
365 Types
= llvm::make_unique
<LazyRandomTypeCollection
>(100);
369 codeview::LazyRandomTypeCollection
&InputFile::types() {
370 return getOrCreateTypeCollection(kTypes
);
373 codeview::LazyRandomTypeCollection
&InputFile::ids() {
374 // Object files have only one type stream that contains both types and ids.
375 // Similarly, some PDBs don't contain an IPI stream, and for those both types
376 // and IDs are in the same stream.
377 if (isObj() || !pdb().hasPDBIpiStream())
380 return getOrCreateTypeCollection(kIds
);
383 iterator_range
<SymbolGroupIterator
> InputFile::symbol_groups() {
384 return make_range
<SymbolGroupIterator
>(symbol_groups_begin(),
385 symbol_groups_end());
388 SymbolGroupIterator
InputFile::symbol_groups_begin() {
389 return SymbolGroupIterator(*this);
392 SymbolGroupIterator
InputFile::symbol_groups_end() {
393 return SymbolGroupIterator();
396 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
398 SymbolGroupIterator::SymbolGroupIterator(InputFile
&File
) : Value(&File
) {
400 SectionIter
= File
.obj().section_begin();
405 bool SymbolGroupIterator::operator==(const SymbolGroupIterator
&R
) const {
411 if (Value
.File
!= R
.Value
.File
)
413 return Index
== R
.Index
;
416 const SymbolGroup
&SymbolGroupIterator::operator*() const {
420 SymbolGroup
&SymbolGroupIterator::operator*() {
425 SymbolGroupIterator
&SymbolGroupIterator::operator++() {
426 assert(Value
.File
&& !isEnd());
431 if (Value
.File
->isPdb()) {
432 Value
.updatePdbModi(Index
);
440 void SymbolGroupIterator::scanToNextDebugS() {
441 assert(SectionIter
.hasValue());
442 auto End
= Value
.File
->obj().section_end();
443 auto &Iter
= *SectionIter
;
446 while (++Iter
!= End
) {
447 DebugSubsectionArray SS
;
448 SectionRef SR
= *Iter
;
449 if (!isDebugSSection(SR
, SS
))
452 Value
.updateDebugS(SS
);
457 bool SymbolGroupIterator::isEnd() const {
460 if (Value
.File
->isPdb()) {
461 auto &Dbi
= cantFail(Value
.File
->pdb().getPDBDbiStream());
462 uint32_t Count
= Dbi
.modules().getModuleCount();
463 assert(Index
<= Count
);
464 return Index
== Count
;
467 assert(SectionIter
.hasValue());
468 return *SectionIter
== Value
.File
->obj().section_end();