1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implementation for LLVM symbolization library.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "SymbolizableObjectFile.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/BinaryFormat/COFF.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
22 #include "llvm/DebugInfo/PDB/PDB.h"
23 #include "llvm/DebugInfo/PDB/PDBContext.h"
24 #include "llvm/Demangle/Demangle.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Object/MachO.h"
27 #include "llvm/Object/MachOUniversal.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/DataExtractor.h"
31 #include "llvm/Support/Errc.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
43 // This must be included after windows.h.
45 #pragma comment(lib, "dbghelp.lib")
47 // Windows.h conflicts with our COFF header definitions.
48 #ifdef IMAGE_FILE_MACHINE_I386
49 #undef IMAGE_FILE_MACHINE_I386
57 LLVMSymbolizer::symbolizeCode(const std::string
&ModuleName
,
58 uint64_t ModuleOffset
, StringRef DWPName
) {
59 SymbolizableModule
*Info
;
60 if (auto InfoOrErr
= getOrCreateModuleInfo(ModuleName
, DWPName
))
61 Info
= InfoOrErr
.get();
63 return InfoOrErr
.takeError();
65 // A null module means an error has already been reported. Return an empty
70 // If the user is giving us relative addresses, add the preferred base of the
71 // object to the offset before we do the query. It's what DIContext expects.
72 if (Opts
.RelativeAddresses
)
73 ModuleOffset
+= Info
->getModulePreferredBase();
75 DILineInfo LineInfo
= Info
->symbolizeCode(ModuleOffset
, Opts
.PrintFunctions
,
78 LineInfo
.FunctionName
= DemangleName(LineInfo
.FunctionName
, Info
);
82 Expected
<DIInliningInfo
>
83 LLVMSymbolizer::symbolizeInlinedCode(const std::string
&ModuleName
,
84 uint64_t ModuleOffset
, StringRef DWPName
) {
85 SymbolizableModule
*Info
;
86 if (auto InfoOrErr
= getOrCreateModuleInfo(ModuleName
, DWPName
))
87 Info
= InfoOrErr
.get();
89 return InfoOrErr
.takeError();
91 // A null module means an error has already been reported. Return an empty
94 return DIInliningInfo();
96 // If the user is giving us relative addresses, add the preferred base of the
97 // object to the offset before we do the query. It's what DIContext expects.
98 if (Opts
.RelativeAddresses
)
99 ModuleOffset
+= Info
->getModulePreferredBase();
101 DIInliningInfo InlinedContext
= Info
->symbolizeInlinedCode(
102 ModuleOffset
, Opts
.PrintFunctions
, Opts
.UseSymbolTable
);
104 for (int i
= 0, n
= InlinedContext
.getNumberOfFrames(); i
< n
; i
++) {
105 auto *Frame
= InlinedContext
.getMutableFrame(i
);
106 Frame
->FunctionName
= DemangleName(Frame
->FunctionName
, Info
);
109 return InlinedContext
;
112 Expected
<DIGlobal
> LLVMSymbolizer::symbolizeData(const std::string
&ModuleName
,
113 uint64_t ModuleOffset
) {
114 SymbolizableModule
*Info
;
115 if (auto InfoOrErr
= getOrCreateModuleInfo(ModuleName
))
116 Info
= InfoOrErr
.get();
118 return InfoOrErr
.takeError();
120 // A null module means an error has already been reported. Return an empty
125 // If the user is giving us relative addresses, add the preferred base of
126 // the object to the offset before we do the query. It's what DIContext
128 if (Opts
.RelativeAddresses
)
129 ModuleOffset
+= Info
->getModulePreferredBase();
131 DIGlobal Global
= Info
->symbolizeData(ModuleOffset
);
133 Global
.Name
= DemangleName(Global
.Name
, Info
);
137 void LLVMSymbolizer::flush() {
138 ObjectForUBPathAndArch
.clear();
139 BinaryForPath
.clear();
140 ObjectPairForPathArch
.clear();
146 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
147 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
148 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
149 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
150 std::string
getDarwinDWARFResourceForPath(
151 const std::string
&Path
, const std::string
&Basename
) {
152 SmallString
<16> ResourceName
= StringRef(Path
);
153 if (sys::path::extension(Path
) != ".dSYM") {
154 ResourceName
+= ".dSYM";
156 sys::path::append(ResourceName
, "Contents", "Resources", "DWARF");
157 sys::path::append(ResourceName
, Basename
);
158 return ResourceName
.str();
161 bool checkFileCRC(StringRef Path
, uint32_t CRCHash
) {
162 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
163 MemoryBuffer::getFileOrSTDIN(Path
);
166 return !zlib::isAvailable() || CRCHash
== zlib::crc32(MB
.get()->getBuffer());
169 bool findDebugBinary(const std::string
&OrigPath
,
170 const std::string
&DebuglinkName
, uint32_t CRCHash
,
171 std::string
&Result
) {
172 std::string OrigRealPath
= OrigPath
;
173 #if defined(HAVE_REALPATH)
174 if (char *RP
= realpath(OrigPath
.c_str(), nullptr)) {
179 SmallString
<16> OrigDir(OrigRealPath
);
180 llvm::sys::path::remove_filename(OrigDir
);
181 SmallString
<16> DebugPath
= OrigDir
;
182 // Try /path/to/original_binary/debuglink_name
183 llvm::sys::path::append(DebugPath
, DebuglinkName
);
184 if (checkFileCRC(DebugPath
, CRCHash
)) {
185 Result
= DebugPath
.str();
188 // Try /path/to/original_binary/.debug/debuglink_name
190 llvm::sys::path::append(DebugPath
, ".debug", DebuglinkName
);
191 if (checkFileCRC(DebugPath
, CRCHash
)) {
192 Result
= DebugPath
.str();
195 #if defined(__NetBSD__)
196 // Try /usr/libdata/debug/path/to/original_binary/debuglink_name
197 DebugPath
= "/usr/libdata/debug";
199 // Try /usr/lib/debug/path/to/original_binary/debuglink_name
200 DebugPath
= "/usr/lib/debug";
202 llvm::sys::path::append(DebugPath
, llvm::sys::path::relative_path(OrigDir
),
204 if (checkFileCRC(DebugPath
, CRCHash
)) {
205 Result
= DebugPath
.str();
211 bool getGNUDebuglinkContents(const ObjectFile
*Obj
, std::string
&DebugName
,
215 for (const SectionRef
&Section
: Obj
->sections()) {
217 Section
.getName(Name
);
218 Name
= Name
.substr(Name
.find_first_not_of("._"));
219 if (Name
== "gnu_debuglink") {
221 Section
.getContents(Data
);
222 DataExtractor
DE(Data
, Obj
->isLittleEndian(), 0);
224 if (const char *DebugNameStr
= DE
.getCStr(&Offset
)) {
225 // 4-byte align the offset.
226 Offset
= (Offset
+ 3) & ~0x3;
227 if (DE
.isValidOffsetForDataOfSize(Offset
, 4)) {
228 DebugName
= DebugNameStr
;
229 CRCHash
= DE
.getU32(&Offset
);
239 bool darwinDsymMatchesBinary(const MachOObjectFile
*DbgObj
,
240 const MachOObjectFile
*Obj
) {
241 ArrayRef
<uint8_t> dbg_uuid
= DbgObj
->getUuid();
242 ArrayRef
<uint8_t> bin_uuid
= Obj
->getUuid();
243 if (dbg_uuid
.empty() || bin_uuid
.empty())
245 return !memcmp(dbg_uuid
.data(), bin_uuid
.data(), dbg_uuid
.size());
248 } // end anonymous namespace
250 ObjectFile
*LLVMSymbolizer::lookUpDsymFile(const std::string
&ExePath
,
251 const MachOObjectFile
*MachExeObj
, const std::string
&ArchName
) {
252 // On Darwin we may find DWARF in separate object file in
253 // resource directory.
254 std::vector
<std::string
> DsymPaths
;
255 StringRef Filename
= sys::path::filename(ExePath
);
256 DsymPaths
.push_back(getDarwinDWARFResourceForPath(ExePath
, Filename
));
257 for (const auto &Path
: Opts
.DsymHints
) {
258 DsymPaths
.push_back(getDarwinDWARFResourceForPath(Path
, Filename
));
260 for (const auto &Path
: DsymPaths
) {
261 auto DbgObjOrErr
= getOrCreateObject(Path
, ArchName
);
263 // Ignore errors, the file might not exist.
264 consumeError(DbgObjOrErr
.takeError());
267 ObjectFile
*DbgObj
= DbgObjOrErr
.get();
270 const MachOObjectFile
*MachDbgObj
= dyn_cast
<const MachOObjectFile
>(DbgObj
);
273 if (darwinDsymMatchesBinary(MachDbgObj
, MachExeObj
))
279 ObjectFile
*LLVMSymbolizer::lookUpDebuglinkObject(const std::string
&Path
,
280 const ObjectFile
*Obj
,
281 const std::string
&ArchName
) {
282 std::string DebuglinkName
;
284 std::string DebugBinaryPath
;
285 if (!getGNUDebuglinkContents(Obj
, DebuglinkName
, CRCHash
))
287 if (!findDebugBinary(Path
, DebuglinkName
, CRCHash
, DebugBinaryPath
))
289 auto DbgObjOrErr
= getOrCreateObject(DebugBinaryPath
, ArchName
);
291 // Ignore errors, the file might not exist.
292 consumeError(DbgObjOrErr
.takeError());
295 return DbgObjOrErr
.get();
298 Expected
<LLVMSymbolizer::ObjectPair
>
299 LLVMSymbolizer::getOrCreateObjectPair(const std::string
&Path
,
300 const std::string
&ArchName
) {
301 const auto &I
= ObjectPairForPathArch
.find(std::make_pair(Path
, ArchName
));
302 if (I
!= ObjectPairForPathArch
.end()) {
306 auto ObjOrErr
= getOrCreateObject(Path
, ArchName
);
308 ObjectPairForPathArch
.insert(std::make_pair(std::make_pair(Path
, ArchName
),
309 ObjectPair(nullptr, nullptr)));
310 return ObjOrErr
.takeError();
313 ObjectFile
*Obj
= ObjOrErr
.get();
314 assert(Obj
!= nullptr);
315 ObjectFile
*DbgObj
= nullptr;
317 if (auto MachObj
= dyn_cast
<const MachOObjectFile
>(Obj
))
318 DbgObj
= lookUpDsymFile(Path
, MachObj
, ArchName
);
320 DbgObj
= lookUpDebuglinkObject(Path
, Obj
, ArchName
);
323 ObjectPair Res
= std::make_pair(Obj
, DbgObj
);
324 ObjectPairForPathArch
.insert(
325 std::make_pair(std::make_pair(Path
, ArchName
), Res
));
329 Expected
<ObjectFile
*>
330 LLVMSymbolizer::getOrCreateObject(const std::string
&Path
,
331 const std::string
&ArchName
) {
332 const auto &I
= BinaryForPath
.find(Path
);
333 Binary
*Bin
= nullptr;
334 if (I
== BinaryForPath
.end()) {
335 Expected
<OwningBinary
<Binary
>> BinOrErr
= createBinary(Path
);
337 BinaryForPath
.insert(std::make_pair(Path
, OwningBinary
<Binary
>()));
338 return BinOrErr
.takeError();
340 Bin
= BinOrErr
->getBinary();
341 BinaryForPath
.insert(std::make_pair(Path
, std::move(BinOrErr
.get())));
343 Bin
= I
->second
.getBinary();
347 return static_cast<ObjectFile
*>(nullptr);
349 if (MachOUniversalBinary
*UB
= dyn_cast_or_null
<MachOUniversalBinary
>(Bin
)) {
350 const auto &I
= ObjectForUBPathAndArch
.find(std::make_pair(Path
, ArchName
));
351 if (I
!= ObjectForUBPathAndArch
.end()) {
352 return I
->second
.get();
354 Expected
<std::unique_ptr
<ObjectFile
>> ObjOrErr
=
355 UB
->getObjectForArch(ArchName
);
357 ObjectForUBPathAndArch
.insert(std::make_pair(
358 std::make_pair(Path
, ArchName
), std::unique_ptr
<ObjectFile
>()));
359 return ObjOrErr
.takeError();
361 ObjectFile
*Res
= ObjOrErr
->get();
362 ObjectForUBPathAndArch
.insert(std::make_pair(std::make_pair(Path
, ArchName
),
363 std::move(ObjOrErr
.get())));
366 if (Bin
->isObject()) {
367 return cast
<ObjectFile
>(Bin
);
369 return errorCodeToError(object_error::arch_not_found
);
372 Expected
<SymbolizableModule
*>
373 LLVMSymbolizer::getOrCreateModuleInfo(const std::string
&ModuleName
,
375 const auto &I
= Modules
.find(ModuleName
);
376 if (I
!= Modules
.end()) {
377 return I
->second
.get();
379 std::string BinaryName
= ModuleName
;
380 std::string ArchName
= Opts
.DefaultArch
;
381 size_t ColonPos
= ModuleName
.find_last_of(':');
382 // Verify that substring after colon form a valid arch name.
383 if (ColonPos
!= std::string::npos
) {
384 std::string ArchStr
= ModuleName
.substr(ColonPos
+ 1);
385 if (Triple(ArchStr
).getArch() != Triple::UnknownArch
) {
386 BinaryName
= ModuleName
.substr(0, ColonPos
);
390 auto ObjectsOrErr
= getOrCreateObjectPair(BinaryName
, ArchName
);
392 // Failed to find valid object file.
394 std::make_pair(ModuleName
, std::unique_ptr
<SymbolizableModule
>()));
395 return ObjectsOrErr
.takeError();
397 ObjectPair Objects
= ObjectsOrErr
.get();
399 std::unique_ptr
<DIContext
> Context
;
400 // If this is a COFF object containing PDB info, use a PDBContext to
401 // symbolize. Otherwise, use DWARF.
402 if (auto CoffObject
= dyn_cast
<COFFObjectFile
>(Objects
.first
)) {
403 const codeview::DebugInfo
*DebugInfo
;
404 StringRef PDBFileName
;
405 auto EC
= CoffObject
->getDebugPDBInfo(DebugInfo
, PDBFileName
);
406 if (!EC
&& DebugInfo
!= nullptr && !PDBFileName
.empty()) {
408 std::unique_ptr
<IPDBSession
> Session
;
409 if (auto Err
= loadDataForEXE(PDB_ReaderType::DIA
,
410 Objects
.first
->getFileName(), Session
)) {
412 std::make_pair(ModuleName
, std::unique_ptr
<SymbolizableModule
>()));
413 // Return along the PDB filename to provide more context
414 return createFileError(PDBFileName
, std::move(Err
));
416 Context
.reset(new PDBContext(*CoffObject
, std::move(Session
)));
420 Context
= DWARFContext::create(*Objects
.second
, nullptr,
421 DWARFContext::defaultErrorHandler
, DWPName
);
424 SymbolizableObjectFile::create(Objects
.first
, std::move(Context
));
425 std::unique_ptr
<SymbolizableModule
> SymMod
;
427 SymMod
= std::move(InfoOrErr
.get());
429 Modules
.insert(std::make_pair(ModuleName
, std::move(SymMod
)));
430 assert(InsertResult
.second
);
431 if (auto EC
= InfoOrErr
.getError())
432 return errorCodeToError(EC
);
433 return InsertResult
.first
->second
.get();
438 // Undo these various manglings for Win32 extern "C" functions:
441 // fastcall - @foo@12
442 // vectorcall - foo@@12
443 // These are all different linkage names for 'foo'.
444 StringRef
demanglePE32ExternCFunc(StringRef SymbolName
) {
445 // Remove any '_' or '@' prefix.
446 char Front
= SymbolName
.empty() ? '\0' : SymbolName
[0];
447 if (Front
== '_' || Front
== '@')
448 SymbolName
= SymbolName
.drop_front();
450 // Remove any '@[0-9]+' suffix.
452 size_t AtPos
= SymbolName
.rfind('@');
453 if (AtPos
!= StringRef::npos
&&
454 std::all_of(SymbolName
.begin() + AtPos
+ 1, SymbolName
.end(),
455 [](char C
) { return C
>= '0' && C
<= '9'; })) {
456 SymbolName
= SymbolName
.substr(0, AtPos
);
460 // Remove any ending '@' for vectorcall.
461 if (SymbolName
.endswith("@"))
462 SymbolName
= SymbolName
.drop_back();
467 } // end anonymous namespace
470 LLVMSymbolizer::DemangleName(const std::string
&Name
,
471 const SymbolizableModule
*DbiModuleDescriptor
) {
472 // We can spoil names of symbols with C linkage, so use an heuristic
473 // approach to check if the name should be demangled.
474 if (Name
.substr(0, 2) == "_Z") {
476 char *DemangledName
= itaniumDemangle(Name
.c_str(), nullptr, nullptr, &status
);
479 std::string Result
= DemangledName
;
484 #if defined(_MSC_VER)
485 if (!Name
.empty() && Name
.front() == '?') {
486 // Only do MSVC C++ demangling on symbols starting with '?'.
487 char DemangledName
[1024] = {0};
488 DWORD result
= ::UnDecorateSymbolName(
489 Name
.c_str(), DemangledName
, 1023,
490 UNDNAME_NO_ACCESS_SPECIFIERS
| // Strip public, private, protected
491 UNDNAME_NO_ALLOCATION_LANGUAGE
| // Strip __thiscall, __stdcall, etc
492 UNDNAME_NO_THROW_SIGNATURES
| // Strip throw() specifications
493 UNDNAME_NO_MEMBER_TYPE
| // Strip virtual, static, etc specifiers
494 UNDNAME_NO_MS_KEYWORDS
| // Strip all MS extension keywords
495 UNDNAME_NO_FUNCTION_RETURNS
); // Strip function return types
496 return (result
== 0) ? Name
: std::string(DemangledName
);
499 if (DbiModuleDescriptor
&& DbiModuleDescriptor
->isWin32Module())
500 return std::string(demanglePE32ExternCFunc(Name
));
504 } // namespace symbolize