1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
15 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/BuildIDFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/LLVMDriver.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/StringSaver.h"
40 #include "llvm/Support/WithColor.h"
41 #include "llvm/Support/raw_ostream.h"
49 using namespace symbolize
;
53 OPT_INVALID
= 0, // This is not an option ID.
54 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
59 #define PREFIX(NAME, VALUE) \
60 static constexpr StringLiteral NAME##_init[] = VALUE; \
61 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
62 std::size(NAME##_init) - 1);
66 using namespace llvm::opt
;
67 static constexpr opt::OptTable::Info InfoTable
[] = {
68 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
73 class SymbolizerOptTable
: public opt::GenericOptTable
{
75 SymbolizerOptTable() : GenericOptTable(InfoTable
) {
76 setGroupedShortOptions(true);
81 static std::string ToolName
;
83 static void printError(const ErrorInfoBase
&EI
, StringRef AuxInfo
) {
84 WithColor::error(errs(), ToolName
);
86 errs() << "'" << AuxInfo
<< "': ";
92 static void print(const Request
&Request
, Expected
<T
> &ResOrErr
,
95 // No error, print the result.
96 Printer
.print(Request
, *ResOrErr
);
101 bool PrintEmpty
= true;
102 handleAllErrors(std::move(ResOrErr
.takeError()),
103 [&](const ErrorInfoBase
&EI
) {
104 PrintEmpty
= Printer
.printError(Request
, EI
);
108 Printer
.print(Request
, T());
111 enum class OutputStyle
{ LLVM
, GNU
, JSON
};
119 static void enableDebuginfod(LLVMSymbolizer
&Symbolizer
,
120 const opt::ArgList
&Args
) {
121 static bool IsEnabled
= false;
125 // Look up symbols using the debuginfod client.
126 Symbolizer
.setBuildIDFetcher(std::make_unique
<DebuginfodFetcher
>(
127 Args
.getAllArgValues(OPT_debug_file_directory_EQ
)));
128 // The HTTPClient must be initialized for use by the debuginfod client.
129 HTTPClient::initialize();
132 static StringRef
getSpaceDelimitedWord(StringRef
&Source
) {
133 const char kDelimiters
[] = " \n\r";
134 const char *Pos
= Source
.data();
136 Pos
+= strspn(Pos
, kDelimiters
);
137 if (*Pos
== '"' || *Pos
== '\'') {
140 const char *End
= strchr(Pos
, Quote
);
143 Result
= StringRef(Pos
, End
- Pos
);
146 int NameLength
= strcspn(Pos
, kDelimiters
);
147 Result
= StringRef(Pos
, NameLength
);
150 Source
= StringRef(Pos
, Source
.end() - Pos
);
154 static Error
makeStringError(StringRef Msg
) {
155 return make_error
<StringError
>(Msg
, inconvertibleErrorCode());
158 static Error
parseCommand(StringRef BinaryName
, bool IsAddr2Line
,
159 StringRef InputString
, Command
&Cmd
,
160 std::string
&ModuleName
, object::BuildID
&BuildID
,
161 StringRef
&Symbol
, uint64_t &Offset
) {
162 ModuleName
= BinaryName
;
163 if (InputString
.consume_front("CODE ")) {
165 } else if (InputString
.consume_front("DATA ")) {
167 } else if (InputString
.consume_front("FRAME ")) {
168 Cmd
= Command::Frame
;
170 // If no cmd, assume it's CODE.
174 // Parse optional input file specification.
175 bool HasFilePrefix
= false;
176 bool HasBuildIDPrefix
= false;
177 while (!InputString
.empty()) {
178 InputString
= InputString
.ltrim();
179 if (InputString
.consume_front("FILE:")) {
180 if (HasFilePrefix
|| HasBuildIDPrefix
)
181 return makeStringError("duplicate input file specification prefix");
182 HasFilePrefix
= true;
185 if (InputString
.consume_front("BUILDID:")) {
186 if (HasBuildIDPrefix
|| HasFilePrefix
)
187 return makeStringError("duplicate input file specification prefix");
188 HasBuildIDPrefix
= true;
194 // If an input file is not specified on the command line, try to extract it
196 if (HasBuildIDPrefix
|| HasFilePrefix
) {
197 InputString
= InputString
.ltrim();
198 if (InputString
.empty()) {
200 return makeStringError("must be followed by an input file");
202 return makeStringError("must be followed by a hash");
205 if (!BinaryName
.empty() || !BuildID
.empty())
206 return makeStringError("input file has already been specified");
208 StringRef Name
= getSpaceDelimitedWord(InputString
);
210 return makeStringError("unbalanced quotes in input file name");
211 if (HasBuildIDPrefix
) {
212 BuildID
= parseBuildID(Name
);
214 return makeStringError("wrong format of build-id");
218 } else if (BinaryName
.empty() && BuildID
.empty()) {
219 // No input file has been specified. If the input string contains at least
220 // two items, assume that the first item is a file name.
221 ModuleName
= getSpaceDelimitedWord(InputString
);
222 if (ModuleName
.empty())
223 return makeStringError("no input filename has been specified");
226 // Parse address specification, which can be an offset in module or a
227 // symbol with optional offset.
228 InputString
= InputString
.trim();
229 if (InputString
.empty())
230 return makeStringError("no module offset has been specified");
232 // If input string contains a space, ignore everything after it. This behavior
233 // is consistent with GNU addr2line.
234 int AddrSpecLength
= InputString
.find_first_of(" \n\r");
235 StringRef AddrSpec
= InputString
.substr(0, AddrSpecLength
);
236 bool StartsWithDigit
= std::isdigit(AddrSpec
.front());
238 // GNU addr2line assumes the address is hexadecimal and allows a redundant
239 // "0x" or "0X" prefix; do the same for compatibility.
241 AddrSpec
.consume_front("0x") || AddrSpec
.consume_front("0X");
243 // If address specification is a number, treat it as a module offset.
244 if (!AddrSpec
.getAsInteger(IsAddr2Line
? 16 : 0, Offset
)) {
245 // Module offset is an address.
246 Symbol
= StringRef();
247 return Error::success();
250 // If address specification starts with a digit, but is not a number, consider
252 if (StartsWithDigit
|| AddrSpec
.empty())
253 return makeStringError("expected a number as module offset");
255 // Otherwise it is a symbol name, potentially with an offset.
259 // If the address specification contains '+', try treating it as
260 // "symbol + offset".
261 size_t Plus
= AddrSpec
.rfind('+');
262 if (Plus
!= StringRef::npos
) {
263 StringRef SymbolStr
= AddrSpec
.take_front(Plus
);
264 StringRef OffsetStr
= AddrSpec
.substr(Plus
+ 1);
265 if (!SymbolStr
.empty() && !OffsetStr
.empty() &&
266 !OffsetStr
.getAsInteger(0, Offset
)) {
268 return Error::success();
270 // The found '+' is not an offset delimiter.
273 return Error::success();
276 template <typename T
>
277 void executeCommand(StringRef ModuleName
, const T
&ModuleSpec
, Command Cmd
,
278 StringRef Symbol
, uint64_t Offset
, uint64_t AdjustVMA
,
279 bool ShouldInline
, OutputStyle Style
,
280 LLVMSymbolizer
&Symbolizer
, DIPrinter
&Printer
) {
281 uint64_t AdjustedOffset
= Offset
- AdjustVMA
;
282 object::SectionedAddress Address
= {AdjustedOffset
,
283 object::SectionedAddress::UndefSection
};
284 Request SymRequest
= {
285 ModuleName
, Symbol
.empty() ? std::make_optional(Offset
) : std::nullopt
,
287 if (Cmd
== Command::Data
) {
288 Expected
<DIGlobal
> ResOrErr
= Symbolizer
.symbolizeData(ModuleSpec
, Address
);
289 print(SymRequest
, ResOrErr
, Printer
);
290 } else if (Cmd
== Command::Frame
) {
291 Expected
<std::vector
<DILocal
>> ResOrErr
=
292 Symbolizer
.symbolizeFrame(ModuleSpec
, Address
);
293 print(SymRequest
, ResOrErr
, Printer
);
294 } else if (!Symbol
.empty()) {
295 Expected
<std::vector
<DILineInfo
>> ResOrErr
=
296 Symbolizer
.findSymbol(ModuleSpec
, Symbol
, Offset
);
297 print(SymRequest
, ResOrErr
, Printer
);
298 } else if (ShouldInline
) {
299 Expected
<DIInliningInfo
> ResOrErr
=
300 Symbolizer
.symbolizeInlinedCode(ModuleSpec
, Address
);
301 print(SymRequest
, ResOrErr
, Printer
);
302 } else if (Style
== OutputStyle::GNU
) {
303 // With PrintFunctions == FunctionNameKind::LinkageName (default)
304 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
305 // may override the name of an inlined function with the name of the topmost
306 // caller function in the inlining chain. This contradicts the existing
307 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
308 // the topmost function, which suits our needs better.
309 Expected
<DIInliningInfo
> ResOrErr
=
310 Symbolizer
.symbolizeInlinedCode(ModuleSpec
, Address
);
311 Expected
<DILineInfo
> Res0OrErr
=
313 ? Expected
<DILineInfo
>(ResOrErr
.takeError())
314 : ((ResOrErr
->getNumberOfFrames() == 0) ? DILineInfo()
315 : ResOrErr
->getFrame(0));
316 print(SymRequest
, Res0OrErr
, Printer
);
318 Expected
<DILineInfo
> ResOrErr
=
319 Symbolizer
.symbolizeCode(ModuleSpec
, Address
);
320 print(SymRequest
, ResOrErr
, Printer
);
322 Symbolizer
.pruneCache();
325 static void printUnknownLineInfo(std::string ModuleName
, DIPrinter
&Printer
) {
326 Request SymRequest
= {ModuleName
, std::nullopt
, StringRef()};
327 Printer
.print(SymRequest
, DILineInfo());
330 static void symbolizeInput(const opt::InputArgList
&Args
,
331 object::BuildIDRef IncomingBuildID
,
332 uint64_t AdjustVMA
, bool IsAddr2Line
,
333 OutputStyle Style
, StringRef InputString
,
334 LLVMSymbolizer
&Symbolizer
, DIPrinter
&Printer
) {
336 std::string ModuleName
;
337 object::BuildID
BuildID(IncomingBuildID
.begin(), IncomingBuildID
.end());
340 if (Error E
= parseCommand(Args
.getLastArgValue(OPT_obj_EQ
), IsAddr2Line
,
341 StringRef(InputString
), Cmd
, ModuleName
, BuildID
,
343 handleAllErrors(std::move(E
), [&](const StringError
&EI
) {
344 printError(EI
, InputString
);
345 printUnknownLineInfo(ModuleName
, Printer
);
349 bool ShouldInline
= Args
.hasFlag(OPT_inlines
, OPT_no_inlines
, !IsAddr2Line
);
350 if (!BuildID
.empty()) {
351 assert(ModuleName
.empty());
352 if (!Args
.hasArg(OPT_no_debuginfod
))
353 enableDebuginfod(Symbolizer
, Args
);
354 std::string BuildIDStr
= toHex(BuildID
);
355 executeCommand(BuildIDStr
, BuildID
, Cmd
, Symbol
, Offset
, AdjustVMA
,
356 ShouldInline
, Style
, Symbolizer
, Printer
);
358 executeCommand(ModuleName
, ModuleName
, Cmd
, Symbol
, Offset
, AdjustVMA
,
359 ShouldInline
, Style
, Symbolizer
, Printer
);
363 static void printHelp(StringRef ToolName
, const SymbolizerOptTable
&Tbl
,
365 const char HelpText
[] = " [options] addresses...";
366 Tbl
.printHelp(OS
, (ToolName
+ HelpText
).str().c_str(),
367 ToolName
.str().c_str());
368 // TODO Replace this with OptTable API once it adds extrahelp support.
369 OS
<< "\nPass @FILE as argument to read options from FILE.\n";
372 static opt::InputArgList
parseOptions(int Argc
, char *Argv
[], bool IsAddr2Line
,
374 SymbolizerOptTable
&Tbl
) {
375 StringRef ToolName
= IsAddr2Line
? "llvm-addr2line" : "llvm-symbolizer";
376 // The environment variable specifies initial options which can be overridden
377 // by commnad line options.
378 Tbl
.setInitialOptionsFromEnvironment(IsAddr2Line
? "LLVM_ADDR2LINE_OPTS"
379 : "LLVM_SYMBOLIZER_OPTS");
380 bool HasError
= false;
381 opt::InputArgList Args
=
382 Tbl
.parseArgs(Argc
, Argv
, OPT_UNKNOWN
, Saver
, [&](StringRef Msg
) {
383 errs() << ("error: " + Msg
+ "\n");
388 if (Args
.hasArg(OPT_help
)) {
389 printHelp(ToolName
, Tbl
, outs());
392 if (Args
.hasArg(OPT_version
)) {
393 outs() << ToolName
<< '\n';
394 cl::PrintVersionMessage();
401 template <typename T
>
402 static void parseIntArg(const opt::InputArgList
&Args
, int ID
, T
&Value
) {
403 if (const opt::Arg
*A
= Args
.getLastArg(ID
)) {
404 StringRef
V(A
->getValue());
405 if (!llvm::to_integer(V
, Value
, 0)) {
406 errs() << A
->getSpelling() +
407 ": expected a non-negative integer, but got '" + V
+ "'";
415 static FunctionNameKind
decideHowToPrintFunctions(const opt::InputArgList
&Args
,
417 if (Args
.hasArg(OPT_functions
))
418 return FunctionNameKind::LinkageName
;
419 if (const opt::Arg
*A
= Args
.getLastArg(OPT_functions_EQ
))
420 return StringSwitch
<FunctionNameKind
>(A
->getValue())
421 .Case("none", FunctionNameKind::None
)
422 .Case("short", FunctionNameKind::ShortName
)
423 .Default(FunctionNameKind::LinkageName
);
424 return IsAddr2Line
? FunctionNameKind::None
: FunctionNameKind::LinkageName
;
427 static std::optional
<bool> parseColorArg(const opt::InputArgList
&Args
) {
428 if (Args
.hasArg(OPT_color
))
430 if (const opt::Arg
*A
= Args
.getLastArg(OPT_color_EQ
))
431 return StringSwitch
<std::optional
<bool>>(A
->getValue())
432 .Case("always", true)
433 .Case("never", false)
434 .Case("auto", std::nullopt
);
438 static object::BuildID
parseBuildIDArg(const opt::InputArgList
&Args
, int ID
) {
439 const opt::Arg
*A
= Args
.getLastArg(ID
);
443 StringRef
V(A
->getValue());
444 object::BuildID BuildID
= parseBuildID(V
);
445 if (BuildID
.empty()) {
446 errs() << A
->getSpelling() + ": expected a build ID, but got '" + V
+ "'\n";
452 // Symbolize markup from stdin and write the result to stdout.
453 static void filterMarkup(const opt::InputArgList
&Args
, LLVMSymbolizer
&Symbolizer
) {
454 MarkupFilter
Filter(outs(), Symbolizer
, parseColorArg(Args
));
455 std::string InputString
;
456 while (std::getline(std::cin
, InputString
)) {
458 Filter
.filter(std::move(InputString
));
463 int llvm_symbolizer_main(int argc
, char **argv
, const llvm::ToolContext
&) {
464 sys::InitializeCOMRAII
COM(sys::COMThreadingMode::MultiThreaded
);
467 bool IsAddr2Line
= sys::path::stem(ToolName
).contains("addr2line");
469 StringSaver
Saver(A
);
470 SymbolizerOptTable Tbl
;
471 opt::InputArgList Args
= parseOptions(argc
, argv
, IsAddr2Line
, Saver
, Tbl
);
473 LLVMSymbolizer::Options Opts
;
475 PrinterConfig Config
;
476 parseIntArg(Args
, OPT_adjust_vma_EQ
, AdjustVMA
);
477 if (const opt::Arg
*A
= Args
.getLastArg(OPT_basenames
, OPT_relativenames
)) {
479 A
->getOption().matches(OPT_basenames
)
480 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
481 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath
;
483 Opts
.PathStyle
= DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
;
485 Opts
.DebugFileDirectory
= Args
.getAllArgValues(OPT_debug_file_directory_EQ
);
486 Opts
.DefaultArch
= Args
.getLastArgValue(OPT_default_arch_EQ
).str();
487 Opts
.Demangle
= Args
.hasFlag(OPT_demangle
, OPT_no_demangle
, !IsAddr2Line
);
488 Opts
.DWPName
= Args
.getLastArgValue(OPT_dwp_EQ
).str();
489 Opts
.FallbackDebugPath
=
490 Args
.getLastArgValue(OPT_fallback_debug_path_EQ
).str();
491 Opts
.PrintFunctions
= decideHowToPrintFunctions(Args
, IsAddr2Line
);
492 parseIntArg(Args
, OPT_print_source_context_lines_EQ
,
493 Config
.SourceContextLines
);
494 Opts
.RelativeAddresses
= Args
.hasArg(OPT_relative_address
);
495 Opts
.UntagAddresses
=
496 Args
.hasFlag(OPT_untag_addresses
, OPT_no_untag_addresses
, !IsAddr2Line
);
497 Opts
.UseDIA
= Args
.hasArg(OPT_use_dia
);
498 #if !defined(LLVM_ENABLE_DIA_SDK)
500 WithColor::warning() << "DIA not available; using native PDB reader\n";
504 Opts
.UseSymbolTable
= true;
505 if (Args
.hasArg(OPT_cache_size_EQ
))
506 parseIntArg(Args
, OPT_cache_size_EQ
, Opts
.MaxCacheSize
);
507 Config
.PrintAddress
= Args
.hasArg(OPT_addresses
);
508 Config
.PrintFunctions
= Opts
.PrintFunctions
!= FunctionNameKind::None
;
509 Config
.Pretty
= Args
.hasArg(OPT_pretty_print
);
510 Config
.Verbose
= Args
.hasArg(OPT_verbose
);
512 for (const opt::Arg
*A
: Args
.filtered(OPT_dsym_hint_EQ
)) {
513 StringRef
Hint(A
->getValue());
514 if (sys::path::extension(Hint
) == ".dSYM") {
515 Opts
.DsymHints
.emplace_back(Hint
);
517 errs() << "Warning: invalid dSYM hint: \"" << Hint
518 << "\" (must have the '.dSYM' extension).\n";
522 LLVMSymbolizer
Symbolizer(Opts
);
524 if (Args
.hasFlag(OPT_debuginfod
, OPT_no_debuginfod
, canUseDebuginfod()))
525 enableDebuginfod(Symbolizer
, Args
);
527 if (Args
.hasArg(OPT_filter_markup
)) {
528 filterMarkup(Args
, Symbolizer
);
532 auto Style
= IsAddr2Line
? OutputStyle::GNU
: OutputStyle::LLVM
;
533 if (const opt::Arg
*A
= Args
.getLastArg(OPT_output_style_EQ
)) {
534 if (strcmp(A
->getValue(), "GNU") == 0)
535 Style
= OutputStyle::GNU
;
536 else if (strcmp(A
->getValue(), "JSON") == 0)
537 Style
= OutputStyle::JSON
;
539 Style
= OutputStyle::LLVM
;
542 if (Args
.hasArg(OPT_build_id_EQ
) && Args
.hasArg(OPT_obj_EQ
)) {
543 errs() << "error: cannot specify both --build-id and --obj\n";
546 object::BuildID BuildID
= parseBuildIDArg(Args
, OPT_build_id_EQ
);
548 std::unique_ptr
<DIPrinter
> Printer
;
549 if (Style
== OutputStyle::GNU
)
550 Printer
= std::make_unique
<GNUPrinter
>(outs(), printError
, Config
);
551 else if (Style
== OutputStyle::JSON
)
552 Printer
= std::make_unique
<JSONPrinter
>(outs(), Config
);
554 Printer
= std::make_unique
<LLVMPrinter
>(outs(), printError
, Config
);
556 // When an input file is specified, exit immediately if the file cannot be
557 // read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the
558 // cached file handle.
559 if (auto *Arg
= Args
.getLastArg(OPT_obj_EQ
); Arg
) {
560 auto Status
= Symbolizer
.getOrCreateModuleInfo(Arg
->getValue());
562 Request SymRequest
= {Arg
->getValue(), 0, StringRef()};
563 handleAllErrors(Status
.takeError(), [&](const ErrorInfoBase
&EI
) {
564 Printer
->printError(SymRequest
, EI
);
570 std::vector
<std::string
> InputAddresses
= Args
.getAllArgValues(OPT_INPUT
);
571 if (InputAddresses
.empty()) {
572 const int kMaxInputStringLength
= 1024;
573 char InputString
[kMaxInputStringLength
];
575 while (fgets(InputString
, sizeof(InputString
), stdin
)) {
576 // Strip newline characters.
577 std::string
StrippedInputString(InputString
);
578 llvm::erase_if(StrippedInputString
,
579 [](char c
) { return c
== '\r' || c
== '\n'; });
580 symbolizeInput(Args
, BuildID
, AdjustVMA
, IsAddr2Line
, Style
,
581 StrippedInputString
, Symbolizer
, *Printer
);
585 Printer
->listBegin();
586 for (StringRef Address
: InputAddresses
)
587 symbolizeInput(Args
, BuildID
, AdjustVMA
, IsAddr2Line
, Style
, Address
,
588 Symbolizer
, *Printer
);