1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
15 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/BuildIDFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/InitLLVM.h"
38 #include "llvm/Support/LLVMDriver.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/StringSaver.h"
41 #include "llvm/Support/WithColor.h"
42 #include "llvm/Support/raw_ostream.h"
50 using namespace symbolize
;
54 OPT_INVALID
= 0, // This is not an option ID.
55 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
60 #define PREFIX(NAME, VALUE) \
61 static constexpr StringLiteral NAME##_init[] = VALUE; \
62 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
63 std::size(NAME##_init) - 1);
67 using namespace llvm::opt
;
68 static constexpr opt::OptTable::Info InfoTable
[] = {
69 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
74 class SymbolizerOptTable
: public opt::GenericOptTable
{
76 SymbolizerOptTable() : GenericOptTable(InfoTable
) {
77 setGroupedShortOptions(true);
82 static std::string ToolName
;
84 static void printError(const ErrorInfoBase
&EI
, StringRef AuxInfo
) {
85 WithColor::error(errs(), ToolName
);
87 errs() << "'" << AuxInfo
<< "': ";
93 static void print(const Request
&Request
, Expected
<T
> &ResOrErr
,
96 // No error, print the result.
97 Printer
.print(Request
, *ResOrErr
);
102 bool PrintEmpty
= true;
103 handleAllErrors(std::move(ResOrErr
.takeError()),
104 [&](const ErrorInfoBase
&EI
) {
105 PrintEmpty
= Printer
.printError(Request
, EI
);
109 Printer
.print(Request
, T());
112 enum class OutputStyle
{ LLVM
, GNU
, JSON
};
120 static void enableDebuginfod(LLVMSymbolizer
&Symbolizer
,
121 const opt::ArgList
&Args
) {
122 static bool IsEnabled
= false;
126 // Look up symbols using the debuginfod client.
127 Symbolizer
.setBuildIDFetcher(std::make_unique
<DebuginfodFetcher
>(
128 Args
.getAllArgValues(OPT_debug_file_directory_EQ
)));
129 // The HTTPClient must be initialized for use by the debuginfod client.
130 HTTPClient::initialize();
133 static StringRef
getSpaceDelimitedWord(StringRef
&Source
) {
134 const char kDelimiters
[] = " \n\r";
135 const char *Pos
= Source
.data();
137 Pos
+= strspn(Pos
, kDelimiters
);
138 if (*Pos
== '"' || *Pos
== '\'') {
141 const char *End
= strchr(Pos
, Quote
);
144 Result
= StringRef(Pos
, End
- Pos
);
147 int NameLength
= strcspn(Pos
, kDelimiters
);
148 Result
= StringRef(Pos
, NameLength
);
151 Source
= StringRef(Pos
, Source
.end() - Pos
);
155 static Error
makeStringError(StringRef Msg
) {
156 return make_error
<StringError
>(Msg
, inconvertibleErrorCode());
159 static Error
parseCommand(StringRef BinaryName
, bool IsAddr2Line
,
160 StringRef InputString
, Command
&Cmd
,
161 std::string
&ModuleName
, object::BuildID
&BuildID
,
162 StringRef
&Symbol
, uint64_t &ModuleOffset
) {
163 ModuleName
= BinaryName
;
164 if (InputString
.consume_front("CODE ")) {
166 } else if (InputString
.consume_front("DATA ")) {
168 } else if (InputString
.consume_front("FRAME ")) {
169 Cmd
= Command::Frame
;
171 // If no cmd, assume it's CODE.
175 // Parse optional input file specification.
176 bool HasFilePrefix
= false;
177 bool HasBuildIDPrefix
= false;
178 while (!InputString
.empty()) {
179 InputString
= InputString
.ltrim();
180 if (InputString
.consume_front("FILE:")) {
181 if (HasFilePrefix
|| HasBuildIDPrefix
)
182 return makeStringError("duplicate input file specification prefix");
183 HasFilePrefix
= true;
186 if (InputString
.consume_front("BUILDID:")) {
187 if (HasBuildIDPrefix
|| HasFilePrefix
)
188 return makeStringError("duplicate input file specification prefix");
189 HasBuildIDPrefix
= true;
195 // If an input file is not specified on the command line, try to extract it
197 if (HasBuildIDPrefix
|| HasFilePrefix
) {
198 InputString
= InputString
.ltrim();
199 if (InputString
.empty()) {
201 return makeStringError("must be followed by an input file");
203 return makeStringError("must be followed by a hash");
206 if (!BinaryName
.empty() || !BuildID
.empty())
207 return makeStringError("input file has already been specified");
209 StringRef Name
= getSpaceDelimitedWord(InputString
);
211 return makeStringError("unbalanced quotes in input file name");
212 if (HasBuildIDPrefix
) {
213 BuildID
= parseBuildID(Name
);
215 return makeStringError("wrong format of build-id");
219 } else if (BinaryName
.empty() && BuildID
.empty()) {
220 // No input file has been specified. If the input string contains at least
221 // two items, assume that the first item is a file name.
222 ModuleName
= getSpaceDelimitedWord(InputString
);
223 if (ModuleName
.empty())
224 return makeStringError("no input filename has been specified");
227 // Parse module offset, which can be specified as a number or as a symbol.
228 InputString
= InputString
.ltrim();
229 if (InputString
.empty())
230 return makeStringError("no module offset has been specified");
232 // If input string contains a space, ignore everything after it. This behavior
233 // is consistent with GNU addr2line.
234 int OffsetLength
= InputString
.find_first_of(" \n\r");
235 StringRef Offset
= InputString
.substr(0, OffsetLength
);
237 // GNU addr2line assumes the offset is hexadecimal and allows a redundant
238 // "0x" or "0X" prefix; do the same for compatibility.
240 Offset
.consume_front("0x") || Offset
.consume_front("0X");
242 // If the input is not a number, treat it is a symbol.
243 if (Offset
.getAsInteger(IsAddr2Line
? 16 : 0, ModuleOffset
)) {
248 return Error::success();
251 template <typename T
>
252 void executeCommand(StringRef ModuleName
, const T
&ModuleSpec
, Command Cmd
,
253 StringRef Symbol
, uint64_t Offset
, uint64_t AdjustVMA
,
254 bool ShouldInline
, OutputStyle Style
,
255 LLVMSymbolizer
&Symbolizer
, DIPrinter
&Printer
) {
256 uint64_t AdjustedOffset
= Offset
- AdjustVMA
;
257 object::SectionedAddress Address
= {AdjustedOffset
,
258 object::SectionedAddress::UndefSection
};
259 Request SymRequest
= {
260 ModuleName
, Symbol
.empty() ? std::make_optional(Offset
) : std::nullopt
,
262 if (Cmd
== Command::Data
) {
263 Expected
<DIGlobal
> ResOrErr
= Symbolizer
.symbolizeData(ModuleSpec
, Address
);
264 print(SymRequest
, ResOrErr
, Printer
);
265 } else if (Cmd
== Command::Frame
) {
266 Expected
<std::vector
<DILocal
>> ResOrErr
=
267 Symbolizer
.symbolizeFrame(ModuleSpec
, Address
);
268 print(SymRequest
, ResOrErr
, Printer
);
269 } else if (!Symbol
.empty()) {
270 Expected
<std::vector
<DILineInfo
>> ResOrErr
=
271 Symbolizer
.findSymbol(ModuleSpec
, Symbol
);
272 print(SymRequest
, ResOrErr
, Printer
);
273 } else if (ShouldInline
) {
274 Expected
<DIInliningInfo
> ResOrErr
=
275 Symbolizer
.symbolizeInlinedCode(ModuleSpec
, Address
);
276 print(SymRequest
, ResOrErr
, Printer
);
277 } else if (Style
== OutputStyle::GNU
) {
278 // With PrintFunctions == FunctionNameKind::LinkageName (default)
279 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
280 // may override the name of an inlined function with the name of the topmost
281 // caller function in the inlining chain. This contradicts the existing
282 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
283 // the topmost function, which suits our needs better.
284 Expected
<DIInliningInfo
> ResOrErr
=
285 Symbolizer
.symbolizeInlinedCode(ModuleSpec
, Address
);
286 Expected
<DILineInfo
> Res0OrErr
=
288 ? Expected
<DILineInfo
>(ResOrErr
.takeError())
289 : ((ResOrErr
->getNumberOfFrames() == 0) ? DILineInfo()
290 : ResOrErr
->getFrame(0));
291 print(SymRequest
, Res0OrErr
, Printer
);
293 Expected
<DILineInfo
> ResOrErr
=
294 Symbolizer
.symbolizeCode(ModuleSpec
, Address
);
295 print(SymRequest
, ResOrErr
, Printer
);
297 Symbolizer
.pruneCache();
300 static void printUnknownLineInfo(std::string ModuleName
, DIPrinter
&Printer
) {
301 Request SymRequest
= {ModuleName
, std::nullopt
, StringRef()};
302 Printer
.print(SymRequest
, DILineInfo());
305 static void symbolizeInput(const opt::InputArgList
&Args
,
306 object::BuildIDRef IncomingBuildID
,
307 uint64_t AdjustVMA
, bool IsAddr2Line
,
308 OutputStyle Style
, StringRef InputString
,
309 LLVMSymbolizer
&Symbolizer
, DIPrinter
&Printer
) {
311 std::string ModuleName
;
312 object::BuildID
BuildID(IncomingBuildID
.begin(), IncomingBuildID
.end());
315 if (Error E
= parseCommand(Args
.getLastArgValue(OPT_obj_EQ
), IsAddr2Line
,
316 StringRef(InputString
), Cmd
, ModuleName
, BuildID
,
318 handleAllErrors(std::move(E
), [&](const StringError
&EI
) {
319 printError(EI
, InputString
);
320 printUnknownLineInfo(ModuleName
, Printer
);
324 bool ShouldInline
= Args
.hasFlag(OPT_inlines
, OPT_no_inlines
, !IsAddr2Line
);
325 if (!BuildID
.empty()) {
326 assert(ModuleName
.empty());
327 if (!Args
.hasArg(OPT_no_debuginfod
))
328 enableDebuginfod(Symbolizer
, Args
);
329 std::string BuildIDStr
= toHex(BuildID
);
330 executeCommand(BuildIDStr
, BuildID
, Cmd
, Symbol
, Offset
, AdjustVMA
,
331 ShouldInline
, Style
, Symbolizer
, Printer
);
333 executeCommand(ModuleName
, ModuleName
, Cmd
, Symbol
, Offset
, AdjustVMA
,
334 ShouldInline
, Style
, Symbolizer
, Printer
);
338 static void printHelp(StringRef ToolName
, const SymbolizerOptTable
&Tbl
,
340 const char HelpText
[] = " [options] addresses...";
341 Tbl
.printHelp(OS
, (ToolName
+ HelpText
).str().c_str(),
342 ToolName
.str().c_str());
343 // TODO Replace this with OptTable API once it adds extrahelp support.
344 OS
<< "\nPass @FILE as argument to read options from FILE.\n";
347 static opt::InputArgList
parseOptions(int Argc
, char *Argv
[], bool IsAddr2Line
,
349 SymbolizerOptTable
&Tbl
) {
350 StringRef ToolName
= IsAddr2Line
? "llvm-addr2line" : "llvm-symbolizer";
351 // The environment variable specifies initial options which can be overridden
352 // by commnad line options.
353 Tbl
.setInitialOptionsFromEnvironment(IsAddr2Line
? "LLVM_ADDR2LINE_OPTS"
354 : "LLVM_SYMBOLIZER_OPTS");
355 bool HasError
= false;
356 opt::InputArgList Args
=
357 Tbl
.parseArgs(Argc
, Argv
, OPT_UNKNOWN
, Saver
, [&](StringRef Msg
) {
358 errs() << ("error: " + Msg
+ "\n");
363 if (Args
.hasArg(OPT_help
)) {
364 printHelp(ToolName
, Tbl
, outs());
367 if (Args
.hasArg(OPT_version
)) {
368 outs() << ToolName
<< '\n';
369 cl::PrintVersionMessage();
376 template <typename T
>
377 static void parseIntArg(const opt::InputArgList
&Args
, int ID
, T
&Value
) {
378 if (const opt::Arg
*A
= Args
.getLastArg(ID
)) {
379 StringRef
V(A
->getValue());
380 if (!llvm::to_integer(V
, Value
, 0)) {
381 errs() << A
->getSpelling() +
382 ": expected a non-negative integer, but got '" + V
+ "'";
390 static FunctionNameKind
decideHowToPrintFunctions(const opt::InputArgList
&Args
,
392 if (Args
.hasArg(OPT_functions
))
393 return FunctionNameKind::LinkageName
;
394 if (const opt::Arg
*A
= Args
.getLastArg(OPT_functions_EQ
))
395 return StringSwitch
<FunctionNameKind
>(A
->getValue())
396 .Case("none", FunctionNameKind::None
)
397 .Case("short", FunctionNameKind::ShortName
)
398 .Default(FunctionNameKind::LinkageName
);
399 return IsAddr2Line
? FunctionNameKind::None
: FunctionNameKind::LinkageName
;
402 static std::optional
<bool> parseColorArg(const opt::InputArgList
&Args
) {
403 if (Args
.hasArg(OPT_color
))
405 if (const opt::Arg
*A
= Args
.getLastArg(OPT_color_EQ
))
406 return StringSwitch
<std::optional
<bool>>(A
->getValue())
407 .Case("always", true)
408 .Case("never", false)
409 .Case("auto", std::nullopt
);
413 static object::BuildID
parseBuildIDArg(const opt::InputArgList
&Args
, int ID
) {
414 const opt::Arg
*A
= Args
.getLastArg(ID
);
418 StringRef
V(A
->getValue());
419 object::BuildID BuildID
= parseBuildID(V
);
420 if (BuildID
.empty()) {
421 errs() << A
->getSpelling() + ": expected a build ID, but got '" + V
+ "'\n";
427 // Symbolize markup from stdin and write the result to stdout.
428 static void filterMarkup(const opt::InputArgList
&Args
, LLVMSymbolizer
&Symbolizer
) {
429 MarkupFilter
Filter(outs(), Symbolizer
, parseColorArg(Args
));
430 std::string InputString
;
431 while (std::getline(std::cin
, InputString
)) {
433 Filter
.filter(InputString
);
438 int llvm_symbolizer_main(int argc
, char **argv
, const llvm::ToolContext
&) {
439 InitLLVM
X(argc
, argv
);
440 sys::InitializeCOMRAII
COM(sys::COMThreadingMode::MultiThreaded
);
443 bool IsAddr2Line
= sys::path::stem(ToolName
).contains("addr2line");
445 StringSaver
Saver(A
);
446 SymbolizerOptTable Tbl
;
447 opt::InputArgList Args
= parseOptions(argc
, argv
, IsAddr2Line
, Saver
, Tbl
);
449 LLVMSymbolizer::Options Opts
;
451 PrinterConfig Config
;
452 parseIntArg(Args
, OPT_adjust_vma_EQ
, AdjustVMA
);
453 if (const opt::Arg
*A
= Args
.getLastArg(OPT_basenames
, OPT_relativenames
)) {
455 A
->getOption().matches(OPT_basenames
)
456 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
457 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath
;
459 Opts
.PathStyle
= DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
;
461 Opts
.DebugFileDirectory
= Args
.getAllArgValues(OPT_debug_file_directory_EQ
);
462 Opts
.DefaultArch
= Args
.getLastArgValue(OPT_default_arch_EQ
).str();
463 Opts
.Demangle
= Args
.hasFlag(OPT_demangle
, OPT_no_demangle
, !IsAddr2Line
);
464 Opts
.DWPName
= Args
.getLastArgValue(OPT_dwp_EQ
).str();
465 Opts
.FallbackDebugPath
=
466 Args
.getLastArgValue(OPT_fallback_debug_path_EQ
).str();
467 Opts
.PrintFunctions
= decideHowToPrintFunctions(Args
, IsAddr2Line
);
468 parseIntArg(Args
, OPT_print_source_context_lines_EQ
,
469 Config
.SourceContextLines
);
470 Opts
.RelativeAddresses
= Args
.hasArg(OPT_relative_address
);
471 Opts
.UntagAddresses
=
472 Args
.hasFlag(OPT_untag_addresses
, OPT_no_untag_addresses
, !IsAddr2Line
);
473 Opts
.UseDIA
= Args
.hasArg(OPT_use_dia
);
474 #if !defined(LLVM_ENABLE_DIA_SDK)
476 WithColor::warning() << "DIA not available; using native PDB reader\n";
480 Opts
.UseSymbolTable
= true;
481 if (Args
.hasArg(OPT_cache_size_EQ
))
482 parseIntArg(Args
, OPT_cache_size_EQ
, Opts
.MaxCacheSize
);
483 Config
.PrintAddress
= Args
.hasArg(OPT_addresses
);
484 Config
.PrintFunctions
= Opts
.PrintFunctions
!= FunctionNameKind::None
;
485 Config
.Pretty
= Args
.hasArg(OPT_pretty_print
);
486 Config
.Verbose
= Args
.hasArg(OPT_verbose
);
488 for (const opt::Arg
*A
: Args
.filtered(OPT_dsym_hint_EQ
)) {
489 StringRef
Hint(A
->getValue());
490 if (sys::path::extension(Hint
) == ".dSYM") {
491 Opts
.DsymHints
.emplace_back(Hint
);
493 errs() << "Warning: invalid dSYM hint: \"" << Hint
494 << "\" (must have the '.dSYM' extension).\n";
498 LLVMSymbolizer
Symbolizer(Opts
);
500 if (Args
.hasFlag(OPT_debuginfod
, OPT_no_debuginfod
, canUseDebuginfod()))
501 enableDebuginfod(Symbolizer
, Args
);
503 if (Args
.hasArg(OPT_filter_markup
)) {
504 filterMarkup(Args
, Symbolizer
);
508 auto Style
= IsAddr2Line
? OutputStyle::GNU
: OutputStyle::LLVM
;
509 if (const opt::Arg
*A
= Args
.getLastArg(OPT_output_style_EQ
)) {
510 if (strcmp(A
->getValue(), "GNU") == 0)
511 Style
= OutputStyle::GNU
;
512 else if (strcmp(A
->getValue(), "JSON") == 0)
513 Style
= OutputStyle::JSON
;
515 Style
= OutputStyle::LLVM
;
518 if (Args
.hasArg(OPT_build_id_EQ
) && Args
.hasArg(OPT_obj_EQ
)) {
519 errs() << "error: cannot specify both --build-id and --obj\n";
522 object::BuildID BuildID
= parseBuildIDArg(Args
, OPT_build_id_EQ
);
524 std::unique_ptr
<DIPrinter
> Printer
;
525 if (Style
== OutputStyle::GNU
)
526 Printer
= std::make_unique
<GNUPrinter
>(outs(), printError
, Config
);
527 else if (Style
== OutputStyle::JSON
)
528 Printer
= std::make_unique
<JSONPrinter
>(outs(), Config
);
530 Printer
= std::make_unique
<LLVMPrinter
>(outs(), printError
, Config
);
532 // When an input file is specified, exit immediately if the file cannot be
533 // read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the
534 // cached file handle.
535 if (auto *Arg
= Args
.getLastArg(OPT_obj_EQ
); Arg
) {
536 auto Status
= Symbolizer
.getOrCreateModuleInfo(Arg
->getValue());
538 Request SymRequest
= {Arg
->getValue(), 0, StringRef()};
539 handleAllErrors(Status
.takeError(), [&](const ErrorInfoBase
&EI
) {
540 Printer
->printError(SymRequest
, EI
);
546 std::vector
<std::string
> InputAddresses
= Args
.getAllArgValues(OPT_INPUT
);
547 if (InputAddresses
.empty()) {
548 const int kMaxInputStringLength
= 1024;
549 char InputString
[kMaxInputStringLength
];
551 while (fgets(InputString
, sizeof(InputString
), stdin
)) {
552 // Strip newline characters.
553 std::string
StrippedInputString(InputString
);
554 llvm::erase_if(StrippedInputString
,
555 [](char c
) { return c
== '\r' || c
== '\n'; });
556 symbolizeInput(Args
, BuildID
, AdjustVMA
, IsAddr2Line
, Style
,
557 StrippedInputString
, Symbolizer
, *Printer
);
561 Printer
->listBegin();
562 for (StringRef Address
: InputAddresses
)
563 symbolizeInput(Args
, BuildID
, AdjustVMA
, IsAddr2Line
, Style
, Address
,
564 Symbolizer
, *Printer
);