1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
15 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/DIFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/InitLLVM.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include "llvm/Support/raw_ostream.h"
46 using namespace symbolize
;
50 OPT_INVALID
= 0, // This is not an option ID.
51 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
52 HELPTEXT, METAVAR, VALUES) \
58 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
62 const opt::OptTable::Info InfoTable
[] = {
63 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
64 HELPTEXT, METAVAR, VALUES) \
66 PREFIX, NAME, HELPTEXT, \
67 METAVAR, OPT_##ID, opt::Option::KIND##Class, \
68 PARAM, FLAGS, OPT_##GROUP, \
69 OPT_##ALIAS, ALIASARGS, VALUES},
74 class SymbolizerOptTable
: public opt::OptTable
{
76 SymbolizerOptTable() : OptTable(InfoTable
) {
77 setGroupedShortOptions(true);
83 static void print(const Request
&Request
, Expected
<T
> &ResOrErr
,
86 // No error, print the result.
87 Printer
.print(Request
, *ResOrErr
);
92 bool PrintEmpty
= true;
93 handleAllErrors(std::move(ResOrErr
.takeError()),
94 [&](const ErrorInfoBase
&EI
) {
95 PrintEmpty
= Printer
.printError(
96 Request
, EI
, "LLVMSymbolizer: error reading file: ");
100 Printer
.print(Request
, T());
103 enum class OutputStyle
{ LLVM
, GNU
, JSON
};
111 static void enableDebuginfod(LLVMSymbolizer
&Symbolizer
) {
112 static bool IsEnabled
= false;
116 // Look up symbols using the debuginfod client.
117 Symbolizer
.addDIFetcher(std::make_unique
<DebuginfodDIFetcher
>());
118 // The HTTPClient must be initialized for use by the debuginfod client.
119 HTTPClient::initialize();
122 static SmallVector
<uint8_t> parseBuildID(StringRef Str
) {
124 if (!tryGetFromHex(Str
, Bytes
))
126 ArrayRef
<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes
.data()),
128 return SmallVector
<uint8_t>(BuildID
.begin(), BuildID
.end());
131 static bool parseCommand(StringRef BinaryName
, bool IsAddr2Line
,
132 StringRef InputString
, Command
&Cmd
,
133 std::string
&ModuleName
,
134 SmallVectorImpl
<uint8_t> &BuildID
,
135 uint64_t &ModuleOffset
) {
136 const char kDelimiters
[] = " \n\r";
138 if (InputString
.consume_front("CODE ")) {
140 } else if (InputString
.consume_front("DATA ")) {
142 } else if (InputString
.consume_front("FRAME ")) {
143 Cmd
= Command::Frame
;
145 // If no cmd, assume it's CODE.
150 // Skip delimiters and parse input filename (if needed).
151 if (BinaryName
.empty() && BuildID
.empty()) {
152 bool HasFilePrefix
= false;
153 bool HasBuildIDPrefix
= false;
155 if (InputString
.consume_front("FILE:")) {
158 HasFilePrefix
= true;
161 if (InputString
.consume_front("BUILDID:")) {
162 if (HasBuildIDPrefix
)
164 HasBuildIDPrefix
= true;
169 if (HasFilePrefix
&& HasBuildIDPrefix
)
172 Pos
= InputString
.data();
173 Pos
+= strspn(Pos
, kDelimiters
);
174 if (*Pos
== '"' || *Pos
== '\'') {
177 const char *End
= strchr(Pos
, Quote
);
180 ModuleName
= std::string(Pos
, End
- Pos
);
183 int NameLength
= strcspn(Pos
, kDelimiters
);
184 ModuleName
= std::string(Pos
, NameLength
);
187 if (HasBuildIDPrefix
) {
188 BuildID
= parseBuildID(ModuleName
);
194 Pos
= InputString
.data();
195 ModuleName
= BinaryName
.str();
197 // Skip delimiters and parse module offset.
198 Pos
+= strspn(Pos
, kDelimiters
);
199 int OffsetLength
= strcspn(Pos
, kDelimiters
);
200 StringRef
Offset(Pos
, OffsetLength
);
201 // GNU addr2line assumes the offset is hexadecimal and allows a redundant
202 // "0x" or "0X" prefix; do the same for compatibility.
204 Offset
.consume_front("0x") || Offset
.consume_front("0X");
205 return !Offset
.getAsInteger(IsAddr2Line
? 16 : 0, ModuleOffset
);
208 template <typename T
>
209 void executeCommand(StringRef ModuleName
, const T
&ModuleSpec
, Command Cmd
,
210 uint64_t Offset
, uint64_t AdjustVMA
, bool ShouldInline
,
211 OutputStyle Style
, LLVMSymbolizer
&Symbolizer
,
212 DIPrinter
&Printer
) {
213 uint64_t AdjustedOffset
= Offset
- AdjustVMA
;
214 object::SectionedAddress Address
= {AdjustedOffset
,
215 object::SectionedAddress::UndefSection
};
216 if (Cmd
== Command::Data
) {
217 Expected
<DIGlobal
> ResOrErr
= Symbolizer
.symbolizeData(ModuleSpec
, Address
);
218 print({ModuleName
, Offset
}, ResOrErr
, Printer
);
219 } else if (Cmd
== Command::Frame
) {
220 Expected
<std::vector
<DILocal
>> ResOrErr
=
221 Symbolizer
.symbolizeFrame(ModuleSpec
, Address
);
222 print({ModuleName
, Offset
}, ResOrErr
, Printer
);
223 } else if (ShouldInline
) {
224 Expected
<DIInliningInfo
> ResOrErr
=
225 Symbolizer
.symbolizeInlinedCode(ModuleSpec
, Address
);
226 print({ModuleName
, Offset
}, ResOrErr
, Printer
);
227 } else if (Style
== OutputStyle::GNU
) {
228 // With PrintFunctions == FunctionNameKind::LinkageName (default)
229 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
230 // may override the name of an inlined function with the name of the topmost
231 // caller function in the inlining chain. This contradicts the existing
232 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
233 // the topmost function, which suits our needs better.
234 Expected
<DIInliningInfo
> ResOrErr
=
235 Symbolizer
.symbolizeInlinedCode(ModuleSpec
, Address
);
236 Expected
<DILineInfo
> Res0OrErr
=
238 ? Expected
<DILineInfo
>(ResOrErr
.takeError())
239 : ((ResOrErr
->getNumberOfFrames() == 0) ? DILineInfo()
240 : ResOrErr
->getFrame(0));
241 print({ModuleName
, Offset
}, Res0OrErr
, Printer
);
243 Expected
<DILineInfo
> ResOrErr
=
244 Symbolizer
.symbolizeCode(ModuleSpec
, Address
);
245 print({ModuleName
, Offset
}, ResOrErr
, Printer
);
247 Symbolizer
.pruneCache();
250 static void symbolizeInput(const opt::InputArgList
&Args
,
251 ArrayRef
<uint8_t> IncomingBuildID
,
252 uint64_t AdjustVMA
, bool IsAddr2Line
,
253 OutputStyle Style
, StringRef InputString
,
254 LLVMSymbolizer
&Symbolizer
, DIPrinter
&Printer
) {
256 std::string ModuleName
;
257 SmallVector
<uint8_t> BuildID(IncomingBuildID
.begin(), IncomingBuildID
.end());
259 if (!parseCommand(Args
.getLastArgValue(OPT_obj_EQ
), IsAddr2Line
,
260 StringRef(InputString
), Cmd
, ModuleName
, BuildID
, Offset
)) {
261 Printer
.printInvalidCommand({ModuleName
, None
}, InputString
);
264 bool ShouldInline
= Args
.hasFlag(OPT_inlines
, OPT_no_inlines
, !IsAddr2Line
);
265 if (!BuildID
.empty()) {
266 assert(ModuleName
.empty());
267 if (!Args
.hasArg(OPT_no_debuginfod
))
268 enableDebuginfod(Symbolizer
);
269 std::string BuildIDStr
= toHex(BuildID
);
270 executeCommand(BuildIDStr
, BuildID
, Cmd
, Offset
, AdjustVMA
, ShouldInline
,
271 Style
, Symbolizer
, Printer
);
273 executeCommand(ModuleName
, ModuleName
, Cmd
, Offset
, AdjustVMA
, ShouldInline
,
274 Style
, Symbolizer
, Printer
);
278 static void printHelp(StringRef ToolName
, const SymbolizerOptTable
&Tbl
,
280 const char HelpText
[] = " [options] addresses...";
281 Tbl
.printHelp(OS
, (ToolName
+ HelpText
).str().c_str(),
282 ToolName
.str().c_str());
283 // TODO Replace this with OptTable API once it adds extrahelp support.
284 OS
<< "\nPass @FILE as argument to read options from FILE.\n";
287 static opt::InputArgList
parseOptions(int Argc
, char *Argv
[], bool IsAddr2Line
,
289 SymbolizerOptTable
&Tbl
) {
290 StringRef ToolName
= IsAddr2Line
? "llvm-addr2line" : "llvm-symbolizer";
291 // The environment variable specifies initial options which can be overridden
292 // by commnad line options.
293 Tbl
.setInitialOptionsFromEnvironment(IsAddr2Line
? "LLVM_ADDR2LINE_OPTS"
294 : "LLVM_SYMBOLIZER_OPTS");
295 bool HasError
= false;
296 opt::InputArgList Args
=
297 Tbl
.parseArgs(Argc
, Argv
, OPT_UNKNOWN
, Saver
, [&](StringRef Msg
) {
298 errs() << ("error: " + Msg
+ "\n");
303 if (Args
.hasArg(OPT_help
)) {
304 printHelp(ToolName
, Tbl
, outs());
307 if (Args
.hasArg(OPT_version
)) {
308 outs() << ToolName
<< '\n';
309 cl::PrintVersionMessage();
316 template <typename T
>
317 static void parseIntArg(const opt::InputArgList
&Args
, int ID
, T
&Value
) {
318 if (const opt::Arg
*A
= Args
.getLastArg(ID
)) {
319 StringRef
V(A
->getValue());
320 if (!llvm::to_integer(V
, Value
, 0)) {
321 errs() << A
->getSpelling() +
322 ": expected a non-negative integer, but got '" + V
+ "'";
330 static FunctionNameKind
decideHowToPrintFunctions(const opt::InputArgList
&Args
,
332 if (Args
.hasArg(OPT_functions
))
333 return FunctionNameKind::LinkageName
;
334 if (const opt::Arg
*A
= Args
.getLastArg(OPT_functions_EQ
))
335 return StringSwitch
<FunctionNameKind
>(A
->getValue())
336 .Case("none", FunctionNameKind::None
)
337 .Case("short", FunctionNameKind::ShortName
)
338 .Default(FunctionNameKind::LinkageName
);
339 return IsAddr2Line
? FunctionNameKind::None
: FunctionNameKind::LinkageName
;
342 static Optional
<bool> parseColorArg(const opt::InputArgList
&Args
) {
343 if (Args
.hasArg(OPT_color
))
345 if (const opt::Arg
*A
= Args
.getLastArg(OPT_color_EQ
))
346 return StringSwitch
<Optional
<bool>>(A
->getValue())
347 .Case("always", true)
348 .Case("never", false)
353 static SmallVector
<uint8_t> parseBuildIDArg(const opt::InputArgList
&Args
,
355 const opt::Arg
*A
= Args
.getLastArg(ID
);
359 StringRef
V(A
->getValue());
360 SmallVector
<uint8_t> BuildID
= parseBuildID(V
);
361 if (BuildID
.empty()) {
362 errs() << A
->getSpelling() + ": expected a build ID, but got '" + V
+ "'\n";
368 // Symbolize markup from stdin and write the result to stdout.
369 static void filterMarkup(const opt::InputArgList
&Args
, LLVMSymbolizer
&Symbolizer
) {
370 MarkupFilter
Filter(outs(), Symbolizer
, parseColorArg(Args
));
371 std::string InputString
;
372 while (std::getline(std::cin
, InputString
)) {
374 Filter
.filter(InputString
);
379 ExitOnError ExitOnErr
;
381 int main(int argc
, char **argv
) {
382 InitLLVM
X(argc
, argv
);
383 sys::InitializeCOMRAII
COM(sys::COMThreadingMode::MultiThreaded
);
385 bool IsAddr2Line
= sys::path::stem(argv
[0]).contains("addr2line");
387 StringSaver
Saver(A
);
388 SymbolizerOptTable Tbl
;
389 opt::InputArgList Args
= parseOptions(argc
, argv
, IsAddr2Line
, Saver
, Tbl
);
391 LLVMSymbolizer::Options Opts
;
393 PrinterConfig Config
;
394 parseIntArg(Args
, OPT_adjust_vma_EQ
, AdjustVMA
);
395 if (const opt::Arg
*A
= Args
.getLastArg(OPT_basenames
, OPT_relativenames
)) {
397 A
->getOption().matches(OPT_basenames
)
398 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
399 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath
;
401 Opts
.PathStyle
= DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
;
403 Opts
.DebugFileDirectory
= Args
.getAllArgValues(OPT_debug_file_directory_EQ
);
404 Opts
.DefaultArch
= Args
.getLastArgValue(OPT_default_arch_EQ
).str();
405 Opts
.Demangle
= Args
.hasFlag(OPT_demangle
, OPT_no_demangle
, !IsAddr2Line
);
406 Opts
.DWPName
= Args
.getLastArgValue(OPT_dwp_EQ
).str();
407 Opts
.FallbackDebugPath
=
408 Args
.getLastArgValue(OPT_fallback_debug_path_EQ
).str();
409 Opts
.PrintFunctions
= decideHowToPrintFunctions(Args
, IsAddr2Line
);
410 parseIntArg(Args
, OPT_print_source_context_lines_EQ
,
411 Config
.SourceContextLines
);
412 Opts
.RelativeAddresses
= Args
.hasArg(OPT_relative_address
);
413 Opts
.UntagAddresses
=
414 Args
.hasFlag(OPT_untag_addresses
, OPT_no_untag_addresses
, !IsAddr2Line
);
415 Opts
.UseDIA
= Args
.hasArg(OPT_use_dia
);
416 #if !defined(LLVM_ENABLE_DIA_SDK)
418 WithColor::warning() << "DIA not available; using native PDB reader\n";
422 Opts
.UseSymbolTable
= true;
423 if (Args
.hasArg(OPT_cache_size_EQ
))
424 parseIntArg(Args
, OPT_cache_size_EQ
, Opts
.MaxCacheSize
);
425 Config
.PrintAddress
= Args
.hasArg(OPT_addresses
);
426 Config
.PrintFunctions
= Opts
.PrintFunctions
!= FunctionNameKind::None
;
427 Config
.Pretty
= Args
.hasArg(OPT_pretty_print
);
428 Config
.Verbose
= Args
.hasArg(OPT_verbose
);
430 for (const opt::Arg
*A
: Args
.filtered(OPT_dsym_hint_EQ
)) {
431 StringRef
Hint(A
->getValue());
432 if (sys::path::extension(Hint
) == ".dSYM") {
433 Opts
.DsymHints
.emplace_back(Hint
);
435 errs() << "Warning: invalid dSYM hint: \"" << Hint
436 << "\" (must have the '.dSYM' extension).\n";
440 LLVMSymbolizer
Symbolizer(Opts
);
442 // A debuginfod lookup could succeed if a HTTP client is available and at
443 // least one backing URL is configured.
444 bool ShouldUseDebuginfodByDefault
=
445 HTTPClient::isAvailable() &&
446 !ExitOnErr(getDefaultDebuginfodUrls()).empty();
447 if (Args
.hasFlag(OPT_debuginfod
, OPT_no_debuginfod
,
448 ShouldUseDebuginfodByDefault
))
449 enableDebuginfod(Symbolizer
);
451 if (Args
.hasArg(OPT_filter_markup
)) {
452 filterMarkup(Args
, Symbolizer
);
456 auto Style
= IsAddr2Line
? OutputStyle::GNU
: OutputStyle::LLVM
;
457 if (const opt::Arg
*A
= Args
.getLastArg(OPT_output_style_EQ
)) {
458 if (strcmp(A
->getValue(), "GNU") == 0)
459 Style
= OutputStyle::GNU
;
460 else if (strcmp(A
->getValue(), "JSON") == 0)
461 Style
= OutputStyle::JSON
;
463 Style
= OutputStyle::LLVM
;
466 if (Args
.hasArg(OPT_build_id_EQ
) && Args
.hasArg(OPT_obj_EQ
)) {
467 errs() << "error: cannot specify both --build-id and --obj\n";
470 SmallVector
<uint8_t> BuildID
= parseBuildIDArg(Args
, OPT_build_id_EQ
);
472 std::unique_ptr
<DIPrinter
> Printer
;
473 if (Style
== OutputStyle::GNU
)
474 Printer
= std::make_unique
<GNUPrinter
>(outs(), errs(), Config
);
475 else if (Style
== OutputStyle::JSON
)
476 Printer
= std::make_unique
<JSONPrinter
>(outs(), Config
);
478 Printer
= std::make_unique
<LLVMPrinter
>(outs(), errs(), Config
);
480 std::vector
<std::string
> InputAddresses
= Args
.getAllArgValues(OPT_INPUT
);
481 if (InputAddresses
.empty()) {
482 const int kMaxInputStringLength
= 1024;
483 char InputString
[kMaxInputStringLength
];
485 while (fgets(InputString
, sizeof(InputString
), stdin
)) {
486 // Strip newline characters.
487 std::string
StrippedInputString(InputString
);
488 llvm::erase_if(StrippedInputString
,
489 [](char c
) { return c
== '\r' || c
== '\n'; });
490 symbolizeInput(Args
, BuildID
, AdjustVMA
, IsAddr2Line
, Style
,
491 StrippedInputString
, Symbolizer
, *Printer
);
495 Printer
->listBegin();
496 for (StringRef Address
: InputAddresses
)
497 symbolizeInput(Args
, BuildID
, AdjustVMA
, IsAddr2Line
, Style
, Address
,
498 Symbolizer
, *Printer
);