Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / tools / llvm-symbolizer / llvm-symbolizer.cpp
blob447c18abadc1743846bdbc1ea585d3a3123e04a1
1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
15 //===----------------------------------------------------------------------===//
17 #include "Opts.inc"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/BuildIDFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/InitLLVM.h"
38 #include "llvm/Support/LLVMDriver.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/StringSaver.h"
41 #include "llvm/Support/WithColor.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <algorithm>
44 #include <cstdio>
45 #include <cstring>
46 #include <iostream>
47 #include <string>
49 using namespace llvm;
50 using namespace symbolize;
52 namespace {
53 enum ID {
54 OPT_INVALID = 0, // This is not an option ID.
55 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
56 #include "Opts.inc"
57 #undef OPTION
60 #define PREFIX(NAME, VALUE) \
61 static constexpr StringLiteral NAME##_init[] = VALUE; \
62 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
63 std::size(NAME##_init) - 1);
64 #include "Opts.inc"
65 #undef PREFIX
67 using namespace llvm::opt;
68 static constexpr opt::OptTable::Info InfoTable[] = {
69 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
70 #include "Opts.inc"
71 #undef OPTION
74 class SymbolizerOptTable : public opt::GenericOptTable {
75 public:
76 SymbolizerOptTable() : GenericOptTable(InfoTable) {
77 setGroupedShortOptions(true);
80 } // namespace
82 static std::string ToolName;
84 static void printError(const ErrorInfoBase &EI, StringRef AuxInfo) {
85 WithColor::error(errs(), ToolName);
86 if (!AuxInfo.empty())
87 errs() << "'" << AuxInfo << "': ";
88 EI.log(errs());
89 errs() << '\n';
92 template <typename T>
93 static void print(const Request &Request, Expected<T> &ResOrErr,
94 DIPrinter &Printer) {
95 if (ResOrErr) {
96 // No error, print the result.
97 Printer.print(Request, *ResOrErr);
98 return;
101 // Handle the error.
102 bool PrintEmpty = true;
103 handleAllErrors(std::move(ResOrErr.takeError()),
104 [&](const ErrorInfoBase &EI) {
105 PrintEmpty = Printer.printError(Request, EI);
108 if (PrintEmpty)
109 Printer.print(Request, T());
112 enum class OutputStyle { LLVM, GNU, JSON };
114 enum class Command {
115 Code,
116 Data,
117 Frame,
120 static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
121 const opt::ArgList &Args) {
122 static bool IsEnabled = false;
123 if (IsEnabled)
124 return;
125 IsEnabled = true;
126 // Look up symbols using the debuginfod client.
127 Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>(
128 Args.getAllArgValues(OPT_debug_file_directory_EQ)));
129 // The HTTPClient must be initialized for use by the debuginfod client.
130 HTTPClient::initialize();
133 static StringRef getSpaceDelimitedWord(StringRef &Source) {
134 const char kDelimiters[] = " \n\r";
135 const char *Pos = Source.data();
136 StringRef Result;
137 Pos += strspn(Pos, kDelimiters);
138 if (*Pos == '"' || *Pos == '\'') {
139 char Quote = *Pos;
140 Pos++;
141 const char *End = strchr(Pos, Quote);
142 if (!End)
143 return StringRef();
144 Result = StringRef(Pos, End - Pos);
145 Pos = End + 1;
146 } else {
147 int NameLength = strcspn(Pos, kDelimiters);
148 Result = StringRef(Pos, NameLength);
149 Pos += NameLength;
151 Source = StringRef(Pos, Source.end() - Pos);
152 return Result;
155 static Error makeStringError(StringRef Msg) {
156 return make_error<StringError>(Msg, inconvertibleErrorCode());
159 static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
160 StringRef InputString, Command &Cmd,
161 std::string &ModuleName, object::BuildID &BuildID,
162 StringRef &Symbol, uint64_t &ModuleOffset) {
163 ModuleName = BinaryName;
164 if (InputString.consume_front("CODE ")) {
165 Cmd = Command::Code;
166 } else if (InputString.consume_front("DATA ")) {
167 Cmd = Command::Data;
168 } else if (InputString.consume_front("FRAME ")) {
169 Cmd = Command::Frame;
170 } else {
171 // If no cmd, assume it's CODE.
172 Cmd = Command::Code;
175 // Parse optional input file specification.
176 bool HasFilePrefix = false;
177 bool HasBuildIDPrefix = false;
178 while (!InputString.empty()) {
179 InputString = InputString.ltrim();
180 if (InputString.consume_front("FILE:")) {
181 if (HasFilePrefix || HasBuildIDPrefix)
182 return makeStringError("duplicate input file specification prefix");
183 HasFilePrefix = true;
184 continue;
186 if (InputString.consume_front("BUILDID:")) {
187 if (HasBuildIDPrefix || HasFilePrefix)
188 return makeStringError("duplicate input file specification prefix");
189 HasBuildIDPrefix = true;
190 continue;
192 break;
195 // If an input file is not specified on the command line, try to extract it
196 // from the command.
197 if (HasBuildIDPrefix || HasFilePrefix) {
198 InputString = InputString.ltrim();
199 if (InputString.empty()) {
200 if (HasFilePrefix)
201 return makeStringError("must be followed by an input file");
202 else
203 return makeStringError("must be followed by a hash");
206 if (!BinaryName.empty() || !BuildID.empty())
207 return makeStringError("input file has already been specified");
209 StringRef Name = getSpaceDelimitedWord(InputString);
210 if (Name.empty())
211 return makeStringError("unbalanced quotes in input file name");
212 if (HasBuildIDPrefix) {
213 BuildID = parseBuildID(Name);
214 if (BuildID.empty())
215 return makeStringError("wrong format of build-id");
216 } else {
217 ModuleName = Name;
219 } else if (BinaryName.empty() && BuildID.empty()) {
220 // No input file has been specified. If the input string contains at least
221 // two items, assume that the first item is a file name.
222 ModuleName = getSpaceDelimitedWord(InputString);
223 if (ModuleName.empty())
224 return makeStringError("no input filename has been specified");
227 // Parse module offset, which can be specified as a number or as a symbol.
228 InputString = InputString.ltrim();
229 if (InputString.empty())
230 return makeStringError("no module offset has been specified");
232 // If input string contains a space, ignore everything after it. This behavior
233 // is consistent with GNU addr2line.
234 int OffsetLength = InputString.find_first_of(" \n\r");
235 StringRef Offset = InputString.substr(0, OffsetLength);
237 // GNU addr2line assumes the offset is hexadecimal and allows a redundant
238 // "0x" or "0X" prefix; do the same for compatibility.
239 if (IsAddr2Line)
240 Offset.consume_front("0x") || Offset.consume_front("0X");
242 // If the input is not a number, treat it is a symbol.
243 if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) {
244 Symbol = Offset;
245 ModuleOffset = 0;
248 return Error::success();
251 template <typename T>
252 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
253 StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
254 bool ShouldInline, OutputStyle Style,
255 LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
256 uint64_t AdjustedOffset = Offset - AdjustVMA;
257 object::SectionedAddress Address = {AdjustedOffset,
258 object::SectionedAddress::UndefSection};
259 Request SymRequest = {
260 ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
261 Symbol};
262 if (Cmd == Command::Data) {
263 Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
264 print(SymRequest, ResOrErr, Printer);
265 } else if (Cmd == Command::Frame) {
266 Expected<std::vector<DILocal>> ResOrErr =
267 Symbolizer.symbolizeFrame(ModuleSpec, Address);
268 print(SymRequest, ResOrErr, Printer);
269 } else if (!Symbol.empty()) {
270 Expected<std::vector<DILineInfo>> ResOrErr =
271 Symbolizer.findSymbol(ModuleSpec, Symbol);
272 print(SymRequest, ResOrErr, Printer);
273 } else if (ShouldInline) {
274 Expected<DIInliningInfo> ResOrErr =
275 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
276 print(SymRequest, ResOrErr, Printer);
277 } else if (Style == OutputStyle::GNU) {
278 // With PrintFunctions == FunctionNameKind::LinkageName (default)
279 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
280 // may override the name of an inlined function with the name of the topmost
281 // caller function in the inlining chain. This contradicts the existing
282 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
283 // the topmost function, which suits our needs better.
284 Expected<DIInliningInfo> ResOrErr =
285 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
286 Expected<DILineInfo> Res0OrErr =
287 !ResOrErr
288 ? Expected<DILineInfo>(ResOrErr.takeError())
289 : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
290 : ResOrErr->getFrame(0));
291 print(SymRequest, Res0OrErr, Printer);
292 } else {
293 Expected<DILineInfo> ResOrErr =
294 Symbolizer.symbolizeCode(ModuleSpec, Address);
295 print(SymRequest, ResOrErr, Printer);
297 Symbolizer.pruneCache();
300 static void printUnknownLineInfo(std::string ModuleName, DIPrinter &Printer) {
301 Request SymRequest = {ModuleName, std::nullopt, StringRef()};
302 Printer.print(SymRequest, DILineInfo());
305 static void symbolizeInput(const opt::InputArgList &Args,
306 object::BuildIDRef IncomingBuildID,
307 uint64_t AdjustVMA, bool IsAddr2Line,
308 OutputStyle Style, StringRef InputString,
309 LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
310 Command Cmd;
311 std::string ModuleName;
312 object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
313 uint64_t Offset = 0;
314 StringRef Symbol;
315 if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
316 StringRef(InputString), Cmd, ModuleName, BuildID,
317 Symbol, Offset)) {
318 handleAllErrors(std::move(E), [&](const StringError &EI) {
319 printError(EI, InputString);
320 printUnknownLineInfo(ModuleName, Printer);
322 return;
324 bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
325 if (!BuildID.empty()) {
326 assert(ModuleName.empty());
327 if (!Args.hasArg(OPT_no_debuginfod))
328 enableDebuginfod(Symbolizer, Args);
329 std::string BuildIDStr = toHex(BuildID);
330 executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
331 ShouldInline, Style, Symbolizer, Printer);
332 } else {
333 executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
334 ShouldInline, Style, Symbolizer, Printer);
338 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
339 raw_ostream &OS) {
340 const char HelpText[] = " [options] addresses...";
341 Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
342 ToolName.str().c_str());
343 // TODO Replace this with OptTable API once it adds extrahelp support.
344 OS << "\nPass @FILE as argument to read options from FILE.\n";
347 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
348 StringSaver &Saver,
349 SymbolizerOptTable &Tbl) {
350 StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
351 // The environment variable specifies initial options which can be overridden
352 // by commnad line options.
353 Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
354 : "LLVM_SYMBOLIZER_OPTS");
355 bool HasError = false;
356 opt::InputArgList Args =
357 Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
358 errs() << ("error: " + Msg + "\n");
359 HasError = true;
361 if (HasError)
362 exit(1);
363 if (Args.hasArg(OPT_help)) {
364 printHelp(ToolName, Tbl, outs());
365 exit(0);
367 if (Args.hasArg(OPT_version)) {
368 outs() << ToolName << '\n';
369 cl::PrintVersionMessage();
370 exit(0);
373 return Args;
376 template <typename T>
377 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
378 if (const opt::Arg *A = Args.getLastArg(ID)) {
379 StringRef V(A->getValue());
380 if (!llvm::to_integer(V, Value, 0)) {
381 errs() << A->getSpelling() +
382 ": expected a non-negative integer, but got '" + V + "'";
383 exit(1);
385 } else {
386 Value = 0;
390 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
391 bool IsAddr2Line) {
392 if (Args.hasArg(OPT_functions))
393 return FunctionNameKind::LinkageName;
394 if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
395 return StringSwitch<FunctionNameKind>(A->getValue())
396 .Case("none", FunctionNameKind::None)
397 .Case("short", FunctionNameKind::ShortName)
398 .Default(FunctionNameKind::LinkageName);
399 return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
402 static std::optional<bool> parseColorArg(const opt::InputArgList &Args) {
403 if (Args.hasArg(OPT_color))
404 return true;
405 if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
406 return StringSwitch<std::optional<bool>>(A->getValue())
407 .Case("always", true)
408 .Case("never", false)
409 .Case("auto", std::nullopt);
410 return std::nullopt;
413 static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) {
414 const opt::Arg *A = Args.getLastArg(ID);
415 if (!A)
416 return {};
418 StringRef V(A->getValue());
419 object::BuildID BuildID = parseBuildID(V);
420 if (BuildID.empty()) {
421 errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
422 exit(1);
424 return BuildID;
427 // Symbolize markup from stdin and write the result to stdout.
428 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {
429 MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));
430 std::string InputString;
431 while (std::getline(std::cin, InputString)) {
432 InputString += '\n';
433 Filter.filter(InputString);
435 Filter.finish();
438 int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {
439 InitLLVM X(argc, argv);
440 sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
442 ToolName = argv[0];
443 bool IsAddr2Line = sys::path::stem(ToolName).contains("addr2line");
444 BumpPtrAllocator A;
445 StringSaver Saver(A);
446 SymbolizerOptTable Tbl;
447 opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
449 LLVMSymbolizer::Options Opts;
450 uint64_t AdjustVMA;
451 PrinterConfig Config;
452 parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
453 if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
454 Opts.PathStyle =
455 A->getOption().matches(OPT_basenames)
456 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
457 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
458 } else {
459 Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
461 Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
462 Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
463 Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
464 Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
465 Opts.FallbackDebugPath =
466 Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
467 Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
468 parseIntArg(Args, OPT_print_source_context_lines_EQ,
469 Config.SourceContextLines);
470 Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
471 Opts.UntagAddresses =
472 Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
473 Opts.UseDIA = Args.hasArg(OPT_use_dia);
474 #if !defined(LLVM_ENABLE_DIA_SDK)
475 if (Opts.UseDIA) {
476 WithColor::warning() << "DIA not available; using native PDB reader\n";
477 Opts.UseDIA = false;
479 #endif
480 Opts.UseSymbolTable = true;
481 if (Args.hasArg(OPT_cache_size_EQ))
482 parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
483 Config.PrintAddress = Args.hasArg(OPT_addresses);
484 Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
485 Config.Pretty = Args.hasArg(OPT_pretty_print);
486 Config.Verbose = Args.hasArg(OPT_verbose);
488 for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
489 StringRef Hint(A->getValue());
490 if (sys::path::extension(Hint) == ".dSYM") {
491 Opts.DsymHints.emplace_back(Hint);
492 } else {
493 errs() << "Warning: invalid dSYM hint: \"" << Hint
494 << "\" (must have the '.dSYM' extension).\n";
498 LLVMSymbolizer Symbolizer(Opts);
500 if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))
501 enableDebuginfod(Symbolizer, Args);
503 if (Args.hasArg(OPT_filter_markup)) {
504 filterMarkup(Args, Symbolizer);
505 return 0;
508 auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
509 if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
510 if (strcmp(A->getValue(), "GNU") == 0)
511 Style = OutputStyle::GNU;
512 else if (strcmp(A->getValue(), "JSON") == 0)
513 Style = OutputStyle::JSON;
514 else
515 Style = OutputStyle::LLVM;
518 if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
519 errs() << "error: cannot specify both --build-id and --obj\n";
520 return EXIT_FAILURE;
522 object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
524 std::unique_ptr<DIPrinter> Printer;
525 if (Style == OutputStyle::GNU)
526 Printer = std::make_unique<GNUPrinter>(outs(), printError, Config);
527 else if (Style == OutputStyle::JSON)
528 Printer = std::make_unique<JSONPrinter>(outs(), Config);
529 else
530 Printer = std::make_unique<LLVMPrinter>(outs(), printError, Config);
532 // When an input file is specified, exit immediately if the file cannot be
533 // read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the
534 // cached file handle.
535 if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {
536 auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());
537 if (!Status) {
538 Request SymRequest = {Arg->getValue(), 0, StringRef()};
539 handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {
540 Printer->printError(SymRequest, EI);
542 return EXIT_FAILURE;
546 std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
547 if (InputAddresses.empty()) {
548 const int kMaxInputStringLength = 1024;
549 char InputString[kMaxInputStringLength];
551 while (fgets(InputString, sizeof(InputString), stdin)) {
552 // Strip newline characters.
553 std::string StrippedInputString(InputString);
554 llvm::erase_if(StrippedInputString,
555 [](char c) { return c == '\r' || c == '\n'; });
556 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
557 StrippedInputString, Symbolizer, *Printer);
558 outs().flush();
560 } else {
561 Printer->listBegin();
562 for (StringRef Address : InputAddresses)
563 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
564 Symbolizer, *Printer);
565 Printer->listEnd();
568 return 0;