[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / tools / llvm-gsymutil / llvm-gsymutil.cpp
blob46ec4bdc28709f18617a2e4fa0c7787ea3a682f2
1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/STLExtras.h"
10 #include "llvm/ADT/StringSet.h"
11 #include "llvm/DebugInfo/DIContext.h"
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/Object/Archive.h"
14 #include "llvm/Object/ELFObjectFile.h"
15 #include "llvm/Object/MachOUniversal.h"
16 #include "llvm/Object/ObjectFile.h"
17 #include "llvm/Option/ArgList.h"
18 #include "llvm/Option/Option.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/LLVMDriver.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/TargetParser/Triple.h"
31 #include <algorithm>
32 #include <cstring>
33 #include <inttypes.h>
34 #include <iostream>
35 #include <map>
36 #include <optional>
37 #include <string>
38 #include <system_error>
39 #include <vector>
41 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
42 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
43 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
44 #include "llvm/DebugInfo/GSYM/GsymReader.h"
45 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
46 #include "llvm/DebugInfo/GSYM/LookupResult.h"
47 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
48 #include <optional>
50 using namespace llvm;
51 using namespace gsym;
52 using namespace object;
54 /// @}
55 /// Command line options.
56 /// @{
58 using namespace llvm::opt;
59 enum ID {
60 OPT_INVALID = 0, // This is not an option ID.
61 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
62 #include "Opts.inc"
63 #undef OPTION
66 #define PREFIX(NAME, VALUE) \
67 constexpr llvm::StringLiteral NAME##_init[] = VALUE; \
68 constexpr llvm::ArrayRef<llvm::StringLiteral> NAME( \
69 NAME##_init, std::size(NAME##_init) - 1);
70 #include "Opts.inc"
71 #undef PREFIX
73 const opt::OptTable::Info InfoTable[] = {
74 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
75 #include "Opts.inc"
76 #undef OPTION
79 class GSYMUtilOptTable : public llvm::opt::GenericOptTable {
80 public:
81 GSYMUtilOptTable() : GenericOptTable(InfoTable) {
82 setGroupedShortOptions(true);
86 static bool Verbose;
87 static std::vector<std::string> InputFilenames;
88 static std::string ConvertFilename;
89 static std::vector<std::string> ArchFilters;
90 static std::string OutputFilename;
91 static bool Verify;
92 static unsigned NumThreads;
93 static uint64_t SegmentSize;
94 static bool Quiet;
95 static std::vector<uint64_t> LookupAddresses;
96 static bool LookupAddressesFromStdin;
98 static void parseArgs(int argc, char **argv) {
99 GSYMUtilOptTable Tbl;
100 llvm::StringRef ToolName = argv[0];
101 llvm::BumpPtrAllocator A;
102 llvm::StringSaver Saver{A};
103 llvm::opt::InputArgList Args =
104 Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
105 llvm::errs() << Msg << '\n';
106 std::exit(1);
108 if (Args.hasArg(OPT_help)) {
109 const char *Overview =
110 "A tool for dumping, searching and creating GSYM files.\n\n"
111 "Specify one or more GSYM paths as arguments to dump all of the "
112 "information in each GSYM file.\n"
113 "Specify a single GSYM file along with one or more --lookup options to "
114 "lookup addresses within that GSYM file.\n"
115 "Use the --convert option to specify a file with option --out-file "
116 "option to convert to GSYM format.\n";
118 Tbl.printHelp(llvm::outs(), "llvm-gsymutil [options] <input GSYM files>",
119 Overview);
120 std::exit(0);
122 if (Args.hasArg(OPT_version)) {
123 llvm::outs() << ToolName << '\n';
124 cl::PrintVersionMessage();
125 std::exit(0);
128 Verbose = Args.hasArg(OPT_verbose);
130 for (const llvm::opt::Arg *A : Args.filtered(OPT_INPUT))
131 InputFilenames.emplace_back(A->getValue());
133 if (const llvm::opt::Arg *A = Args.getLastArg(OPT_convert_EQ))
134 ConvertFilename = A->getValue();
136 for (const llvm::opt::Arg *A : Args.filtered(OPT_arch_EQ))
137 ArchFilters.emplace_back(A->getValue());
139 if (const llvm::opt::Arg *A = Args.getLastArg(OPT_out_file_EQ))
140 OutputFilename = A->getValue();
142 Verify = Args.hasArg(OPT_verify);
144 if (const llvm::opt::Arg *A = Args.getLastArg(OPT_num_threads_EQ)) {
145 StringRef S{A->getValue()};
146 if (!llvm::to_integer(S, NumThreads, 0)) {
147 llvm::errs() << ToolName << ": for the --num-threads option: '" << S
148 << "' value invalid for uint argument!\n";
149 std::exit(1);
153 if (const llvm::opt::Arg *A = Args.getLastArg(OPT_segment_size_EQ)) {
154 StringRef S{A->getValue()};
155 if (!llvm::to_integer(S, SegmentSize, 0)) {
156 llvm::errs() << ToolName << ": for the --segment-size option: '" << S
157 << "' value invalid for uint argument!\n";
158 std::exit(1);
162 Quiet = Args.hasArg(OPT_quiet);
164 for (const llvm::opt::Arg *A : Args.filtered(OPT_address_EQ)) {
165 StringRef S{A->getValue()};
166 if (!llvm::to_integer(S, LookupAddresses.emplace_back(), 0)) {
167 llvm::errs() << ToolName << ": for the --address option: '" << S
168 << "' value invalid for uint argument!\n";
169 std::exit(1);
173 LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
176 /// @}
177 //===----------------------------------------------------------------------===//
179 static void error(Error Err) {
180 if (!Err)
181 return;
182 WithColor::error() << toString(std::move(Err)) << "\n";
183 exit(1);
186 static void error(StringRef Prefix, llvm::Error Err) {
187 if (!Err)
188 return;
189 errs() << Prefix << ": " << Err << "\n";
190 consumeError(std::move(Err));
191 exit(1);
194 static void error(StringRef Prefix, std::error_code EC) {
195 if (!EC)
196 return;
197 errs() << Prefix << ": " << EC.message() << "\n";
198 exit(1);
201 static uint32_t getCPUType(MachOObjectFile &MachO) {
202 if (MachO.is64Bit())
203 return MachO.getHeader64().cputype;
204 else
205 return MachO.getHeader().cputype;
208 /// Return true if the object file has not been filtered by an --arch option.
209 static bool filterArch(MachOObjectFile &Obj) {
210 if (ArchFilters.empty())
211 return true;
213 Triple ObjTriple(Obj.getArchTriple());
214 StringRef ObjArch = ObjTriple.getArchName();
216 for (StringRef Arch : ArchFilters) {
217 // Match name.
218 if (Arch == ObjArch)
219 return true;
221 // Match architecture number.
222 unsigned Value;
223 if (!Arch.getAsInteger(0, Value))
224 if (Value == getCPUType(Obj))
225 return true;
227 return false;
230 /// Determine the virtual address that is considered the base address of an ELF
231 /// object file.
233 /// The base address of an ELF file is the "p_vaddr" of the first program
234 /// header whose "p_type" is PT_LOAD.
236 /// \param ELFFile An ELF object file we will search.
238 /// \returns A valid image base address if we are able to extract one.
239 template <class ELFT>
240 static std::optional<uint64_t>
241 getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
242 auto PhdrRangeOrErr = ELFFile.program_headers();
243 if (!PhdrRangeOrErr) {
244 consumeError(PhdrRangeOrErr.takeError());
245 return std::nullopt;
247 for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
248 if (Phdr.p_type == ELF::PT_LOAD)
249 return (uint64_t)Phdr.p_vaddr;
250 return std::nullopt;
253 /// Determine the virtual address that is considered the base address of mach-o
254 /// object file.
256 /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
258 /// \param MachO A mach-o object file we will search.
260 /// \returns A valid image base address if we are able to extract one.
261 static std::optional<uint64_t>
262 getImageBaseAddress(const object::MachOObjectFile *MachO) {
263 for (const auto &Command : MachO->load_commands()) {
264 if (Command.C.cmd == MachO::LC_SEGMENT) {
265 MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
266 StringRef SegName = SLC.segname;
267 if (SegName == "__TEXT")
268 return SLC.vmaddr;
269 } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
270 MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
271 StringRef SegName = SLC.segname;
272 if (SegName == "__TEXT")
273 return SLC.vmaddr;
276 return std::nullopt;
279 /// Determine the virtual address that is considered the base address of an
280 /// object file.
282 /// Since GSYM files are used for symbolication, many clients will need to
283 /// easily adjust addresses they find in stack traces so the lookups happen
284 /// on unslid addresses from the original object file. If the base address of
285 /// a GSYM file is set to the base address of the image, then this address
286 /// adjusting is much easier.
288 /// \param Obj An object file we will search.
290 /// \returns A valid image base address if we are able to extract one.
291 static std::optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
292 if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
293 return getImageBaseAddress(MachO);
294 else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
295 return getImageBaseAddress(ELFObj->getELFFile());
296 else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
297 return getImageBaseAddress(ELFObj->getELFFile());
298 else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
299 return getImageBaseAddress(ELFObj->getELFFile());
300 else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
301 return getImageBaseAddress(ELFObj->getELFFile());
302 return std::nullopt;
305 static llvm::Error handleObjectFile(ObjectFile &Obj,
306 const std::string &OutFile) {
307 auto ThreadCount =
308 NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
309 auto &OS = outs();
310 // Make a stream refernce that will become a /dev/null log stream if
311 // Quiet is true, or normal output if Quiet is false. This can stop the
312 // errors and warnings from being displayed and producing too much output
313 // when they aren't desired.
314 raw_ostream *LogOS = Quiet ? nullptr : &outs();
316 GsymCreator Gsym(Quiet);
318 // See if we can figure out the base address for a given object file, and if
319 // we can, then set the base address to use to this value. This will ease
320 // symbolication since clients can slide the GSYM lookup addresses by using
321 // the load bias of the shared library.
322 if (auto ImageBaseAddr = getImageBaseAddress(Obj))
323 Gsym.setBaseAddress(*ImageBaseAddr);
325 // We need to know where the valid sections are that contain instructions.
326 // See header documentation for DWARFTransformer::SetValidTextRanges() for
327 // defails.
328 AddressRanges TextRanges;
329 for (const object::SectionRef &Sect : Obj.sections()) {
330 if (!Sect.isText())
331 continue;
332 const uint64_t Size = Sect.getSize();
333 if (Size == 0)
334 continue;
335 const uint64_t StartAddr = Sect.getAddress();
336 TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
339 // Make sure there is DWARF to convert first.
340 std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
341 Obj,
342 /*RelocAction=*/DWARFContext::ProcessDebugRelocations::Process,
343 nullptr,
344 /*DWPName=*/"",
345 /*RecoverableErrorHandler=*/WithColor::defaultErrorHandler,
346 /*WarningHandler=*/WithColor::defaultWarningHandler,
347 /*ThreadSafe*/true);
348 if (!DICtx)
349 return createStringError(std::errc::invalid_argument,
350 "unable to create DWARF context");
352 // Make a DWARF transformer object and populate the ranges of the code
353 // so we don't end up adding invalid functions to GSYM data.
354 DwarfTransformer DT(*DICtx, Gsym);
355 if (!TextRanges.empty())
356 Gsym.SetValidTextRanges(TextRanges);
358 // Convert all DWARF to GSYM.
359 if (auto Err = DT.convert(ThreadCount, LogOS))
360 return Err;
362 // Get the UUID and convert symbol table to GSYM.
363 if (auto Err = ObjectFileTransformer::convert(Obj, LogOS, Gsym))
364 return Err;
366 // Finalize the GSYM to make it ready to save to disk. This will remove
367 // duplicate FunctionInfo entries where we might have found an entry from
368 // debug info and also a symbol table entry from the object file.
369 if (auto Err = Gsym.finalize(OS))
370 return Err;
372 // Save the GSYM file to disk.
373 llvm::endianness Endian = Obj.makeTriple().isLittleEndian()
374 ? llvm::endianness::little
375 : llvm::endianness::big;
377 std::optional<uint64_t> OptSegmentSize;
378 if (SegmentSize > 0)
379 OptSegmentSize = SegmentSize;
380 if (auto Err = Gsym.save(OutFile, Endian, OptSegmentSize))
381 return Err;
383 // Verify the DWARF if requested. This will ensure all the info in the DWARF
384 // can be looked up in the GSYM and that all lookups get matching data.
385 if (Verify) {
386 if (auto Err = DT.verify(OutFile, OS))
387 return Err;
390 return Error::success();
393 static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
394 const std::string &OutFile) {
395 Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
396 error(Filename, errorToErrorCode(BinOrErr.takeError()));
398 if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
399 Triple ObjTriple(Obj->makeTriple());
400 auto ArchName = ObjTriple.getArchName();
401 outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
402 if (auto Err = handleObjectFile(*Obj, OutFile))
403 return Err;
404 } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
405 // Iterate over all contained architectures and filter out any that were
406 // not specified with the "--arch <arch>" option. If the --arch option was
407 // not specified on the command line, we will process all architectures.
408 std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
409 for (auto &ObjForArch : Fat->objects()) {
410 if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
411 auto &Obj = **MachOOrErr;
412 if (filterArch(Obj))
413 FilterObjs.emplace_back(MachOOrErr->release());
414 } else {
415 error(Filename, MachOOrErr.takeError());
418 if (FilterObjs.empty())
419 error(Filename, createStringError(std::errc::invalid_argument,
420 "no matching architectures found"));
422 // Now handle each architecture we need to convert.
423 for (auto &Obj : FilterObjs) {
424 Triple ObjTriple(Obj->getArchTriple());
425 auto ArchName = ObjTriple.getArchName();
426 std::string ArchOutFile(OutFile);
427 // If we are only handling a single architecture, then we will use the
428 // normal output file. If we are handling multiple architectures append
429 // the architecture name to the end of the out file path so that we
430 // don't overwrite the previous architecture's gsym file.
431 if (FilterObjs.size() > 1) {
432 ArchOutFile.append(1, '.');
433 ArchOutFile.append(ArchName.str());
435 outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
436 if (auto Err = handleObjectFile(*Obj, ArchOutFile))
437 return Err;
440 return Error::success();
443 static llvm::Error handleFileConversionToGSYM(StringRef Filename,
444 const std::string &OutFile) {
445 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
446 MemoryBuffer::getFileOrSTDIN(Filename);
447 error(Filename, BuffOrErr.getError());
448 std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
449 return handleBuffer(Filename, *Buffer, OutFile);
452 static llvm::Error convertFileToGSYM(raw_ostream &OS) {
453 // Expand any .dSYM bundles to the individual object files contained therein.
454 std::vector<std::string> Objects;
455 std::string OutFile = OutputFilename;
456 if (OutFile.empty()) {
457 OutFile = ConvertFilename;
458 OutFile += ".gsym";
461 OS << "Input file: " << ConvertFilename << "\n";
463 if (auto DsymObjectsOrErr =
464 MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
465 if (DsymObjectsOrErr->empty())
466 Objects.push_back(ConvertFilename);
467 else
468 llvm::append_range(Objects, *DsymObjectsOrErr);
469 } else {
470 error(DsymObjectsOrErr.takeError());
473 for (StringRef Object : Objects)
474 if (Error Err = handleFileConversionToGSYM(Object, OutFile))
475 return Err;
476 return Error::success();
479 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
480 if (auto Result = Gsym.lookup(Addr)) {
481 // If verbose is enabled dump the full function info for the address.
482 if (Verbose) {
483 if (auto FI = Gsym.getFunctionInfo(Addr)) {
484 OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
485 Gsym.dump(OS, *FI);
486 OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
489 OS << Result.get();
490 } else {
491 if (Verbose)
492 OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
493 OS << HEX64(Addr) << ": ";
494 logAllUnhandledErrors(Result.takeError(), OS, "error: ");
496 if (Verbose)
497 OS << "\n";
500 int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
501 // Print a stack trace if we signal out.
502 sys::PrintStackTraceOnErrorSignal(argv[0]);
503 PrettyStackTraceProgram X(argc, argv);
504 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
506 llvm::InitializeAllTargets();
508 parseArgs(argc, argv);
510 raw_ostream &OS = outs();
512 if (!ConvertFilename.empty()) {
513 // Convert DWARF to GSYM
514 if (!InputFilenames.empty()) {
515 OS << "error: no input files can be specified when using the --convert "
516 "option.\n";
517 return 1;
519 // Call error() if we have an error and it will exit with a status of 1
520 if (auto Err = convertFileToGSYM(OS))
521 error("DWARF conversion failed: ", std::move(Err));
522 return 0;
525 if (LookupAddressesFromStdin) {
526 if (!LookupAddresses.empty() || !InputFilenames.empty()) {
527 OS << "error: no input files or addresses can be specified when using "
528 "the --addresses-from-stdin "
529 "option.\n";
530 return 1;
533 std::string InputLine;
534 std::string CurrentGSYMPath;
535 std::optional<Expected<GsymReader>> CurrentGsym;
537 while (std::getline(std::cin, InputLine)) {
538 // Strip newline characters.
539 std::string StrippedInputLine(InputLine);
540 llvm::erase_if(StrippedInputLine,
541 [](char c) { return c == '\r' || c == '\n'; });
543 StringRef AddrStr, GSYMPath;
544 std::tie(AddrStr, GSYMPath) =
545 llvm::StringRef{StrippedInputLine}.split(' ');
547 if (GSYMPath != CurrentGSYMPath) {
548 CurrentGsym = GsymReader::openFile(GSYMPath);
549 if (!*CurrentGsym)
550 error(GSYMPath, CurrentGsym->takeError());
551 CurrentGSYMPath = GSYMPath;
554 uint64_t Addr;
555 if (AddrStr.getAsInteger(0, Addr)) {
556 OS << "error: invalid address " << AddrStr
557 << ", expected: Address GsymFile.\n";
558 return 1;
561 doLookup(**CurrentGsym, Addr, OS);
563 OS << "\n";
564 OS.flush();
567 return EXIT_SUCCESS;
570 // Dump or access data inside GSYM files
571 for (const auto &GSYMPath : InputFilenames) {
572 auto Gsym = GsymReader::openFile(GSYMPath);
573 if (!Gsym)
574 error(GSYMPath, Gsym.takeError());
576 if (LookupAddresses.empty()) {
577 Gsym->dump(outs());
578 continue;
581 // Lookup an address in a GSYM file and print any matches.
582 OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
583 for (auto Addr : LookupAddresses) {
584 doLookup(*Gsym, Addr, OS);
587 return EXIT_SUCCESS;