1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/STLExtras.h"
10 #include "llvm/ADT/StringSet.h"
11 #include "llvm/DebugInfo/DIContext.h"
12 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
13 #include "llvm/Object/Archive.h"
14 #include "llvm/Object/ELFObjectFile.h"
15 #include "llvm/Object/MachOUniversal.h"
16 #include "llvm/Object/ObjectFile.h"
17 #include "llvm/Option/ArgList.h"
18 #include "llvm/Option/Option.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/LLVMDriver.h"
23 #include "llvm/Support/ManagedStatic.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/TargetParser/Triple.h"
38 #include <system_error>
41 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
42 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
43 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
44 #include "llvm/DebugInfo/GSYM/GsymReader.h"
45 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
46 #include "llvm/DebugInfo/GSYM/LookupResult.h"
47 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
52 using namespace object
;
55 /// Command line options.
58 using namespace llvm::opt
;
60 OPT_INVALID
= 0, // This is not an option ID.
61 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
66 #define PREFIX(NAME, VALUE) \
67 constexpr llvm::StringLiteral NAME##_init[] = VALUE; \
68 constexpr llvm::ArrayRef<llvm::StringLiteral> NAME( \
69 NAME##_init, std::size(NAME##_init) - 1);
73 const opt::OptTable::Info InfoTable
[] = {
74 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
79 class GSYMUtilOptTable
: public llvm::opt::GenericOptTable
{
81 GSYMUtilOptTable() : GenericOptTable(InfoTable
) {
82 setGroupedShortOptions(true);
87 static std::vector
<std::string
> InputFilenames
;
88 static std::string ConvertFilename
;
89 static std::vector
<std::string
> ArchFilters
;
90 static std::string OutputFilename
;
92 static unsigned NumThreads
;
93 static uint64_t SegmentSize
;
95 static std::vector
<uint64_t> LookupAddresses
;
96 static bool LookupAddressesFromStdin
;
98 static void parseArgs(int argc
, char **argv
) {
100 llvm::StringRef ToolName
= argv
[0];
101 llvm::BumpPtrAllocator A
;
102 llvm::StringSaver Saver
{A
};
103 llvm::opt::InputArgList Args
=
104 Tbl
.parseArgs(argc
, argv
, OPT_UNKNOWN
, Saver
, [&](StringRef Msg
) {
105 llvm::errs() << Msg
<< '\n';
108 if (Args
.hasArg(OPT_help
)) {
109 const char *Overview
=
110 "A tool for dumping, searching and creating GSYM files.\n\n"
111 "Specify one or more GSYM paths as arguments to dump all of the "
112 "information in each GSYM file.\n"
113 "Specify a single GSYM file along with one or more --lookup options to "
114 "lookup addresses within that GSYM file.\n"
115 "Use the --convert option to specify a file with option --out-file "
116 "option to convert to GSYM format.\n";
118 Tbl
.printHelp(llvm::outs(), "llvm-gsymutil [options] <input GSYM files>",
122 if (Args
.hasArg(OPT_version
)) {
123 llvm::outs() << ToolName
<< '\n';
124 cl::PrintVersionMessage();
128 Verbose
= Args
.hasArg(OPT_verbose
);
130 for (const llvm::opt::Arg
*A
: Args
.filtered(OPT_INPUT
))
131 InputFilenames
.emplace_back(A
->getValue());
133 if (const llvm::opt::Arg
*A
= Args
.getLastArg(OPT_convert_EQ
))
134 ConvertFilename
= A
->getValue();
136 for (const llvm::opt::Arg
*A
: Args
.filtered(OPT_arch_EQ
))
137 ArchFilters
.emplace_back(A
->getValue());
139 if (const llvm::opt::Arg
*A
= Args
.getLastArg(OPT_out_file_EQ
))
140 OutputFilename
= A
->getValue();
142 Verify
= Args
.hasArg(OPT_verify
);
144 if (const llvm::opt::Arg
*A
= Args
.getLastArg(OPT_num_threads_EQ
)) {
145 StringRef S
{A
->getValue()};
146 if (!llvm::to_integer(S
, NumThreads
, 0)) {
147 llvm::errs() << ToolName
<< ": for the --num-threads option: '" << S
148 << "' value invalid for uint argument!\n";
153 if (const llvm::opt::Arg
*A
= Args
.getLastArg(OPT_segment_size_EQ
)) {
154 StringRef S
{A
->getValue()};
155 if (!llvm::to_integer(S
, SegmentSize
, 0)) {
156 llvm::errs() << ToolName
<< ": for the --segment-size option: '" << S
157 << "' value invalid for uint argument!\n";
162 Quiet
= Args
.hasArg(OPT_quiet
);
164 for (const llvm::opt::Arg
*A
: Args
.filtered(OPT_address_EQ
)) {
165 StringRef S
{A
->getValue()};
166 if (!llvm::to_integer(S
, LookupAddresses
.emplace_back(), 0)) {
167 llvm::errs() << ToolName
<< ": for the --address option: '" << S
168 << "' value invalid for uint argument!\n";
173 LookupAddressesFromStdin
= Args
.hasArg(OPT_addresses_from_stdin
);
177 //===----------------------------------------------------------------------===//
179 static void error(Error Err
) {
182 WithColor::error() << toString(std::move(Err
)) << "\n";
186 static void error(StringRef Prefix
, llvm::Error Err
) {
189 errs() << Prefix
<< ": " << Err
<< "\n";
190 consumeError(std::move(Err
));
194 static void error(StringRef Prefix
, std::error_code EC
) {
197 errs() << Prefix
<< ": " << EC
.message() << "\n";
201 static uint32_t getCPUType(MachOObjectFile
&MachO
) {
203 return MachO
.getHeader64().cputype
;
205 return MachO
.getHeader().cputype
;
208 /// Return true if the object file has not been filtered by an --arch option.
209 static bool filterArch(MachOObjectFile
&Obj
) {
210 if (ArchFilters
.empty())
213 Triple
ObjTriple(Obj
.getArchTriple());
214 StringRef ObjArch
= ObjTriple
.getArchName();
216 for (StringRef Arch
: ArchFilters
) {
221 // Match architecture number.
223 if (!Arch
.getAsInteger(0, Value
))
224 if (Value
== getCPUType(Obj
))
230 /// Determine the virtual address that is considered the base address of an ELF
233 /// The base address of an ELF file is the "p_vaddr" of the first program
234 /// header whose "p_type" is PT_LOAD.
236 /// \param ELFFile An ELF object file we will search.
238 /// \returns A valid image base address if we are able to extract one.
239 template <class ELFT
>
240 static std::optional
<uint64_t>
241 getImageBaseAddress(const object::ELFFile
<ELFT
> &ELFFile
) {
242 auto PhdrRangeOrErr
= ELFFile
.program_headers();
243 if (!PhdrRangeOrErr
) {
244 consumeError(PhdrRangeOrErr
.takeError());
247 for (const typename
ELFT::Phdr
&Phdr
: *PhdrRangeOrErr
)
248 if (Phdr
.p_type
== ELF::PT_LOAD
)
249 return (uint64_t)Phdr
.p_vaddr
;
253 /// Determine the virtual address that is considered the base address of mach-o
256 /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
258 /// \param MachO A mach-o object file we will search.
260 /// \returns A valid image base address if we are able to extract one.
261 static std::optional
<uint64_t>
262 getImageBaseAddress(const object::MachOObjectFile
*MachO
) {
263 for (const auto &Command
: MachO
->load_commands()) {
264 if (Command
.C
.cmd
== MachO::LC_SEGMENT
) {
265 MachO::segment_command SLC
= MachO
->getSegmentLoadCommand(Command
);
266 StringRef SegName
= SLC
.segname
;
267 if (SegName
== "__TEXT")
269 } else if (Command
.C
.cmd
== MachO::LC_SEGMENT_64
) {
270 MachO::segment_command_64 SLC
= MachO
->getSegment64LoadCommand(Command
);
271 StringRef SegName
= SLC
.segname
;
272 if (SegName
== "__TEXT")
279 /// Determine the virtual address that is considered the base address of an
282 /// Since GSYM files are used for symbolication, many clients will need to
283 /// easily adjust addresses they find in stack traces so the lookups happen
284 /// on unslid addresses from the original object file. If the base address of
285 /// a GSYM file is set to the base address of the image, then this address
286 /// adjusting is much easier.
288 /// \param Obj An object file we will search.
290 /// \returns A valid image base address if we are able to extract one.
291 static std::optional
<uint64_t> getImageBaseAddress(object::ObjectFile
&Obj
) {
292 if (const auto *MachO
= dyn_cast
<object::MachOObjectFile
>(&Obj
))
293 return getImageBaseAddress(MachO
);
294 else if (const auto *ELFObj
= dyn_cast
<object::ELF32LEObjectFile
>(&Obj
))
295 return getImageBaseAddress(ELFObj
->getELFFile());
296 else if (const auto *ELFObj
= dyn_cast
<object::ELF32BEObjectFile
>(&Obj
))
297 return getImageBaseAddress(ELFObj
->getELFFile());
298 else if (const auto *ELFObj
= dyn_cast
<object::ELF64LEObjectFile
>(&Obj
))
299 return getImageBaseAddress(ELFObj
->getELFFile());
300 else if (const auto *ELFObj
= dyn_cast
<object::ELF64BEObjectFile
>(&Obj
))
301 return getImageBaseAddress(ELFObj
->getELFFile());
305 static llvm::Error
handleObjectFile(ObjectFile
&Obj
,
306 const std::string
&OutFile
) {
308 NumThreads
> 0 ? NumThreads
: std::thread::hardware_concurrency();
310 // Make a stream refernce that will become a /dev/null log stream if
311 // Quiet is true, or normal output if Quiet is false. This can stop the
312 // errors and warnings from being displayed and producing too much output
313 // when they aren't desired.
314 raw_ostream
*LogOS
= Quiet
? nullptr : &outs();
316 GsymCreator
Gsym(Quiet
);
318 // See if we can figure out the base address for a given object file, and if
319 // we can, then set the base address to use to this value. This will ease
320 // symbolication since clients can slide the GSYM lookup addresses by using
321 // the load bias of the shared library.
322 if (auto ImageBaseAddr
= getImageBaseAddress(Obj
))
323 Gsym
.setBaseAddress(*ImageBaseAddr
);
325 // We need to know where the valid sections are that contain instructions.
326 // See header documentation for DWARFTransformer::SetValidTextRanges() for
328 AddressRanges TextRanges
;
329 for (const object::SectionRef
&Sect
: Obj
.sections()) {
332 const uint64_t Size
= Sect
.getSize();
335 const uint64_t StartAddr
= Sect
.getAddress();
336 TextRanges
.insert(AddressRange(StartAddr
, StartAddr
+ Size
));
339 // Make sure there is DWARF to convert first.
340 std::unique_ptr
<DWARFContext
> DICtx
= DWARFContext::create(
342 /*RelocAction=*/DWARFContext::ProcessDebugRelocations::Process
,
345 /*RecoverableErrorHandler=*/WithColor::defaultErrorHandler
,
346 /*WarningHandler=*/WithColor::defaultWarningHandler
,
349 return createStringError(std::errc::invalid_argument
,
350 "unable to create DWARF context");
352 // Make a DWARF transformer object and populate the ranges of the code
353 // so we don't end up adding invalid functions to GSYM data.
354 DwarfTransformer
DT(*DICtx
, Gsym
);
355 if (!TextRanges
.empty())
356 Gsym
.SetValidTextRanges(TextRanges
);
358 // Convert all DWARF to GSYM.
359 if (auto Err
= DT
.convert(ThreadCount
, LogOS
))
362 // Get the UUID and convert symbol table to GSYM.
363 if (auto Err
= ObjectFileTransformer::convert(Obj
, LogOS
, Gsym
))
366 // Finalize the GSYM to make it ready to save to disk. This will remove
367 // duplicate FunctionInfo entries where we might have found an entry from
368 // debug info and also a symbol table entry from the object file.
369 if (auto Err
= Gsym
.finalize(OS
))
372 // Save the GSYM file to disk.
373 llvm::endianness Endian
= Obj
.makeTriple().isLittleEndian()
374 ? llvm::endianness::little
375 : llvm::endianness::big
;
377 std::optional
<uint64_t> OptSegmentSize
;
379 OptSegmentSize
= SegmentSize
;
380 if (auto Err
= Gsym
.save(OutFile
, Endian
, OptSegmentSize
))
383 // Verify the DWARF if requested. This will ensure all the info in the DWARF
384 // can be looked up in the GSYM and that all lookups get matching data.
386 if (auto Err
= DT
.verify(OutFile
, OS
))
390 return Error::success();
393 static llvm::Error
handleBuffer(StringRef Filename
, MemoryBufferRef Buffer
,
394 const std::string
&OutFile
) {
395 Expected
<std::unique_ptr
<Binary
>> BinOrErr
= object::createBinary(Buffer
);
396 error(Filename
, errorToErrorCode(BinOrErr
.takeError()));
398 if (auto *Obj
= dyn_cast
<ObjectFile
>(BinOrErr
->get())) {
399 Triple
ObjTriple(Obj
->makeTriple());
400 auto ArchName
= ObjTriple
.getArchName();
401 outs() << "Output file (" << ArchName
<< "): " << OutFile
<< "\n";
402 if (auto Err
= handleObjectFile(*Obj
, OutFile
))
404 } else if (auto *Fat
= dyn_cast
<MachOUniversalBinary
>(BinOrErr
->get())) {
405 // Iterate over all contained architectures and filter out any that were
406 // not specified with the "--arch <arch>" option. If the --arch option was
407 // not specified on the command line, we will process all architectures.
408 std::vector
<std::unique_ptr
<MachOObjectFile
>> FilterObjs
;
409 for (auto &ObjForArch
: Fat
->objects()) {
410 if (auto MachOOrErr
= ObjForArch
.getAsObjectFile()) {
411 auto &Obj
= **MachOOrErr
;
413 FilterObjs
.emplace_back(MachOOrErr
->release());
415 error(Filename
, MachOOrErr
.takeError());
418 if (FilterObjs
.empty())
419 error(Filename
, createStringError(std::errc::invalid_argument
,
420 "no matching architectures found"));
422 // Now handle each architecture we need to convert.
423 for (auto &Obj
: FilterObjs
) {
424 Triple
ObjTriple(Obj
->getArchTriple());
425 auto ArchName
= ObjTriple
.getArchName();
426 std::string
ArchOutFile(OutFile
);
427 // If we are only handling a single architecture, then we will use the
428 // normal output file. If we are handling multiple architectures append
429 // the architecture name to the end of the out file path so that we
430 // don't overwrite the previous architecture's gsym file.
431 if (FilterObjs
.size() > 1) {
432 ArchOutFile
.append(1, '.');
433 ArchOutFile
.append(ArchName
.str());
435 outs() << "Output file (" << ArchName
<< "): " << ArchOutFile
<< "\n";
436 if (auto Err
= handleObjectFile(*Obj
, ArchOutFile
))
440 return Error::success();
443 static llvm::Error
handleFileConversionToGSYM(StringRef Filename
,
444 const std::string
&OutFile
) {
445 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BuffOrErr
=
446 MemoryBuffer::getFileOrSTDIN(Filename
);
447 error(Filename
, BuffOrErr
.getError());
448 std::unique_ptr
<MemoryBuffer
> Buffer
= std::move(BuffOrErr
.get());
449 return handleBuffer(Filename
, *Buffer
, OutFile
);
452 static llvm::Error
convertFileToGSYM(raw_ostream
&OS
) {
453 // Expand any .dSYM bundles to the individual object files contained therein.
454 std::vector
<std::string
> Objects
;
455 std::string OutFile
= OutputFilename
;
456 if (OutFile
.empty()) {
457 OutFile
= ConvertFilename
;
461 OS
<< "Input file: " << ConvertFilename
<< "\n";
463 if (auto DsymObjectsOrErr
=
464 MachOObjectFile::findDsymObjectMembers(ConvertFilename
)) {
465 if (DsymObjectsOrErr
->empty())
466 Objects
.push_back(ConvertFilename
);
468 llvm::append_range(Objects
, *DsymObjectsOrErr
);
470 error(DsymObjectsOrErr
.takeError());
473 for (StringRef Object
: Objects
)
474 if (Error Err
= handleFileConversionToGSYM(Object
, OutFile
))
476 return Error::success();
479 static void doLookup(GsymReader
&Gsym
, uint64_t Addr
, raw_ostream
&OS
) {
480 if (auto Result
= Gsym
.lookup(Addr
)) {
481 // If verbose is enabled dump the full function info for the address.
483 if (auto FI
= Gsym
.getFunctionInfo(Addr
)) {
484 OS
<< "FunctionInfo for " << HEX64(Addr
) << ":\n";
486 OS
<< "\nLookupResult for " << HEX64(Addr
) << ":\n";
492 OS
<< "\nLookupResult for " << HEX64(Addr
) << ":\n";
493 OS
<< HEX64(Addr
) << ": ";
494 logAllUnhandledErrors(Result
.takeError(), OS
, "error: ");
500 int llvm_gsymutil_main(int argc
, char **argv
, const llvm::ToolContext
&) {
501 // Print a stack trace if we signal out.
502 sys::PrintStackTraceOnErrorSignal(argv
[0]);
503 PrettyStackTraceProgram
X(argc
, argv
);
504 llvm_shutdown_obj Y
; // Call llvm_shutdown() on exit.
506 llvm::InitializeAllTargets();
508 parseArgs(argc
, argv
);
510 raw_ostream
&OS
= outs();
512 if (!ConvertFilename
.empty()) {
513 // Convert DWARF to GSYM
514 if (!InputFilenames
.empty()) {
515 OS
<< "error: no input files can be specified when using the --convert "
519 // Call error() if we have an error and it will exit with a status of 1
520 if (auto Err
= convertFileToGSYM(OS
))
521 error("DWARF conversion failed: ", std::move(Err
));
525 if (LookupAddressesFromStdin
) {
526 if (!LookupAddresses
.empty() || !InputFilenames
.empty()) {
527 OS
<< "error: no input files or addresses can be specified when using "
528 "the --addresses-from-stdin "
533 std::string InputLine
;
534 std::string CurrentGSYMPath
;
535 std::optional
<Expected
<GsymReader
>> CurrentGsym
;
537 while (std::getline(std::cin
, InputLine
)) {
538 // Strip newline characters.
539 std::string
StrippedInputLine(InputLine
);
540 llvm::erase_if(StrippedInputLine
,
541 [](char c
) { return c
== '\r' || c
== '\n'; });
543 StringRef AddrStr
, GSYMPath
;
544 std::tie(AddrStr
, GSYMPath
) =
545 llvm::StringRef
{StrippedInputLine
}.split(' ');
547 if (GSYMPath
!= CurrentGSYMPath
) {
548 CurrentGsym
= GsymReader::openFile(GSYMPath
);
550 error(GSYMPath
, CurrentGsym
->takeError());
551 CurrentGSYMPath
= GSYMPath
;
555 if (AddrStr
.getAsInteger(0, Addr
)) {
556 OS
<< "error: invalid address " << AddrStr
557 << ", expected: Address GsymFile.\n";
561 doLookup(**CurrentGsym
, Addr
, OS
);
570 // Dump or access data inside GSYM files
571 for (const auto &GSYMPath
: InputFilenames
) {
572 auto Gsym
= GsymReader::openFile(GSYMPath
);
574 error(GSYMPath
, Gsym
.takeError());
576 if (LookupAddresses
.empty()) {
581 // Lookup an address in a GSYM file and print any matches.
582 OS
<< "Looking up addresses in \"" << GSYMPath
<< "\":\n";
583 for (auto Addr
: LookupAddresses
) {
584 doLookup(*Gsym
, Addr
, OS
);