1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/STLExtras.h"
10 #include "llvm/ADT/StringSet.h"
11 #include "llvm/ADT/Triple.h"
12 #include "llvm/DebugInfo/DIContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/ELFObjectFile.h"
16 #include "llvm/Object/MachOUniversal.h"
17 #include "llvm/Object/ObjectFile.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/PrettyStackTrace.h"
24 #include "llvm/Support/Regex.h"
25 #include "llvm/Support/Signals.h"
26 #include "llvm/Support/TargetSelect.h"
27 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
37 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
38 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
39 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
40 #include "llvm/DebugInfo/GSYM/GsymReader.h"
41 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
42 #include "llvm/DebugInfo/GSYM/LookupResult.h"
43 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
47 using namespace object
;
50 /// Command line options.
56 OptionCategory
GeneralOptions("Options");
57 OptionCategory
ConversionOptions("Conversion Options");
58 OptionCategory
LookupOptions("Lookup Options");
60 static opt
<bool> Help("h", desc("Alias for -help"), Hidden
,
63 static opt
<bool> Verbose("verbose",
64 desc("Enable verbose logging and encoding details."),
67 static list
<std::string
> InputFilenames(Positional
, desc("<input GSYM files>"),
68 ZeroOrMore
, cat(GeneralOptions
));
70 static opt
<std::string
>
71 ConvertFilename("convert", cl::init(""),
72 cl::desc("Convert the specified file to the GSYM format.\n"
73 "Supported files include ELF and mach-o files "
74 "that will have their debug info (DWARF) and "
75 "symbol table converted."),
76 cl::value_desc("path"), cat(ConversionOptions
));
78 static list
<std::string
>
80 desc("Process debug information for the specified CPU "
81 "architecture only.\nArchitectures may be specified by "
82 "name or by number.\nThis option can be specified "
83 "multiple times, once for each desired architecture."),
84 cl::value_desc("arch"), cat(ConversionOptions
));
86 static opt
<std::string
>
87 OutputFilename("out-file", cl::init(""),
88 cl::desc("Specify the path where the converted GSYM file "
89 "will be saved.\nWhen not specified, a '.gsym' "
90 "extension will be appended to the file name "
91 "specified in the --convert option."),
92 cl::value_desc("path"), cat(ConversionOptions
));
93 static alias
OutputFilenameAlias("o", desc("Alias for -out-file."),
94 aliasopt(OutputFilename
),
95 cat(ConversionOptions
));
97 static opt
<bool> Verify("verify",
98 desc("Verify the generated GSYM file against the "
99 "information in the file that was converted."),
100 cat(ConversionOptions
));
103 NumThreads("num-threads",
104 desc("Specify the maximum number (n) of simultaneous threads "
105 "to use when converting files to GSYM.\nDefaults to the "
106 "number of cores on the current machine."),
107 cl::value_desc("n"), cat(ConversionOptions
));
110 Quiet("quiet", desc("Do not output warnings about the debug information"),
111 cat(ConversionOptions
));
113 static list
<uint64_t> LookupAddresses("address",
114 desc("Lookup an address in a GSYM file"),
115 cl::value_desc("addr"),
118 static opt
<bool> LookupAddressesFromStdin(
119 "addresses-from-stdin",
120 desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
121 "line is expected to be of the following format: <addr> <gsym-path>"),
126 //===----------------------------------------------------------------------===//
128 static void error(Error Err
) {
131 WithColor::error() << toString(std::move(Err
)) << "\n";
135 static void error(StringRef Prefix
, llvm::Error Err
) {
138 errs() << Prefix
<< ": " << Err
<< "\n";
139 consumeError(std::move(Err
));
143 static void error(StringRef Prefix
, std::error_code EC
) {
146 errs() << Prefix
<< ": " << EC
.message() << "\n";
150 static uint32_t getCPUType(MachOObjectFile
&MachO
) {
152 return MachO
.getHeader64().cputype
;
154 return MachO
.getHeader().cputype
;
157 /// Return true if the object file has not been filtered by an --arch option.
158 static bool filterArch(MachOObjectFile
&Obj
) {
159 if (ArchFilters
.empty())
162 Triple
ObjTriple(Obj
.getArchTriple());
163 StringRef ObjArch
= ObjTriple
.getArchName();
165 for (auto Arch
: ArchFilters
) {
170 // Match architecture number.
172 if (!StringRef(Arch
).getAsInteger(0, Value
))
173 if (Value
== getCPUType(Obj
))
179 /// Determine the virtual address that is considered the base address of an ELF
182 /// The base address of an ELF file is the the "p_vaddr" of the first program
183 /// header whose "p_type" is PT_LOAD.
185 /// \param ELFFile An ELF object file we will search.
187 /// \returns A valid image base address if we are able to extract one.
188 template <class ELFT
>
189 static llvm::Optional
<uint64_t>
190 getImageBaseAddress(const object::ELFFile
<ELFT
> &ELFFile
) {
191 auto PhdrRangeOrErr
= ELFFile
.program_headers();
192 if (!PhdrRangeOrErr
) {
193 consumeError(PhdrRangeOrErr
.takeError());
196 for (const typename
ELFT::Phdr
&Phdr
: *PhdrRangeOrErr
)
197 if (Phdr
.p_type
== ELF::PT_LOAD
)
198 return (uint64_t)Phdr
.p_vaddr
;
202 /// Determine the virtual address that is considered the base address of mach-o
205 /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
207 /// \param MachO A mach-o object file we will search.
209 /// \returns A valid image base address if we are able to extract one.
210 static llvm::Optional
<uint64_t>
211 getImageBaseAddress(const object::MachOObjectFile
*MachO
) {
212 for (const auto &Command
: MachO
->load_commands()) {
213 if (Command
.C
.cmd
== MachO::LC_SEGMENT
) {
214 MachO::segment_command SLC
= MachO
->getSegmentLoadCommand(Command
);
215 StringRef SegName
= SLC
.segname
;
216 if (SegName
== "__TEXT")
218 } else if (Command
.C
.cmd
== MachO::LC_SEGMENT_64
) {
219 MachO::segment_command_64 SLC
= MachO
->getSegment64LoadCommand(Command
);
220 StringRef SegName
= SLC
.segname
;
221 if (SegName
== "__TEXT")
228 /// Determine the virtual address that is considered the base address of an
231 /// Since GSYM files are used for symbolication, many clients will need to
232 /// easily adjust addresses they find in stack traces so the lookups happen
233 /// on unslid addresses from the original object file. If the base address of
234 /// a GSYM file is set to the base address of the image, then this address
235 /// adjusting is much easier.
237 /// \param Obj An object file we will search.
239 /// \returns A valid image base address if we are able to extract one.
240 static llvm::Optional
<uint64_t> getImageBaseAddress(object::ObjectFile
&Obj
) {
241 if (const auto *MachO
= dyn_cast
<object::MachOObjectFile
>(&Obj
))
242 return getImageBaseAddress(MachO
);
243 else if (const auto *ELFObj
= dyn_cast
<object::ELF32LEObjectFile
>(&Obj
))
244 return getImageBaseAddress(ELFObj
->getELFFile());
245 else if (const auto *ELFObj
= dyn_cast
<object::ELF32BEObjectFile
>(&Obj
))
246 return getImageBaseAddress(ELFObj
->getELFFile());
247 else if (const auto *ELFObj
= dyn_cast
<object::ELF64LEObjectFile
>(&Obj
))
248 return getImageBaseAddress(ELFObj
->getELFFile());
249 else if (const auto *ELFObj
= dyn_cast
<object::ELF64BEObjectFile
>(&Obj
))
250 return getImageBaseAddress(ELFObj
->getELFFile());
254 static llvm::Error
handleObjectFile(ObjectFile
&Obj
,
255 const std::string
&OutFile
) {
257 NumThreads
> 0 ? NumThreads
: std::thread::hardware_concurrency();
260 GsymCreator
Gsym(Quiet
);
262 // See if we can figure out the base address for a given object file, and if
263 // we can, then set the base address to use to this value. This will ease
264 // symbolication since clients can slide the GSYM lookup addresses by using
265 // the load bias of the shared library.
266 if (auto ImageBaseAddr
= getImageBaseAddress(Obj
))
267 Gsym
.setBaseAddress(*ImageBaseAddr
);
269 // We need to know where the valid sections are that contain instructions.
270 // See header documentation for DWARFTransformer::SetValidTextRanges() for
272 AddressRanges TextRanges
;
273 for (const object::SectionRef
&Sect
: Obj
.sections()) {
276 const uint64_t Size
= Sect
.getSize();
279 const uint64_t StartAddr
= Sect
.getAddress();
280 TextRanges
.insert(AddressRange(StartAddr
, StartAddr
+ Size
));
283 // Make sure there is DWARF to convert first.
284 std::unique_ptr
<DWARFContext
> DICtx
= DWARFContext::create(Obj
);
286 return createStringError(std::errc::invalid_argument
,
287 "unable to create DWARF context");
288 logAllUnhandledErrors(DICtx
->loadRegisterInfo(Obj
), OS
, "DwarfTransformer: ");
290 // Make a DWARF transformer object and populate the ranges of the code
291 // so we don't end up adding invalid functions to GSYM data.
292 DwarfTransformer
DT(*DICtx
, OS
, Gsym
);
293 if (!TextRanges
.empty())
294 Gsym
.SetValidTextRanges(TextRanges
);
296 // Convert all DWARF to GSYM.
297 if (auto Err
= DT
.convert(ThreadCount
))
300 // Get the UUID and convert symbol table to GSYM.
301 if (auto Err
= ObjectFileTransformer::convert(Obj
, OS
, Gsym
))
304 // Finalize the GSYM to make it ready to save to disk. This will remove
305 // duplicate FunctionInfo entries where we might have found an entry from
306 // debug info and also a symbol table entry from the object file.
307 if (auto Err
= Gsym
.finalize(OS
))
310 // Save the GSYM file to disk.
311 support::endianness Endian
=
312 Obj
.makeTriple().isLittleEndian() ? support::little
: support::big
;
313 if (auto Err
= Gsym
.save(OutFile
, Endian
))
316 // Verify the DWARF if requested. This will ensure all the info in the DWARF
317 // can be looked up in the GSYM and that all lookups get matching data.
319 if (auto Err
= DT
.verify(OutFile
))
323 return Error::success();
326 static llvm::Error
handleBuffer(StringRef Filename
, MemoryBufferRef Buffer
,
327 const std::string
&OutFile
) {
328 Expected
<std::unique_ptr
<Binary
>> BinOrErr
= object::createBinary(Buffer
);
329 error(Filename
, errorToErrorCode(BinOrErr
.takeError()));
331 if (auto *Obj
= dyn_cast
<ObjectFile
>(BinOrErr
->get())) {
332 Triple
ObjTriple(Obj
->makeTriple());
333 auto ArchName
= ObjTriple
.getArchName();
334 outs() << "Output file (" << ArchName
<< "): " << OutFile
<< "\n";
335 if (auto Err
= handleObjectFile(*Obj
, OutFile
))
337 } else if (auto *Fat
= dyn_cast
<MachOUniversalBinary
>(BinOrErr
->get())) {
338 // Iterate over all contained architectures and filter out any that were
339 // not specified with the "--arch <arch>" option. If the --arch option was
340 // not specified on the command line, we will process all architectures.
341 std::vector
<std::unique_ptr
<MachOObjectFile
>> FilterObjs
;
342 for (auto &ObjForArch
: Fat
->objects()) {
343 if (auto MachOOrErr
= ObjForArch
.getAsObjectFile()) {
344 auto &Obj
= **MachOOrErr
;
346 FilterObjs
.emplace_back(MachOOrErr
->release());
348 error(Filename
, MachOOrErr
.takeError());
351 if (FilterObjs
.empty())
352 error(Filename
, createStringError(std::errc::invalid_argument
,
353 "no matching architectures found"));
355 // Now handle each architecture we need to convert.
356 for (auto &Obj
: FilterObjs
) {
357 Triple
ObjTriple(Obj
->getArchTriple());
358 auto ArchName
= ObjTriple
.getArchName();
359 std::string
ArchOutFile(OutFile
);
360 // If we are only handling a single architecture, then we will use the
361 // normal output file. If we are handling multiple architectures append
362 // the architecture name to the end of the out file path so that we
363 // don't overwrite the previous architecture's gsym file.
364 if (FilterObjs
.size() > 1) {
365 ArchOutFile
.append(1, '.');
366 ArchOutFile
.append(ArchName
.str());
368 outs() << "Output file (" << ArchName
<< "): " << ArchOutFile
<< "\n";
369 if (auto Err
= handleObjectFile(*Obj
, ArchOutFile
))
373 return Error::success();
376 static llvm::Error
handleFileConversionToGSYM(StringRef Filename
,
377 const std::string
&OutFile
) {
378 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BuffOrErr
=
379 MemoryBuffer::getFileOrSTDIN(Filename
);
380 error(Filename
, BuffOrErr
.getError());
381 std::unique_ptr
<MemoryBuffer
> Buffer
= std::move(BuffOrErr
.get());
382 return handleBuffer(Filename
, *Buffer
, OutFile
);
385 static llvm::Error
convertFileToGSYM(raw_ostream
&OS
) {
386 // Expand any .dSYM bundles to the individual object files contained therein.
387 std::vector
<std::string
> Objects
;
388 std::string OutFile
= OutputFilename
;
389 if (OutFile
.empty()) {
390 OutFile
= ConvertFilename
;
394 OS
<< "Input file: " << ConvertFilename
<< "\n";
396 if (auto DsymObjectsOrErr
=
397 MachOObjectFile::findDsymObjectMembers(ConvertFilename
)) {
398 if (DsymObjectsOrErr
->empty())
399 Objects
.push_back(ConvertFilename
);
401 llvm::append_range(Objects
, *DsymObjectsOrErr
);
403 error(DsymObjectsOrErr
.takeError());
406 for (auto Object
: Objects
) {
407 if (auto Err
= handleFileConversionToGSYM(Object
, OutFile
))
410 return Error::success();
413 static void doLookup(GsymReader
&Gsym
, uint64_t Addr
, raw_ostream
&OS
) {
414 if (auto Result
= Gsym
.lookup(Addr
)) {
415 // If verbose is enabled dump the full function info for the address.
417 if (auto FI
= Gsym
.getFunctionInfo(Addr
)) {
418 OS
<< "FunctionInfo for " << HEX64(Addr
) << ":\n";
420 OS
<< "\nLookupResult for " << HEX64(Addr
) << ":\n";
426 OS
<< "\nLookupResult for " << HEX64(Addr
) << ":\n";
427 OS
<< HEX64(Addr
) << ": ";
428 logAllUnhandledErrors(Result
.takeError(), OS
, "error: ");
434 int main(int argc
, char const *argv
[]) {
435 // Print a stack trace if we signal out.
436 sys::PrintStackTraceOnErrorSignal(argv
[0]);
437 PrettyStackTraceProgram
X(argc
, argv
);
438 llvm_shutdown_obj Y
; // Call llvm_shutdown() on exit.
440 llvm::InitializeAllTargets();
442 const char *Overview
=
443 "A tool for dumping, searching and creating GSYM files.\n\n"
444 "Specify one or more GSYM paths as arguments to dump all of the "
445 "information in each GSYM file.\n"
446 "Specify a single GSYM file along with one or more --lookup options to "
447 "lookup addresses within that GSYM file.\n"
448 "Use the --convert option to specify a file with option --out-file "
449 "option to convert to GSYM format.\n";
450 HideUnrelatedOptions({&GeneralOptions
, &ConversionOptions
, &LookupOptions
});
451 cl::ParseCommandLineOptions(argc
, argv
, Overview
);
454 PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
458 raw_ostream
&OS
= outs();
460 if (!ConvertFilename
.empty()) {
461 // Convert DWARF to GSYM
462 if (!InputFilenames
.empty()) {
463 OS
<< "error: no input files can be specified when using the --convert "
467 // Call error() if we have an error and it will exit with a status of 1
468 if (auto Err
= convertFileToGSYM(OS
))
469 error("DWARF conversion failed: ", std::move(Err
));
473 if (LookupAddressesFromStdin
) {
474 if (!LookupAddresses
.empty() || !InputFilenames
.empty()) {
475 OS
<< "error: no input files or addresses can be specified when using "
476 "the --addresses-from-stdin "
481 std::string InputLine
;
482 std::string CurrentGSYMPath
;
483 llvm::Optional
<Expected
<GsymReader
>> CurrentGsym
;
485 while (std::getline(std::cin
, InputLine
)) {
486 // Strip newline characters.
487 std::string
StrippedInputLine(InputLine
);
488 llvm::erase_if(StrippedInputLine
,
489 [](char c
) { return c
== '\r' || c
== '\n'; });
491 StringRef AddrStr
, GSYMPath
;
492 std::tie(AddrStr
, GSYMPath
) =
493 llvm::StringRef
{StrippedInputLine
}.split(' ');
495 if (GSYMPath
!= CurrentGSYMPath
) {
496 CurrentGsym
= GsymReader::openFile(GSYMPath
);
498 error(GSYMPath
, CurrentGsym
->takeError());
502 if (AddrStr
.getAsInteger(0, Addr
)) {
503 OS
<< "error: invalid address " << AddrStr
504 << ", expected: Address GsymFile.\n";
508 doLookup(**CurrentGsym
, Addr
, OS
);
517 // Dump or access data inside GSYM files
518 for (const auto &GSYMPath
: InputFilenames
) {
519 auto Gsym
= GsymReader::openFile(GSYMPath
);
521 error(GSYMPath
, Gsym
.takeError());
523 if (LookupAddresses
.empty()) {
528 // Lookup an address in a GSYM file and print any matches.
529 OS
<< "Looking up addresses in \"" << GSYMPath
<< "\":\n";
530 for (auto Addr
: LookupAddresses
) {
531 doLookup(*Gsym
, Addr
, OS
);