[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / tools / llvm-gsymutil / llvm-gsymutil.cpp
blob4e3c06e1e5fd7563464d283c3af2eccceea13d31
1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/STLExtras.h"
10 #include "llvm/ADT/StringSet.h"
11 #include "llvm/ADT/Triple.h"
12 #include "llvm/DebugInfo/DIContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/ELFObjectFile.h"
16 #include "llvm/Object/MachOUniversal.h"
17 #include "llvm/Object/ObjectFile.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/PrettyStackTrace.h"
24 #include "llvm/Support/Regex.h"
25 #include "llvm/Support/Signals.h"
26 #include "llvm/Support/TargetSelect.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cstring>
30 #include <inttypes.h>
31 #include <iostream>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
37 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
38 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
39 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
40 #include "llvm/DebugInfo/GSYM/GsymReader.h"
41 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
42 #include "llvm/DebugInfo/GSYM/LookupResult.h"
43 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
45 using namespace llvm;
46 using namespace gsym;
47 using namespace object;
49 /// @}
50 /// Command line options.
51 /// @{
53 namespace {
54 using namespace cl;
56 OptionCategory GeneralOptions("Options");
57 OptionCategory ConversionOptions("Conversion Options");
58 OptionCategory LookupOptions("Lookup Options");
60 static opt<bool> Help("h", desc("Alias for -help"), Hidden,
61 cat(GeneralOptions));
63 static opt<bool> Verbose("verbose",
64 desc("Enable verbose logging and encoding details."),
65 cat(GeneralOptions));
67 static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
68 ZeroOrMore, cat(GeneralOptions));
70 static opt<std::string>
71 ConvertFilename("convert", cl::init(""),
72 cl::desc("Convert the specified file to the GSYM format.\n"
73 "Supported files include ELF and mach-o files "
74 "that will have their debug info (DWARF) and "
75 "symbol table converted."),
76 cl::value_desc("path"), cat(ConversionOptions));
78 static list<std::string>
79 ArchFilters("arch",
80 desc("Process debug information for the specified CPU "
81 "architecture only.\nArchitectures may be specified by "
82 "name or by number.\nThis option can be specified "
83 "multiple times, once for each desired architecture."),
84 cl::value_desc("arch"), cat(ConversionOptions));
86 static opt<std::string>
87 OutputFilename("out-file", cl::init(""),
88 cl::desc("Specify the path where the converted GSYM file "
89 "will be saved.\nWhen not specified, a '.gsym' "
90 "extension will be appended to the file name "
91 "specified in the --convert option."),
92 cl::value_desc("path"), cat(ConversionOptions));
93 static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
94 aliasopt(OutputFilename),
95 cat(ConversionOptions));
97 static opt<bool> Verify("verify",
98 desc("Verify the generated GSYM file against the "
99 "information in the file that was converted."),
100 cat(ConversionOptions));
102 static opt<unsigned>
103 NumThreads("num-threads",
104 desc("Specify the maximum number (n) of simultaneous threads "
105 "to use when converting files to GSYM.\nDefaults to the "
106 "number of cores on the current machine."),
107 cl::value_desc("n"), cat(ConversionOptions));
109 static opt<bool>
110 Quiet("quiet", desc("Do not output warnings about the debug information"),
111 cat(ConversionOptions));
113 static list<uint64_t> LookupAddresses("address",
114 desc("Lookup an address in a GSYM file"),
115 cl::value_desc("addr"),
116 cat(LookupOptions));
118 static opt<bool> LookupAddressesFromStdin(
119 "addresses-from-stdin",
120 desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
121 "line is expected to be of the following format: <addr> <gsym-path>"),
122 cat(LookupOptions));
124 } // namespace
125 /// @}
126 //===----------------------------------------------------------------------===//
128 static void error(Error Err) {
129 if (!Err)
130 return;
131 WithColor::error() << toString(std::move(Err)) << "\n";
132 exit(1);
135 static void error(StringRef Prefix, llvm::Error Err) {
136 if (!Err)
137 return;
138 errs() << Prefix << ": " << Err << "\n";
139 consumeError(std::move(Err));
140 exit(1);
143 static void error(StringRef Prefix, std::error_code EC) {
144 if (!EC)
145 return;
146 errs() << Prefix << ": " << EC.message() << "\n";
147 exit(1);
150 static uint32_t getCPUType(MachOObjectFile &MachO) {
151 if (MachO.is64Bit())
152 return MachO.getHeader64().cputype;
153 else
154 return MachO.getHeader().cputype;
157 /// Return true if the object file has not been filtered by an --arch option.
158 static bool filterArch(MachOObjectFile &Obj) {
159 if (ArchFilters.empty())
160 return true;
162 Triple ObjTriple(Obj.getArchTriple());
163 StringRef ObjArch = ObjTriple.getArchName();
165 for (auto Arch : ArchFilters) {
166 // Match name.
167 if (Arch == ObjArch)
168 return true;
170 // Match architecture number.
171 unsigned Value;
172 if (!StringRef(Arch).getAsInteger(0, Value))
173 if (Value == getCPUType(Obj))
174 return true;
176 return false;
179 /// Determine the virtual address that is considered the base address of an ELF
180 /// object file.
182 /// The base address of an ELF file is the the "p_vaddr" of the first program
183 /// header whose "p_type" is PT_LOAD.
185 /// \param ELFFile An ELF object file we will search.
187 /// \returns A valid image base address if we are able to extract one.
188 template <class ELFT>
189 static llvm::Optional<uint64_t>
190 getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
191 auto PhdrRangeOrErr = ELFFile.program_headers();
192 if (!PhdrRangeOrErr) {
193 consumeError(PhdrRangeOrErr.takeError());
194 return llvm::None;
196 for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
197 if (Phdr.p_type == ELF::PT_LOAD)
198 return (uint64_t)Phdr.p_vaddr;
199 return llvm::None;
202 /// Determine the virtual address that is considered the base address of mach-o
203 /// object file.
205 /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
207 /// \param MachO A mach-o object file we will search.
209 /// \returns A valid image base address if we are able to extract one.
210 static llvm::Optional<uint64_t>
211 getImageBaseAddress(const object::MachOObjectFile *MachO) {
212 for (const auto &Command : MachO->load_commands()) {
213 if (Command.C.cmd == MachO::LC_SEGMENT) {
214 MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
215 StringRef SegName = SLC.segname;
216 if (SegName == "__TEXT")
217 return SLC.vmaddr;
218 } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
219 MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
220 StringRef SegName = SLC.segname;
221 if (SegName == "__TEXT")
222 return SLC.vmaddr;
225 return llvm::None;
228 /// Determine the virtual address that is considered the base address of an
229 /// object file.
231 /// Since GSYM files are used for symbolication, many clients will need to
232 /// easily adjust addresses they find in stack traces so the lookups happen
233 /// on unslid addresses from the original object file. If the base address of
234 /// a GSYM file is set to the base address of the image, then this address
235 /// adjusting is much easier.
237 /// \param Obj An object file we will search.
239 /// \returns A valid image base address if we are able to extract one.
240 static llvm::Optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
241 if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
242 return getImageBaseAddress(MachO);
243 else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
244 return getImageBaseAddress(ELFObj->getELFFile());
245 else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
246 return getImageBaseAddress(ELFObj->getELFFile());
247 else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
248 return getImageBaseAddress(ELFObj->getELFFile());
249 else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
250 return getImageBaseAddress(ELFObj->getELFFile());
251 return llvm::None;
254 static llvm::Error handleObjectFile(ObjectFile &Obj,
255 const std::string &OutFile) {
256 auto ThreadCount =
257 NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
258 auto &OS = outs();
260 GsymCreator Gsym(Quiet);
262 // See if we can figure out the base address for a given object file, and if
263 // we can, then set the base address to use to this value. This will ease
264 // symbolication since clients can slide the GSYM lookup addresses by using
265 // the load bias of the shared library.
266 if (auto ImageBaseAddr = getImageBaseAddress(Obj))
267 Gsym.setBaseAddress(*ImageBaseAddr);
269 // We need to know where the valid sections are that contain instructions.
270 // See header documentation for DWARFTransformer::SetValidTextRanges() for
271 // defails.
272 AddressRanges TextRanges;
273 for (const object::SectionRef &Sect : Obj.sections()) {
274 if (!Sect.isText())
275 continue;
276 const uint64_t Size = Sect.getSize();
277 if (Size == 0)
278 continue;
279 const uint64_t StartAddr = Sect.getAddress();
280 TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
283 // Make sure there is DWARF to convert first.
284 std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
285 if (!DICtx)
286 return createStringError(std::errc::invalid_argument,
287 "unable to create DWARF context");
288 logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), OS, "DwarfTransformer: ");
290 // Make a DWARF transformer object and populate the ranges of the code
291 // so we don't end up adding invalid functions to GSYM data.
292 DwarfTransformer DT(*DICtx, OS, Gsym);
293 if (!TextRanges.empty())
294 Gsym.SetValidTextRanges(TextRanges);
296 // Convert all DWARF to GSYM.
297 if (auto Err = DT.convert(ThreadCount))
298 return Err;
300 // Get the UUID and convert symbol table to GSYM.
301 if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
302 return Err;
304 // Finalize the GSYM to make it ready to save to disk. This will remove
305 // duplicate FunctionInfo entries where we might have found an entry from
306 // debug info and also a symbol table entry from the object file.
307 if (auto Err = Gsym.finalize(OS))
308 return Err;
310 // Save the GSYM file to disk.
311 support::endianness Endian =
312 Obj.makeTriple().isLittleEndian() ? support::little : support::big;
313 if (auto Err = Gsym.save(OutFile, Endian))
314 return Err;
316 // Verify the DWARF if requested. This will ensure all the info in the DWARF
317 // can be looked up in the GSYM and that all lookups get matching data.
318 if (Verify) {
319 if (auto Err = DT.verify(OutFile))
320 return Err;
323 return Error::success();
326 static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
327 const std::string &OutFile) {
328 Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
329 error(Filename, errorToErrorCode(BinOrErr.takeError()));
331 if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
332 Triple ObjTriple(Obj->makeTriple());
333 auto ArchName = ObjTriple.getArchName();
334 outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
335 if (auto Err = handleObjectFile(*Obj, OutFile))
336 return Err;
337 } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
338 // Iterate over all contained architectures and filter out any that were
339 // not specified with the "--arch <arch>" option. If the --arch option was
340 // not specified on the command line, we will process all architectures.
341 std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
342 for (auto &ObjForArch : Fat->objects()) {
343 if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
344 auto &Obj = **MachOOrErr;
345 if (filterArch(Obj))
346 FilterObjs.emplace_back(MachOOrErr->release());
347 } else {
348 error(Filename, MachOOrErr.takeError());
351 if (FilterObjs.empty())
352 error(Filename, createStringError(std::errc::invalid_argument,
353 "no matching architectures found"));
355 // Now handle each architecture we need to convert.
356 for (auto &Obj : FilterObjs) {
357 Triple ObjTriple(Obj->getArchTriple());
358 auto ArchName = ObjTriple.getArchName();
359 std::string ArchOutFile(OutFile);
360 // If we are only handling a single architecture, then we will use the
361 // normal output file. If we are handling multiple architectures append
362 // the architecture name to the end of the out file path so that we
363 // don't overwrite the previous architecture's gsym file.
364 if (FilterObjs.size() > 1) {
365 ArchOutFile.append(1, '.');
366 ArchOutFile.append(ArchName.str());
368 outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
369 if (auto Err = handleObjectFile(*Obj, ArchOutFile))
370 return Err;
373 return Error::success();
376 static llvm::Error handleFileConversionToGSYM(StringRef Filename,
377 const std::string &OutFile) {
378 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
379 MemoryBuffer::getFileOrSTDIN(Filename);
380 error(Filename, BuffOrErr.getError());
381 std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
382 return handleBuffer(Filename, *Buffer, OutFile);
385 static llvm::Error convertFileToGSYM(raw_ostream &OS) {
386 // Expand any .dSYM bundles to the individual object files contained therein.
387 std::vector<std::string> Objects;
388 std::string OutFile = OutputFilename;
389 if (OutFile.empty()) {
390 OutFile = ConvertFilename;
391 OutFile += ".gsym";
394 OS << "Input file: " << ConvertFilename << "\n";
396 if (auto DsymObjectsOrErr =
397 MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
398 if (DsymObjectsOrErr->empty())
399 Objects.push_back(ConvertFilename);
400 else
401 llvm::append_range(Objects, *DsymObjectsOrErr);
402 } else {
403 error(DsymObjectsOrErr.takeError());
406 for (auto Object : Objects) {
407 if (auto Err = handleFileConversionToGSYM(Object, OutFile))
408 return Err;
410 return Error::success();
413 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
414 if (auto Result = Gsym.lookup(Addr)) {
415 // If verbose is enabled dump the full function info for the address.
416 if (Verbose) {
417 if (auto FI = Gsym.getFunctionInfo(Addr)) {
418 OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
419 Gsym.dump(OS, *FI);
420 OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
423 OS << Result.get();
424 } else {
425 if (Verbose)
426 OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
427 OS << HEX64(Addr) << ": ";
428 logAllUnhandledErrors(Result.takeError(), OS, "error: ");
430 if (Verbose)
431 OS << "\n";
434 int main(int argc, char const *argv[]) {
435 // Print a stack trace if we signal out.
436 sys::PrintStackTraceOnErrorSignal(argv[0]);
437 PrettyStackTraceProgram X(argc, argv);
438 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
440 llvm::InitializeAllTargets();
442 const char *Overview =
443 "A tool for dumping, searching and creating GSYM files.\n\n"
444 "Specify one or more GSYM paths as arguments to dump all of the "
445 "information in each GSYM file.\n"
446 "Specify a single GSYM file along with one or more --lookup options to "
447 "lookup addresses within that GSYM file.\n"
448 "Use the --convert option to specify a file with option --out-file "
449 "option to convert to GSYM format.\n";
450 HideUnrelatedOptions({&GeneralOptions, &ConversionOptions, &LookupOptions});
451 cl::ParseCommandLineOptions(argc, argv, Overview);
453 if (Help) {
454 PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
455 return 0;
458 raw_ostream &OS = outs();
460 if (!ConvertFilename.empty()) {
461 // Convert DWARF to GSYM
462 if (!InputFilenames.empty()) {
463 OS << "error: no input files can be specified when using the --convert "
464 "option.\n";
465 return 1;
467 // Call error() if we have an error and it will exit with a status of 1
468 if (auto Err = convertFileToGSYM(OS))
469 error("DWARF conversion failed: ", std::move(Err));
470 return 0;
473 if (LookupAddressesFromStdin) {
474 if (!LookupAddresses.empty() || !InputFilenames.empty()) {
475 OS << "error: no input files or addresses can be specified when using "
476 "the --addresses-from-stdin "
477 "option.\n";
478 return 1;
481 std::string InputLine;
482 std::string CurrentGSYMPath;
483 llvm::Optional<Expected<GsymReader>> CurrentGsym;
485 while (std::getline(std::cin, InputLine)) {
486 // Strip newline characters.
487 std::string StrippedInputLine(InputLine);
488 llvm::erase_if(StrippedInputLine,
489 [](char c) { return c == '\r' || c == '\n'; });
491 StringRef AddrStr, GSYMPath;
492 std::tie(AddrStr, GSYMPath) =
493 llvm::StringRef{StrippedInputLine}.split(' ');
495 if (GSYMPath != CurrentGSYMPath) {
496 CurrentGsym = GsymReader::openFile(GSYMPath);
497 if (!*CurrentGsym)
498 error(GSYMPath, CurrentGsym->takeError());
501 uint64_t Addr;
502 if (AddrStr.getAsInteger(0, Addr)) {
503 OS << "error: invalid address " << AddrStr
504 << ", expected: Address GsymFile.\n";
505 return 1;
508 doLookup(**CurrentGsym, Addr, OS);
510 OS << "\n";
511 OS.flush();
514 return EXIT_SUCCESS;
517 // Dump or access data inside GSYM files
518 for (const auto &GSYMPath : InputFilenames) {
519 auto Gsym = GsymReader::openFile(GSYMPath);
520 if (!Gsym)
521 error(GSYMPath, Gsym.takeError());
523 if (LookupAddresses.empty()) {
524 Gsym->dump(outs());
525 continue;
528 // Lookup an address in a GSYM file and print any matches.
529 OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
530 for (auto Addr : LookupAddresses) {
531 doLookup(*Gsym, Addr, OS);
534 return EXIT_SUCCESS;