Backed out changeset b71c8c052463 (bug 1943846) for causing mass failures. CLOSED...
[gecko.git] / mozglue / baseprofiler / lul / LulElf.cpp
blob16866c5a41d5edc3ba6625468e8f85081535f1e2
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
4 // Copyright (c) 2006, 2011, 2012 Google Inc.
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are
9 // met:
11 // * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // * Neither the name of Google Inc. nor the names of its
18 // contributors may be used to endorse or promote products derived from
19 // this software without specific prior written permission.
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
35 // (derived from)
36 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
37 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
39 // dump_symbols.h: Read debugging information from an ELF file, and write
40 // it out as a Breakpad symbol file.
42 // This file is derived from the following files in
43 // toolkit/crashreporter/google-breakpad:
44 // src/common/linux/dump_symbols.cc
45 // src/common/linux/elfutils.cc
46 // src/common/linux/file_id.cc
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <libgen.h>
51 #include <stdio.h>
52 #include <string.h>
53 #include <sys/mman.h>
54 #include <sys/stat.h>
55 #include <unistd.h>
56 #include <arpa/inet.h>
58 #include <cstdlib>
59 #include <set>
60 #include <string>
61 #include <vector>
63 #include "mozilla/Assertions.h"
64 #include "mozilla/Sprintf.h"
66 #include "PlatformMacros.h"
67 #include "LulCommonExt.h"
68 #include "LulDwarfExt.h"
69 #include "LulElfInt.h"
70 #include "LulMainInt.h"
72 #if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
73 // bionic and older glibsc don't define it
74 # define SHT_ARM_EXIDX (SHT_LOPROC + 1)
75 #endif
77 // Old Linux header doesn't define EM_AARCH64
78 #ifndef EM_AARCH64
79 # define EM_AARCH64 183
80 #endif
82 // This namespace contains helper functions.
83 namespace {
85 using lul::DwarfCFIToModule;
86 using lul::FindElfSectionByName;
87 using lul::GetOffset;
88 using lul::IsValidElf;
89 using lul::Summariser;
90 using lul::UniqueStringUniverse;
91 using std::set;
92 using std::string;
93 using std::vector;
96 // FDWrapper
98 // Wrapper class to make sure opened file is closed.
100 class FDWrapper {
101 public:
102 explicit FDWrapper(int fd) : fd_(fd) {}
103 ~FDWrapper() {
104 if (fd_ != -1) close(fd_);
106 int get() { return fd_; }
107 int release() {
108 int fd = fd_;
109 fd_ = -1;
110 return fd;
113 private:
114 int fd_;
118 // MmapWrapper
120 // Wrapper class to make sure mapped regions are unmapped.
122 class MmapWrapper {
123 public:
124 MmapWrapper() : is_set_(false), base_(NULL), size_(0) {}
125 ~MmapWrapper() {
126 if (is_set_ && base_ != NULL) {
127 MOZ_ASSERT(size_ > 0);
128 munmap(base_, size_);
131 void set(void* mapped_address, size_t mapped_size) {
132 is_set_ = true;
133 base_ = mapped_address;
134 size_ = mapped_size;
136 void release() {
137 MOZ_ASSERT(is_set_);
138 is_set_ = false;
139 base_ = NULL;
140 size_ = 0;
143 private:
144 bool is_set_;
145 void* base_;
146 size_t size_;
149 // Set NUM_DW_REGNAMES to be the number of Dwarf register names
150 // appropriate to the machine architecture given in HEADER. Return
151 // true on success, or false if HEADER's machine architecture is not
152 // supported.
153 template <typename ElfClass>
154 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
155 unsigned int* num_dw_regnames) {
156 switch (elf_header->e_machine) {
157 case EM_386:
158 *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
159 return true;
160 case EM_ARM:
161 *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
162 return true;
163 case EM_X86_64:
164 *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
165 return true;
166 case EM_MIPS:
167 *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
168 return true;
169 case EM_AARCH64:
170 *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
171 return true;
172 default:
173 MOZ_ASSERT(0);
174 return false;
178 template <typename ElfClass>
179 bool LoadDwarfCFI(const string& dwarf_filename,
180 const typename ElfClass::Ehdr* elf_header,
181 const char* section_name,
182 const typename ElfClass::Shdr* section, const bool eh_frame,
183 const typename ElfClass::Shdr* got_section,
184 const typename ElfClass::Shdr* text_section,
185 const bool big_endian, SecMap* smap, uintptr_t text_bias,
186 UniqueStringUniverse* usu, void (*log)(const char*)) {
187 // Find the appropriate set of register names for this file's
188 // architecture.
189 unsigned int num_dw_regs = 0;
190 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
191 fprintf(stderr,
192 "%s: unrecognized ELF machine architecture '%d';"
193 " cannot convert DWARF call frame information\n",
194 dwarf_filename.c_str(), elf_header->e_machine);
195 return false;
198 const lul::Endianness endianness =
199 big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
201 // Find the call frame information and its size.
202 const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset);
203 size_t cfi_size = section->sh_size;
205 // Plug together the parser, handler, and their entourages.
207 // Here's a summariser, which will receive the output of the
208 // parser, create summaries, and add them to |smap|.
209 Summariser summ(smap, text_bias, log);
211 lul::ByteReader reader(endianness);
212 reader.SetAddressSize(ElfClass::kAddrSize);
214 DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
215 DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
217 // Provide the base addresses for .eh_frame encoded pointers, if
218 // possible.
219 reader.SetCFIDataBase(section->sh_addr, cfi);
220 if (got_section) reader.SetDataBase(got_section->sh_addr);
221 if (text_section) reader.SetTextBase(text_section->sh_addr);
223 lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
224 section_name);
225 lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter,
226 eh_frame);
227 parser.Start();
229 return true;
232 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
233 void** elf_header) {
234 int obj_fd = open(obj_file.c_str(), O_RDONLY);
235 if (obj_fd < 0) {
236 fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(),
237 strerror(errno));
238 return false;
240 FDWrapper obj_fd_wrapper(obj_fd);
241 struct stat st;
242 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
243 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(),
244 strerror(errno));
245 return false;
247 // Mapping it read-only is good enough. In any case, mapping it
248 // read-write confuses Valgrind's debuginfo acquire/discard
249 // heuristics, making it hard to profile the profiler.
250 void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0);
251 if (obj_base == MAP_FAILED) {
252 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(),
253 strerror(errno));
254 return false;
256 map_wrapper->set(obj_base, st.st_size);
257 *elf_header = obj_base;
258 if (!IsValidElf(*elf_header)) {
259 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
260 return false;
262 return true;
265 // Get the endianness of ELF_HEADER. If it's invalid, return false.
266 template <typename ElfClass>
267 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
268 bool* big_endian) {
269 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
270 *big_endian = false;
271 return true;
273 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
274 *big_endian = true;
275 return true;
278 fprintf(stderr, "bad data encoding in ELF header: %d\n",
279 elf_header->e_ident[EI_DATA]);
280 return false;
284 // LoadSymbolsInfo
286 // Holds the state between the two calls to LoadSymbols() in case it's necessary
287 // to follow the .gnu_debuglink section and load debug information from a
288 // different file.
290 template <typename ElfClass>
291 class LoadSymbolsInfo {
292 public:
293 typedef typename ElfClass::Addr Addr;
295 explicit LoadSymbolsInfo(const vector<string>& dbg_dirs)
296 : debug_dirs_(dbg_dirs), has_loading_addr_(false) {}
298 // Keeps track of which sections have been loaded so sections don't
299 // accidentally get loaded twice from two different files.
300 void LoadedSection(const string& section) {
301 if (loaded_sections_.count(section) == 0) {
302 loaded_sections_.insert(section);
303 } else {
304 fprintf(stderr, "Section %s has already been loaded.\n", section.c_str());
308 string debuglink_file() const { return debuglink_file_; }
310 private:
311 const vector<string>& debug_dirs_; // Directories in which to
312 // search for the debug ELF file.
314 string debuglink_file_; // Full path to the debug ELF file.
316 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
318 set<string> loaded_sections_; // Tracks the Loaded ELF sections
319 // between calls to LoadSymbols().
322 // Find the preferred loading address of the binary.
323 template <typename ElfClass>
324 typename ElfClass::Addr GetLoadingAddress(
325 const typename ElfClass::Phdr* program_headers, int nheader) {
326 typedef typename ElfClass::Phdr Phdr;
328 // For non-PIC executables (e_type == ET_EXEC), the load address is
329 // the start address of the first PT_LOAD segment. (ELF requires
330 // the segments to be sorted by load address.) For PIC executables
331 // and dynamic libraries (e_type == ET_DYN), this address will
332 // normally be zero.
333 for (int i = 0; i < nheader; ++i) {
334 const Phdr& header = program_headers[i];
335 if (header.p_type == PT_LOAD) return header.p_vaddr;
337 return 0;
340 template <typename ElfClass>
341 bool LoadSymbols(const string& obj_file, const bool big_endian,
342 const typename ElfClass::Ehdr* elf_header,
343 const bool read_gnu_debug_link,
344 LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma,
345 size_t rx_size, UniqueStringUniverse* usu,
346 void (*log)(const char*)) {
347 typedef typename ElfClass::Phdr Phdr;
348 typedef typename ElfClass::Shdr Shdr;
350 char buf[500];
351 SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str());
352 buf[sizeof(buf) - 1] = 0;
353 log(buf);
355 // This is how the text bias is calculated.
356 // BEGIN CALCULATE BIAS
357 uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
358 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
359 elf_header->e_phnum);
360 uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
361 SprintfLiteral(buf, "LoadSymbols: rx_avma=%llx, text_bias=%llx",
362 (unsigned long long int)(uintptr_t)rx_avma,
363 (unsigned long long int)text_bias);
364 buf[sizeof(buf) - 1] = 0;
365 log(buf);
366 // END CALCULATE BIAS
368 const Shdr* sections =
369 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
370 const Shdr* section_names = sections + elf_header->e_shstrndx;
371 const char* names =
372 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
373 const char* names_end = names + section_names->sh_size;
374 bool found_usable_info = false;
376 // Dwarf Call Frame Information (CFI) is actually independent from
377 // the other DWARF debugging information, and can be used alone.
378 const Shdr* dwarf_cfi_section =
379 FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections,
380 names, names_end, elf_header->e_shnum);
381 if (dwarf_cfi_section) {
382 // Ignore the return value of this function; even without call frame
383 // information, the other debugging information could be perfectly
384 // useful.
385 info->LoadedSection(".debug_frame");
386 bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
387 dwarf_cfi_section, false, 0, 0,
388 big_endian, smap, text_bias, usu, log);
389 found_usable_info = found_usable_info || result;
390 if (result) log("LoadSymbols: read CFI from .debug_frame");
393 // Linux C++ exception handling information can also provide
394 // unwinding data.
395 const Shdr* eh_frame_section =
396 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names,
397 names_end, elf_header->e_shnum);
398 if (eh_frame_section) {
399 // Pointers in .eh_frame data may be relative to the base addresses of
400 // certain sections. Provide those sections if present.
401 const Shdr* got_section = FindElfSectionByName<ElfClass>(
402 ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
403 const Shdr* text_section = FindElfSectionByName<ElfClass>(
404 ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
405 info->LoadedSection(".eh_frame");
406 // As above, ignore the return value of this function.
407 bool result = LoadDwarfCFI<ElfClass>(
408 obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section,
409 text_section, big_endian, smap, text_bias, usu, log);
410 found_usable_info = found_usable_info || result;
411 if (result) log("LoadSymbols: read CFI from .eh_frame");
414 SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str());
415 buf[sizeof(buf) - 1] = 0;
416 log(buf);
418 return found_usable_info;
421 // Return the breakpad symbol file identifier for the architecture of
422 // ELF_HEADER.
423 template <typename ElfClass>
424 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
425 typedef typename ElfClass::Half Half;
426 Half arch = elf_header->e_machine;
427 switch (arch) {
428 case EM_386:
429 return "x86";
430 case EM_ARM:
431 return "arm";
432 case EM_AARCH64:
433 return "arm64";
434 case EM_MIPS:
435 return "mips";
436 case EM_PPC64:
437 return "ppc64";
438 case EM_PPC:
439 return "ppc";
440 case EM_S390:
441 return "s390";
442 case EM_SPARC:
443 return "sparc";
444 case EM_SPARCV9:
445 return "sparcv9";
446 case EM_X86_64:
447 return "x86_64";
448 default:
449 return NULL;
453 // Format the Elf file identifier in IDENTIFIER as a UUID with the
454 // dashes removed.
455 string FormatIdentifier(unsigned char identifier[16]) {
456 char identifier_str[40];
457 lul::FileID::ConvertIdentifierToString(identifier, identifier_str,
458 sizeof(identifier_str));
459 string id_no_dash;
460 for (int i = 0; identifier_str[i] != '\0'; ++i)
461 if (identifier_str[i] != '-') id_no_dash += identifier_str[i];
462 // Add an extra "0" by the end. PDB files on Windows have an 'age'
463 // number appended to the end of the file identifier; this isn't
464 // really used or necessary on other platforms, but be consistent.
465 id_no_dash += '0';
466 return id_no_dash;
469 // Return the non-directory portion of FILENAME: the portion after the
470 // last slash, or the whole filename if there are no slashes.
471 string BaseFileName(const string& filename) {
472 // Lots of copies! basename's behavior is less than ideal.
473 char* c_filename = strdup(filename.c_str());
474 string base = basename(c_filename);
475 free(c_filename);
476 return base;
479 template <typename ElfClass>
480 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
481 const string& obj_filename,
482 const vector<string>& debug_dirs, SecMap* smap,
483 void* rx_avma, size_t rx_size,
484 UniqueStringUniverse* usu,
485 void (*log)(const char*)) {
486 typedef typename ElfClass::Ehdr Ehdr;
488 unsigned char identifier[16];
489 if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
490 fprintf(stderr, "%s: unable to generate file identifier\n",
491 obj_filename.c_str());
492 return false;
495 const char* architecture = ElfArchitecture<ElfClass>(elf_header);
496 if (!architecture) {
497 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
498 obj_filename.c_str(), elf_header->e_machine);
499 return false;
502 // Figure out what endianness this file is.
503 bool big_endian;
504 if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false;
506 string name = BaseFileName(obj_filename);
507 string os = "Linux";
508 string id = FormatIdentifier(identifier);
510 LoadSymbolsInfo<ElfClass> info(debug_dirs);
511 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
512 !debug_dirs.empty(), &info, smap, rx_avma, rx_size,
513 usu, log)) {
514 const string debuglink_file = info.debuglink_file();
515 if (debuglink_file.empty()) return false;
517 // Load debuglink ELF file.
518 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
519 MmapWrapper debug_map_wrapper;
520 Ehdr* debug_elf_header = NULL;
521 if (!LoadELF(debuglink_file, &debug_map_wrapper,
522 reinterpret_cast<void**>(&debug_elf_header)))
523 return false;
524 // Sanity checks to make sure everything matches up.
525 const char* debug_architecture =
526 ElfArchitecture<ElfClass>(debug_elf_header);
527 if (!debug_architecture) {
528 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
529 debuglink_file.c_str(), debug_elf_header->e_machine);
530 return false;
532 if (strcmp(architecture, debug_architecture)) {
533 fprintf(stderr,
534 "%s with ELF machine architecture %s does not match "
535 "%s with ELF architecture %s\n",
536 debuglink_file.c_str(), debug_architecture, obj_filename.c_str(),
537 architecture);
538 return false;
541 bool debug_big_endian;
542 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
543 return false;
544 if (debug_big_endian != big_endian) {
545 fprintf(stderr, "%s and %s does not match in endianness\n",
546 obj_filename.c_str(), debuglink_file.c_str());
547 return false;
550 if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
551 debug_elf_header, false, &info, smap, rx_avma,
552 rx_size, usu, log)) {
553 return false;
557 return true;
560 } // namespace
562 namespace lul {
564 bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename,
565 const vector<string>& debug_dirs, SecMap* smap,
566 void* rx_avma, size_t rx_size,
567 UniqueStringUniverse* usu,
568 void (*log)(const char*)) {
569 if (!IsValidElf(obj_file)) {
570 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
571 return false;
574 int elfclass = ElfClass(obj_file);
575 if (elfclass == ELFCLASS32) {
576 return ReadSymbolDataElfClass<ElfClass32>(
577 reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
578 smap, rx_avma, rx_size, usu, log);
580 if (elfclass == ELFCLASS64) {
581 return ReadSymbolDataElfClass<ElfClass64>(
582 reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
583 smap, rx_avma, rx_size, usu, log);
586 return false;
589 bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs,
590 SecMap* smap, void* rx_avma, size_t rx_size,
591 UniqueStringUniverse* usu, void (*log)(const char*)) {
592 MmapWrapper map_wrapper;
593 void* elf_header = NULL;
594 if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false;
596 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
597 obj_file, debug_dirs, smap, rx_avma, rx_size,
598 usu, log);
601 namespace {
603 template <typename ElfClass>
604 void FindElfClassSection(const char* elf_base, const char* section_name,
605 typename ElfClass::Word section_type,
606 const void** section_start, int* section_size) {
607 typedef typename ElfClass::Ehdr Ehdr;
608 typedef typename ElfClass::Shdr Shdr;
610 MOZ_ASSERT(elf_base);
611 MOZ_ASSERT(section_start);
612 MOZ_ASSERT(section_size);
614 MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
616 const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
617 MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
619 const Shdr* sections =
620 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
621 const Shdr* section_names = sections + elf_header->e_shstrndx;
622 const char* names =
623 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
624 const char* names_end = names + section_names->sh_size;
626 const Shdr* section =
627 FindElfSectionByName<ElfClass>(section_name, section_type, sections,
628 names, names_end, elf_header->e_shnum);
630 if (section != NULL && section->sh_size > 0) {
631 *section_start = elf_base + section->sh_offset;
632 *section_size = section->sh_size;
636 template <typename ElfClass>
637 void FindElfClassSegment(const char* elf_base,
638 typename ElfClass::Word segment_type,
639 const void** segment_start, int* segment_size) {
640 typedef typename ElfClass::Ehdr Ehdr;
641 typedef typename ElfClass::Phdr Phdr;
643 MOZ_ASSERT(elf_base);
644 MOZ_ASSERT(segment_start);
645 MOZ_ASSERT(segment_size);
647 MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
649 const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
650 MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
652 const Phdr* phdrs =
653 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
655 for (int i = 0; i < elf_header->e_phnum; ++i) {
656 if (phdrs[i].p_type == segment_type) {
657 *segment_start = elf_base + phdrs[i].p_offset;
658 *segment_size = phdrs[i].p_filesz;
659 return;
664 } // namespace
666 bool IsValidElf(const void* elf_base) {
667 return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0;
670 int ElfClass(const void* elf_base) {
671 const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
673 return elf_header->e_ident[EI_CLASS];
676 bool FindElfSection(const void* elf_mapped_base, const char* section_name,
677 uint32_t section_type, const void** section_start,
678 int* section_size, int* elfclass) {
679 MOZ_ASSERT(elf_mapped_base);
680 MOZ_ASSERT(section_start);
681 MOZ_ASSERT(section_size);
683 *section_start = NULL;
684 *section_size = 0;
686 if (!IsValidElf(elf_mapped_base)) return false;
688 int cls = ElfClass(elf_mapped_base);
689 if (elfclass) {
690 *elfclass = cls;
693 const char* elf_base = static_cast<const char*>(elf_mapped_base);
695 if (cls == ELFCLASS32) {
696 FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
697 section_start, section_size);
698 return *section_start != NULL;
699 } else if (cls == ELFCLASS64) {
700 FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
701 section_start, section_size);
702 return *section_start != NULL;
705 return false;
708 bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
709 const void** segment_start, int* segment_size,
710 int* elfclass) {
711 MOZ_ASSERT(elf_mapped_base);
712 MOZ_ASSERT(segment_start);
713 MOZ_ASSERT(segment_size);
715 *segment_start = NULL;
716 *segment_size = 0;
718 if (!IsValidElf(elf_mapped_base)) return false;
720 int cls = ElfClass(elf_mapped_base);
721 if (elfclass) {
722 *elfclass = cls;
725 const char* elf_base = static_cast<const char*>(elf_mapped_base);
727 if (cls == ELFCLASS32) {
728 FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start,
729 segment_size);
730 return *segment_start != NULL;
731 } else if (cls == ELFCLASS64) {
732 FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start,
733 segment_size);
734 return *segment_start != NULL;
737 return false;
740 // (derived from)
741 // file_id.cc: Return a unique identifier for a file
743 // See file_id.h for documentation
746 // ELF note name and desc are 32-bits word padded.
747 #define NOTE_PADDING(a) ((a + 3) & ~3)
749 // These functions are also used inside the crashed process, so be safe
750 // and use the syscall/libc wrappers instead of direct syscalls or libc.
752 template <typename ElfClass>
753 static bool ElfClassBuildIDNoteIdentifier(const void* section, int length,
754 uint8_t identifier[kMDGUIDSize]) {
755 typedef typename ElfClass::Nhdr Nhdr;
757 const void* section_end = reinterpret_cast<const char*>(section) + length;
758 const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
759 while (reinterpret_cast<const void*>(note_header) < section_end) {
760 if (note_header->n_type == NT_GNU_BUILD_ID) break;
761 note_header = reinterpret_cast<const Nhdr*>(
762 reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
763 NOTE_PADDING(note_header->n_namesz) +
764 NOTE_PADDING(note_header->n_descsz));
766 if (reinterpret_cast<const void*>(note_header) >= section_end ||
767 note_header->n_descsz == 0) {
768 return false;
771 const char* build_id = reinterpret_cast<const char*>(note_header) +
772 sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
773 // Copy as many bits of the build ID as will fit
774 // into the GUID space.
775 memset(identifier, 0, kMDGUIDSize);
776 memcpy(identifier, build_id,
777 std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
779 return true;
782 // Attempt to locate a .note.gnu.build-id section in an ELF binary
783 // and copy as many bytes of it as will fit into |identifier|.
784 static bool FindElfBuildIDNote(const void* elf_mapped_base,
785 uint8_t identifier[kMDGUIDSize]) {
786 void* note_section;
787 int note_size, elfclass;
788 if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)&note_section,
789 &note_size, &elfclass) ||
790 note_size == 0) &&
791 (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
792 (const void**)&note_section, &note_size, &elfclass) ||
793 note_size == 0)) {
794 return false;
797 if (elfclass == ELFCLASS32) {
798 return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
799 identifier);
800 } else if (elfclass == ELFCLASS64) {
801 return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
802 identifier);
805 return false;
808 // Attempt to locate the .text section of an ELF binary and generate
809 // a simple hash by XORing the first page worth of bytes into |identifier|.
810 static bool HashElfTextSection(const void* elf_mapped_base,
811 uint8_t identifier[kMDGUIDSize]) {
812 void* text_section;
813 int text_size;
814 if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
815 (const void**)&text_section, &text_size, NULL) ||
816 text_size == 0) {
817 return false;
820 memset(identifier, 0, kMDGUIDSize);
821 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
822 const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
823 while (ptr < ptr_end) {
824 for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
825 ptr += kMDGUIDSize;
827 return true;
830 // static
831 bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
832 uint8_t identifier[kMDGUIDSize]) {
833 // Look for a build id note first.
834 if (FindElfBuildIDNote(base, identifier)) return true;
836 // Fall back on hashing the first page of the text section.
837 return HashElfTextSection(base, identifier);
840 // static
841 void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
842 char* buffer, int buffer_length) {
843 uint8_t identifier_swapped[kMDGUIDSize];
845 // Endian-ness swap to match dump processor expectation.
846 memcpy(identifier_swapped, identifier, kMDGUIDSize);
847 uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
848 *data1 = htonl(*data1);
849 uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
850 *data2 = htons(*data2);
851 uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
852 *data3 = htons(*data3);
854 int buffer_idx = 0;
855 for (unsigned int idx = 0;
856 (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) {
857 int hi = (identifier_swapped[idx] >> 4) & 0x0F;
858 int lo = (identifier_swapped[idx]) & 0x0F;
860 if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
861 buffer[buffer_idx++] = '-';
863 buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
864 buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
867 // NULL terminate
868 buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
871 } // namespace lul