1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // Author: Satoru Takabayashi
31 // Stack-footprint reduction work done by Raksit Ashok
33 // Implementation note:
35 // We don't use heaps but only use stacks. We want to reduce the
36 // stack consumption so that the symbolizer can run on small stacks.
38 // Here are some numbers collected with GCC 4.1.0 on x86:
39 // - sizeof(Elf32_Sym) = 16
40 // - sizeof(Elf32_Shdr) = 40
41 // - sizeof(Elf64_Sym) = 24
42 // - sizeof(Elf64_Shdr) = 64
44 // This implementation is intended to be async-signal-safe but uses
45 // some functions which are not guaranteed to be so, such as memchr()
46 // and memmove(). We assume they are async-signal-safe.
49 #include "build/build_config.h"
50 #include "utilities.h"
52 #if defined(HAVE_SYMBOLIZE)
56 #include "symbolize.h"
59 _START_GOOGLE_NAMESPACE_
61 // We don't use assert() since it's not guaranteed to be
62 // async-signal-safe. Instead we define a minimal assertion
63 // macro. So far, we don't need pretty printing for __FILE__, etc.
65 // A wrapper for abort() to make it callable in ? :.
66 static int AssertFail() {
68 return 0; // Should not reach.
71 #define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail())
73 static SymbolizeCallback g_symbolize_callback
= NULL
;
74 void InstallSymbolizeCallback(SymbolizeCallback callback
) {
75 g_symbolize_callback
= callback
;
78 // This function wraps the Demangle function to provide an interface
79 // where the input symbol is demangled in-place.
80 // To keep stack consumption low, we would like this function to not
82 static ATTRIBUTE_NOINLINE
void DemangleInplace(char *out
, int out_size
) {
83 char demangled
[256]; // Big enough for sane demangled symbols.
84 if (Demangle(out
, demangled
, sizeof(demangled
))) {
85 // Demangling succeeded. Copy to out if the space allows.
86 int len
= strlen(demangled
);
87 if (len
+ 1 <= out_size
) { // +1 for '\0'.
88 SAFE_ASSERT(len
< sizeof(demangled
));
89 memmove(out
, demangled
, len
+ 1);
94 _END_GOOGLE_NAMESPACE_
99 #if defined(OS_OPENBSD)
100 #include <sys/exec_elf.h>
112 #include <sys/stat.h>
113 #include <sys/types.h>
116 #include "symbolize.h"
118 #include "glog/raw_logging.h"
120 // Re-runs fn until it doesn't cause EINTR.
121 #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
123 _START_GOOGLE_NAMESPACE_
125 // Read up to "count" bytes from file descriptor "fd" into the buffer
126 // starting at "buf" while handling short reads and EINTR. On
127 // success, return the number of bytes read. Otherwise, return -1.
128 static ssize_t
ReadPersistent(const int fd
, void *buf
, const size_t count
) {
129 SAFE_ASSERT(fd
>= 0);
130 SAFE_ASSERT(count
<= std::numeric_limits
<ssize_t
>::max());
131 char *buf0
= reinterpret_cast<char *>(buf
);
132 ssize_t num_bytes
= 0;
133 while (num_bytes
< count
) {
135 NO_INTR(len
= read(fd
, buf0
+ num_bytes
, count
- num_bytes
));
136 if (len
< 0) { // There was an error other than EINTR.
139 if (len
== 0) { // Reached EOF.
144 SAFE_ASSERT(num_bytes
<= count
);
148 // Read up to "count" bytes from "offset" in the file pointed by file
149 // descriptor "fd" into the buffer starting at "buf". On success,
150 // return the number of bytes read. Otherwise, return -1.
151 static ssize_t
ReadFromOffset(const int fd
, void *buf
,
152 const size_t count
, const off_t offset
) {
153 off_t off
= lseek(fd
, offset
, SEEK_SET
);
154 if (off
== (off_t
)-1) {
157 return ReadPersistent(fd
, buf
, count
);
160 // Try reading exactly "count" bytes from "offset" bytes in a file
161 // pointed by "fd" into the buffer starting at "buf" while handling
162 // short reads and EINTR. On success, return true. Otherwise, return
164 static bool ReadFromOffsetExact(const int fd
, void *buf
,
165 const size_t count
, const off_t offset
) {
166 ssize_t len
= ReadFromOffset(fd
, buf
, count
, offset
);
170 // Returns elf_header.e_type if the file pointed by fd is an ELF binary.
171 static int FileGetElfType(const int fd
) {
172 ElfW(Ehdr
) elf_header
;
173 if (!ReadFromOffsetExact(fd
, &elf_header
, sizeof(elf_header
), 0)) {
176 if (memcmp(elf_header
.e_ident
, ELFMAG
, SELFMAG
) != 0) {
179 return elf_header
.e_type
;
182 // Read the section headers in the given ELF binary, and if a section
183 // of the specified type is found, set the output to this section header
184 // and return true. Otherwise, return false.
185 // To keep stack consumption low, we would like this function to not get
187 static ATTRIBUTE_NOINLINE
bool
188 GetSectionHeaderByType(const int fd
, ElfW(Half
) sh_num
, const off_t sh_offset
,
189 ElfW(Word
) type
, ElfW(Shdr
) *out
) {
190 // Read at most 16 section headers at a time to save read calls.
192 for (int i
= 0; i
< sh_num
;) {
193 const ssize_t num_bytes_left
= (sh_num
- i
) * sizeof(buf
[0]);
194 const ssize_t num_bytes_to_read
=
195 (sizeof(buf
) > num_bytes_left
) ? num_bytes_left
: sizeof(buf
);
196 const ssize_t len
= ReadFromOffset(fd
, buf
, num_bytes_to_read
,
197 sh_offset
+ i
* sizeof(buf
[0]));
198 SAFE_ASSERT(len
% sizeof(buf
[0]) == 0);
199 const ssize_t num_headers_in_buf
= len
/ sizeof(buf
[0]);
200 SAFE_ASSERT(num_headers_in_buf
<= sizeof(buf
) / sizeof(buf
[0]));
201 for (int j
= 0; j
< num_headers_in_buf
; ++j
) {
202 if (buf
[j
].sh_type
== type
) {
207 i
+= num_headers_in_buf
;
212 // There is no particular reason to limit section name to 63 characters,
213 // but there has (as yet) been no need for anything longer either.
214 const int kMaxSectionNameLen
= 64;
216 // name_len should include terminating '\0'.
217 bool GetSectionHeaderByName(int fd
, const char *name
, size_t name_len
,
219 ElfW(Ehdr
) elf_header
;
220 if (!ReadFromOffsetExact(fd
, &elf_header
, sizeof(elf_header
), 0)) {
225 off_t shstrtab_offset
= (elf_header
.e_shoff
+
226 elf_header
.e_shentsize
* elf_header
.e_shstrndx
);
227 if (!ReadFromOffsetExact(fd
, &shstrtab
, sizeof(shstrtab
), shstrtab_offset
)) {
231 for (int i
= 0; i
< elf_header
.e_shnum
; ++i
) {
232 off_t section_header_offset
= (elf_header
.e_shoff
+
233 elf_header
.e_shentsize
* i
);
234 if (!ReadFromOffsetExact(fd
, out
, sizeof(*out
), section_header_offset
)) {
237 char header_name
[kMaxSectionNameLen
];
238 if (sizeof(header_name
) < name_len
) {
239 RAW_LOG(WARNING
, "Section name '%s' is too long (%"PRIuS
"); "
240 "section will not be found (even if present).", name
, name_len
);
241 // No point in even trying.
244 off_t name_offset
= shstrtab
.sh_offset
+ out
->sh_name
;
245 ssize_t n_read
= ReadFromOffset(fd
, &header_name
, name_len
, name_offset
);
248 } else if (n_read
!= name_len
) {
249 // Short read -- name could be at end of file.
252 if (memcmp(header_name
, name
, name_len
) == 0) {
259 // Read a symbol table and look for the symbol containing the
260 // pc. Iterate over symbols in a symbol table and look for the symbol
261 // containing "pc". On success, return true and write the symbol name
262 // to out. Otherwise, return false.
263 // To keep stack consumption low, we would like this function to not get
265 static ATTRIBUTE_NOINLINE
bool
266 FindSymbol(uint64_t pc
, const int fd
, char *out
, int out_size
,
267 uint64_t symbol_offset
, const ElfW(Shdr
) *strtab
,
268 const ElfW(Shdr
) *symtab
) {
269 if (symtab
== NULL
) {
272 const int num_symbols
= symtab
->sh_size
/ symtab
->sh_entsize
;
273 for (int i
= 0; i
< num_symbols
;) {
274 off_t offset
= symtab
->sh_offset
+ i
* symtab
->sh_entsize
;
276 // If we are reading Elf64_Sym's, we want to limit this array to
277 // 32 elements (to keep stack consumption low), otherwise we can
278 // have a 64 element Elf32_Sym array.
280 #define NUM_SYMBOLS 32
282 #define NUM_SYMBOLS 64
285 // Read at most NUM_SYMBOLS symbols at once to save read() calls.
286 ElfW(Sym
) buf
[NUM_SYMBOLS
];
287 const ssize_t len
= ReadFromOffset(fd
, &buf
, sizeof(buf
), offset
);
288 SAFE_ASSERT(len
% sizeof(buf
[0]) == 0);
289 const ssize_t num_symbols_in_buf
= len
/ sizeof(buf
[0]);
290 SAFE_ASSERT(num_symbols_in_buf
<= sizeof(buf
)/sizeof(buf
[0]));
291 for (int j
= 0; j
< num_symbols_in_buf
; ++j
) {
292 const ElfW(Sym
)& symbol
= buf
[j
];
293 uint64_t start_address
= symbol
.st_value
;
294 start_address
+= symbol_offset
;
295 uint64_t end_address
= start_address
+ symbol
.st_size
;
296 if (symbol
.st_value
!= 0 && // Skip null value symbols.
297 symbol
.st_shndx
!= 0 && // Skip undefined symbols.
298 start_address
<= pc
&& pc
< end_address
) {
299 ssize_t len1
= ReadFromOffset(fd
, out
, out_size
,
300 strtab
->sh_offset
+ symbol
.st_name
);
301 if (len1
<= 0 || memchr(out
, '\0', out_size
) == NULL
) {
304 return true; // Obtained the symbol name.
307 i
+= num_symbols_in_buf
;
312 // Get the symbol name of "pc" from the file pointed by "fd". Process
313 // both regular and dynamic symbol tables if necessary. On success,
314 // write the symbol name to "out" and return true. Otherwise, return
316 static bool GetSymbolFromObjectFile(const int fd
, uint64_t pc
,
317 char *out
, int out_size
,
318 uint64_t map_start_address
) {
319 // Read the ELF header.
320 ElfW(Ehdr
) elf_header
;
321 if (!ReadFromOffsetExact(fd
, &elf_header
, sizeof(elf_header
), 0)) {
325 uint64_t symbol_offset
= 0;
326 if (elf_header
.e_type
== ET_DYN
) { // DSO needs offset adjustment.
327 symbol_offset
= map_start_address
;
330 ElfW(Shdr
) symtab
, strtab
;
332 // Consult a regular symbol table first.
333 if (GetSectionHeaderByType(fd
, elf_header
.e_shnum
, elf_header
.e_shoff
,
334 SHT_SYMTAB
, &symtab
)) {
335 if (!ReadFromOffsetExact(fd
, &strtab
, sizeof(strtab
), elf_header
.e_shoff
+
336 symtab
.sh_link
* sizeof(symtab
))) {
339 if (FindSymbol(pc
, fd
, out
, out_size
, symbol_offset
,
341 return true; // Found the symbol in a regular symbol table.
345 // If the symbol is not found, then consult a dynamic symbol table.
346 if (GetSectionHeaderByType(fd
, elf_header
.e_shnum
, elf_header
.e_shoff
,
347 SHT_DYNSYM
, &symtab
)) {
348 if (!ReadFromOffsetExact(fd
, &strtab
, sizeof(strtab
), elf_header
.e_shoff
+
349 symtab
.sh_link
* sizeof(symtab
))) {
352 if (FindSymbol(pc
, fd
, out
, out_size
, symbol_offset
,
354 return true; // Found the symbol in a dynamic symbol table.
362 // Thin wrapper around a file descriptor so that the file descriptor
363 // gets closed for sure.
364 struct FileDescriptor
{
366 explicit FileDescriptor(int fd
) : fd_(fd
) {}
372 int get() { return fd_
; }
375 explicit FileDescriptor(const FileDescriptor
&);
376 void operator=(const FileDescriptor
&);
379 // Helper class for reading lines from file.
381 // Note: we don't use ProcMapsIterator since the object is big (it has
382 // a 5k array member) and uses async-unsafe functions such as sscanf()
386 explicit LineReader(int fd
, char *buf
, int buf_len
) : fd_(fd
),
387 buf_(buf
), buf_len_(buf_len
), bol_(buf
), eol_(buf
), eod_(buf
) {
390 // Read '\n'-terminated line from file. On success, modify "bol"
391 // and "eol", then return true. Otherwise, return false.
393 // Note: if the last line doesn't end with '\n', the line will be
394 // dropped. It's an intentional behavior to make the code simple.
395 bool ReadLine(const char **bol
, const char **eol
) {
396 if (BufferIsEmpty()) { // First time.
397 const ssize_t num_bytes
= ReadPersistent(fd_
, buf_
, buf_len_
);
398 if (num_bytes
<= 0) { // EOF or error.
401 eod_
= buf_
+ num_bytes
;
404 bol_
= eol_
+ 1; // Advance to the next line in the buffer.
405 SAFE_ASSERT(bol_
<= eod_
); // "bol_" can point to "eod_".
406 if (!HasCompleteLine()) {
407 const int incomplete_line_length
= eod_
- bol_
;
408 // Move the trailing incomplete line to the beginning.
409 memmove(buf_
, bol_
, incomplete_line_length
);
410 // Read text from file and append it.
411 char * const append_pos
= buf_
+ incomplete_line_length
;
412 const int capacity_left
= buf_len_
- incomplete_line_length
;
413 const ssize_t num_bytes
= ReadPersistent(fd_
, append_pos
,
415 if (num_bytes
<= 0) { // EOF or error.
418 eod_
= append_pos
+ num_bytes
;
422 eol_
= FindLineFeed();
423 if (eol_
== NULL
) { // '\n' not found. Malformed line.
426 *eol_
= '\0'; // Replace '\n' with '\0'.
433 // Beginning of line.
444 explicit LineReader(const LineReader
&);
445 void operator=(const LineReader
&);
447 char *FindLineFeed() {
448 return reinterpret_cast<char *>(memchr(bol_
, '\n', eod_
- bol_
));
451 bool BufferIsEmpty() {
455 bool HasCompleteLine() {
456 return !BufferIsEmpty() && FindLineFeed() != NULL
;
464 const char *eod_
; // End of data in "buf_".
468 // Place the hex number read from "start" into "*hex". The pointer to
469 // the first non-hex character or "end" is returned.
470 static char *GetHex(const char *start
, const char *end
, uint64_t *hex
) {
473 for (p
= start
; p
< end
; ++p
) {
475 if ((ch
>= '0' && ch
<= '9') ||
476 (ch
>= 'A' && ch
<= 'F') || (ch
>= 'a' && ch
<= 'f')) {
477 *hex
= (*hex
<< 4) | (ch
< 'A' ? ch
- '0' : (ch
& 0xF) + 9);
478 } else { // Encountered the first non-hex character.
482 SAFE_ASSERT(p
<= end
);
483 return const_cast<char *>(p
);
486 // Search for the object file (from /proc/self/maps) that contains
487 // the specified pc. If found, open this file and return the file handle,
488 // and also set start_address to the start address of where this object
489 // file is mapped to in memory. Otherwise, return -1.
490 static ATTRIBUTE_NOINLINE
int
491 OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc
,
492 uint64_t &start_address
) {
495 // Open /proc/self/maps.
497 NO_INTR(maps_fd
= open("/proc/self/maps", O_RDONLY
));
498 FileDescriptor
wrapped_maps_fd(maps_fd
);
499 if (wrapped_maps_fd
.get() < 0) {
503 // Iterate over maps and look for the map containing the pc. Then
504 // look into the symbol tables inside.
505 char buf
[1024]; // Big enough for line of sane /proc/self/maps
506 LineReader
reader(wrapped_maps_fd
.get(), buf
, sizeof(buf
));
510 if (!reader
.ReadLine(&cursor
, &eol
)) { // EOF or malformed line.
514 // Start parsing line in /proc/self/maps. Here is an example:
516 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat
518 // We want start address (08048000), end address (0804c000), flags
519 // (r-xp) and file name (/bin/cat).
521 // Read start address.
522 cursor
= GetHex(cursor
, eol
, &start_address
);
523 if (cursor
== eol
|| *cursor
!= '-') {
524 return -1; // Malformed line.
526 ++cursor
; // Skip '-'.
529 uint64_t end_address
;
530 cursor
= GetHex(cursor
, eol
, &end_address
);
531 if (cursor
== eol
|| *cursor
!= ' ') {
532 return -1; // Malformed line.
534 ++cursor
; // Skip ' '.
536 // Check start and end addresses.
537 if (!(start_address
<= pc
&& pc
< end_address
)) {
538 continue; // We skip this map. PC isn't in this map.
541 // Read flags. Skip flags until we encounter a space or eol.
542 const char * const flags_start
= cursor
;
543 while (cursor
< eol
&& *cursor
!= ' ') {
546 // We expect at least four letters for flags (ex. "r-xp").
547 if (cursor
== eol
|| cursor
< flags_start
+ 4) {
548 return -1; // Malformed line.
551 // Check flags. We are only interested in "r-x" maps.
552 if (memcmp(flags_start
, "r-x", 3) != 0) { // Not a "r-x" map.
553 continue; // We skip this map.
555 ++cursor
; // Skip ' '.
557 // Skip to file name. "cursor" now points to file offset. We need to
558 // skip at least three spaces for file offset, dev, and inode.
560 while (cursor
< eol
) {
561 if (*cursor
== ' ') {
563 } else if (num_spaces
>= 3) {
564 // The first non-space character after skipping three spaces
565 // is the beginning of the file name.
571 return -1; // Malformed line.
574 // Finally, "cursor" now points to file name of our interest.
575 NO_INTR(object_fd
= open(cursor
, O_RDONLY
));
583 // The implementation of our symbolization routine. If it
584 // successfully finds the symbol containing "pc" and obtains the
585 // symbol name, returns true and write the symbol name to "out".
586 // Otherwise, returns false. If Callback function is installed via
587 // InstallSymbolizeCallback(), the function is also called in this function,
588 // and "out" is used as its output.
589 // To keep stack consumption low, we would like this function to not
591 static ATTRIBUTE_NOINLINE
bool SymbolizeAndDemangle(void *pc
, char *out
,
593 uint64_t pc0
= reinterpret_cast<uintptr_t>(pc
);
594 uint64_t start_address
= 0;
596 int object_fd
= OpenObjectFileContainingPcAndGetStartAddress(pc0
,
598 if (object_fd
== -1) {
601 FileDescriptor
wrapped_object_fd(object_fd
);
602 int elf_type
= FileGetElfType(wrapped_object_fd
.get());
603 if (elf_type
== -1) {
606 if (g_symbolize_callback
) {
607 // Run the call back if it's installed.
608 // Note: relocation (and much of the rest of this code) will be
609 // wrong for prelinked shared libraries and PIE executables.
610 uint64 relocation
= (elf_type
== ET_DYN
) ? start_address
: 0;
611 int num_bytes_written
= g_symbolize_callback(wrapped_object_fd
.get(),
614 if (num_bytes_written
> 0) {
615 out
+= num_bytes_written
;
616 out_size
-= num_bytes_written
;
619 if (!GetSymbolFromObjectFile(wrapped_object_fd
.get(), pc0
,
620 out
, out_size
, start_address
)) {
624 // Symbolization succeeded. Now we try to demangle the symbol.
625 DemangleInplace(out
, out_size
);
629 _END_GOOGLE_NAMESPACE_
631 #elif defined(OS_MACOSX) && defined(HAVE_DLADDR)
636 _START_GOOGLE_NAMESPACE_
638 static ATTRIBUTE_NOINLINE
bool SymbolizeAndDemangle(void *pc
, char *out
,
641 if (dladdr(pc
, &info
)) {
642 if (strlen(info
.dli_sname
) < out_size
) {
643 strcpy(out
, info
.dli_sname
);
644 // Symbolization succeeded. Now we try to demangle the symbol.
645 DemangleInplace(out
, out_size
);
652 _END_GOOGLE_NAMESPACE_
655 # error BUG: HAVE_SYMBOLIZE was wrongly set
658 _START_GOOGLE_NAMESPACE_
660 bool Symbolize(void *pc
, char *out
, int out_size
) {
661 SAFE_ASSERT(out_size
>= 0);
662 return SymbolizeAndDemangle(pc
, out
, out_size
);
665 _END_GOOGLE_NAMESPACE_
667 #else /* HAVE_SYMBOLIZE */
673 _START_GOOGLE_NAMESPACE_
675 // TODO: Support other environments.
676 bool Symbolize(void *pc
, char *out
, int out_size
) {
681 _END_GOOGLE_NAMESPACE_