2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2005-2013 Apple Inc.
12 Greg Parker gparker@apple.com
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_darwin)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcfile.h"
40 #include "pub_core_libcproc.h"
41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42 #include "pub_core_machine.h" /* VG_ELF_CLASS */
43 #include "pub_core_options.h"
44 #include "pub_core_oset.h"
45 #include "pub_core_tooliface.h" /* VG_(needs) */
46 #include "pub_core_xarray.h"
47 #include "pub_core_clientstate.h"
48 #include "pub_core_debuginfo.h"
50 #include "priv_misc.h"
51 #include "priv_image.h"
52 #include "priv_d3basics.h"
53 #include "priv_tytypes.h"
54 #include "priv_storage.h"
55 #include "priv_readmacho.h"
56 #include "priv_readdwarf.h"
57 #include "priv_readdwarf3.h"
59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60 #include <mach-o/loader.h>
61 #include <mach-o/nlist.h>
62 #include <mach-o/fat.h>
63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
66 # define MAGIC MH_MAGIC
67 # define MACH_HEADER mach_header
68 # define LC_SEGMENT_CMD LC_SEGMENT
69 # define SEGMENT_COMMAND segment_command
70 # define SECTION section
73 # define MAGIC MH_MAGIC_64
74 # define MACH_HEADER mach_header_64
75 # define LC_SEGMENT_CMD LC_SEGMENT_64
76 # define SEGMENT_COMMAND segment_command_64
77 # define SECTION section_64
78 # define NLIST nlist_64
82 /*------------------------------------------------------------*/
84 /*--- Mach-O file mapping/unmapping helpers ---*/
86 /*------------------------------------------------------------*/
88 /* A DiSlice is used to handle the thin/fat distinction for MachO images.
89 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
90 whatnot: the entire file. This is the DiImage* that is the backing
92 (2) the Mach-O object of interest, which is presumably somewhere inside
93 the primary image. map_image_aboard() below, which generates this
94 info, will carefully check that the macho_ fields denote a section of
95 memory that falls entirely inside the primary image.
98 Bool
ML_(is_macho_object_file
)( const void* buf
, SizeT szB
)
100 /* (JRS: the Mach-O headers might not be in this mapped data,
101 because we only mapped a page for this initial check,
102 or at least not very much, and what's at the start of the file
103 is in general a so-called fat header. The Mach-O object we're
104 interested in could be arbitrarily far along the image, and so
105 we can't assume its header will fall within this page.) */
107 /* But we can say that either it's a fat object, in which case it
108 begins with a fat header, or it's unadorned Mach-O, in which
109 case it starts with a normal header. At least do what checks we
110 can to establish whether or not we're looking at something
113 const struct fat_header
* fh_be
= buf
;
114 const struct MACH_HEADER
* mh
= buf
;
117 if (szB
< sizeof(struct fat_header
))
119 if (VG_(ntohl
)(fh_be
->magic
) == FAT_MAGIC
)
122 if (szB
< sizeof(struct MACH_HEADER
))
124 if (mh
->magic
== MAGIC
)
131 /* Unmap an image mapped in by map_image_aboard. */
132 static void unmap_image ( /*MOD*/DiSlice
* sli
)
135 if (ML_(sli_is_valid
)(*sli
)) {
136 ML_(img_done
)(sli
->img
);
137 *sli
= DiSlice_INVALID
;
142 /* Open the given file, find the thin part if necessary, do some
143 checks, and return a DiSlice containing details of both the thin
144 part and (implicitly, via the contained DiImage*) the fat part.
145 returns DiSlice_INVALID if it fails. If it succeeds, the returned
146 slice is guaranteed to refer to a valid(ish) Mach-O image. */
147 static DiSlice
map_image_aboard ( DebugInfo
* di
, /* only for err msgs */
148 const HChar
* filename
)
150 DiSlice sli
= DiSlice_INVALID
;
152 /* First off, try to map the thing in. */
153 DiImage
* mimg
= ML_(img_from_local_file
)(filename
);
155 VG_(message
)(Vg_UserMsg
, "warning: connection to image %s failed\n",
157 VG_(message
)(Vg_UserMsg
, " no symbols or debug info loaded\n" );
158 return DiSlice_INVALID
;
161 /* Now we have a viable DiImage* for it. Look for the embedded
162 Mach-O object. If not findable, close the image and fail. */
163 DiOffT fh_be_ioff
= 0;
164 struct fat_header fh_be
;
165 struct fat_header fh
;
167 // Assume initially that we have a thin image, and narrow
168 // the bounds if it turns out to be fat. This stores |mimg| as
169 // |sli.img|, so NULL out |mimg| after this point, for the sake of
171 sli
= ML_(sli_from_img
)(mimg
);
174 // Check for fat header.
175 if (ML_(img_size
)(sli
.img
) < sizeof(struct fat_header
)) {
176 ML_(symerr
)(di
, True
, "Invalid Mach-O file (0 too small).");
180 // Fat header is always BIG-ENDIAN
181 ML_(img_get
)(&fh_be
, sli
.img
, fh_be_ioff
, sizeof(fh_be
));
182 VG_(memset
)(&fh
, 0, sizeof(fh
));
183 fh
.magic
= VG_(ntohl
)(fh_be
.magic
);
184 fh
.nfat_arch
= VG_(ntohl
)(fh_be
.nfat_arch
);
185 if (fh
.magic
== FAT_MAGIC
) {
186 // Look for a good architecture.
187 if (ML_(img_size
)(sli
.img
) < sizeof(struct fat_header
)
188 + fh
.nfat_arch
* sizeof(struct fat_arch
)) {
189 ML_(symerr
)(di
, True
, "Invalid Mach-O file (1 too small).");
194 for (f
= 0, arch_be_ioff
= sizeof(struct fat_header
);
196 f
++, arch_be_ioff
+= sizeof(struct fat_arch
)) {
197 # if defined(VGA_ppc)
198 Int cputype
= CPU_TYPE_POWERPC
;
199 # elif defined(VGA_ppc64be)
200 Int cputype
= CPU_TYPE_POWERPC64BE
;
201 # elif defined(VGA_ppc64le)
202 Int cputype
= CPU_TYPE_POWERPC64LE
;
203 # elif defined(VGA_x86)
204 Int cputype
= CPU_TYPE_X86
;
205 # elif defined(VGA_amd64)
206 Int cputype
= CPU_TYPE_X86_64
;
208 # error "unknown architecture"
210 struct fat_arch arch_be
;
211 struct fat_arch arch
;
212 ML_(img_get
)(&arch_be
, sli
.img
, arch_be_ioff
, sizeof(arch_be
));
213 VG_(memset
)(&arch
, 0, sizeof(arch
));
214 arch
.cputype
= VG_(ntohl
)(arch_be
.cputype
);
215 arch
.cpusubtype
= VG_(ntohl
)(arch_be
.cpusubtype
);
216 arch
.offset
= VG_(ntohl
)(arch_be
.offset
);
217 arch
.size
= VG_(ntohl
)(arch_be
.size
);
218 if (arch
.cputype
== cputype
) {
219 if (ML_(img_size
)(sli
.img
) < arch
.offset
+ arch
.size
) {
220 ML_(symerr
)(di
, True
, "Invalid Mach-O file (2 too small).");
223 /* Found a suitable arch. Narrow down the slice accordingly. */
224 sli
.ioff
= arch
.offset
;
229 if (f
== fh
.nfat_arch
) {
230 ML_(symerr
)(di
, True
,
231 "No acceptable architecture found in fat file.");
236 /* Sanity check what we found. */
238 /* assured by logic above */
239 vg_assert(ML_(img_size
)(sli
.img
) >= sizeof(struct fat_header
));
241 if (sli
.szB
< sizeof(struct MACH_HEADER
)) {
242 ML_(symerr
)(di
, True
, "Invalid Mach-O file (3 too small).");
246 if (sli
.szB
> ML_(img_size
)(sli
.img
)) {
247 ML_(symerr
)(di
, True
, "Invalid Mach-O file (thin bigger than fat).");
251 if (sli
.ioff
>= 0 && sli
.ioff
+ sli
.szB
<= ML_(img_size
)(sli
.img
)) {
252 /* thin entirely within fat, as expected */
254 ML_(symerr
)(di
, True
, "Invalid Mach-O file (thin not inside fat).");
258 /* Peer at the Mach header for the thin object, starting at the
259 beginning of the slice, to check it's at least marginally
261 struct MACH_HEADER mh
;
262 ML_(cur_read_get
)(&mh
, ML_(cur_from_sli
)(sli
), sizeof(mh
));
263 if (mh
.magic
!= MAGIC
) {
264 ML_(symerr
)(di
, True
, "Invalid Mach-O file (bad magic).");
268 if (sli
.szB
< sizeof(struct MACH_HEADER
) + mh
.sizeofcmds
) {
269 ML_(symerr
)(di
, True
, "Invalid Mach-O file (4 too small).");
273 /* "main image is plausible" */
275 vg_assert(ML_(img_size
)(sli
.img
) > 0);
276 /* "thin image exists and is a sub-part (or all) of main image" */
277 vg_assert(sli
.ioff
>= 0);
278 vg_assert(sli
.szB
> 0);
279 vg_assert(sli
.ioff
+ sli
.szB
<= ML_(img_size
)(sli
.img
));
280 return sli
; /* success */
285 return DiSlice_INVALID
; /* bah! */
289 /*------------------------------------------------------------*/
291 /*--- Mach-O symbol table reading ---*/
293 /*------------------------------------------------------------*/
295 /* Read a symbol table (nlist). Add the resulting candidate symbols
296 to 'syms'; the caller will post-process them and hand them off to
297 ML_(addSym) itself. */
299 void read_symtab( /*OUT*/XArray
* /* DiSym */ syms
,
300 struct _DebugInfo
* di
,
301 DiCursor symtab_cur
, UInt symtab_count
,
302 DiCursor strtab_cur
, UInt strtab_sz
)
307 // "start_according_to_valgrind"
308 static const HChar
* s_a_t_v
= NULL
; /* do not make non-static */
310 for (i
= 0; i
< symtab_count
; i
++) {
312 ML_(cur_read_get
)(&nl
,
313 ML_(cur_plus
)(symtab_cur
, i
* sizeof(struct NLIST
)),
317 if ((nl
.n_type
& N_TYPE
) == N_SECT
) {
318 sym_addr
= di
->text_bias
+ nl
.n_value
;
319 /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
320 GrP fixme don't ignore absolute symbols?
321 sym_addr = nl.n_value; */
326 if (di
->trace_symtab
) {
327 HChar
* str
= ML_(cur_read_strdup
)(
328 ML_(cur_plus
)(strtab_cur
, nl
.n_un
.n_strx
),
330 VG_(printf
)("nlist raw: avma %010lx %s\n", sym_addr
, str
);
331 ML_(dinfo_free
)(str
);
334 /* If no part of the symbol falls within the mapped range,
336 if (sym_addr
<= di
->text_avma
337 || sym_addr
>= di
->text_avma
+di
->text_size
) {
341 /* skip names which point outside the string table;
342 following these risks segfaulting Valgrind */
343 if (nl
.n_un
.n_strx
< 0 || nl
.n_un
.n_strx
>= strtab_sz
) {
348 = ML_(cur_read_strdup
)( ML_(cur_plus
)(strtab_cur
, nl
.n_un
.n_strx
),
351 /* skip nameless symbols; these appear to be common, but
354 ML_(dinfo_free
)(name
);
358 VG_(bzero_inline
)(&disym
, sizeof(disym
));
359 disym
.avmas
.main
= sym_addr
;
360 SET_TOCPTR_AVMA(disym
, 0);
361 SET_LOCAL_EP_AVMA(disym
, 0);
362 disym
.pri_name
= ML_(addStr
)(di
, name
, -1);
363 disym
.sec_names
= NULL
;
364 disym
.size
= // let canonicalize fix it
365 di
->text_avma
+di
->text_size
- sym_addr
;
367 disym
.isIFunc
= False
;
368 // Lots of user function names get prepended with an underscore. Eg. the
369 // function 'f' becomes the symbol '_f'. And the "below main"
370 // function is called "start". So we skip the leading underscore, and
371 // if we see 'start' and --show-below-main=no, we rename it as
372 // "start_according_to_valgrind", which makes it easy to spot later
373 // and display as "(below main)".
374 if (disym
.pri_name
[0] == '_') {
377 else if (!VG_(clo_show_below_main
) && VG_STREQ(disym
.pri_name
, "start")) {
379 s_a_t_v
= ML_(addStr
)(di
, "start_according_to_valgrind", -1);
381 disym
.pri_name
= s_a_t_v
;
384 vg_assert(disym
.pri_name
);
385 VG_(addToXA
)( syms
, &disym
);
386 ML_(dinfo_free
)(name
);
391 /* Compare DiSyms by their start address, and for equal addresses, use
392 the primary name as a secondary sort key. */
393 static Int
cmp_DiSym_by_start_then_name ( const void* v1
, const void* v2
)
395 const DiSym
* s1
= (DiSym
*)v1
;
396 const DiSym
* s2
= (DiSym
*)v2
;
397 if (s1
->avmas
.main
< s2
->avmas
.main
) return -1;
398 if (s1
->avmas
.main
> s2
->avmas
.main
) return 1;
399 return VG_(strcmp
)(s1
->pri_name
, s2
->pri_name
);
402 /* 'cand' is a bunch of candidate symbols obtained by reading
403 nlist-style symbol table entries. Their ends may overlap, so sort
404 them and truncate them accordingly. The code in this routine is
405 copied almost verbatim from read_symbol_table() in readxcoff.c. */
406 static void tidy_up_cand_syms ( /*MOD*/XArray
* /* of DiSym */ syms
,
409 Word nsyms
, i
, j
, k
, m
;
411 nsyms
= VG_(sizeXA
)(syms
);
413 VG_(setCmpFnXA
)(syms
, cmp_DiSym_by_start_then_name
);
416 /* We only know for sure the start addresses (actual VMAs) of
417 symbols, and an overestimation of their end addresses. So sort
418 by start address, then clip each symbol so that its end address
419 does not overlap with the next one along.
421 There is a small refinement: if a group of symbols have the same
422 address, treat them as a group: find the next symbol along that
423 has a higher start address, and clip all of the group
424 accordingly. This clips the group as a whole so as not to
425 overlap following symbols. This leaves prefersym() in
426 storage.c, which is not nlist-specific, to later decide which of
427 the symbols in the group to keep.
429 Another refinement is that we need to get rid of symbols which,
430 after clipping, have identical starts, ends, and names. So the
431 sorting uses the name as a secondary key.
434 for (i
= 0; i
< nsyms
; i
++) {
437 && ((DiSym
*)VG_(indexXA
)(syms
,i
))->avmas
.main
438 == ((DiSym
*)VG_(indexXA
)(syms
,k
))->avmas
.main
;
441 /* So now [i .. k-1] is a group all with the same start address.
442 Clip their ending addresses so they don't overlap [k]. In
443 the normal case (no overlaps), k == i+1. */
445 DiSym
* next
= (DiSym
*)VG_(indexXA
)(syms
,k
);
446 for (m
= i
; m
< k
; m
++) {
447 DiSym
* here
= (DiSym
*)VG_(indexXA
)(syms
,m
);
448 vg_assert(here
->avmas
.main
< next
->avmas
.main
);
449 if (here
->avmas
.main
+ here
->size
> next
->avmas
.main
)
450 here
->size
= next
->avmas
.main
- here
->avmas
.main
;
454 vg_assert(i
<= nsyms
);
460 for (i
= 1; i
< nsyms
; i
++) {
461 DiSym
*s_j1
, *s_j
, *s_i
;
463 s_j1
= (DiSym
*)VG_(indexXA
)(syms
, j
-1);
464 s_j
= (DiSym
*)VG_(indexXA
)(syms
, j
);
465 s_i
= (DiSym
*)VG_(indexXA
)(syms
, i
);
466 if (s_i
->avmas
.main
!= s_j1
->avmas
.main
467 || s_i
->size
!= s_j1
->size
468 || 0 != VG_(strcmp
)(s_i
->pri_name
, s_j1
->pri_name
)) {
473 VG_(printf
)("nlist cleanup: dump duplicate avma %010lx %s\n",
474 s_i
->avmas
.main
, s_i
->pri_name
);
478 vg_assert(j
>= 0 && j
<= nsyms
);
479 VG_(dropTailXA
)(syms
, nsyms
- j
);
483 /*------------------------------------------------------------*/
485 /*--- Mach-O top-level processing ---*/
487 /*------------------------------------------------------------*/
489 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
490 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
494 static Bool
file_exists_p(const HChar
*path
)
497 SysRes res
= VG_(stat
)(path
, &sbuf
);
498 return sr_isError(res
) ? False
: True
;
502 /* Search for an existing dSYM file as a possible separate debug file.
505 find_separate_debug_file (const HChar
*executable_name
)
507 const HChar
*basename_str
;
512 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
513 will end up with an infinite loop where after we add a dSYM symbol file,
514 it will then enter this function asking if there is a debug file for the
516 if (VG_(strcasestr
) (executable_name
, ".dSYM") == NULL
)
518 /* Check for the existence of a .dSYM file for a given executable. */
519 basename_str
= VG_(basename
) (executable_name
);
520 dsymfile
= ML_(dinfo_zalloc
)("di.readmacho.dsymfile",
521 VG_(strlen
) (executable_name
)
522 + VG_(strlen
) (APPLE_DSYM_EXT_AND_SUBDIRECTORY
)
523 + VG_(strlen
) (basename_str
)
527 /* First try for the dSYM in the same directory as the original file. */
528 VG_(strcpy
) (dsymfile
, executable_name
);
529 VG_(strcat
) (dsymfile
, APPLE_DSYM_EXT_AND_SUBDIRECTORY
);
530 VG_(strcat
) (dsymfile
, basename_str
);
532 if (file_exists_p (dsymfile
))
535 /* Now search for any parent directory that has a '.' in it so we can find
536 Mac OS X applications, bundles, plugins, and any other kinds of files.
537 Mac OS X application bundles wil have their program in
538 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
539 ".bundle" or ".plugin" for other types of bundles). So we look for any
540 prior '.' character and try appending the apple dSYM extension and
541 subdirectory and see if we find an existing dSYM file (in the above
542 MyApp example the dSYM would be at either:
543 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
544 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
545 VG_(strcpy
) (dsymfile
, VG_(dirname
) (executable_name
));
546 while ((dot_ptr
= VG_(strrchr
) (dsymfile
, '.')))
548 /* Find the directory delimiter that follows the '.' character since
549 we now look for a .dSYM that follows any bundle extension. */
550 slash_ptr
= VG_(strchr
) (dot_ptr
, '/');
553 /* NULL terminate the string at the '/' character and append
554 the path down to the dSYM file. */
556 VG_(strcat
) (slash_ptr
, APPLE_DSYM_EXT_AND_SUBDIRECTORY
);
557 VG_(strcat
) (slash_ptr
, basename_str
);
558 if (file_exists_p (dsymfile
))
562 /* NULL terminate the string at the '.' character and append
563 the path down to the dSYM file. */
565 VG_(strcat
) (dot_ptr
, APPLE_DSYM_EXT_AND_SUBDIRECTORY
);
566 VG_(strcat
) (dot_ptr
, basename_str
);
567 if (file_exists_p (dsymfile
))
570 /* NULL terminate the string at the '.' locatated by the strrchr()
574 /* We found a previous extension '.' character and did not find a
575 dSYM file so now find previous directory delimiter so we don't
576 try multiple times on a file name that may have a version number
577 in it such as "/some/path/MyApp.6.0.4.app". */
578 slash_ptr
= VG_(strrchr
) (dsymfile
, '/');
581 /* NULL terminate the string at the previous directory character
591 /* Given a DiSlice covering the entire Mach-O thin image, find the
592 DiSlice for the specified (segname, sectname) pairing, if
593 possible. Also return the section's .addr field in *svma if
595 static DiSlice
getsectdata ( DiSlice img
,
596 const HChar
*segname
, const HChar
*sectname
,
599 DiCursor cur
= ML_(cur_from_sli
)(img
);
601 struct MACH_HEADER mh
;
602 ML_(cur_step_get
)(&mh
, &cur
, sizeof(mh
));
605 for (c
= 0; c
< mh
.ncmds
; c
++) {
606 struct load_command cmd
;
607 ML_(cur_read_get
)(&cmd
, cur
, sizeof(cmd
));
608 if (cmd
.cmd
== LC_SEGMENT_CMD
) {
609 struct SEGMENT_COMMAND seg
;
610 ML_(cur_read_get
)(&seg
, cur
, sizeof(seg
));
611 if (0 == VG_(strncmp(&seg
.segname
[0],
612 segname
, sizeof(seg
.segname
)))) {
613 DiCursor sects_cur
= ML_(cur_plus
)(cur
, sizeof(seg
));
615 for (s
= 0; s
< seg
.nsects
; s
++) {
617 ML_(cur_step_get
)(§
, §s_cur
, sizeof(sect
));
618 if (0 == VG_(strncmp(sect
.sectname
, sectname
,
619 sizeof(sect
.sectname
)))) {
621 res
.ioff
= sect
.offset
;
623 if (svma
) *svma
= (Addr
)sect
.addr
;
630 cur
= ML_(cur_plus
)(cur
, cmd
.cmdsize
);
633 return DiSlice_INVALID
;
637 /* Brute force just simply search for uuid[0..15] in |sli| */
638 static Bool
check_uuid_matches ( DiSlice sli
, UChar
* uuid
)
643 /* Work through the slice in 1 KB chunks. */
644 UChar first
= uuid
[0];
645 DiOffT min_off
= sli
.ioff
;
646 DiOffT max1_off
= sli
.ioff
+ sli
.szB
;
647 DiOffT curr_off
= min_off
;
648 vg_assert(min_off
< max1_off
);
650 vg_assert(curr_off
>= min_off
&& curr_off
<= max1_off
);
651 if (curr_off
== max1_off
) break;
652 DiOffT avail
= max1_off
- curr_off
;
653 vg_assert(avail
> 0 && avail
<= max1_off
);
654 if (avail
> 1024) avail
= 1024;
656 SizeT nGot
= ML_(img_get_some
)(buf
, sli
.img
, curr_off
, avail
);
657 vg_assert(nGot
>= 1 && nGot
<= avail
);
659 /* Scan through the 1K chunk we got, looking for the start char. */
660 for (i
= 0; i
< (UInt
)nGot
; i
++) {
661 if (LIKELY(buf
[i
] != first
))
663 /* first char matches. See if we can get 16 bytes at this
664 offset, and compare. */
665 if (curr_off
+ i
< max1_off
&& max1_off
- (curr_off
+ i
) >= 16) {
667 ML_(img_get
)(&buff16
[0], sli
.img
, curr_off
+ i
, 16);
668 if (0 == VG_(memcmp
)(&buff16
[0], &uuid
[0], 16))
678 /* Heuristic kludge: return True if this looks like an installed
679 standard library; hence we shouldn't consider automagically running
681 static Bool
is_systemish_library_name ( const HChar
* name
)
684 if (0 == VG_(strncasecmp
)(name
, "/usr/", 5)
685 || 0 == VG_(strncasecmp
)(name
, "/bin/", 5)
686 || 0 == VG_(strncasecmp
)(name
, "/sbin/", 6)
687 || 0 == VG_(strncasecmp
)(name
, "/opt/", 5)
688 || 0 == VG_(strncasecmp
)(name
, "/sw/", 4)
689 || 0 == VG_(strncasecmp
)(name
, "/System/", 8)
690 || 0 == VG_(strncasecmp
)(name
, "/Library/", 9)
691 || 0 == VG_(strncasecmp
)(name
, "/Applications/", 14)) {
699 Bool
ML_(read_macho_debug_info
)( struct _DebugInfo
* di
)
701 DiSlice msli
= DiSlice_INVALID
; // the main image
702 DiSlice dsli
= DiSlice_INVALID
; // the debuginfo image
703 DiCursor sym_cur
= DiCursor_INVALID
;
704 DiCursor dysym_cur
= DiCursor_INVALID
;
705 HChar
* dsymfilename
= NULL
;
706 Bool have_uuid
= False
;
709 const DebugInfoMapping
* rx_map
= NULL
;
710 const DebugInfoMapping
* rw_map
= NULL
;
712 /* mmap the object file to look for di->soname and di->text_bias
713 and uuid and nlist */
715 /* This should be ensured by our caller (that we're in the accept
717 vg_assert(di
->fsm
.have_rx_map
);
718 vg_assert(di
->fsm
.have_rw_map
);
720 for (i
= 0; i
< VG_(sizeXA
)(di
->fsm
.maps
); i
++) {
721 const DebugInfoMapping
* map
= VG_(indexXA
)(di
->fsm
.maps
, i
);
722 if (map
->rx
&& !rx_map
)
724 if (map
->rw
&& !rw_map
)
726 if (rx_map
&& rw_map
)
732 if (VG_(clo_verbosity
) > 1)
733 VG_(message
)(Vg_DebugMsg
,
734 "%s (rx at %#lx, rw at %#lx)\n", di
->fsm
.filename
,
735 rx_map
->avma
, rw_map
->avma
);
737 VG_(memset
)(&uuid
, 0, sizeof(uuid
));
739 msli
= map_image_aboard( di
, di
->fsm
.filename
);
740 if (!ML_(sli_is_valid
)(msli
)) {
741 ML_(symerr
)(di
, False
, "Connect to main image failed.");
745 vg_assert(msli
.img
!= NULL
&& msli
.szB
> 0);
747 /* Poke around in the Mach-O header, to find some important
749 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
750 // Read di->soname from LC_ID_DYLIB if present,
751 // or from LC_ID_DYLINKER if present,
753 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
754 // Get uuid for later dsym search
759 DiCursor cmd_cur
= ML_(cur_from_sli
)(msli
);
761 struct MACH_HEADER mh
;
762 ML_(cur_step_get
)(&mh
, &cmd_cur
, sizeof(mh
));
764 /* Now cur_cmd points just after the Mach header, right at the
765 start of the load commands, which is where we need it to start
766 the following loop. */
769 for (c
= 0; c
< mh
.ncmds
; c
++) {
770 struct load_command cmd
;
771 ML_(cur_read_get
)(&cmd
, cmd_cur
, sizeof(cmd
));
773 if (cmd
.cmd
== LC_SYMTAB
) {
776 else if (cmd
.cmd
== LC_DYSYMTAB
) {
779 else if (cmd
.cmd
== LC_ID_DYLIB
&& mh
.filetype
== MH_DYLIB
) {
781 struct dylib_command dcmd
;
782 ML_(cur_read_get
)(&dcmd
, cmd_cur
, sizeof(dcmd
));
783 DiCursor dylibname_cur
784 = ML_(cur_plus
)(cmd_cur
, dcmd
.dylib
.name
.offset
);
786 = ML_(cur_read_strdup
)(dylibname_cur
, "di.rmdi.1");
787 HChar
* soname
= VG_(strrchr
)(dylibname
, '/');
788 if (!soname
) soname
= dylibname
;
790 di
->soname
= ML_(dinfo_strdup
)("di.readmacho.dylibname",
792 ML_(dinfo_free
)(dylibname
);
794 else if (cmd
.cmd
==LC_ID_DYLINKER
&& mh
.filetype
==MH_DYLINKER
) {
795 struct dylinker_command dcmd
;
796 ML_(cur_read_get
)(&dcmd
, cmd_cur
, sizeof(dcmd
));
797 DiCursor dylinkername_cur
798 = ML_(cur_plus
)(cmd_cur
, dcmd
.name
.offset
);
800 = ML_(cur_read_strdup
)(dylinkername_cur
, "di.rmdi.2");
801 HChar
* soname
= VG_(strrchr
)(dylinkername
, '/');
802 if (!soname
) soname
= dylinkername
;
804 di
->soname
= ML_(dinfo_strdup
)("di.readmacho.dylinkername",
806 ML_(dinfo_free
)(dylinkername
);
809 // A comment from Julian about why varinfo[35] fail:
811 // My impression is, from comparing the output of otool -l for these
812 // executables with the logic in ML_(read_macho_debug_info),
813 // specifically the part that begins "else if (cmd->cmd ==
814 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
815 // to work ok for text symbols. In particular, it appears to assume
816 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
817 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
818 // need. However, otool -l shows, and also the Apple docs state,
819 // that a struct load_command may contain an arbitrary number of
820 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
821 // snarf the first. But I'm not sure about this.
823 // The "Try for __DATA" block below simply adds acquisition of data
824 // svma/bias values using the same assumption. It also needs
825 // (probably) to deal with bss sections, but I don't understand how
826 // this all ties together really, so it requires further study.
828 // If you can get your head around the relationship between MachO
829 // segments, sections and load commands, this might be relatively
830 // easy to fix properly.
832 // Basically we need to come up with plausible numbers for di->
833 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
834 // then trivially derived. Then I think the debuginfo reader should
836 else if (cmd
.cmd
== LC_SEGMENT_CMD
) {
837 struct SEGMENT_COMMAND seg
;
838 ML_(cur_read_get
)(&seg
, cmd_cur
, sizeof(seg
));
840 if (!di
->text_present
841 && 0 == VG_(strcmp
)(&seg
.segname
[0], "__TEXT")
842 /* DDD: is the next line a kludge? -- JRS */
843 && seg
.fileoff
== 0 && seg
.filesize
!= 0) {
844 di
->text_present
= True
;
845 di
->text_svma
= (Addr
)seg
.vmaddr
;
846 di
->text_avma
= rx_map
->avma
;
847 di
->text_size
= seg
.vmsize
;
848 di
->text_bias
= di
->text_avma
- di
->text_svma
;
849 /* Make the _debug_ values be the same as the
850 svma/bias for the primary object, since there is
851 no secondary (debuginfo) object, but nevertheless
852 downstream biasing of Dwarf3 relies on the
854 di
->text_debug_svma
= di
->text_svma
;
855 di
->text_debug_bias
= di
->text_bias
;
858 if (!di
->data_present
859 && 0 == VG_(strcmp
)(&seg
.segname
[0], "__DATA")
860 /* && DDD:seg->fileoff == 0 */ && seg
.filesize
!= 0) {
861 di
->data_present
= True
;
862 di
->data_svma
= (Addr
)seg
.vmaddr
;
863 di
->data_avma
= rw_map
->avma
;
864 di
->data_size
= seg
.vmsize
;
865 di
->data_bias
= di
->data_avma
- di
->data_svma
;
866 di
->data_debug_svma
= di
->data_svma
;
867 di
->data_debug_bias
= di
->data_bias
;
870 else if (cmd
.cmd
== LC_UUID
) {
871 ML_(cur_read_get
)(&uuid
, cmd_cur
, sizeof(uuid
));
874 // Move the cursor along
875 cmd_cur
= ML_(cur_plus
)(cmd_cur
, cmd
.cmdsize
);
880 di
->soname
= ML_(dinfo_strdup
)("di.readmacho.noname", "NONE");
883 if (di
->trace_symtab
) {
885 VG_(printf
)("SONAME = %s\n", di
->soname
);
889 /* Now we have the base object to hand. Read symbols from it. */
891 // We already asserted that ..
892 vg_assert(msli
.img
!= NULL
&& msli
.szB
> 0);
894 if (ML_(cur_is_valid
)(sym_cur
) && ML_(cur_is_valid
)(dysym_cur
)) {
896 struct symtab_command symcmd
;
897 struct dysymtab_command dysymcmd
;
899 ML_(cur_read_get
)(&symcmd
, sym_cur
, sizeof(symcmd
));
900 ML_(cur_read_get
)(&dysymcmd
, dysym_cur
, sizeof(dysymcmd
));
902 /* Read nlist symbol table */
903 DiCursor syms
= DiCursor_INVALID
;
904 DiCursor strs
= DiCursor_INVALID
;
905 XArray
* /* DiSym */ candSyms
= NULL
;
908 if (msli
.szB
< symcmd
.stroff
+ symcmd
.strsize
909 || msli
.szB
< symcmd
.symoff
+ symcmd
.nsyms
910 * sizeof(struct NLIST
)) {
911 ML_(symerr
)(di
, False
, "Invalid Mach-O file (5 too small).");
914 if (dysymcmd
.ilocalsym
+ dysymcmd
.nlocalsym
> symcmd
.nsyms
915 || dysymcmd
.iextdefsym
+ dysymcmd
.nextdefsym
> symcmd
.nsyms
) {
916 ML_(symerr
)(di
, False
, "Invalid Mach-O file (bad symbol table).");
920 syms
= ML_(cur_plus
)(ML_(cur_from_sli
)(msli
), symcmd
.symoff
);
921 strs
= ML_(cur_plus
)(ML_(cur_from_sli
)(msli
), symcmd
.stroff
);
923 if (VG_(clo_verbosity
) > 1)
924 VG_(message
)(Vg_DebugMsg
,
925 " reading syms from primary file (%d %d)\n",
926 dysymcmd
.nextdefsym
, dysymcmd
.nlocalsym
);
928 /* Read candidate symbols into 'candSyms', so we can truncate
929 overlapping ends and generally tidy up, before presenting
930 them to ML_(addSym). */
931 candSyms
= VG_(newXA
)(
932 ML_(dinfo_zalloc
), "di.readmacho.candsyms.1",
933 ML_(dinfo_free
), sizeof(DiSym
)
937 read_symtab(candSyms
,
940 dysymcmd
.iextdefsym
* sizeof(struct NLIST
)),
941 dysymcmd
.nextdefsym
, strs
, symcmd
.strsize
);
942 // static and private_extern symbols
943 read_symtab(candSyms
,
946 dysymcmd
.ilocalsym
* sizeof(struct NLIST
)),
947 dysymcmd
.nlocalsym
, strs
, symcmd
.strsize
);
949 /* tidy up the cand syms -- trim overlapping ends. May resize
951 tidy_up_cand_syms( candSyms
, di
->trace_symtab
);
953 /* and finally present them to ML_(addSym) */
954 nCandSyms
= VG_(sizeXA
)( candSyms
);
955 for (i
= 0; i
< nCandSyms
; i
++) {
956 DiSym
* cand
= (DiSym
*) VG_(indexXA
)( candSyms
, i
);
957 vg_assert(cand
->pri_name
!= NULL
);
958 vg_assert(cand
->sec_names
== NULL
);
959 if (di
->trace_symtab
)
960 VG_(printf
)("nlist final: acquire avma %010lx-%010lx %s\n",
961 cand
->avmas
.main
, cand
->avmas
.main
+ cand
->size
- 1,
963 ML_(addSym
)( di
, cand
);
965 VG_(deleteXA
)( candSyms
);
968 /* If there's no UUID in the primary, don't even bother to try and
969 read any DWARF, since we won't be able to verify it matches.
970 Our policy is not to load debug info unless we can verify that
971 it matches the primary. Just declare success at this point.
972 And don't complain to the user, since that would cause us to
973 complain on objects compiled without -g. (Some versions of
974 XCode are observed to omit a UUID entry for object linked(?)
975 without -g. Others don't appear to omit it.) */
979 /* mmap the dSYM file to look for DWARF debug info. If successful,
980 use the .macho_img and .macho_img_szB in dsli. */
982 dsymfilename
= find_separate_debug_file( di
->fsm
.filename
);
984 /* Try to load it. */
988 if (VG_(clo_verbosity
) > 1)
989 VG_(message
)(Vg_DebugMsg
, " dSYM= %s\n", dsymfilename
);
991 dsli
= map_image_aboard( di
, dsymfilename
);
992 if (!ML_(sli_is_valid
)(dsli
)) {
993 ML_(symerr
)(di
, False
, "Connect to debuginfo image failed "
998 /* check it has the right uuid. */
999 vg_assert(have_uuid
);
1000 valid
= dsli
.img
&& dsli
.szB
> 0 && check_uuid_matches( dsli
, uuid
);
1002 goto read_the_dwarf
;
1004 if (VG_(clo_verbosity
) > 1)
1005 VG_(message
)(Vg_DebugMsg
, " dSYM does not have "
1006 "correct UUID (out of date?)\n");
1009 /* There was no dsym file, or it doesn't match. We'll have to try
1010 regenerating it, unless --dsymutil=no, in which case just complain
1013 /* If this looks like a lib that we shouldn't run dsymutil on, just
1014 give up. (possible reasons: is system lib, or in /usr etc, or
1015 the dsym dir would not be writable by the user, or we're running
1017 vg_assert(di
->fsm
.filename
);
1018 if (is_systemish_library_name(di
->fsm
.filename
))
1021 if (!VG_(clo_dsymutil
)) {
1022 if (VG_(clo_verbosity
) == 1) {
1023 VG_(message
)(Vg_DebugMsg
, "%s:\n", di
->fsm
.filename
);
1025 if (VG_(clo_verbosity
) > 0)
1026 VG_(message
)(Vg_DebugMsg
, "%sdSYM directory %s; consider using "
1028 VG_(clo_verbosity
) > 1 ? " " : "",
1029 dsymfilename
? "has wrong UUID" : "is missing");
1036 const HChar
* dsymutil
= "/usr/bin/dsymutil ";
1037 HChar
* cmd
= ML_(dinfo_zalloc
)( "di.readmacho.tmp1",
1038 VG_(strlen
)(dsymutil
)
1039 + VG_(strlen
)(di
->fsm
.filename
)
1041 VG_(strcpy
)(cmd
, dsymutil
);
1042 if (0) VG_(strcat
)(cmd
, "--verbose ");
1043 VG_(strcat
)(cmd
, "\"");
1044 VG_(strcat
)(cmd
, di
->fsm
.filename
);
1045 VG_(strcat
)(cmd
, "\"");
1046 VG_(message
)(Vg_DebugMsg
, "run: %s\n", cmd
);
1047 r
= VG_(system
)( cmd
);
1049 VG_(message
)(Vg_DebugMsg
, "run: %s FAILED\n", dsymutil
);
1050 ML_(dinfo_free
)(cmd
);
1051 dsymfilename
= find_separate_debug_file(di
->fsm
.filename
);
1054 /* Try again to load it. */
1058 if (VG_(clo_verbosity
) > 1)
1059 VG_(message
)(Vg_DebugMsg
, " dsyms= %s\n", dsymfilename
);
1061 dsli
= map_image_aboard( di
, dsymfilename
);
1062 if (!ML_(sli_is_valid
)(dsli
)) {
1063 ML_(symerr
)(di
, False
, "Connect to debuginfo image failed "
1064 "(second attempt).");
1068 /* check it has the right uuid. */
1069 vg_assert(have_uuid
);
1070 vg_assert(have_uuid
);
1071 valid
= dsli
.img
&& dsli
.szB
> 0 && check_uuid_matches( dsli
, uuid
);
1073 if (VG_(clo_verbosity
) > 0) {
1074 VG_(message
)(Vg_DebugMsg
,
1075 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1076 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1078 (UInt
)uuid
[0], (UInt
)uuid
[1], (UInt
)uuid
[2], (UInt
)uuid
[3],
1079 (UInt
)uuid
[4], (UInt
)uuid
[5], (UInt
)uuid
[6], (UInt
)uuid
[7],
1080 (UInt
)uuid
[8], (UInt
)uuid
[9], (UInt
)uuid
[10],
1081 (UInt
)uuid
[11], (UInt
)uuid
[12], (UInt
)uuid
[13],
1082 (UInt
)uuid
[14], (UInt
)uuid
[15] );
1083 VG_(message
)(Vg_DebugMsg
,
1084 "WARNING: for %s\n", di
->fsm
.filename
);
1086 unmap_image( &dsli
);
1087 /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1088 re-try unmap_image. */
1093 /* Right. Finally we have our best try at the dwarf image, so go
1094 on to reading stuff out of it. */
1097 if (ML_(sli_is_valid
)(dsli
) && dsli
.szB
> 0) {
1098 // "_mscn" is "mach-o section"
1099 DiSlice debug_info_mscn
1100 = getsectdata(dsli
, "__DWARF", "__debug_info", NULL
);
1101 DiSlice debug_abbv_mscn
1102 = getsectdata(dsli
, "__DWARF", "__debug_abbrev", NULL
);
1103 DiSlice debug_line_mscn
1104 = getsectdata(dsli
, "__DWARF", "__debug_line", NULL
);
1105 DiSlice debug_str_mscn
1106 = getsectdata(dsli
, "__DWARF", "__debug_str", NULL
);
1107 DiSlice debug_ranges_mscn
1108 = getsectdata(dsli
, "__DWARF", "__debug_ranges", NULL
);
1109 DiSlice debug_loc_mscn
1110 = getsectdata(dsli
, "__DWARF", "__debug_loc", NULL
);
1112 /* It appears (jrs, 2014-oct-19) that section "__eh_frame" in
1113 segment "__TEXT" appears in both the main and dsym files, but
1114 only the main one gives the right results. Since it's in the
1115 __TEXT segment, we calculate the __eh_frame avma using its
1116 svma and the text bias, and that sounds reasonable. */
1117 Addr eh_frame_svma
= 0;
1118 DiSlice eh_frame_mscn
1119 = getsectdata(msli
, "__TEXT", "__eh_frame", &eh_frame_svma
);
1121 if (ML_(sli_is_valid
)(eh_frame_mscn
)) {
1122 vg_assert(di
->text_bias
== di
->text_debug_bias
);
1123 ML_(read_callframe_info_dwarf3
)(di
, eh_frame_mscn
,
1124 eh_frame_svma
+ di
->text_bias
,
1125 True
/*is_ehframe*/);
1128 if (ML_(sli_is_valid
)(debug_info_mscn
)) {
1129 if (VG_(clo_verbosity
) > 1) {
1131 VG_(message
)(Vg_DebugMsg
,
1132 "Reading dwarf3 for %s (%#lx) from %s"
1133 " (%lld %lld %lld %lld %lld %lld)\n",
1134 di
->fsm
.filename
, di
->text_avma
, dsymfilename
,
1135 debug_info_mscn
.szB
, debug_abbv_mscn
.szB
,
1136 debug_line_mscn
.szB
, debug_str_mscn
.szB
,
1137 debug_ranges_mscn
.szB
, debug_loc_mscn
.szB
1139 VG_(message
)(Vg_DebugMsg
,
1140 " reading dwarf3 from dsyms file\n");
1142 /* The old reader: line numbers and unwind info only */
1143 ML_(read_debuginfo_dwarf3
) ( di
,
1145 DiSlice_INVALID
, /* .debug_types */
1149 DiSlice_INVALID
/* ALT .debug_str */ );
1151 /* The new reader: read the DIEs in .debug_info to acquire
1152 information on variable types and locations or inline info.
1153 But only if the tool asks for it, or the user requests it on
1154 the command line. */
1155 if (VG_(clo_read_var_info
) /* the user or tool asked for it */
1156 || VG_(clo_read_inline_info
)) {
1157 ML_(new_dwarf3_reader
)(
1158 di
, debug_info_mscn
,
1159 DiSlice_INVALID
, /* .debug_types */
1165 DiSlice_INVALID
, /* ALT .debug_info */
1166 DiSlice_INVALID
, /* ALT .debug_abbv */
1167 DiSlice_INVALID
, /* ALT .debug_line */
1168 DiSlice_INVALID
/* ALT .debug_str */
1174 if (dsymfilename
) ML_(dinfo_free
)(dsymfilename
);
1184 ML_(symerr
)(di
, True
, "Error reading Mach-O object.");
1190 #endif // defined(VGO_darwin)
1192 /*--------------------------------------------------------------------*/
1194 /*--------------------------------------------------------------------*/