2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2005-2017 Apple Inc.
12 Greg Parker gparker@apple.com
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_darwin)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcfile.h"
40 #include "pub_core_libcproc.h"
41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42 #include "pub_core_machine.h" /* VG_ELF_CLASS */
43 #include "pub_core_options.h"
44 #include "pub_core_oset.h"
45 #include "pub_core_tooliface.h" /* VG_(needs) */
46 #include "pub_core_xarray.h"
47 #include "pub_core_clientstate.h"
48 #include "pub_core_debuginfo.h"
50 #include "priv_misc.h"
51 #include "priv_image.h"
52 #include "priv_d3basics.h"
53 #include "priv_tytypes.h"
54 #include "priv_storage.h"
55 #include "priv_readmacho.h"
56 #include "priv_readdwarf.h"
57 #include "priv_readdwarf3.h"
59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60 #include <mach-o/loader.h>
61 #include <mach-o/nlist.h>
62 #include <mach-o/fat.h>
63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
66 # define MAGIC MH_MAGIC
67 # define MACH_HEADER mach_header
68 # define LC_SEGMENT_CMD LC_SEGMENT
69 # define SEGMENT_COMMAND segment_command
70 # define SECTION section
73 # define MAGIC MH_MAGIC_64
74 # define MACH_HEADER mach_header_64
75 # define LC_SEGMENT_CMD LC_SEGMENT_64
76 # define SEGMENT_COMMAND segment_command_64
77 # define SECTION section_64
78 # define NLIST nlist_64
82 /*------------------------------------------------------------*/
84 /*--- Mach-O file mapping/unmapping helpers ---*/
86 /*------------------------------------------------------------*/
88 /* A DiSlice is used to handle the thin/fat distinction for MachO images.
89 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
90 whatnot: the entire file. This is the DiImage* that is the backing
92 (2) the Mach-O object of interest, which is presumably somewhere inside
93 the primary image. map_image_aboard() below, which generates this
94 info, will carefully check that the macho_ fields denote a section of
95 memory that falls entirely inside the primary image.
98 Bool
ML_(is_macho_object_file
)( const void* buf
, SizeT szB
)
100 /* (JRS: the Mach-O headers might not be in this mapped data,
101 because we only mapped a page for this initial check,
102 or at least not very much, and what's at the start of the file
103 is in general a so-called fat header. The Mach-O object we're
104 interested in could be arbitrarily far along the image, and so
105 we can't assume its header will fall within this page.) */
107 /* But we can say that either it's a fat object, in which case it
108 begins with a fat header, or it's unadorned Mach-O, in which
109 case it starts with a normal header. At least do what checks we
110 can to establish whether or not we're looking at something
113 const struct fat_header
* fh_be
= buf
;
114 const struct MACH_HEADER
* mh
= buf
;
117 if (szB
< sizeof(struct fat_header
))
119 if (VG_(ntohl
)(fh_be
->magic
) == FAT_MAGIC
)
122 if (szB
< sizeof(struct MACH_HEADER
))
124 if (mh
->magic
== MAGIC
)
131 /* Unmap an image mapped in by map_image_aboard. */
132 static void unmap_image ( /*MOD*/DiSlice
* sli
)
135 if (ML_(sli_is_valid
)(*sli
)) {
136 ML_(img_done
)(sli
->img
);
137 *sli
= DiSlice_INVALID
;
142 /* Open the given file, find the thin part if necessary, do some
143 checks, and return a DiSlice containing details of both the thin
144 part and (implicitly, via the contained DiImage*) the fat part.
145 returns DiSlice_INVALID if it fails. If it succeeds, the returned
146 slice is guaranteed to refer to a valid(ish) Mach-O image. */
147 static DiSlice
map_image_aboard ( DebugInfo
* di
, /* only for err msgs */
148 const HChar
* filename
)
150 DiSlice sli
= DiSlice_INVALID
;
152 /* First off, try to map the thing in. */
153 DiImage
* mimg
= ML_(img_from_local_file
)(filename
);
155 VG_(message
)(Vg_UserMsg
, "warning: connection to image %s failed\n",
157 VG_(message
)(Vg_UserMsg
, " no symbols or debug info loaded\n" );
158 return DiSlice_INVALID
;
161 /* Now we have a viable DiImage* for it. Look for the embedded
162 Mach-O object. If not findable, close the image and fail. */
163 DiOffT fh_be_ioff
= 0;
164 struct fat_header fh_be
;
165 struct fat_header fh
;
167 // Assume initially that we have a thin image, and narrow
168 // the bounds if it turns out to be fat. This stores |mimg| as
169 // |sli.img|, so NULL out |mimg| after this point, for the sake of
171 sli
= ML_(sli_from_img
)(mimg
);
174 // Check for fat header.
175 if (ML_(img_size
)(sli
.img
) < sizeof(struct fat_header
)) {
176 ML_(symerr
)(di
, True
, "Invalid Mach-O file (0 too small).");
180 // Fat header is always BIG-ENDIAN
181 ML_(img_get
)(&fh_be
, sli
.img
, fh_be_ioff
, sizeof(fh_be
));
182 VG_(memset
)(&fh
, 0, sizeof(fh
));
183 fh
.magic
= VG_(ntohl
)(fh_be
.magic
);
184 fh
.nfat_arch
= VG_(ntohl
)(fh_be
.nfat_arch
);
185 if (fh
.magic
== FAT_MAGIC
) {
186 // Look for a good architecture.
187 if (ML_(img_size
)(sli
.img
) < sizeof(struct fat_header
)
188 + fh
.nfat_arch
* sizeof(struct fat_arch
)) {
189 ML_(symerr
)(di
, True
, "Invalid Mach-O file (1 too small).");
194 for (f
= 0, arch_be_ioff
= sizeof(struct fat_header
);
196 f
++, arch_be_ioff
+= sizeof(struct fat_arch
)) {
197 # if defined(VGA_ppc)
198 Int cputype
= CPU_TYPE_POWERPC
;
199 # elif defined(VGA_ppc64be)
200 Int cputype
= CPU_TYPE_POWERPC64BE
;
201 # elif defined(VGA_ppc64le)
202 Int cputype
= CPU_TYPE_POWERPC64LE
;
203 # elif defined(VGA_x86)
204 Int cputype
= CPU_TYPE_X86
;
205 # elif defined(VGA_amd64)
206 Int cputype
= CPU_TYPE_X86_64
;
208 # error "unknown architecture"
210 struct fat_arch arch_be
;
211 struct fat_arch arch
;
212 ML_(img_get
)(&arch_be
, sli
.img
, arch_be_ioff
, sizeof(arch_be
));
213 VG_(memset
)(&arch
, 0, sizeof(arch
));
214 arch
.cputype
= VG_(ntohl
)(arch_be
.cputype
);
215 arch
.cpusubtype
= VG_(ntohl
)(arch_be
.cpusubtype
);
216 arch
.offset
= VG_(ntohl
)(arch_be
.offset
);
217 arch
.size
= VG_(ntohl
)(arch_be
.size
);
218 if (arch
.cputype
== cputype
) {
219 if (ML_(img_size
)(sli
.img
) < arch
.offset
+ arch
.size
) {
220 ML_(symerr
)(di
, True
, "Invalid Mach-O file (2 too small).");
223 /* Found a suitable arch. Narrow down the slice accordingly. */
224 sli
.ioff
= arch
.offset
;
229 if (f
== fh
.nfat_arch
) {
230 ML_(symerr
)(di
, True
,
231 "No acceptable architecture found in fat file.");
236 /* Sanity check what we found. */
238 /* assured by logic above */
239 vg_assert(ML_(img_size
)(sli
.img
) >= sizeof(struct fat_header
));
241 if (sli
.szB
< sizeof(struct MACH_HEADER
)) {
242 ML_(symerr
)(di
, True
, "Invalid Mach-O file (3 too small).");
246 if (sli
.szB
> ML_(img_size
)(sli
.img
)) {
247 ML_(symerr
)(di
, True
, "Invalid Mach-O file (thin bigger than fat).");
251 if (sli
.ioff
>= 0 && sli
.ioff
+ sli
.szB
<= ML_(img_size
)(sli
.img
)) {
252 /* thin entirely within fat, as expected */
254 ML_(symerr
)(di
, True
, "Invalid Mach-O file (thin not inside fat).");
258 /* Peer at the Mach header for the thin object, starting at the
259 beginning of the slice, to check it's at least marginally
261 struct MACH_HEADER mh
;
262 ML_(cur_read_get
)(&mh
, ML_(cur_from_sli
)(sli
), sizeof(mh
));
263 if (mh
.magic
!= MAGIC
) {
264 ML_(symerr
)(di
, True
, "Invalid Mach-O file (bad magic).");
268 if (sli
.szB
< sizeof(struct MACH_HEADER
) + mh
.sizeofcmds
) {
269 ML_(symerr
)(di
, True
, "Invalid Mach-O file (4 too small).");
273 /* "main image is plausible" */
275 vg_assert(ML_(img_size
)(sli
.img
) > 0);
276 /* "thin image exists and is a sub-part (or all) of main image" */
277 vg_assert(sli
.ioff
>= 0);
278 vg_assert(sli
.szB
> 0);
279 vg_assert(sli
.ioff
+ sli
.szB
<= ML_(img_size
)(sli
.img
));
280 return sli
; /* success */
285 return DiSlice_INVALID
; /* bah! */
289 /*------------------------------------------------------------*/
291 /*--- Mach-O symbol table reading ---*/
293 /*------------------------------------------------------------*/
295 /* Read a symbol table (nlist). Add the resulting candidate symbols
296 to 'syms'; the caller will post-process them and hand them off to
297 ML_(addSym) itself. */
299 void read_symtab( /*OUT*/XArray
* /* DiSym */ syms
,
300 struct _DebugInfo
* di
,
301 DiCursor symtab_cur
, UInt symtab_count
,
302 DiCursor strtab_cur
, UInt strtab_sz
)
307 // "start_according_to_valgrind"
308 static const HChar
* s_a_t_v
= NULL
; /* do not make non-static */
310 for (i
= 0; i
< symtab_count
; i
++) {
312 ML_(cur_read_get
)(&nl
,
313 ML_(cur_plus
)(symtab_cur
, i
* sizeof(struct NLIST
)),
317 if ((nl
.n_type
& N_TYPE
) == N_SECT
) {
318 sym_addr
= di
->text_bias
+ nl
.n_value
;
319 /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
320 GrP fixme don't ignore absolute symbols?
321 sym_addr = nl.n_value; */
326 if (di
->trace_symtab
) {
327 HChar
* str
= ML_(cur_read_strdup
)(
328 ML_(cur_plus
)(strtab_cur
, nl
.n_un
.n_strx
),
330 VG_(printf
)("nlist raw: avma %010lx %s\n", sym_addr
, str
);
331 ML_(dinfo_free
)(str
);
334 /* If no part of the symbol falls within the mapped range,
336 if (sym_addr
<= di
->text_avma
337 || sym_addr
>= di
->text_avma
+di
->text_size
) {
341 /* skip names which point outside the string table;
342 following these risks segfaulting Valgrind */
343 if (nl
.n_un
.n_strx
< 0 || nl
.n_un
.n_strx
>= strtab_sz
) {
348 = ML_(cur_read_strdup
)( ML_(cur_plus
)(strtab_cur
, nl
.n_un
.n_strx
),
351 /* skip nameless symbols; these appear to be common, but
354 ML_(dinfo_free
)(name
);
358 VG_(bzero_inline
)(&disym
, sizeof(disym
));
359 disym
.avmas
.main
= sym_addr
;
360 SET_TOCPTR_AVMA(disym
, 0);
361 SET_LOCAL_EP_AVMA(disym
, 0);
362 disym
.pri_name
= ML_(addStr
)(di
, name
, -1);
363 disym
.sec_names
= NULL
;
364 disym
.size
= // let canonicalize fix it
365 di
->text_avma
+di
->text_size
- sym_addr
;
367 disym
.isIFunc
= False
;
368 disym
.isGlobal
= False
;
369 // Lots of user function names get prepended with an underscore. Eg. the
370 // function 'f' becomes the symbol '_f'. And the "below main"
371 // function is called "start". So we skip the leading underscore, and
372 // if we see 'start' and --show-below-main=no, we rename it as
373 // "start_according_to_valgrind", which makes it easy to spot later
374 // and display as "(below main)".
375 if (disym
.pri_name
[0] == '_') {
378 else if (!VG_(clo_show_below_main
) && VG_STREQ(disym
.pri_name
, "start")) {
380 s_a_t_v
= ML_(addStr
)(di
, "start_according_to_valgrind", -1);
382 disym
.pri_name
= s_a_t_v
;
385 vg_assert(disym
.pri_name
);
386 VG_(addToXA
)( syms
, &disym
);
387 ML_(dinfo_free
)(name
);
392 /* Compare DiSyms by their start address, and for equal addresses, use
393 the primary name as a secondary sort key. */
394 static Int
cmp_DiSym_by_start_then_name ( const void* v1
, const void* v2
)
396 const DiSym
* s1
= (const DiSym
*)v1
;
397 const DiSym
* s2
= (const DiSym
*)v2
;
398 if (s1
->avmas
.main
< s2
->avmas
.main
) return -1;
399 if (s1
->avmas
.main
> s2
->avmas
.main
) return 1;
400 return VG_(strcmp
)(s1
->pri_name
, s2
->pri_name
);
403 /* 'cand' is a bunch of candidate symbols obtained by reading
404 nlist-style symbol table entries. Their ends may overlap, so sort
405 them and truncate them accordingly. The code in this routine is
406 copied almost verbatim from read_symbol_table() in readxcoff.c. */
407 static void tidy_up_cand_syms ( /*MOD*/XArray
* /* of DiSym */ syms
,
410 Word nsyms
, i
, j
, k
, m
;
412 nsyms
= VG_(sizeXA
)(syms
);
414 VG_(setCmpFnXA
)(syms
, cmp_DiSym_by_start_then_name
);
417 /* We only know for sure the start addresses (actual VMAs) of
418 symbols, and an overestimation of their end addresses. So sort
419 by start address, then clip each symbol so that its end address
420 does not overlap with the next one along.
422 There is a small refinement: if a group of symbols have the same
423 address, treat them as a group: find the next symbol along that
424 has a higher start address, and clip all of the group
425 accordingly. This clips the group as a whole so as not to
426 overlap following symbols. This leaves prefersym() in
427 storage.c, which is not nlist-specific, to later decide which of
428 the symbols in the group to keep.
430 Another refinement is that we need to get rid of symbols which,
431 after clipping, have identical starts, ends, and names. So the
432 sorting uses the name as a secondary key.
435 for (i
= 0; i
< nsyms
; i
++) {
438 && ((DiSym
*)VG_(indexXA
)(syms
,i
))->avmas
.main
439 == ((DiSym
*)VG_(indexXA
)(syms
,k
))->avmas
.main
;
442 /* So now [i .. k-1] is a group all with the same start address.
443 Clip their ending addresses so they don't overlap [k]. In
444 the normal case (no overlaps), k == i+1. */
446 DiSym
* next
= (DiSym
*)VG_(indexXA
)(syms
,k
);
447 for (m
= i
; m
< k
; m
++) {
448 DiSym
* here
= (DiSym
*)VG_(indexXA
)(syms
,m
);
449 vg_assert(here
->avmas
.main
< next
->avmas
.main
);
450 if (here
->avmas
.main
+ here
->size
> next
->avmas
.main
)
451 here
->size
= next
->avmas
.main
- here
->avmas
.main
;
455 vg_assert(i
<= nsyms
);
461 for (i
= 1; i
< nsyms
; i
++) {
462 DiSym
*s_j1
, *s_j
, *s_i
;
464 s_j1
= (DiSym
*)VG_(indexXA
)(syms
, j
-1);
465 s_j
= (DiSym
*)VG_(indexXA
)(syms
, j
);
466 s_i
= (DiSym
*)VG_(indexXA
)(syms
, i
);
467 if (s_i
->avmas
.main
!= s_j1
->avmas
.main
468 || s_i
->size
!= s_j1
->size
469 || 0 != VG_(strcmp
)(s_i
->pri_name
, s_j1
->pri_name
)) {
474 VG_(printf
)("nlist cleanup: dump duplicate avma %010lx %s\n",
475 s_i
->avmas
.main
, s_i
->pri_name
);
479 vg_assert(j
>= 0 && j
<= nsyms
);
480 VG_(dropTailXA
)(syms
, nsyms
- j
);
484 /*------------------------------------------------------------*/
486 /*--- Mach-O top-level processing ---*/
488 /*------------------------------------------------------------*/
490 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
491 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
495 static Bool
file_exists_p(const HChar
*path
)
498 SysRes res
= VG_(stat
)(path
, &sbuf
);
499 return sr_isError(res
) ? False
: True
;
503 /* Search for an existing dSYM file as a possible separate debug file.
506 find_separate_debug_file (const HChar
*executable_name
)
508 const HChar
*basename_str
;
513 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
514 will end up with an infinite loop where after we add a dSYM symbol file,
515 it will then enter this function asking if there is a debug file for the
517 if (VG_(strcasestr
) (executable_name
, ".dSYM") == NULL
)
519 /* Check for the existence of a .dSYM file for a given executable. */
520 basename_str
= VG_(basename
) (executable_name
);
521 dsymfile
= ML_(dinfo_zalloc
)("di.readmacho.dsymfile",
522 VG_(strlen
) (executable_name
)
523 + VG_(strlen
) (APPLE_DSYM_EXT_AND_SUBDIRECTORY
)
524 + VG_(strlen
) (basename_str
)
528 /* First try for the dSYM in the same directory as the original file. */
529 VG_(strcpy
) (dsymfile
, executable_name
);
530 VG_(strcat
) (dsymfile
, APPLE_DSYM_EXT_AND_SUBDIRECTORY
);
531 VG_(strcat
) (dsymfile
, basename_str
);
533 if (file_exists_p (dsymfile
))
536 /* Now search for any parent directory that has a '.' in it so we can find
537 Mac OS X applications, bundles, plugins, and any other kinds of files.
538 Mac OS X application bundles wil have their program in
539 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
540 ".bundle" or ".plugin" for other types of bundles). So we look for any
541 prior '.' character and try appending the apple dSYM extension and
542 subdirectory and see if we find an existing dSYM file (in the above
543 MyApp example the dSYM would be at either:
544 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
545 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
546 VG_(strcpy
) (dsymfile
, VG_(dirname
) (executable_name
));
547 while ((dot_ptr
= VG_(strrchr
) (dsymfile
, '.')))
549 /* Find the directory delimiter that follows the '.' character since
550 we now look for a .dSYM that follows any bundle extension. */
551 slash_ptr
= VG_(strchr
) (dot_ptr
, '/');
554 /* NULL terminate the string at the '/' character and append
555 the path down to the dSYM file. */
557 VG_(strcat
) (slash_ptr
, APPLE_DSYM_EXT_AND_SUBDIRECTORY
);
558 VG_(strcat
) (slash_ptr
, basename_str
);
559 if (file_exists_p (dsymfile
))
563 /* NULL terminate the string at the '.' character and append
564 the path down to the dSYM file. */
566 VG_(strcat
) (dot_ptr
, APPLE_DSYM_EXT_AND_SUBDIRECTORY
);
567 VG_(strcat
) (dot_ptr
, basename_str
);
568 if (file_exists_p (dsymfile
))
571 /* NULL terminate the string at the '.' locatated by the strrchr()
575 /* We found a previous extension '.' character and did not find a
576 dSYM file so now find previous directory delimiter so we don't
577 try multiple times on a file name that may have a version number
578 in it such as "/some/path/MyApp.6.0.4.app". */
579 slash_ptr
= VG_(strrchr
) (dsymfile
, '/');
582 /* NULL terminate the string at the previous directory character
592 /* Given a DiSlice covering the entire Mach-O thin image, find the
593 DiSlice for the specified (segname, sectname) pairing, if
594 possible. Also return the section's .addr field in *svma if
596 static DiSlice
getsectdata ( DiSlice img
,
597 const HChar
*segname
, const HChar
*sectname
,
600 DiCursor cur
= ML_(cur_from_sli
)(img
);
602 struct MACH_HEADER mh
;
603 ML_(cur_step_get
)(&mh
, &cur
, sizeof(mh
));
606 for (c
= 0; c
< mh
.ncmds
; c
++) {
607 struct load_command cmd
;
608 ML_(cur_read_get
)(&cmd
, cur
, sizeof(cmd
));
609 if (cmd
.cmd
== LC_SEGMENT_CMD
) {
610 struct SEGMENT_COMMAND seg
;
611 ML_(cur_read_get
)(&seg
, cur
, sizeof(seg
));
612 if (0 == VG_(strncmp
)(&seg
.segname
[0],
613 segname
, sizeof(seg
.segname
))) {
614 DiCursor sects_cur
= ML_(cur_plus
)(cur
, sizeof(seg
));
616 for (s
= 0; s
< seg
.nsects
; s
++) {
618 ML_(cur_step_get
)(§
, §s_cur
, sizeof(sect
));
619 if (0 == VG_(strncmp
)(sect
.sectname
, sectname
,
620 sizeof(sect
.sectname
))) {
622 res
.ioff
= sect
.offset
;
624 if (svma
) *svma
= (Addr
)sect
.addr
;
631 cur
= ML_(cur_plus
)(cur
, cmd
.cmdsize
);
634 return DiSlice_INVALID
;
638 /* Brute force just simply search for uuid[0..15] in |sli| */
639 static Bool
check_uuid_matches ( DiSlice sli
, UChar
* uuid
)
644 /* Work through the slice in 1 KB chunks. */
645 UChar first
= uuid
[0];
646 DiOffT min_off
= sli
.ioff
;
647 DiOffT max1_off
= sli
.ioff
+ sli
.szB
;
648 DiOffT curr_off
= min_off
;
649 vg_assert(min_off
< max1_off
);
651 vg_assert(curr_off
>= min_off
&& curr_off
<= max1_off
);
652 if (curr_off
== max1_off
) break;
653 DiOffT avail
= max1_off
- curr_off
;
654 vg_assert(avail
> 0 && avail
<= max1_off
);
655 if (avail
> 1024) avail
= 1024;
657 SizeT nGot
= ML_(img_get_some
)(buf
, sli
.img
, curr_off
, avail
);
658 vg_assert(nGot
>= 1 && nGot
<= avail
);
660 /* Scan through the 1K chunk we got, looking for the start char. */
661 for (i
= 0; i
< (UInt
)nGot
; i
++) {
662 if (LIKELY(buf
[i
] != first
))
664 /* first char matches. See if we can get 16 bytes at this
665 offset, and compare. */
666 if (curr_off
+ i
< max1_off
&& max1_off
- (curr_off
+ i
) >= 16) {
668 ML_(img_get
)(&buff16
[0], sli
.img
, curr_off
+ i
, 16);
669 if (0 == VG_(memcmp
)(&buff16
[0], &uuid
[0], 16))
679 /* Heuristic kludge: return True if this looks like an installed
680 standard library; hence we shouldn't consider automagically running
682 static Bool
is_systemish_library_name ( const HChar
* name
)
685 if (0 == VG_(strncasecmp
)(name
, "/usr/", 5)
686 || 0 == VG_(strncasecmp
)(name
, "/bin/", 5)
687 || 0 == VG_(strncasecmp
)(name
, "/sbin/", 6)
688 || 0 == VG_(strncasecmp
)(name
, "/opt/", 5)
689 || 0 == VG_(strncasecmp
)(name
, "/sw/", 4)
690 || 0 == VG_(strncasecmp
)(name
, "/System/", 8)
691 || 0 == VG_(strncasecmp
)(name
, "/Library/", 9)
692 || 0 == VG_(strncasecmp
)(name
, "/Applications/", 14)) {
700 Bool
ML_(read_macho_debug_info
)( struct _DebugInfo
* di
)
702 DiSlice msli
= DiSlice_INVALID
; // the main image
703 DiSlice dsli
= DiSlice_INVALID
; // the debuginfo image
704 DiCursor sym_cur
= DiCursor_INVALID
;
705 DiCursor dysym_cur
= DiCursor_INVALID
;
706 HChar
* dsymfilename
= NULL
;
707 Bool have_uuid
= False
;
710 const DebugInfoMapping
* rx_map
= NULL
;
711 const DebugInfoMapping
* rw_map
= NULL
;
713 /* mmap the object file to look for di->soname and di->text_bias
714 and uuid and nlist */
716 /* This should be ensured by our caller (that we're in the accept
718 vg_assert(di
->fsm
.have_rx_map
);
719 vg_assert(di
->fsm
.have_rw_map
);
721 for (i
= 0; i
< VG_(sizeXA
)(di
->fsm
.maps
); i
++) {
722 const DebugInfoMapping
* map
= VG_(indexXA
)(di
->fsm
.maps
, i
);
723 if (map
->rx
&& !rx_map
)
725 if (map
->rw
&& !rw_map
)
727 if (rx_map
&& rw_map
)
733 if (VG_(clo_verbosity
) > 1)
734 VG_(message
)(Vg_DebugMsg
,
735 "%s (rx at %#lx, rw at %#lx)\n", di
->fsm
.filename
,
736 rx_map
->avma
, rw_map
->avma
);
738 VG_(memset
)(&uuid
, 0, sizeof(uuid
));
740 msli
= map_image_aboard( di
, di
->fsm
.filename
);
741 if (!ML_(sli_is_valid
)(msli
)) {
742 ML_(symerr
)(di
, False
, "Connect to main image failed.");
746 vg_assert(msli
.img
!= NULL
&& msli
.szB
> 0);
748 /* Poke around in the Mach-O header, to find some important
750 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
751 // Read di->soname from LC_ID_DYLIB if present,
752 // or from LC_ID_DYLINKER if present,
754 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
755 // Get uuid for later dsym search
760 DiCursor cmd_cur
= ML_(cur_from_sli
)(msli
);
762 struct MACH_HEADER mh
;
763 ML_(cur_step_get
)(&mh
, &cmd_cur
, sizeof(mh
));
765 /* Now cur_cmd points just after the Mach header, right at the
766 start of the load commands, which is where we need it to start
767 the following loop. */
770 for (c
= 0; c
< mh
.ncmds
; c
++) {
771 struct load_command cmd
;
772 ML_(cur_read_get
)(&cmd
, cmd_cur
, sizeof(cmd
));
774 if (cmd
.cmd
== LC_SYMTAB
) {
777 else if (cmd
.cmd
== LC_DYSYMTAB
) {
780 else if (cmd
.cmd
== LC_ID_DYLIB
&& mh
.filetype
== MH_DYLIB
) {
782 struct dylib_command dcmd
;
783 ML_(cur_read_get
)(&dcmd
, cmd_cur
, sizeof(dcmd
));
784 DiCursor dylibname_cur
785 = ML_(cur_plus
)(cmd_cur
, dcmd
.dylib
.name
.offset
);
787 = ML_(cur_read_strdup
)(dylibname_cur
, "di.rmdi.1");
788 HChar
* soname
= VG_(strrchr
)(dylibname
, '/');
789 if (!soname
) soname
= dylibname
;
791 di
->soname
= ML_(dinfo_strdup
)("di.readmacho.dylibname",
793 ML_(dinfo_free
)(dylibname
);
795 else if (cmd
.cmd
==LC_ID_DYLINKER
&& mh
.filetype
==MH_DYLINKER
) {
796 struct dylinker_command dcmd
;
797 ML_(cur_read_get
)(&dcmd
, cmd_cur
, sizeof(dcmd
));
798 DiCursor dylinkername_cur
799 = ML_(cur_plus
)(cmd_cur
, dcmd
.name
.offset
);
801 = ML_(cur_read_strdup
)(dylinkername_cur
, "di.rmdi.2");
802 HChar
* soname
= VG_(strrchr
)(dylinkername
, '/');
803 if (!soname
) soname
= dylinkername
;
805 di
->soname
= ML_(dinfo_strdup
)("di.readmacho.dylinkername",
807 ML_(dinfo_free
)(dylinkername
);
810 // A comment from Julian about why varinfo[35] fail:
812 // My impression is, from comparing the output of otool -l for these
813 // executables with the logic in ML_(read_macho_debug_info),
814 // specifically the part that begins "else if (cmd->cmd ==
815 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
816 // to work ok for text symbols. In particular, it appears to assume
817 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
818 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
819 // need. However, otool -l shows, and also the Apple docs state,
820 // that a struct load_command may contain an arbitrary number of
821 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
822 // snarf the first. But I'm not sure about this.
824 // The "Try for __DATA" block below simply adds acquisition of data
825 // svma/bias values using the same assumption. It also needs
826 // (probably) to deal with bss sections, but I don't understand how
827 // this all ties together really, so it requires further study.
829 // If you can get your head around the relationship between MachO
830 // segments, sections and load commands, this might be relatively
831 // easy to fix properly.
833 // Basically we need to come up with plausible numbers for di->
834 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
835 // then trivially derived. Then I think the debuginfo reader should
837 else if (cmd
.cmd
== LC_SEGMENT_CMD
) {
838 struct SEGMENT_COMMAND seg
;
839 ML_(cur_read_get
)(&seg
, cmd_cur
, sizeof(seg
));
841 if (!di
->text_present
842 && 0 == VG_(strcmp
)(&seg
.segname
[0], "__TEXT")
843 /* DDD: is the next line a kludge? -- JRS */
844 && seg
.fileoff
== 0 && seg
.filesize
!= 0) {
845 di
->text_present
= True
;
846 di
->text_svma
= (Addr
)seg
.vmaddr
;
847 di
->text_avma
= rx_map
->avma
;
848 di
->text_size
= seg
.vmsize
;
849 di
->text_bias
= di
->text_avma
- di
->text_svma
;
850 /* Make the _debug_ values be the same as the
851 svma/bias for the primary object, since there is
852 no secondary (debuginfo) object, but nevertheless
853 downstream biasing of Dwarf3 relies on the
855 di
->text_debug_svma
= di
->text_svma
;
856 di
->text_debug_bias
= di
->text_bias
;
859 if (!di
->data_present
860 && 0 == VG_(strcmp
)(&seg
.segname
[0], "__DATA")
861 /* && DDD:seg->fileoff == 0 */ && seg
.filesize
!= 0) {
862 di
->data_present
= True
;
863 di
->data_svma
= (Addr
)seg
.vmaddr
;
864 di
->data_avma
= rw_map
->avma
;
865 di
->data_size
= seg
.vmsize
;
866 di
->data_bias
= di
->data_avma
- di
->data_svma
;
867 di
->data_debug_svma
= di
->data_svma
;
868 di
->data_debug_bias
= di
->data_bias
;
871 else if (cmd
.cmd
== LC_UUID
) {
872 ML_(cur_read_get
)(&uuid
, cmd_cur
, sizeof(uuid
));
875 // Move the cursor along
876 cmd_cur
= ML_(cur_plus
)(cmd_cur
, cmd
.cmdsize
);
881 di
->soname
= ML_(dinfo_strdup
)("di.readmacho.noname", "NONE");
884 if (di
->trace_symtab
) {
886 VG_(printf
)("SONAME = %s\n", di
->soname
);
890 /* Now we have the base object to hand. Read symbols from it. */
892 // We already asserted that ..
893 vg_assert(msli
.img
!= NULL
&& msli
.szB
> 0);
895 if (ML_(cur_is_valid
)(sym_cur
) && ML_(cur_is_valid
)(dysym_cur
)) {
897 struct symtab_command symcmd
;
898 struct dysymtab_command dysymcmd
;
900 ML_(cur_read_get
)(&symcmd
, sym_cur
, sizeof(symcmd
));
901 ML_(cur_read_get
)(&dysymcmd
, dysym_cur
, sizeof(dysymcmd
));
903 /* Read nlist symbol table */
904 DiCursor syms
= DiCursor_INVALID
;
905 DiCursor strs
= DiCursor_INVALID
;
906 XArray
* /* DiSym */ candSyms
= NULL
;
909 if (msli
.szB
< symcmd
.stroff
+ symcmd
.strsize
910 || msli
.szB
< symcmd
.symoff
+ symcmd
.nsyms
911 * sizeof(struct NLIST
)) {
912 ML_(symerr
)(di
, False
, "Invalid Mach-O file (5 too small).");
915 if (dysymcmd
.ilocalsym
+ dysymcmd
.nlocalsym
> symcmd
.nsyms
916 || dysymcmd
.iextdefsym
+ dysymcmd
.nextdefsym
> symcmd
.nsyms
) {
917 ML_(symerr
)(di
, False
, "Invalid Mach-O file (bad symbol table).");
921 syms
= ML_(cur_plus
)(ML_(cur_from_sli
)(msli
), symcmd
.symoff
);
922 strs
= ML_(cur_plus
)(ML_(cur_from_sli
)(msli
), symcmd
.stroff
);
924 if (VG_(clo_verbosity
) > 1)
925 VG_(message
)(Vg_DebugMsg
,
926 " reading syms from primary file (%d %d)\n",
927 dysymcmd
.nextdefsym
, dysymcmd
.nlocalsym
);
929 /* Read candidate symbols into 'candSyms', so we can truncate
930 overlapping ends and generally tidy up, before presenting
931 them to ML_(addSym). */
932 candSyms
= VG_(newXA
)(
933 ML_(dinfo_zalloc
), "di.readmacho.candsyms.1",
934 ML_(dinfo_free
), sizeof(DiSym
)
938 read_symtab(candSyms
,
941 dysymcmd
.iextdefsym
* sizeof(struct NLIST
)),
942 dysymcmd
.nextdefsym
, strs
, symcmd
.strsize
);
943 // static and private_extern symbols
944 read_symtab(candSyms
,
947 dysymcmd
.ilocalsym
* sizeof(struct NLIST
)),
948 dysymcmd
.nlocalsym
, strs
, symcmd
.strsize
);
950 /* tidy up the cand syms -- trim overlapping ends. May resize
952 tidy_up_cand_syms( candSyms
, di
->trace_symtab
);
954 /* and finally present them to ML_(addSym) */
955 nCandSyms
= VG_(sizeXA
)( candSyms
);
956 for (i
= 0; i
< nCandSyms
; i
++) {
957 DiSym
* cand
= (DiSym
*) VG_(indexXA
)( candSyms
, i
);
958 vg_assert(cand
->pri_name
!= NULL
);
959 vg_assert(cand
->sec_names
== NULL
);
960 if (di
->trace_symtab
)
961 VG_(printf
)("nlist final: acquire avma %010lx-%010lx %s\n",
962 cand
->avmas
.main
, cand
->avmas
.main
+ cand
->size
- 1,
964 ML_(addSym
)( di
, cand
);
966 VG_(deleteXA
)( candSyms
);
969 /* If there's no UUID in the primary, don't even bother to try and
970 read any DWARF, since we won't be able to verify it matches.
971 Our policy is not to load debug info unless we can verify that
972 it matches the primary. Just declare success at this point.
973 And don't complain to the user, since that would cause us to
974 complain on objects compiled without -g. (Some versions of
975 XCode are observed to omit a UUID entry for object linked(?)
976 without -g. Others don't appear to omit it.) */
980 /* mmap the dSYM file to look for DWARF debug info. If successful,
981 use the .macho_img and .macho_img_szB in dsli. */
983 dsymfilename
= find_separate_debug_file( di
->fsm
.filename
);
985 /* Try to load it. */
989 if (VG_(clo_verbosity
) > 1)
990 VG_(message
)(Vg_DebugMsg
, " dSYM= %s\n", dsymfilename
);
992 dsli
= map_image_aboard( di
, dsymfilename
);
993 if (!ML_(sli_is_valid
)(dsli
)) {
994 ML_(symerr
)(di
, False
, "Connect to debuginfo image failed "
999 /* check it has the right uuid. */
1000 vg_assert(have_uuid
);
1001 valid
= dsli
.img
&& dsli
.szB
> 0 && check_uuid_matches( dsli
, uuid
);
1003 goto read_the_dwarf
;
1005 if (VG_(clo_verbosity
) > 1)
1006 VG_(message
)(Vg_DebugMsg
, " dSYM does not have "
1007 "correct UUID (out of date?)\n");
1010 /* There was no dsym file, or it doesn't match. We'll have to try
1011 regenerating it, unless --dsymutil=no, in which case just complain
1014 /* If this looks like a lib that we shouldn't run dsymutil on, just
1015 give up. (possible reasons: is system lib, or in /usr etc, or
1016 the dsym dir would not be writable by the user, or we're running
1018 vg_assert(di
->fsm
.filename
);
1019 if (is_systemish_library_name(di
->fsm
.filename
))
1022 if (!VG_(clo_dsymutil
)) {
1023 if (VG_(clo_verbosity
) == 1) {
1024 VG_(message
)(Vg_DebugMsg
, "%s:\n", di
->fsm
.filename
);
1026 if (VG_(clo_verbosity
) > 0)
1027 VG_(message
)(Vg_DebugMsg
, "%sdSYM directory %s; consider using "
1029 VG_(clo_verbosity
) > 1 ? " " : "",
1030 dsymfilename
? "has wrong UUID" : "is missing");
1037 const HChar
* dsymutil
= "/usr/bin/dsymutil ";
1038 HChar
* cmd
= ML_(dinfo_zalloc
)( "di.readmacho.tmp1",
1039 VG_(strlen
)(dsymutil
)
1040 + VG_(strlen
)(di
->fsm
.filename
)
1042 VG_(strcpy
)(cmd
, dsymutil
);
1043 if (0) VG_(strcat
)(cmd
, "--verbose ");
1044 VG_(strcat
)(cmd
, "\"");
1045 VG_(strcat
)(cmd
, di
->fsm
.filename
);
1046 VG_(strcat
)(cmd
, "\"");
1047 VG_(message
)(Vg_DebugMsg
, "run: %s\n", cmd
);
1048 r
= VG_(system
)( cmd
);
1050 VG_(message
)(Vg_DebugMsg
, "run: %s FAILED\n", dsymutil
);
1051 ML_(dinfo_free
)(cmd
);
1052 dsymfilename
= find_separate_debug_file(di
->fsm
.filename
);
1055 /* Try again to load it. */
1059 if (VG_(clo_verbosity
) > 1)
1060 VG_(message
)(Vg_DebugMsg
, " dsyms= %s\n", dsymfilename
);
1062 dsli
= map_image_aboard( di
, dsymfilename
);
1063 if (!ML_(sli_is_valid
)(dsli
)) {
1064 ML_(symerr
)(di
, False
, "Connect to debuginfo image failed "
1065 "(second attempt).");
1069 /* check it has the right uuid. */
1070 vg_assert(have_uuid
);
1071 vg_assert(have_uuid
);
1072 valid
= dsli
.img
&& dsli
.szB
> 0 && check_uuid_matches( dsli
, uuid
);
1074 if (VG_(clo_verbosity
) > 0) {
1075 VG_(message
)(Vg_DebugMsg
,
1076 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1077 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1079 (UInt
)uuid
[0], (UInt
)uuid
[1], (UInt
)uuid
[2], (UInt
)uuid
[3],
1080 (UInt
)uuid
[4], (UInt
)uuid
[5], (UInt
)uuid
[6], (UInt
)uuid
[7],
1081 (UInt
)uuid
[8], (UInt
)uuid
[9], (UInt
)uuid
[10],
1082 (UInt
)uuid
[11], (UInt
)uuid
[12], (UInt
)uuid
[13],
1083 (UInt
)uuid
[14], (UInt
)uuid
[15] );
1084 VG_(message
)(Vg_DebugMsg
,
1085 "WARNING: for %s\n", di
->fsm
.filename
);
1087 unmap_image( &dsli
);
1088 /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1089 re-try unmap_image. */
1094 /* Right. Finally we have our best try at the dwarf image, so go
1095 on to reading stuff out of it. */
1098 if (ML_(sli_is_valid
)(dsli
) && dsli
.szB
> 0) {
1099 // "_mscn" is "mach-o section"
1100 DiSlice debug_info_mscn
1101 = getsectdata(dsli
, "__DWARF", "__debug_info", NULL
);
1102 DiSlice debug_abbv_mscn
1103 = getsectdata(dsli
, "__DWARF", "__debug_abbrev", NULL
);
1104 DiSlice debug_line_mscn
1105 = getsectdata(dsli
, "__DWARF", "__debug_line", NULL
);
1106 DiSlice debug_str_mscn
1107 = getsectdata(dsli
, "__DWARF", "__debug_str", NULL
);
1108 DiSlice debug_ranges_mscn
1109 = getsectdata(dsli
, "__DWARF", "__debug_ranges", NULL
);
1110 DiSlice debug_loc_mscn
1111 = getsectdata(dsli
, "__DWARF", "__debug_loc", NULL
);
1113 /* It appears (jrs, 2014-oct-19) that section "__eh_frame" in
1114 segment "__TEXT" appears in both the main and dsym files, but
1115 only the main one gives the right results. Since it's in the
1116 __TEXT segment, we calculate the __eh_frame avma using its
1117 svma and the text bias, and that sounds reasonable. */
1118 Addr eh_frame_svma
= 0;
1119 DiSlice eh_frame_mscn
1120 = getsectdata(msli
, "__TEXT", "__eh_frame", &eh_frame_svma
);
1122 if (ML_(sli_is_valid
)(eh_frame_mscn
)) {
1123 vg_assert(di
->text_bias
== di
->text_debug_bias
);
1124 ML_(read_callframe_info_dwarf3
)(di
, eh_frame_mscn
,
1125 eh_frame_svma
+ di
->text_bias
,
1126 True
/*is_ehframe*/);
1129 if (ML_(sli_is_valid
)(debug_info_mscn
)) {
1130 if (VG_(clo_verbosity
) > 1) {
1132 VG_(message
)(Vg_DebugMsg
,
1133 "Reading dwarf3 for %s (%#lx) from %s"
1134 " (%lld %lld %lld %lld %lld %lld)\n",
1135 di
->fsm
.filename
, di
->text_avma
, dsymfilename
,
1136 debug_info_mscn
.szB
, debug_abbv_mscn
.szB
,
1137 debug_line_mscn
.szB
, debug_str_mscn
.szB
,
1138 debug_ranges_mscn
.szB
, debug_loc_mscn
.szB
1140 VG_(message
)(Vg_DebugMsg
,
1141 " reading dwarf3 from dsyms file\n");
1143 /* The old reader: line numbers and unwind info only */
1144 ML_(read_debuginfo_dwarf3
) ( di
,
1146 DiSlice_INVALID
, /* .debug_types */
1150 DiSlice_INVALID
/* ALT .debug_str */ );
1152 /* The new reader: read the DIEs in .debug_info to acquire
1153 information on variable types and locations or inline info.
1154 But only if the tool asks for it, or the user requests it on
1155 the command line. */
1156 if (VG_(clo_read_var_info
) /* the user or tool asked for it */
1157 || VG_(clo_read_inline_info
)) {
1158 ML_(new_dwarf3_reader
)(
1159 di
, debug_info_mscn
,
1160 DiSlice_INVALID
, /* .debug_types */
1166 DiSlice_INVALID
, /* ALT .debug_info */
1167 DiSlice_INVALID
, /* ALT .debug_abbv */
1168 DiSlice_INVALID
, /* ALT .debug_line */
1169 DiSlice_INVALID
/* ALT .debug_str */
1175 if (dsymfilename
) ML_(dinfo_free
)(dsymfilename
);
1185 ML_(symerr
)(di
, True
, "Error reading Mach-O object.");
1191 #endif // defined(VGO_darwin)
1193 /*--------------------------------------------------------------------*/
1195 /*--------------------------------------------------------------------*/