Add DRD suppression patterns for races triggered by std::ostream
[valgrind.git] / coregrind / m_debuginfo / readmacho.c
blob1f6a61fc4ba14c30ff980328d2bf94bc2b6538b1
2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2005-2017 Apple Inc.
12 Greg Parker gparker@apple.com
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_darwin)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcfile.h"
40 #include "pub_core_libcproc.h"
41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42 #include "pub_core_machine.h" /* VG_ELF_CLASS */
43 #include "pub_core_options.h"
44 #include "pub_core_oset.h"
45 #include "pub_core_tooliface.h" /* VG_(needs) */
46 #include "pub_core_xarray.h"
47 #include "pub_core_clientstate.h"
48 #include "pub_core_debuginfo.h"
50 #include "priv_misc.h"
51 #include "priv_image.h"
52 #include "priv_d3basics.h"
53 #include "priv_tytypes.h"
54 #include "priv_storage.h"
55 #include "priv_readmacho.h"
56 #include "priv_readdwarf.h"
57 #include "priv_readdwarf3.h"
59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60 #include <mach-o/loader.h>
61 #include <mach-o/nlist.h>
62 #include <mach-o/fat.h>
63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
65 #if VG_WORDSIZE == 4
66 # define MAGIC MH_MAGIC
67 # define MACH_HEADER mach_header
68 # define LC_SEGMENT_CMD LC_SEGMENT
69 # define SEGMENT_COMMAND segment_command
70 # define SECTION section
71 # define NLIST nlist
72 #else
73 # define MAGIC MH_MAGIC_64
74 # define MACH_HEADER mach_header_64
75 # define LC_SEGMENT_CMD LC_SEGMENT_64
76 # define SEGMENT_COMMAND segment_command_64
77 # define SECTION section_64
78 # define NLIST nlist_64
79 #endif
82 /*------------------------------------------------------------*/
83 /*--- ---*/
84 /*--- Mach-O file mapping/unmapping helpers ---*/
85 /*--- ---*/
86 /*------------------------------------------------------------*/
88 /* A DiSlice is used to handle the thin/fat distinction for MachO images.
89 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
90 whatnot: the entire file. This is the DiImage* that is the backing
91 for the DiSlice.
92 (2) the Mach-O object of interest, which is presumably somewhere inside
93 the primary image. map_image_aboard() below, which generates this
94 info, will carefully check that the macho_ fields denote a section of
95 memory that falls entirely inside the primary image.
98 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
100 /* (JRS: the Mach-O headers might not be in this mapped data,
101 because we only mapped a page for this initial check,
102 or at least not very much, and what's at the start of the file
103 is in general a so-called fat header. The Mach-O object we're
104 interested in could be arbitrarily far along the image, and so
105 we can't assume its header will fall within this page.) */
107 /* But we can say that either it's a fat object, in which case it
108 begins with a fat header, or it's unadorned Mach-O, in which
109 case it starts with a normal header. At least do what checks we
110 can to establish whether or not we're looking at something
111 sane. */
113 const struct fat_header* fh_be = buf;
114 const struct MACH_HEADER* mh = buf;
116 vg_assert(buf);
117 if (szB < sizeof(struct fat_header))
118 return False;
119 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
120 return True;
122 if (szB < sizeof(struct MACH_HEADER))
123 return False;
124 if (mh->magic == MAGIC)
125 return True;
127 return False;
131 /* Unmap an image mapped in by map_image_aboard. */
132 static void unmap_image ( /*MOD*/DiSlice* sli )
134 vg_assert(sli);
135 if (ML_(sli_is_valid)(*sli)) {
136 ML_(img_done)(sli->img);
137 *sli = DiSlice_INVALID;
142 /* Open the given file, find the thin part if necessary, do some
143 checks, and return a DiSlice containing details of both the thin
144 part and (implicitly, via the contained DiImage*) the fat part.
145 returns DiSlice_INVALID if it fails. If it succeeds, the returned
146 slice is guaranteed to refer to a valid(ish) Mach-O image. */
147 static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
148 const HChar* filename )
150 DiSlice sli = DiSlice_INVALID;
152 /* First off, try to map the thing in. */
153 DiImage* mimg = ML_(img_from_local_file)(filename);
154 if (mimg == NULL) {
155 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
156 filename );
157 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
158 return DiSlice_INVALID;
161 /* Now we have a viable DiImage* for it. Look for the embedded
162 Mach-O object. If not findable, close the image and fail. */
163 DiOffT fh_be_ioff = 0;
164 struct fat_header fh_be;
165 struct fat_header fh;
167 // Assume initially that we have a thin image, and narrow
168 // the bounds if it turns out to be fat. This stores |mimg| as
169 // |sli.img|, so NULL out |mimg| after this point, for the sake of
170 // clarity.
171 sli = ML_(sli_from_img)(mimg);
172 mimg = NULL;
174 // Check for fat header.
175 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) {
176 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
177 goto close_and_fail;
180 // Fat header is always BIG-ENDIAN
181 ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be));
182 VG_(memset)(&fh, 0, sizeof(fh));
183 fh.magic = VG_(ntohl)(fh_be.magic);
184 fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch);
185 if (fh.magic == FAT_MAGIC) {
186 // Look for a good architecture.
187 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)
188 + fh.nfat_arch * sizeof(struct fat_arch)) {
189 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
190 goto close_and_fail;
192 DiOffT arch_be_ioff;
193 Int f;
194 for (f = 0, arch_be_ioff = sizeof(struct fat_header);
195 f < fh.nfat_arch;
196 f++, arch_be_ioff += sizeof(struct fat_arch)) {
197 # if defined(VGA_ppc)
198 Int cputype = CPU_TYPE_POWERPC;
199 # elif defined(VGA_ppc64be)
200 Int cputype = CPU_TYPE_POWERPC64BE;
201 # elif defined(VGA_ppc64le)
202 Int cputype = CPU_TYPE_POWERPC64LE;
203 # elif defined(VGA_x86)
204 Int cputype = CPU_TYPE_X86;
205 # elif defined(VGA_amd64)
206 Int cputype = CPU_TYPE_X86_64;
207 # else
208 # error "unknown architecture"
209 # endif
210 struct fat_arch arch_be;
211 struct fat_arch arch;
212 ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be));
213 VG_(memset)(&arch, 0, sizeof(arch));
214 arch.cputype = VG_(ntohl)(arch_be.cputype);
215 arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype);
216 arch.offset = VG_(ntohl)(arch_be.offset);
217 arch.size = VG_(ntohl)(arch_be.size);
218 if (arch.cputype == cputype) {
219 if (ML_(img_size)(sli.img) < arch.offset + arch.size) {
220 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
221 goto close_and_fail;
223 /* Found a suitable arch. Narrow down the slice accordingly. */
224 sli.ioff = arch.offset;
225 sli.szB = arch.size;
226 break;
229 if (f == fh.nfat_arch) {
230 ML_(symerr)(di, True,
231 "No acceptable architecture found in fat file.");
232 goto close_and_fail;
236 /* Sanity check what we found. */
238 /* assured by logic above */
239 vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header));
241 if (sli.szB < sizeof(struct MACH_HEADER)) {
242 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
243 goto close_and_fail;
246 if (sli.szB > ML_(img_size)(sli.img)) {
247 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
248 goto close_and_fail;
251 if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) {
252 /* thin entirely within fat, as expected */
253 } else {
254 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
255 goto close_and_fail;
258 /* Peer at the Mach header for the thin object, starting at the
259 beginning of the slice, to check it's at least marginally
260 sane. */
261 struct MACH_HEADER mh;
262 ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh));
263 if (mh.magic != MAGIC) {
264 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
265 goto close_and_fail;
268 if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) {
269 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
270 goto close_and_fail;
273 /* "main image is plausible" */
274 vg_assert(sli.img);
275 vg_assert(ML_(img_size)(sli.img) > 0);
276 /* "thin image exists and is a sub-part (or all) of main image" */
277 vg_assert(sli.ioff >= 0);
278 vg_assert(sli.szB > 0);
279 vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img));
280 return sli; /* success */
281 /*NOTREACHED*/
283 close_and_fail:
284 unmap_image(&sli);
285 return DiSlice_INVALID; /* bah! */
289 /*------------------------------------------------------------*/
290 /*--- ---*/
291 /*--- Mach-O symbol table reading ---*/
292 /*--- ---*/
293 /*------------------------------------------------------------*/
295 /* Read a symbol table (nlist). Add the resulting candidate symbols
296 to 'syms'; the caller will post-process them and hand them off to
297 ML_(addSym) itself. */
298 static
299 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
300 struct _DebugInfo* di,
301 DiCursor symtab_cur, UInt symtab_count,
302 DiCursor strtab_cur, UInt strtab_sz )
304 Int i;
305 DiSym disym;
307 // "start_according_to_valgrind"
308 static const HChar* s_a_t_v = NULL; /* do not make non-static */
310 for (i = 0; i < symtab_count; i++) {
311 struct NLIST nl;
312 ML_(cur_read_get)(&nl,
313 ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)),
314 sizeof(nl));
316 Addr sym_addr = 0;
317 if ((nl.n_type & N_TYPE) == N_SECT) {
318 sym_addr = di->text_bias + nl.n_value;
319 /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
320 GrP fixme don't ignore absolute symbols?
321 sym_addr = nl.n_value; */
322 } else {
323 continue;
326 if (di->trace_symtab) {
327 HChar* str = ML_(cur_read_strdup)(
328 ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
329 "di.read_symtab.1");
330 VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, str );
331 ML_(dinfo_free)(str);
334 /* If no part of the symbol falls within the mapped range,
335 ignore it. */
336 if (sym_addr <= di->text_avma
337 || sym_addr >= di->text_avma+di->text_size) {
338 continue;
341 /* skip names which point outside the string table;
342 following these risks segfaulting Valgrind */
343 if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) {
344 continue;
347 HChar* name
348 = ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
349 "di.read_symtab.2");
351 /* skip nameless symbols; these appear to be common, but
352 useless */
353 if (*name == 0) {
354 ML_(dinfo_free)(name);
355 continue;
358 VG_(bzero_inline)(&disym, sizeof(disym));
359 disym.avmas.main = sym_addr;
360 SET_TOCPTR_AVMA(disym, 0);
361 SET_LOCAL_EP_AVMA(disym, 0);
362 disym.pri_name = ML_(addStr)(di, name, -1);
363 disym.sec_names = NULL;
364 disym.size = // let canonicalize fix it
365 di->text_avma+di->text_size - sym_addr;
366 disym.isText = True;
367 disym.isIFunc = False;
368 disym.isGlobal = False;
369 // Lots of user function names get prepended with an underscore. Eg. the
370 // function 'f' becomes the symbol '_f'. And the "below main"
371 // function is called "start". So we skip the leading underscore, and
372 // if we see 'start' and --show-below-main=no, we rename it as
373 // "start_according_to_valgrind", which makes it easy to spot later
374 // and display as "(below main)".
375 if (disym.pri_name[0] == '_') {
376 disym.pri_name++;
378 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
379 if (s_a_t_v == NULL)
380 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
381 vg_assert(s_a_t_v);
382 disym.pri_name = s_a_t_v;
385 vg_assert(disym.pri_name);
386 VG_(addToXA)( syms, &disym );
387 ML_(dinfo_free)(name);
392 /* Compare DiSyms by their start address, and for equal addresses, use
393 the primary name as a secondary sort key. */
394 static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 )
396 const DiSym* s1 = (const DiSym*)v1;
397 const DiSym* s2 = (const DiSym*)v2;
398 if (s1->avmas.main < s2->avmas.main) return -1;
399 if (s1->avmas.main > s2->avmas.main) return 1;
400 return VG_(strcmp)(s1->pri_name, s2->pri_name);
403 /* 'cand' is a bunch of candidate symbols obtained by reading
404 nlist-style symbol table entries. Their ends may overlap, so sort
405 them and truncate them accordingly. The code in this routine is
406 copied almost verbatim from read_symbol_table() in readxcoff.c. */
407 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
408 Bool trace_symtab )
410 Word nsyms, i, j, k, m;
412 nsyms = VG_(sizeXA)(syms);
414 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
415 VG_(sortXA)(syms);
417 /* We only know for sure the start addresses (actual VMAs) of
418 symbols, and an overestimation of their end addresses. So sort
419 by start address, then clip each symbol so that its end address
420 does not overlap with the next one along.
422 There is a small refinement: if a group of symbols have the same
423 address, treat them as a group: find the next symbol along that
424 has a higher start address, and clip all of the group
425 accordingly. This clips the group as a whole so as not to
426 overlap following symbols. This leaves prefersym() in
427 storage.c, which is not nlist-specific, to later decide which of
428 the symbols in the group to keep.
430 Another refinement is that we need to get rid of symbols which,
431 after clipping, have identical starts, ends, and names. So the
432 sorting uses the name as a secondary key.
435 for (i = 0; i < nsyms; i++) {
436 for (k = i+1;
437 k < nsyms
438 && ((DiSym*)VG_(indexXA)(syms,i))->avmas.main
439 == ((DiSym*)VG_(indexXA)(syms,k))->avmas.main;
440 k++)
442 /* So now [i .. k-1] is a group all with the same start address.
443 Clip their ending addresses so they don't overlap [k]. In
444 the normal case (no overlaps), k == i+1. */
445 if (k < nsyms) {
446 DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
447 for (m = i; m < k; m++) {
448 DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
449 vg_assert(here->avmas.main < next->avmas.main);
450 if (here->avmas.main + here->size > next->avmas.main)
451 here->size = next->avmas.main - here->avmas.main;
454 i = k-1;
455 vg_assert(i <= nsyms);
458 j = 0;
459 if (nsyms > 0) {
460 j = 1;
461 for (i = 1; i < nsyms; i++) {
462 DiSym *s_j1, *s_j, *s_i;
463 vg_assert(j <= i);
464 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
465 s_j = (DiSym*)VG_(indexXA)(syms, j);
466 s_i = (DiSym*)VG_(indexXA)(syms, i);
467 if (s_i->avmas.main != s_j1->avmas.main
468 || s_i->size != s_j1->size
469 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
470 *s_j = *s_i;
471 j++;
472 } else {
473 if (trace_symtab)
474 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
475 s_i->avmas.main, s_i->pri_name );
479 vg_assert(j >= 0 && j <= nsyms);
480 VG_(dropTailXA)(syms, nsyms - j);
484 /*------------------------------------------------------------*/
485 /*--- ---*/
486 /*--- Mach-O top-level processing ---*/
487 /*--- ---*/
488 /*------------------------------------------------------------*/
490 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
491 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
492 #endif
495 static Bool file_exists_p(const HChar *path)
497 struct vg_stat sbuf;
498 SysRes res = VG_(stat)(path, &sbuf);
499 return sr_isError(res) ? False : True;
503 /* Search for an existing dSYM file as a possible separate debug file.
504 Adapted from gdb. */
505 static HChar *
506 find_separate_debug_file (const HChar *executable_name)
508 const HChar *basename_str;
509 HChar *dot_ptr;
510 HChar *slash_ptr;
511 HChar *dsymfile;
513 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
514 will end up with an infinite loop where after we add a dSYM symbol file,
515 it will then enter this function asking if there is a debug file for the
516 dSYM file itself. */
517 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
519 /* Check for the existence of a .dSYM file for a given executable. */
520 basename_str = VG_(basename) (executable_name);
521 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
522 VG_(strlen) (executable_name)
523 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
524 + VG_(strlen) (basename_str)
528 /* First try for the dSYM in the same directory as the original file. */
529 VG_(strcpy) (dsymfile, executable_name);
530 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
531 VG_(strcat) (dsymfile, basename_str);
533 if (file_exists_p (dsymfile))
534 return dsymfile;
536 /* Now search for any parent directory that has a '.' in it so we can find
537 Mac OS X applications, bundles, plugins, and any other kinds of files.
538 Mac OS X application bundles wil have their program in
539 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
540 ".bundle" or ".plugin" for other types of bundles). So we look for any
541 prior '.' character and try appending the apple dSYM extension and
542 subdirectory and see if we find an existing dSYM file (in the above
543 MyApp example the dSYM would be at either:
544 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
545 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
546 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
547 while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
549 /* Find the directory delimiter that follows the '.' character since
550 we now look for a .dSYM that follows any bundle extension. */
551 slash_ptr = VG_(strchr) (dot_ptr, '/');
552 if (slash_ptr)
554 /* NULL terminate the string at the '/' character and append
555 the path down to the dSYM file. */
556 *slash_ptr = '\0';
557 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
558 VG_(strcat) (slash_ptr, basename_str);
559 if (file_exists_p (dsymfile))
560 return dsymfile;
563 /* NULL terminate the string at the '.' character and append
564 the path down to the dSYM file. */
565 *dot_ptr = '\0';
566 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
567 VG_(strcat) (dot_ptr, basename_str);
568 if (file_exists_p (dsymfile))
569 return dsymfile;
571 /* NULL terminate the string at the '.' locatated by the strrchr()
572 function again. */
573 *dot_ptr = '\0';
575 /* We found a previous extension '.' character and did not find a
576 dSYM file so now find previous directory delimiter so we don't
577 try multiple times on a file name that may have a version number
578 in it such as "/some/path/MyApp.6.0.4.app". */
579 slash_ptr = VG_(strrchr) (dsymfile, '/');
580 if (!slash_ptr)
581 break;
582 /* NULL terminate the string at the previous directory character
583 and search again. */
584 *slash_ptr = '\0';
588 return NULL;
592 /* Given a DiSlice covering the entire Mach-O thin image, find the
593 DiSlice for the specified (segname, sectname) pairing, if
594 possible. Also return the section's .addr field in *svma if
595 svma is non-NULL. */
596 static DiSlice getsectdata ( DiSlice img,
597 const HChar *segname, const HChar *sectname,
598 /*OUT*/Addr* svma )
600 DiCursor cur = ML_(cur_from_sli)(img);
602 struct MACH_HEADER mh;
603 ML_(cur_step_get)(&mh, &cur, sizeof(mh));
605 Int c;
606 for (c = 0; c < mh.ncmds; c++) {
607 struct load_command cmd;
608 ML_(cur_read_get)(&cmd, cur, sizeof(cmd));
609 if (cmd.cmd == LC_SEGMENT_CMD) {
610 struct SEGMENT_COMMAND seg;
611 ML_(cur_read_get)(&seg, cur, sizeof(seg));
612 if (0 == VG_(strncmp)(&seg.segname[0],
613 segname, sizeof(seg.segname))) {
614 DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg));
615 Int s;
616 for (s = 0; s < seg.nsects; s++) {
617 struct SECTION sect;
618 ML_(cur_step_get)(&sect, &sects_cur, sizeof(sect));
619 if (0 == VG_(strncmp)(sect.sectname, sectname,
620 sizeof(sect.sectname))) {
621 DiSlice res = img;
622 res.ioff = sect.offset;
623 res.szB = sect.size;
624 if (svma) *svma = (Addr)sect.addr;
625 return res;
631 cur = ML_(cur_plus)(cur, cmd.cmdsize);
634 return DiSlice_INVALID;
638 /* Brute force just simply search for uuid[0..15] in |sli| */
639 static Bool check_uuid_matches ( DiSlice sli, UChar* uuid )
641 if (sli.szB < 16)
642 return False;
644 /* Work through the slice in 1 KB chunks. */
645 UChar first = uuid[0];
646 DiOffT min_off = sli.ioff;
647 DiOffT max1_off = sli.ioff + sli.szB;
648 DiOffT curr_off = min_off;
649 vg_assert(min_off < max1_off);
650 while (1) {
651 vg_assert(curr_off >= min_off && curr_off <= max1_off);
652 if (curr_off == max1_off) break;
653 DiOffT avail = max1_off - curr_off;
654 vg_assert(avail > 0 && avail <= max1_off);
655 if (avail > 1024) avail = 1024;
656 UChar buf[1024];
657 SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail);
658 vg_assert(nGot >= 1 && nGot <= avail);
659 UInt i;
660 /* Scan through the 1K chunk we got, looking for the start char. */
661 for (i = 0; i < (UInt)nGot; i++) {
662 if (LIKELY(buf[i] != first))
663 continue;
664 /* first char matches. See if we can get 16 bytes at this
665 offset, and compare. */
666 if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) {
667 UChar buff16[16];
668 ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16);
669 if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16))
670 return True;
673 curr_off += nGot;
675 return False;
679 /* Heuristic kludge: return True if this looks like an installed
680 standard library; hence we shouldn't consider automagically running
681 dsymutil on it. */
682 static Bool is_systemish_library_name ( const HChar* name )
684 vg_assert(name);
685 if (0 == VG_(strncasecmp)(name, "/usr/", 5)
686 || 0 == VG_(strncasecmp)(name, "/bin/", 5)
687 || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
688 || 0 == VG_(strncasecmp)(name, "/opt/", 5)
689 || 0 == VG_(strncasecmp)(name, "/sw/", 4)
690 || 0 == VG_(strncasecmp)(name, "/System/", 8)
691 || 0 == VG_(strncasecmp)(name, "/Library/", 9)
692 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
693 return True;
694 } else {
695 return False;
700 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
702 DiSlice msli = DiSlice_INVALID; // the main image
703 DiSlice dsli = DiSlice_INVALID; // the debuginfo image
704 DiCursor sym_cur = DiCursor_INVALID;
705 DiCursor dysym_cur = DiCursor_INVALID;
706 HChar* dsymfilename = NULL;
707 Bool have_uuid = False;
708 UChar uuid[16];
709 Word i;
710 const DebugInfoMapping* rx_map = NULL;
711 const DebugInfoMapping* rw_map = NULL;
713 /* mmap the object file to look for di->soname and di->text_bias
714 and uuid and nlist */
716 /* This should be ensured by our caller (that we're in the accept
717 state). */
718 vg_assert(di->fsm.have_rx_map);
719 vg_assert(di->fsm.have_rw_map);
721 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
722 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
723 if (map->rx && !rx_map)
724 rx_map = map;
725 if (map->rw && !rw_map)
726 rw_map = map;
727 if (rx_map && rw_map)
728 break;
730 vg_assert(rx_map);
731 vg_assert(rw_map);
733 if (VG_(clo_verbosity) > 1)
734 VG_(message)(Vg_DebugMsg,
735 "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
736 rx_map->avma, rw_map->avma );
738 VG_(memset)(&uuid, 0, sizeof(uuid));
740 msli = map_image_aboard( di, di->fsm.filename );
741 if (!ML_(sli_is_valid)(msli)) {
742 ML_(symerr)(di, False, "Connect to main image failed.");
743 goto fail;
746 vg_assert(msli.img != NULL && msli.szB > 0);
748 /* Poke around in the Mach-O header, to find some important
749 stuff. */
750 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
751 // Read di->soname from LC_ID_DYLIB if present,
752 // or from LC_ID_DYLINKER if present,
753 // or use "NONE".
754 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
755 // Get uuid for later dsym search
757 di->text_bias = 0;
760 DiCursor cmd_cur = ML_(cur_from_sli)(msli);
762 struct MACH_HEADER mh;
763 ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh));
765 /* Now cur_cmd points just after the Mach header, right at the
766 start of the load commands, which is where we need it to start
767 the following loop. */
769 Int c;
770 for (c = 0; c < mh.ncmds; c++) {
771 struct load_command cmd;
772 ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd));
774 if (cmd.cmd == LC_SYMTAB) {
775 sym_cur = cmd_cur;
777 else if (cmd.cmd == LC_DYSYMTAB) {
778 dysym_cur = cmd_cur;
780 else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) {
781 // GrP fixme bundle?
782 struct dylib_command dcmd;
783 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
784 DiCursor dylibname_cur
785 = ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset);
786 HChar* dylibname
787 = ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1");
788 HChar* soname = VG_(strrchr)(dylibname, '/');
789 if (!soname) soname = dylibname;
790 else soname++;
791 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
792 soname);
793 ML_(dinfo_free)(dylibname);
795 else if (cmd.cmd==LC_ID_DYLINKER && mh.filetype==MH_DYLINKER) {
796 struct dylinker_command dcmd;
797 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
798 DiCursor dylinkername_cur
799 = ML_(cur_plus)(cmd_cur, dcmd.name.offset);
800 HChar* dylinkername
801 = ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2");
802 HChar* soname = VG_(strrchr)(dylinkername, '/');
803 if (!soname) soname = dylinkername;
804 else soname++;
805 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
806 soname);
807 ML_(dinfo_free)(dylinkername);
810 // A comment from Julian about why varinfo[35] fail:
812 // My impression is, from comparing the output of otool -l for these
813 // executables with the logic in ML_(read_macho_debug_info),
814 // specifically the part that begins "else if (cmd->cmd ==
815 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
816 // to work ok for text symbols. In particular, it appears to assume
817 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
818 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
819 // need. However, otool -l shows, and also the Apple docs state,
820 // that a struct load_command may contain an arbitrary number of
821 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
822 // snarf the first. But I'm not sure about this.
824 // The "Try for __DATA" block below simply adds acquisition of data
825 // svma/bias values using the same assumption. It also needs
826 // (probably) to deal with bss sections, but I don't understand how
827 // this all ties together really, so it requires further study.
829 // If you can get your head around the relationship between MachO
830 // segments, sections and load commands, this might be relatively
831 // easy to fix properly.
833 // Basically we need to come up with plausible numbers for di->
834 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
835 // then trivially derived. Then I think the debuginfo reader should
836 // work pretty well.
837 else if (cmd.cmd == LC_SEGMENT_CMD) {
838 struct SEGMENT_COMMAND seg;
839 ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg));
840 /* Try for __TEXT */
841 if (!di->text_present
842 && 0 == VG_(strcmp)(&seg.segname[0], "__TEXT")
843 /* DDD: is the next line a kludge? -- JRS */
844 && seg.fileoff == 0 && seg.filesize != 0) {
845 di->text_present = True;
846 di->text_svma = (Addr)seg.vmaddr;
847 di->text_avma = rx_map->avma;
848 di->text_size = seg.vmsize;
849 di->text_bias = di->text_avma - di->text_svma;
850 /* Make the _debug_ values be the same as the
851 svma/bias for the primary object, since there is
852 no secondary (debuginfo) object, but nevertheless
853 downstream biasing of Dwarf3 relies on the
854 _debug_ values. */
855 di->text_debug_svma = di->text_svma;
856 di->text_debug_bias = di->text_bias;
858 /* Try for __DATA */
859 if (!di->data_present
860 && 0 == VG_(strcmp)(&seg.segname[0], "__DATA")
861 /* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) {
862 di->data_present = True;
863 di->data_svma = (Addr)seg.vmaddr;
864 di->data_avma = rw_map->avma;
865 di->data_size = seg.vmsize;
866 di->data_bias = di->data_avma - di->data_svma;
867 di->data_debug_svma = di->data_svma;
868 di->data_debug_bias = di->data_bias;
871 else if (cmd.cmd == LC_UUID) {
872 ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid));
873 have_uuid = True;
875 // Move the cursor along
876 cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize);
880 if (!di->soname) {
881 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
884 if (di->trace_symtab) {
885 VG_(printf)("\n");
886 VG_(printf)("SONAME = %s\n", di->soname);
887 VG_(printf)("\n");
890 /* Now we have the base object to hand. Read symbols from it. */
892 // We already asserted that ..
893 vg_assert(msli.img != NULL && msli.szB > 0);
895 if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) {
897 struct symtab_command symcmd;
898 struct dysymtab_command dysymcmd;
900 ML_(cur_read_get)(&symcmd, sym_cur, sizeof(symcmd));
901 ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd));
903 /* Read nlist symbol table */
904 DiCursor syms = DiCursor_INVALID;
905 DiCursor strs = DiCursor_INVALID;
906 XArray* /* DiSym */ candSyms = NULL;
907 Word nCandSyms;
909 if (msli.szB < symcmd.stroff + symcmd.strsize
910 || msli.szB < symcmd.symoff + symcmd.nsyms
911 * sizeof(struct NLIST)) {
912 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
913 goto fail;
915 if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms
916 || dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) {
917 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
918 goto fail;
921 syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff);
922 strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff);
924 if (VG_(clo_verbosity) > 1)
925 VG_(message)(Vg_DebugMsg,
926 " reading syms from primary file (%d %d)\n",
927 dysymcmd.nextdefsym, dysymcmd.nlocalsym );
929 /* Read candidate symbols into 'candSyms', so we can truncate
930 overlapping ends and generally tidy up, before presenting
931 them to ML_(addSym). */
932 candSyms = VG_(newXA)(
933 ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
934 ML_(dinfo_free), sizeof(DiSym)
937 // extern symbols
938 read_symtab(candSyms,
940 ML_(cur_plus)(syms,
941 dysymcmd.iextdefsym * sizeof(struct NLIST)),
942 dysymcmd.nextdefsym, strs, symcmd.strsize);
943 // static and private_extern symbols
944 read_symtab(candSyms,
946 ML_(cur_plus)(syms,
947 dysymcmd.ilocalsym * sizeof(struct NLIST)),
948 dysymcmd.nlocalsym, strs, symcmd.strsize);
950 /* tidy up the cand syms -- trim overlapping ends. May resize
951 candSyms. */
952 tidy_up_cand_syms( candSyms, di->trace_symtab );
954 /* and finally present them to ML_(addSym) */
955 nCandSyms = VG_(sizeXA)( candSyms );
956 for (i = 0; i < nCandSyms; i++) {
957 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
958 vg_assert(cand->pri_name != NULL);
959 vg_assert(cand->sec_names == NULL);
960 if (di->trace_symtab)
961 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
962 cand->avmas.main, cand->avmas.main + cand->size - 1,
963 cand->pri_name );
964 ML_(addSym)( di, cand );
966 VG_(deleteXA)( candSyms );
969 /* If there's no UUID in the primary, don't even bother to try and
970 read any DWARF, since we won't be able to verify it matches.
971 Our policy is not to load debug info unless we can verify that
972 it matches the primary. Just declare success at this point.
973 And don't complain to the user, since that would cause us to
974 complain on objects compiled without -g. (Some versions of
975 XCode are observed to omit a UUID entry for object linked(?)
976 without -g. Others don't appear to omit it.) */
977 if (!have_uuid)
978 goto success;
980 /* mmap the dSYM file to look for DWARF debug info. If successful,
981 use the .macho_img and .macho_img_szB in dsli. */
983 dsymfilename = find_separate_debug_file( di->fsm.filename );
985 /* Try to load it. */
986 if (dsymfilename) {
987 Bool valid;
989 if (VG_(clo_verbosity) > 1)
990 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
992 dsli = map_image_aboard( di, dsymfilename );
993 if (!ML_(sli_is_valid)(dsli)) {
994 ML_(symerr)(di, False, "Connect to debuginfo image failed "
995 "(first attempt).");
996 goto fail;
999 /* check it has the right uuid. */
1000 vg_assert(have_uuid);
1001 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1002 if (valid)
1003 goto read_the_dwarf;
1005 if (VG_(clo_verbosity) > 1)
1006 VG_(message)(Vg_DebugMsg, " dSYM does not have "
1007 "correct UUID (out of date?)\n");
1010 /* There was no dsym file, or it doesn't match. We'll have to try
1011 regenerating it, unless --dsymutil=no, in which case just complain
1012 instead. */
1014 /* If this looks like a lib that we shouldn't run dsymutil on, just
1015 give up. (possible reasons: is system lib, or in /usr etc, or
1016 the dsym dir would not be writable by the user, or we're running
1017 as root) */
1018 vg_assert(di->fsm.filename);
1019 if (is_systemish_library_name(di->fsm.filename))
1020 goto success;
1022 if (!VG_(clo_dsymutil)) {
1023 if (VG_(clo_verbosity) == 1) {
1024 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
1026 if (VG_(clo_verbosity) > 0)
1027 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
1028 "--dsymutil=yes\n",
1029 VG_(clo_verbosity) > 1 ? " " : "",
1030 dsymfilename ? "has wrong UUID" : "is missing");
1031 goto success;
1034 /* Run dsymutil */
1036 { Int r;
1037 const HChar* dsymutil = "/usr/bin/dsymutil ";
1038 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
1039 VG_(strlen)(dsymutil)
1040 + VG_(strlen)(di->fsm.filename)
1041 + 32 /* misc */ );
1042 VG_(strcpy)(cmd, dsymutil);
1043 if (0) VG_(strcat)(cmd, "--verbose ");
1044 VG_(strcat)(cmd, "\"");
1045 VG_(strcat)(cmd, di->fsm.filename);
1046 VG_(strcat)(cmd, "\"");
1047 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
1048 r = VG_(system)( cmd );
1049 if (r)
1050 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
1051 ML_(dinfo_free)(cmd);
1052 dsymfilename = find_separate_debug_file(di->fsm.filename);
1055 /* Try again to load it. */
1056 if (dsymfilename) {
1057 Bool valid;
1059 if (VG_(clo_verbosity) > 1)
1060 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
1062 dsli = map_image_aboard( di, dsymfilename );
1063 if (!ML_(sli_is_valid)(dsli)) {
1064 ML_(symerr)(di, False, "Connect to debuginfo image failed "
1065 "(second attempt).");
1066 goto fail;
1069 /* check it has the right uuid. */
1070 vg_assert(have_uuid);
1071 vg_assert(have_uuid);
1072 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1073 if (!valid) {
1074 if (VG_(clo_verbosity) > 0) {
1075 VG_(message)(Vg_DebugMsg,
1076 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1077 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1078 " in dSYM dir\n",
1079 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1080 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1081 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1082 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1083 (UInt)uuid[14], (UInt)uuid[15] );
1084 VG_(message)(Vg_DebugMsg,
1085 "WARNING: for %s\n", di->fsm.filename);
1087 unmap_image( &dsli );
1088 /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1089 re-try unmap_image. */
1090 goto fail;
1094 /* Right. Finally we have our best try at the dwarf image, so go
1095 on to reading stuff out of it. */
1097 read_the_dwarf:
1098 if (ML_(sli_is_valid)(dsli) && dsli.szB > 0) {
1099 // "_mscn" is "mach-o section"
1100 DiSlice debug_info_mscn
1101 = getsectdata(dsli, "__DWARF", "__debug_info", NULL);
1102 DiSlice debug_abbv_mscn
1103 = getsectdata(dsli, "__DWARF", "__debug_abbrev", NULL);
1104 DiSlice debug_line_mscn
1105 = getsectdata(dsli, "__DWARF", "__debug_line", NULL);
1106 DiSlice debug_str_mscn
1107 = getsectdata(dsli, "__DWARF", "__debug_str", NULL);
1108 DiSlice debug_ranges_mscn
1109 = getsectdata(dsli, "__DWARF", "__debug_ranges", NULL);
1110 DiSlice debug_loc_mscn
1111 = getsectdata(dsli, "__DWARF", "__debug_loc", NULL);
1113 /* It appears (jrs, 2014-oct-19) that section "__eh_frame" in
1114 segment "__TEXT" appears in both the main and dsym files, but
1115 only the main one gives the right results. Since it's in the
1116 __TEXT segment, we calculate the __eh_frame avma using its
1117 svma and the text bias, and that sounds reasonable. */
1118 Addr eh_frame_svma = 0;
1119 DiSlice eh_frame_mscn
1120 = getsectdata(msli, "__TEXT", "__eh_frame", &eh_frame_svma);
1122 if (ML_(sli_is_valid)(eh_frame_mscn)) {
1123 vg_assert(di->text_bias == di->text_debug_bias);
1124 ML_(read_callframe_info_dwarf3)(di, eh_frame_mscn,
1125 eh_frame_svma + di->text_bias,
1126 True/*is_ehframe*/);
1129 if (ML_(sli_is_valid)(debug_info_mscn)) {
1130 if (VG_(clo_verbosity) > 1) {
1131 if (0)
1132 VG_(message)(Vg_DebugMsg,
1133 "Reading dwarf3 for %s (%#lx) from %s"
1134 " (%lld %lld %lld %lld %lld %lld)\n",
1135 di->fsm.filename, di->text_avma, dsymfilename,
1136 debug_info_mscn.szB, debug_abbv_mscn.szB,
1137 debug_line_mscn.szB, debug_str_mscn.szB,
1138 debug_ranges_mscn.szB, debug_loc_mscn.szB
1140 VG_(message)(Vg_DebugMsg,
1141 " reading dwarf3 from dsyms file\n");
1143 /* The old reader: line numbers and unwind info only */
1144 ML_(read_debuginfo_dwarf3) ( di,
1145 debug_info_mscn,
1146 DiSlice_INVALID, /* .debug_types */
1147 debug_abbv_mscn,
1148 debug_line_mscn,
1149 debug_str_mscn,
1150 DiSlice_INVALID /* ALT .debug_str */ );
1152 /* The new reader: read the DIEs in .debug_info to acquire
1153 information on variable types and locations or inline info.
1154 But only if the tool asks for it, or the user requests it on
1155 the command line. */
1156 if (VG_(clo_read_var_info) /* the user or tool asked for it */
1157 || VG_(clo_read_inline_info)) {
1158 ML_(new_dwarf3_reader)(
1159 di, debug_info_mscn,
1160 DiSlice_INVALID, /* .debug_types */
1161 debug_abbv_mscn,
1162 debug_line_mscn,
1163 debug_str_mscn,
1164 debug_ranges_mscn,
1165 debug_loc_mscn,
1166 DiSlice_INVALID, /* ALT .debug_info */
1167 DiSlice_INVALID, /* ALT .debug_abbv */
1168 DiSlice_INVALID, /* ALT .debug_line */
1169 DiSlice_INVALID /* ALT .debug_str */
1175 if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1177 success:
1178 unmap_image(&msli);
1179 unmap_image(&dsli);
1180 return True;
1182 /* NOTREACHED */
1184 fail:
1185 ML_(symerr)(di, True, "Error reading Mach-O object.");
1186 unmap_image(&msli);
1187 unmap_image(&dsli);
1188 return False;
1191 #endif // defined(VGO_darwin)
1193 /*--------------------------------------------------------------------*/
1194 /*--- end ---*/
1195 /*--------------------------------------------------------------------*/