drd: Add a consistency check
[valgrind.git] / coregrind / m_debuginfo / readmacho.c
blobf359d575bbc589e31e62417cc17241bef87c07c0
2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2005-2013 Apple Inc.
12 Greg Parker gparker@apple.com
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_darwin)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcfile.h"
40 #include "pub_core_libcproc.h"
41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42 #include "pub_core_machine.h" /* VG_ELF_CLASS */
43 #include "pub_core_options.h"
44 #include "pub_core_oset.h"
45 #include "pub_core_tooliface.h" /* VG_(needs) */
46 #include "pub_core_xarray.h"
47 #include "pub_core_clientstate.h"
48 #include "pub_core_debuginfo.h"
50 #include "priv_misc.h"
51 #include "priv_image.h"
52 #include "priv_d3basics.h"
53 #include "priv_tytypes.h"
54 #include "priv_storage.h"
55 #include "priv_readmacho.h"
56 #include "priv_readdwarf.h"
57 #include "priv_readdwarf3.h"
59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60 #include <mach-o/loader.h>
61 #include <mach-o/nlist.h>
62 #include <mach-o/fat.h>
63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
65 #if VG_WORDSIZE == 4
66 # define MAGIC MH_MAGIC
67 # define MACH_HEADER mach_header
68 # define LC_SEGMENT_CMD LC_SEGMENT
69 # define SEGMENT_COMMAND segment_command
70 # define SECTION section
71 # define NLIST nlist
72 #else
73 # define MAGIC MH_MAGIC_64
74 # define MACH_HEADER mach_header_64
75 # define LC_SEGMENT_CMD LC_SEGMENT_64
76 # define SEGMENT_COMMAND segment_command_64
77 # define SECTION section_64
78 # define NLIST nlist_64
79 #endif
82 /*------------------------------------------------------------*/
83 /*--- ---*/
84 /*--- Mach-O file mapping/unmapping helpers ---*/
85 /*--- ---*/
86 /*------------------------------------------------------------*/
88 /* A DiSlice is used to handle the thin/fat distinction for MachO images.
89 (1) the entire mapped-in ("primary") image, fat headers, kitchen sink,
90 whatnot: the entire file. This is the DiImage* that is the backing
91 for the DiSlice.
92 (2) the Mach-O object of interest, which is presumably somewhere inside
93 the primary image. map_image_aboard() below, which generates this
94 info, will carefully check that the macho_ fields denote a section of
95 memory that falls entirely inside the primary image.
98 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
100 /* (JRS: the Mach-O headers might not be in this mapped data,
101 because we only mapped a page for this initial check,
102 or at least not very much, and what's at the start of the file
103 is in general a so-called fat header. The Mach-O object we're
104 interested in could be arbitrarily far along the image, and so
105 we can't assume its header will fall within this page.) */
107 /* But we can say that either it's a fat object, in which case it
108 begins with a fat header, or it's unadorned Mach-O, in which
109 case it starts with a normal header. At least do what checks we
110 can to establish whether or not we're looking at something
111 sane. */
113 const struct fat_header* fh_be = buf;
114 const struct MACH_HEADER* mh = buf;
116 vg_assert(buf);
117 if (szB < sizeof(struct fat_header))
118 return False;
119 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
120 return True;
122 if (szB < sizeof(struct MACH_HEADER))
123 return False;
124 if (mh->magic == MAGIC)
125 return True;
127 return False;
131 /* Unmap an image mapped in by map_image_aboard. */
132 static void unmap_image ( /*MOD*/DiSlice* sli )
134 vg_assert(sli);
135 if (ML_(sli_is_valid)(*sli)) {
136 ML_(img_done)(sli->img);
137 *sli = DiSlice_INVALID;
142 /* Open the given file, find the thin part if necessary, do some
143 checks, and return a DiSlice containing details of both the thin
144 part and (implicitly, via the contained DiImage*) the fat part.
145 returns DiSlice_INVALID if it fails. If it succeeds, the returned
146 slice is guaranteed to refer to a valid(ish) Mach-O image. */
147 static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
148 const HChar* filename )
150 DiSlice sli = DiSlice_INVALID;
152 /* First off, try to map the thing in. */
153 DiImage* mimg = ML_(img_from_local_file)(filename);
154 if (mimg == NULL) {
155 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
156 filename );
157 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
158 return DiSlice_INVALID;
161 /* Now we have a viable DiImage* for it. Look for the embedded
162 Mach-O object. If not findable, close the image and fail. */
163 DiOffT fh_be_ioff = 0;
164 struct fat_header fh_be;
165 struct fat_header fh;
167 // Assume initially that we have a thin image, and narrow
168 // the bounds if it turns out to be fat. This stores |mimg| as
169 // |sli.img|, so NULL out |mimg| after this point, for the sake of
170 // clarity.
171 sli = ML_(sli_from_img)(mimg);
172 mimg = NULL;
174 // Check for fat header.
175 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)) {
176 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
177 goto close_and_fail;
180 // Fat header is always BIG-ENDIAN
181 ML_(img_get)(&fh_be, sli.img, fh_be_ioff, sizeof(fh_be));
182 VG_(memset)(&fh, 0, sizeof(fh));
183 fh.magic = VG_(ntohl)(fh_be.magic);
184 fh.nfat_arch = VG_(ntohl)(fh_be.nfat_arch);
185 if (fh.magic == FAT_MAGIC) {
186 // Look for a good architecture.
187 if (ML_(img_size)(sli.img) < sizeof(struct fat_header)
188 + fh.nfat_arch * sizeof(struct fat_arch)) {
189 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
190 goto close_and_fail;
192 DiOffT arch_be_ioff;
193 Int f;
194 for (f = 0, arch_be_ioff = sizeof(struct fat_header);
195 f < fh.nfat_arch;
196 f++, arch_be_ioff += sizeof(struct fat_arch)) {
197 # if defined(VGA_ppc)
198 Int cputype = CPU_TYPE_POWERPC;
199 # elif defined(VGA_ppc64be)
200 Int cputype = CPU_TYPE_POWERPC64BE;
201 # elif defined(VGA_ppc64le)
202 Int cputype = CPU_TYPE_POWERPC64LE;
203 # elif defined(VGA_x86)
204 Int cputype = CPU_TYPE_X86;
205 # elif defined(VGA_amd64)
206 Int cputype = CPU_TYPE_X86_64;
207 # else
208 # error "unknown architecture"
209 # endif
210 struct fat_arch arch_be;
211 struct fat_arch arch;
212 ML_(img_get)(&arch_be, sli.img, arch_be_ioff, sizeof(arch_be));
213 VG_(memset)(&arch, 0, sizeof(arch));
214 arch.cputype = VG_(ntohl)(arch_be.cputype);
215 arch.cpusubtype = VG_(ntohl)(arch_be.cpusubtype);
216 arch.offset = VG_(ntohl)(arch_be.offset);
217 arch.size = VG_(ntohl)(arch_be.size);
218 if (arch.cputype == cputype) {
219 if (ML_(img_size)(sli.img) < arch.offset + arch.size) {
220 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
221 goto close_and_fail;
223 /* Found a suitable arch. Narrow down the slice accordingly. */
224 sli.ioff = arch.offset;
225 sli.szB = arch.size;
226 break;
229 if (f == fh.nfat_arch) {
230 ML_(symerr)(di, True,
231 "No acceptable architecture found in fat file.");
232 goto close_and_fail;
236 /* Sanity check what we found. */
238 /* assured by logic above */
239 vg_assert(ML_(img_size)(sli.img) >= sizeof(struct fat_header));
241 if (sli.szB < sizeof(struct MACH_HEADER)) {
242 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
243 goto close_and_fail;
246 if (sli.szB > ML_(img_size)(sli.img)) {
247 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
248 goto close_and_fail;
251 if (sli.ioff >= 0 && sli.ioff + sli.szB <= ML_(img_size)(sli.img)) {
252 /* thin entirely within fat, as expected */
253 } else {
254 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
255 goto close_and_fail;
258 /* Peer at the Mach header for the thin object, starting at the
259 beginning of the slice, to check it's at least marginally
260 sane. */
261 struct MACH_HEADER mh;
262 ML_(cur_read_get)(&mh, ML_(cur_from_sli)(sli), sizeof(mh));
263 if (mh.magic != MAGIC) {
264 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
265 goto close_and_fail;
268 if (sli.szB < sizeof(struct MACH_HEADER) + mh.sizeofcmds) {
269 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
270 goto close_and_fail;
273 /* "main image is plausible" */
274 vg_assert(sli.img);
275 vg_assert(ML_(img_size)(sli.img) > 0);
276 /* "thin image exists and is a sub-part (or all) of main image" */
277 vg_assert(sli.ioff >= 0);
278 vg_assert(sli.szB > 0);
279 vg_assert(sli.ioff + sli.szB <= ML_(img_size)(sli.img));
280 return sli; /* success */
281 /*NOTREACHED*/
283 close_and_fail:
284 unmap_image(&sli);
285 return DiSlice_INVALID; /* bah! */
289 /*------------------------------------------------------------*/
290 /*--- ---*/
291 /*--- Mach-O symbol table reading ---*/
292 /*--- ---*/
293 /*------------------------------------------------------------*/
295 /* Read a symbol table (nlist). Add the resulting candidate symbols
296 to 'syms'; the caller will post-process them and hand them off to
297 ML_(addSym) itself. */
298 static
299 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
300 struct _DebugInfo* di,
301 DiCursor symtab_cur, UInt symtab_count,
302 DiCursor strtab_cur, UInt strtab_sz )
304 Int i;
305 DiSym disym;
307 // "start_according_to_valgrind"
308 static const HChar* s_a_t_v = NULL; /* do not make non-static */
310 for (i = 0; i < symtab_count; i++) {
311 struct NLIST nl;
312 ML_(cur_read_get)(&nl,
313 ML_(cur_plus)(symtab_cur, i * sizeof(struct NLIST)),
314 sizeof(nl));
316 Addr sym_addr = 0;
317 if ((nl.n_type & N_TYPE) == N_SECT) {
318 sym_addr = di->text_bias + nl.n_value;
319 /*} else if ((nl.n_type & N_TYPE) == N_ABS) {
320 GrP fixme don't ignore absolute symbols?
321 sym_addr = nl.n_value; */
322 } else {
323 continue;
326 if (di->trace_symtab) {
327 HChar* str = ML_(cur_read_strdup)(
328 ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
329 "di.read_symtab.1");
330 VG_(printf)("nlist raw: avma %010lx %s\n", sym_addr, str );
331 ML_(dinfo_free)(str);
334 /* If no part of the symbol falls within the mapped range,
335 ignore it. */
336 if (sym_addr <= di->text_avma
337 || sym_addr >= di->text_avma+di->text_size) {
338 continue;
341 /* skip names which point outside the string table;
342 following these risks segfaulting Valgrind */
343 if (nl.n_un.n_strx < 0 || nl.n_un.n_strx >= strtab_sz) {
344 continue;
347 HChar* name
348 = ML_(cur_read_strdup)( ML_(cur_plus)(strtab_cur, nl.n_un.n_strx),
349 "di.read_symtab.2");
351 /* skip nameless symbols; these appear to be common, but
352 useless */
353 if (*name == 0) {
354 ML_(dinfo_free)(name);
355 continue;
358 VG_(bzero_inline)(&disym, sizeof(disym));
359 disym.avmas.main = sym_addr;
360 SET_TOCPTR_AVMA(disym, 0);
361 SET_LOCAL_EP_AVMA(disym, 0);
362 disym.pri_name = ML_(addStr)(di, name, -1);
363 disym.sec_names = NULL;
364 disym.size = // let canonicalize fix it
365 di->text_avma+di->text_size - sym_addr;
366 disym.isText = True;
367 disym.isIFunc = False;
368 // Lots of user function names get prepended with an underscore. Eg. the
369 // function 'f' becomes the symbol '_f'. And the "below main"
370 // function is called "start". So we skip the leading underscore, and
371 // if we see 'start' and --show-below-main=no, we rename it as
372 // "start_according_to_valgrind", which makes it easy to spot later
373 // and display as "(below main)".
374 if (disym.pri_name[0] == '_') {
375 disym.pri_name++;
377 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
378 if (s_a_t_v == NULL)
379 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
380 vg_assert(s_a_t_v);
381 disym.pri_name = s_a_t_v;
384 vg_assert(disym.pri_name);
385 VG_(addToXA)( syms, &disym );
386 ML_(dinfo_free)(name);
391 /* Compare DiSyms by their start address, and for equal addresses, use
392 the primary name as a secondary sort key. */
393 static Int cmp_DiSym_by_start_then_name ( const void* v1, const void* v2 )
395 const DiSym* s1 = (DiSym*)v1;
396 const DiSym* s2 = (DiSym*)v2;
397 if (s1->avmas.main < s2->avmas.main) return -1;
398 if (s1->avmas.main > s2->avmas.main) return 1;
399 return VG_(strcmp)(s1->pri_name, s2->pri_name);
402 /* 'cand' is a bunch of candidate symbols obtained by reading
403 nlist-style symbol table entries. Their ends may overlap, so sort
404 them and truncate them accordingly. The code in this routine is
405 copied almost verbatim from read_symbol_table() in readxcoff.c. */
406 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
407 Bool trace_symtab )
409 Word nsyms, i, j, k, m;
411 nsyms = VG_(sizeXA)(syms);
413 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
414 VG_(sortXA)(syms);
416 /* We only know for sure the start addresses (actual VMAs) of
417 symbols, and an overestimation of their end addresses. So sort
418 by start address, then clip each symbol so that its end address
419 does not overlap with the next one along.
421 There is a small refinement: if a group of symbols have the same
422 address, treat them as a group: find the next symbol along that
423 has a higher start address, and clip all of the group
424 accordingly. This clips the group as a whole so as not to
425 overlap following symbols. This leaves prefersym() in
426 storage.c, which is not nlist-specific, to later decide which of
427 the symbols in the group to keep.
429 Another refinement is that we need to get rid of symbols which,
430 after clipping, have identical starts, ends, and names. So the
431 sorting uses the name as a secondary key.
434 for (i = 0; i < nsyms; i++) {
435 for (k = i+1;
436 k < nsyms
437 && ((DiSym*)VG_(indexXA)(syms,i))->avmas.main
438 == ((DiSym*)VG_(indexXA)(syms,k))->avmas.main;
439 k++)
441 /* So now [i .. k-1] is a group all with the same start address.
442 Clip their ending addresses so they don't overlap [k]. In
443 the normal case (no overlaps), k == i+1. */
444 if (k < nsyms) {
445 DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
446 for (m = i; m < k; m++) {
447 DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
448 vg_assert(here->avmas.main < next->avmas.main);
449 if (here->avmas.main + here->size > next->avmas.main)
450 here->size = next->avmas.main - here->avmas.main;
453 i = k-1;
454 vg_assert(i <= nsyms);
457 j = 0;
458 if (nsyms > 0) {
459 j = 1;
460 for (i = 1; i < nsyms; i++) {
461 DiSym *s_j1, *s_j, *s_i;
462 vg_assert(j <= i);
463 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
464 s_j = (DiSym*)VG_(indexXA)(syms, j);
465 s_i = (DiSym*)VG_(indexXA)(syms, i);
466 if (s_i->avmas.main != s_j1->avmas.main
467 || s_i->size != s_j1->size
468 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
469 *s_j = *s_i;
470 j++;
471 } else {
472 if (trace_symtab)
473 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
474 s_i->avmas.main, s_i->pri_name );
478 vg_assert(j >= 0 && j <= nsyms);
479 VG_(dropTailXA)(syms, nsyms - j);
483 /*------------------------------------------------------------*/
484 /*--- ---*/
485 /*--- Mach-O top-level processing ---*/
486 /*--- ---*/
487 /*------------------------------------------------------------*/
489 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
490 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
491 #endif
494 static Bool file_exists_p(const HChar *path)
496 struct vg_stat sbuf;
497 SysRes res = VG_(stat)(path, &sbuf);
498 return sr_isError(res) ? False : True;
502 /* Search for an existing dSYM file as a possible separate debug file.
503 Adapted from gdb. */
504 static HChar *
505 find_separate_debug_file (const HChar *executable_name)
507 const HChar *basename_str;
508 HChar *dot_ptr;
509 HChar *slash_ptr;
510 HChar *dsymfile;
512 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
513 will end up with an infinite loop where after we add a dSYM symbol file,
514 it will then enter this function asking if there is a debug file for the
515 dSYM file itself. */
516 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
518 /* Check for the existence of a .dSYM file for a given executable. */
519 basename_str = VG_(basename) (executable_name);
520 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
521 VG_(strlen) (executable_name)
522 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
523 + VG_(strlen) (basename_str)
527 /* First try for the dSYM in the same directory as the original file. */
528 VG_(strcpy) (dsymfile, executable_name);
529 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
530 VG_(strcat) (dsymfile, basename_str);
532 if (file_exists_p (dsymfile))
533 return dsymfile;
535 /* Now search for any parent directory that has a '.' in it so we can find
536 Mac OS X applications, bundles, plugins, and any other kinds of files.
537 Mac OS X application bundles wil have their program in
538 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
539 ".bundle" or ".plugin" for other types of bundles). So we look for any
540 prior '.' character and try appending the apple dSYM extension and
541 subdirectory and see if we find an existing dSYM file (in the above
542 MyApp example the dSYM would be at either:
543 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
544 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
545 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
546 while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
548 /* Find the directory delimiter that follows the '.' character since
549 we now look for a .dSYM that follows any bundle extension. */
550 slash_ptr = VG_(strchr) (dot_ptr, '/');
551 if (slash_ptr)
553 /* NULL terminate the string at the '/' character and append
554 the path down to the dSYM file. */
555 *slash_ptr = '\0';
556 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
557 VG_(strcat) (slash_ptr, basename_str);
558 if (file_exists_p (dsymfile))
559 return dsymfile;
562 /* NULL terminate the string at the '.' character and append
563 the path down to the dSYM file. */
564 *dot_ptr = '\0';
565 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
566 VG_(strcat) (dot_ptr, basename_str);
567 if (file_exists_p (dsymfile))
568 return dsymfile;
570 /* NULL terminate the string at the '.' locatated by the strrchr()
571 function again. */
572 *dot_ptr = '\0';
574 /* We found a previous extension '.' character and did not find a
575 dSYM file so now find previous directory delimiter so we don't
576 try multiple times on a file name that may have a version number
577 in it such as "/some/path/MyApp.6.0.4.app". */
578 slash_ptr = VG_(strrchr) (dsymfile, '/');
579 if (!slash_ptr)
580 break;
581 /* NULL terminate the string at the previous directory character
582 and search again. */
583 *slash_ptr = '\0';
587 return NULL;
591 /* Given a DiSlice covering the entire Mach-O thin image, find the
592 DiSlice for the specified (segname, sectname) pairing, if
593 possible. Also return the section's .addr field in *svma if
594 svma is non-NULL. */
595 static DiSlice getsectdata ( DiSlice img,
596 const HChar *segname, const HChar *sectname,
597 /*OUT*/Addr* svma )
599 DiCursor cur = ML_(cur_from_sli)(img);
601 struct MACH_HEADER mh;
602 ML_(cur_step_get)(&mh, &cur, sizeof(mh));
604 Int c;
605 for (c = 0; c < mh.ncmds; c++) {
606 struct load_command cmd;
607 ML_(cur_read_get)(&cmd, cur, sizeof(cmd));
608 if (cmd.cmd == LC_SEGMENT_CMD) {
609 struct SEGMENT_COMMAND seg;
610 ML_(cur_read_get)(&seg, cur, sizeof(seg));
611 if (0 == VG_(strncmp(&seg.segname[0],
612 segname, sizeof(seg.segname)))) {
613 DiCursor sects_cur = ML_(cur_plus)(cur, sizeof(seg));
614 Int s;
615 for (s = 0; s < seg.nsects; s++) {
616 struct SECTION sect;
617 ML_(cur_step_get)(&sect, &sects_cur, sizeof(sect));
618 if (0 == VG_(strncmp(sect.sectname, sectname,
619 sizeof(sect.sectname)))) {
620 DiSlice res = img;
621 res.ioff = sect.offset;
622 res.szB = sect.size;
623 if (svma) *svma = (Addr)sect.addr;
624 return res;
630 cur = ML_(cur_plus)(cur, cmd.cmdsize);
633 return DiSlice_INVALID;
637 /* Brute force just simply search for uuid[0..15] in |sli| */
638 static Bool check_uuid_matches ( DiSlice sli, UChar* uuid )
640 if (sli.szB < 16)
641 return False;
643 /* Work through the slice in 1 KB chunks. */
644 UChar first = uuid[0];
645 DiOffT min_off = sli.ioff;
646 DiOffT max1_off = sli.ioff + sli.szB;
647 DiOffT curr_off = min_off;
648 vg_assert(min_off < max1_off);
649 while (1) {
650 vg_assert(curr_off >= min_off && curr_off <= max1_off);
651 if (curr_off == max1_off) break;
652 DiOffT avail = max1_off - curr_off;
653 vg_assert(avail > 0 && avail <= max1_off);
654 if (avail > 1024) avail = 1024;
655 UChar buf[1024];
656 SizeT nGot = ML_(img_get_some)(buf, sli.img, curr_off, avail);
657 vg_assert(nGot >= 1 && nGot <= avail);
658 UInt i;
659 /* Scan through the 1K chunk we got, looking for the start char. */
660 for (i = 0; i < (UInt)nGot; i++) {
661 if (LIKELY(buf[i] != first))
662 continue;
663 /* first char matches. See if we can get 16 bytes at this
664 offset, and compare. */
665 if (curr_off + i < max1_off && max1_off - (curr_off + i) >= 16) {
666 UChar buff16[16];
667 ML_(img_get)(&buff16[0], sli.img, curr_off + i, 16);
668 if (0 == VG_(memcmp)(&buff16[0], &uuid[0], 16))
669 return True;
672 curr_off += nGot;
674 return False;
678 /* Heuristic kludge: return True if this looks like an installed
679 standard library; hence we shouldn't consider automagically running
680 dsymutil on it. */
681 static Bool is_systemish_library_name ( const HChar* name )
683 vg_assert(name);
684 if (0 == VG_(strncasecmp)(name, "/usr/", 5)
685 || 0 == VG_(strncasecmp)(name, "/bin/", 5)
686 || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
687 || 0 == VG_(strncasecmp)(name, "/opt/", 5)
688 || 0 == VG_(strncasecmp)(name, "/sw/", 4)
689 || 0 == VG_(strncasecmp)(name, "/System/", 8)
690 || 0 == VG_(strncasecmp)(name, "/Library/", 9)
691 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
692 return True;
693 } else {
694 return False;
699 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
701 DiSlice msli = DiSlice_INVALID; // the main image
702 DiSlice dsli = DiSlice_INVALID; // the debuginfo image
703 DiCursor sym_cur = DiCursor_INVALID;
704 DiCursor dysym_cur = DiCursor_INVALID;
705 HChar* dsymfilename = NULL;
706 Bool have_uuid = False;
707 UChar uuid[16];
708 Word i;
709 const DebugInfoMapping* rx_map = NULL;
710 const DebugInfoMapping* rw_map = NULL;
712 /* mmap the object file to look for di->soname and di->text_bias
713 and uuid and nlist */
715 /* This should be ensured by our caller (that we're in the accept
716 state). */
717 vg_assert(di->fsm.have_rx_map);
718 vg_assert(di->fsm.have_rw_map);
720 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
721 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
722 if (map->rx && !rx_map)
723 rx_map = map;
724 if (map->rw && !rw_map)
725 rw_map = map;
726 if (rx_map && rw_map)
727 break;
729 vg_assert(rx_map);
730 vg_assert(rw_map);
732 if (VG_(clo_verbosity) > 1)
733 VG_(message)(Vg_DebugMsg,
734 "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
735 rx_map->avma, rw_map->avma );
737 VG_(memset)(&uuid, 0, sizeof(uuid));
739 msli = map_image_aboard( di, di->fsm.filename );
740 if (!ML_(sli_is_valid)(msli)) {
741 ML_(symerr)(di, False, "Connect to main image failed.");
742 goto fail;
745 vg_assert(msli.img != NULL && msli.szB > 0);
747 /* Poke around in the Mach-O header, to find some important
748 stuff. */
749 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
750 // Read di->soname from LC_ID_DYLIB if present,
751 // or from LC_ID_DYLINKER if present,
752 // or use "NONE".
753 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
754 // Get uuid for later dsym search
756 di->text_bias = 0;
759 DiCursor cmd_cur = ML_(cur_from_sli)(msli);
761 struct MACH_HEADER mh;
762 ML_(cur_step_get)(&mh, &cmd_cur, sizeof(mh));
764 /* Now cur_cmd points just after the Mach header, right at the
765 start of the load commands, which is where we need it to start
766 the following loop. */
768 Int c;
769 for (c = 0; c < mh.ncmds; c++) {
770 struct load_command cmd;
771 ML_(cur_read_get)(&cmd, cmd_cur, sizeof(cmd));
773 if (cmd.cmd == LC_SYMTAB) {
774 sym_cur = cmd_cur;
776 else if (cmd.cmd == LC_DYSYMTAB) {
777 dysym_cur = cmd_cur;
779 else if (cmd.cmd == LC_ID_DYLIB && mh.filetype == MH_DYLIB) {
780 // GrP fixme bundle?
781 struct dylib_command dcmd;
782 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
783 DiCursor dylibname_cur
784 = ML_(cur_plus)(cmd_cur, dcmd.dylib.name.offset);
785 HChar* dylibname
786 = ML_(cur_read_strdup)(dylibname_cur, "di.rmdi.1");
787 HChar* soname = VG_(strrchr)(dylibname, '/');
788 if (!soname) soname = dylibname;
789 else soname++;
790 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
791 soname);
792 ML_(dinfo_free)(dylibname);
794 else if (cmd.cmd==LC_ID_DYLINKER && mh.filetype==MH_DYLINKER) {
795 struct dylinker_command dcmd;
796 ML_(cur_read_get)(&dcmd, cmd_cur, sizeof(dcmd));
797 DiCursor dylinkername_cur
798 = ML_(cur_plus)(cmd_cur, dcmd.name.offset);
799 HChar* dylinkername
800 = ML_(cur_read_strdup)(dylinkername_cur, "di.rmdi.2");
801 HChar* soname = VG_(strrchr)(dylinkername, '/');
802 if (!soname) soname = dylinkername;
803 else soname++;
804 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
805 soname);
806 ML_(dinfo_free)(dylinkername);
809 // A comment from Julian about why varinfo[35] fail:
811 // My impression is, from comparing the output of otool -l for these
812 // executables with the logic in ML_(read_macho_debug_info),
813 // specifically the part that begins "else if (cmd->cmd ==
814 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
815 // to work ok for text symbols. In particular, it appears to assume
816 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
817 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
818 // need. However, otool -l shows, and also the Apple docs state,
819 // that a struct load_command may contain an arbitrary number of
820 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
821 // snarf the first. But I'm not sure about this.
823 // The "Try for __DATA" block below simply adds acquisition of data
824 // svma/bias values using the same assumption. It also needs
825 // (probably) to deal with bss sections, but I don't understand how
826 // this all ties together really, so it requires further study.
828 // If you can get your head around the relationship between MachO
829 // segments, sections and load commands, this might be relatively
830 // easy to fix properly.
832 // Basically we need to come up with plausible numbers for di->
833 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
834 // then trivially derived. Then I think the debuginfo reader should
835 // work pretty well.
836 else if (cmd.cmd == LC_SEGMENT_CMD) {
837 struct SEGMENT_COMMAND seg;
838 ML_(cur_read_get)(&seg, cmd_cur, sizeof(seg));
839 /* Try for __TEXT */
840 if (!di->text_present
841 && 0 == VG_(strcmp)(&seg.segname[0], "__TEXT")
842 /* DDD: is the next line a kludge? -- JRS */
843 && seg.fileoff == 0 && seg.filesize != 0) {
844 di->text_present = True;
845 di->text_svma = (Addr)seg.vmaddr;
846 di->text_avma = rx_map->avma;
847 di->text_size = seg.vmsize;
848 di->text_bias = di->text_avma - di->text_svma;
849 /* Make the _debug_ values be the same as the
850 svma/bias for the primary object, since there is
851 no secondary (debuginfo) object, but nevertheless
852 downstream biasing of Dwarf3 relies on the
853 _debug_ values. */
854 di->text_debug_svma = di->text_svma;
855 di->text_debug_bias = di->text_bias;
857 /* Try for __DATA */
858 if (!di->data_present
859 && 0 == VG_(strcmp)(&seg.segname[0], "__DATA")
860 /* && DDD:seg->fileoff == 0 */ && seg.filesize != 0) {
861 di->data_present = True;
862 di->data_svma = (Addr)seg.vmaddr;
863 di->data_avma = rw_map->avma;
864 di->data_size = seg.vmsize;
865 di->data_bias = di->data_avma - di->data_svma;
866 di->data_debug_svma = di->data_svma;
867 di->data_debug_bias = di->data_bias;
870 else if (cmd.cmd == LC_UUID) {
871 ML_(cur_read_get)(&uuid, cmd_cur, sizeof(uuid));
872 have_uuid = True;
874 // Move the cursor along
875 cmd_cur = ML_(cur_plus)(cmd_cur, cmd.cmdsize);
879 if (!di->soname) {
880 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
883 if (di->trace_symtab) {
884 VG_(printf)("\n");
885 VG_(printf)("SONAME = %s\n", di->soname);
886 VG_(printf)("\n");
889 /* Now we have the base object to hand. Read symbols from it. */
891 // We already asserted that ..
892 vg_assert(msli.img != NULL && msli.szB > 0);
894 if (ML_(cur_is_valid)(sym_cur) && ML_(cur_is_valid)(dysym_cur)) {
896 struct symtab_command symcmd;
897 struct dysymtab_command dysymcmd;
899 ML_(cur_read_get)(&symcmd, sym_cur, sizeof(symcmd));
900 ML_(cur_read_get)(&dysymcmd, dysym_cur, sizeof(dysymcmd));
902 /* Read nlist symbol table */
903 DiCursor syms = DiCursor_INVALID;
904 DiCursor strs = DiCursor_INVALID;
905 XArray* /* DiSym */ candSyms = NULL;
906 Word nCandSyms;
908 if (msli.szB < symcmd.stroff + symcmd.strsize
909 || msli.szB < symcmd.symoff + symcmd.nsyms
910 * sizeof(struct NLIST)) {
911 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
912 goto fail;
914 if (dysymcmd.ilocalsym + dysymcmd.nlocalsym > symcmd.nsyms
915 || dysymcmd.iextdefsym + dysymcmd.nextdefsym > symcmd.nsyms) {
916 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
917 goto fail;
920 syms = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.symoff);
921 strs = ML_(cur_plus)(ML_(cur_from_sli)(msli), symcmd.stroff);
923 if (VG_(clo_verbosity) > 1)
924 VG_(message)(Vg_DebugMsg,
925 " reading syms from primary file (%d %d)\n",
926 dysymcmd.nextdefsym, dysymcmd.nlocalsym );
928 /* Read candidate symbols into 'candSyms', so we can truncate
929 overlapping ends and generally tidy up, before presenting
930 them to ML_(addSym). */
931 candSyms = VG_(newXA)(
932 ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
933 ML_(dinfo_free), sizeof(DiSym)
936 // extern symbols
937 read_symtab(candSyms,
939 ML_(cur_plus)(syms,
940 dysymcmd.iextdefsym * sizeof(struct NLIST)),
941 dysymcmd.nextdefsym, strs, symcmd.strsize);
942 // static and private_extern symbols
943 read_symtab(candSyms,
945 ML_(cur_plus)(syms,
946 dysymcmd.ilocalsym * sizeof(struct NLIST)),
947 dysymcmd.nlocalsym, strs, symcmd.strsize);
949 /* tidy up the cand syms -- trim overlapping ends. May resize
950 candSyms. */
951 tidy_up_cand_syms( candSyms, di->trace_symtab );
953 /* and finally present them to ML_(addSym) */
954 nCandSyms = VG_(sizeXA)( candSyms );
955 for (i = 0; i < nCandSyms; i++) {
956 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
957 vg_assert(cand->pri_name != NULL);
958 vg_assert(cand->sec_names == NULL);
959 if (di->trace_symtab)
960 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
961 cand->avmas.main, cand->avmas.main + cand->size - 1,
962 cand->pri_name );
963 ML_(addSym)( di, cand );
965 VG_(deleteXA)( candSyms );
968 /* If there's no UUID in the primary, don't even bother to try and
969 read any DWARF, since we won't be able to verify it matches.
970 Our policy is not to load debug info unless we can verify that
971 it matches the primary. Just declare success at this point.
972 And don't complain to the user, since that would cause us to
973 complain on objects compiled without -g. (Some versions of
974 XCode are observed to omit a UUID entry for object linked(?)
975 without -g. Others don't appear to omit it.) */
976 if (!have_uuid)
977 goto success;
979 /* mmap the dSYM file to look for DWARF debug info. If successful,
980 use the .macho_img and .macho_img_szB in dsli. */
982 dsymfilename = find_separate_debug_file( di->fsm.filename );
984 /* Try to load it. */
985 if (dsymfilename) {
986 Bool valid;
988 if (VG_(clo_verbosity) > 1)
989 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
991 dsli = map_image_aboard( di, dsymfilename );
992 if (!ML_(sli_is_valid)(dsli)) {
993 ML_(symerr)(di, False, "Connect to debuginfo image failed "
994 "(first attempt).");
995 goto fail;
998 /* check it has the right uuid. */
999 vg_assert(have_uuid);
1000 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1001 if (valid)
1002 goto read_the_dwarf;
1004 if (VG_(clo_verbosity) > 1)
1005 VG_(message)(Vg_DebugMsg, " dSYM does not have "
1006 "correct UUID (out of date?)\n");
1009 /* There was no dsym file, or it doesn't match. We'll have to try
1010 regenerating it, unless --dsymutil=no, in which case just complain
1011 instead. */
1013 /* If this looks like a lib that we shouldn't run dsymutil on, just
1014 give up. (possible reasons: is system lib, or in /usr etc, or
1015 the dsym dir would not be writable by the user, or we're running
1016 as root) */
1017 vg_assert(di->fsm.filename);
1018 if (is_systemish_library_name(di->fsm.filename))
1019 goto success;
1021 if (!VG_(clo_dsymutil)) {
1022 if (VG_(clo_verbosity) == 1) {
1023 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
1025 if (VG_(clo_verbosity) > 0)
1026 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
1027 "--dsymutil=yes\n",
1028 VG_(clo_verbosity) > 1 ? " " : "",
1029 dsymfilename ? "has wrong UUID" : "is missing");
1030 goto success;
1033 /* Run dsymutil */
1035 { Int r;
1036 const HChar* dsymutil = "/usr/bin/dsymutil ";
1037 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
1038 VG_(strlen)(dsymutil)
1039 + VG_(strlen)(di->fsm.filename)
1040 + 32 /* misc */ );
1041 VG_(strcpy)(cmd, dsymutil);
1042 if (0) VG_(strcat)(cmd, "--verbose ");
1043 VG_(strcat)(cmd, "\"");
1044 VG_(strcat)(cmd, di->fsm.filename);
1045 VG_(strcat)(cmd, "\"");
1046 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
1047 r = VG_(system)( cmd );
1048 if (r)
1049 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
1050 ML_(dinfo_free)(cmd);
1051 dsymfilename = find_separate_debug_file(di->fsm.filename);
1054 /* Try again to load it. */
1055 if (dsymfilename) {
1056 Bool valid;
1058 if (VG_(clo_verbosity) > 1)
1059 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
1061 dsli = map_image_aboard( di, dsymfilename );
1062 if (!ML_(sli_is_valid)(dsli)) {
1063 ML_(symerr)(di, False, "Connect to debuginfo image failed "
1064 "(second attempt).");
1065 goto fail;
1068 /* check it has the right uuid. */
1069 vg_assert(have_uuid);
1070 vg_assert(have_uuid);
1071 valid = dsli.img && dsli.szB > 0 && check_uuid_matches( dsli, uuid );
1072 if (!valid) {
1073 if (VG_(clo_verbosity) > 0) {
1074 VG_(message)(Vg_DebugMsg,
1075 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1076 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1077 " in dSYM dir\n",
1078 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1079 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1080 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1081 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1082 (UInt)uuid[14], (UInt)uuid[15] );
1083 VG_(message)(Vg_DebugMsg,
1084 "WARNING: for %s\n", di->fsm.filename);
1086 unmap_image( &dsli );
1087 /* unmap_image zeroes out dsli, so it's safe for "fail:" to
1088 re-try unmap_image. */
1089 goto fail;
1093 /* Right. Finally we have our best try at the dwarf image, so go
1094 on to reading stuff out of it. */
1096 read_the_dwarf:
1097 if (ML_(sli_is_valid)(dsli) && dsli.szB > 0) {
1098 // "_mscn" is "mach-o section"
1099 DiSlice debug_info_mscn
1100 = getsectdata(dsli, "__DWARF", "__debug_info", NULL);
1101 DiSlice debug_abbv_mscn
1102 = getsectdata(dsli, "__DWARF", "__debug_abbrev", NULL);
1103 DiSlice debug_line_mscn
1104 = getsectdata(dsli, "__DWARF", "__debug_line", NULL);
1105 DiSlice debug_str_mscn
1106 = getsectdata(dsli, "__DWARF", "__debug_str", NULL);
1107 DiSlice debug_ranges_mscn
1108 = getsectdata(dsli, "__DWARF", "__debug_ranges", NULL);
1109 DiSlice debug_loc_mscn
1110 = getsectdata(dsli, "__DWARF", "__debug_loc", NULL);
1112 /* It appears (jrs, 2014-oct-19) that section "__eh_frame" in
1113 segment "__TEXT" appears in both the main and dsym files, but
1114 only the main one gives the right results. Since it's in the
1115 __TEXT segment, we calculate the __eh_frame avma using its
1116 svma and the text bias, and that sounds reasonable. */
1117 Addr eh_frame_svma = 0;
1118 DiSlice eh_frame_mscn
1119 = getsectdata(msli, "__TEXT", "__eh_frame", &eh_frame_svma);
1121 if (ML_(sli_is_valid)(eh_frame_mscn)) {
1122 vg_assert(di->text_bias == di->text_debug_bias);
1123 ML_(read_callframe_info_dwarf3)(di, eh_frame_mscn,
1124 eh_frame_svma + di->text_bias,
1125 True/*is_ehframe*/);
1128 if (ML_(sli_is_valid)(debug_info_mscn)) {
1129 if (VG_(clo_verbosity) > 1) {
1130 if (0)
1131 VG_(message)(Vg_DebugMsg,
1132 "Reading dwarf3 for %s (%#lx) from %s"
1133 " (%lld %lld %lld %lld %lld %lld)\n",
1134 di->fsm.filename, di->text_avma, dsymfilename,
1135 debug_info_mscn.szB, debug_abbv_mscn.szB,
1136 debug_line_mscn.szB, debug_str_mscn.szB,
1137 debug_ranges_mscn.szB, debug_loc_mscn.szB
1139 VG_(message)(Vg_DebugMsg,
1140 " reading dwarf3 from dsyms file\n");
1142 /* The old reader: line numbers and unwind info only */
1143 ML_(read_debuginfo_dwarf3) ( di,
1144 debug_info_mscn,
1145 DiSlice_INVALID, /* .debug_types */
1146 debug_abbv_mscn,
1147 debug_line_mscn,
1148 debug_str_mscn,
1149 DiSlice_INVALID /* ALT .debug_str */ );
1151 /* The new reader: read the DIEs in .debug_info to acquire
1152 information on variable types and locations or inline info.
1153 But only if the tool asks for it, or the user requests it on
1154 the command line. */
1155 if (VG_(clo_read_var_info) /* the user or tool asked for it */
1156 || VG_(clo_read_inline_info)) {
1157 ML_(new_dwarf3_reader)(
1158 di, debug_info_mscn,
1159 DiSlice_INVALID, /* .debug_types */
1160 debug_abbv_mscn,
1161 debug_line_mscn,
1162 debug_str_mscn,
1163 debug_ranges_mscn,
1164 debug_loc_mscn,
1165 DiSlice_INVALID, /* ALT .debug_info */
1166 DiSlice_INVALID, /* ALT .debug_abbv */
1167 DiSlice_INVALID, /* ALT .debug_line */
1168 DiSlice_INVALID /* ALT .debug_str */
1174 if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1176 success:
1177 unmap_image(&msli);
1178 unmap_image(&dsli);
1179 return True;
1181 /* NOTREACHED */
1183 fail:
1184 ML_(symerr)(di, True, "Error reading Mach-O object.");
1185 unmap_image(&msli);
1186 unmap_image(&dsli);
1187 return False;
1190 #endif // defined(VGO_darwin)
1192 /*--------------------------------------------------------------------*/
1193 /*--- end ---*/
1194 /*--------------------------------------------------------------------*/