2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
5 Can be compiled as either a 32- or 64-bit program (doesn't matter).
8 /* What does this program do? In short it postprocesses tool
9 executables on MacOSX, after linking using /usr/bin/ld.
11 This is to deal with two separate and entirely unrelated problems.
12 Problem (1) is a bug in the linker in Xcode 4.0.0. Problem (2) is
13 much newer and concerns linking 64-bit tool executables for
16 --- Problem (1) ------------------------------------------------
18 This is a bug in the linker on Xcode 4.0.0 and Xcode 4.0.1. Xcode
19 versions prior to 4.0.0 are unaffected.
21 The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
23 The bug causes 64-bit tool executables to segfault at startup,
26 Comparing the MachO load commands vs a (working) tool executable
27 that was created by Xcode 3.2.x, it appears that the new linker has
28 partially ignored the build system's request to place the tool
29 executable's stack at a non standard location. The build system
30 tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
32 With the Xcode 3.2 linker those flags produce two results:
34 (1) A load command to allocate the stack at the said location:
39 vmaddr 0x0000000133800000
40 vmsize 0x0000000000800000
48 (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
49 at process startup, 0x134000000.
51 With Xcode 4.0.1, (1) is missing but (2) is still present. The
52 tool executable therefore starts up with %rsp pointing to unmapped
53 memory and faults almost instantly.
55 The workaround implemented by this program is documented in comment
58 One really sick workaround is to observe that the executables
59 contain a redundant MachO load command:
65 vmaddr 0x0000000138dea000
66 vmsize 0x00000000000ad000
74 The described section presumably contains information intended for
75 the dynamic linker, but is irrelevant because this is a statically
76 linked executable. Hence it might be possible to postprocess the
77 executables after linking, to overwrite this entry with the
78 information that would have been in the missing __UNIXSTACK entry.
79 I tried this by hand (with a binary editor) earlier and got
80 something that worked.
82 --- Problem (2) ------------------------------------------------
84 On MacOSX 10.10 (Yosemite), the kernel requires all valid
85 executables to have a __PAGEZERO section with SVMA of zero and size
86 of at least one page. However, our tool executables have a
87 __PAGEZERO section with SVMA set to the requested Valgrind load
88 address (typically 0x1'3800'0000). And the kernel won't start
89 those. So we take the opportunity to "fix" this by setting the
90 SVMA to zero. Seems to work and have no obvious bad side effects.
93 #define DEBUGPRINTING 0
100 #include <sys/stat.h>
104 #undef PLAT_x86_darwin
105 #undef PLAT_amd64_darwin
107 #if defined(__APPLE__) && defined(__i386__)
108 # define PLAT_x86_darwin 1
109 #elif defined(__APPLE__) && defined(__x86_64__)
110 # define PLAT_amd64_darwin 1
112 # error "Can't be compiled on this platform"
115 #include <mach-o/loader.h>
116 #include <mach-o/nlist.h>
117 #include <mach-o/fat.h>
118 #include <mach/i386/thread_status.h>
120 /* Get hold of DARWIN_VERS, and check it has a sane value. */
122 #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \
123 && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \
124 && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \
125 && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12 \
126 && DARWIN_VERS != DARWIN_10_13
127 # error "Unknown DARWIN_VERS value. This file only compiles on Darwin."
131 typedef unsigned char UChar
;
132 typedef signed char Char
;
133 typedef char HChar
; /* signfulness depends on host */
135 typedef unsigned int UInt
;
136 typedef signed int Int
;
138 typedef unsigned char Bool
;
139 #define True ((Bool)1)
140 #define False ((Bool)0)
142 typedef unsigned long UWord
;
147 typedef unsigned long long int ULong
;
148 typedef signed long long int Long
;
152 __attribute__((noreturn
))
153 void fail ( HChar
* msg
)
155 fprintf(stderr
, "fixup_macho_loadcmds: fail: %s\n", msg
);
160 /*------------------------------------------------------------*/
162 /*--- Mach-O file mapping/unmapping helpers ---*/
164 /*------------------------------------------------------------*/
168 /* These two describe the entire mapped-in ("primary") image,
169 fat headers, kitchen sink, whatnot: the entire file. The
170 image is mapped into img[0 .. img_szB-1]. */
173 /* These two describe the Mach-O object of interest, which is
174 presumably somewhere inside the primary image.
175 map_image_aboard() below, which generates this info, will
176 carefully check that the macho_ fields denote a section of
177 memory that falls entirely inside img[0 .. img_szB-1]. */
184 Bool
is_macho_object_file( const void* buf
, SizeT szB
)
186 /* (JRS: the Mach-O headers might not be in this mapped data,
187 because we only mapped a page for this initial check,
188 or at least not very much, and what's at the start of the file
189 is in general a so-called fat header. The Mach-O object we're
190 interested in could be arbitrarily far along the image, and so
191 we can't assume its header will fall within this page.) */
193 /* But we can say that either it's a fat object, in which case it
194 begins with a fat header, or it's unadorned Mach-O, in which
195 case it starts with a normal header. At least do what checks we
196 can to establish whether or not we're looking at something
199 const struct fat_header
* fh_be
= buf
;
200 const struct mach_header_64
* mh
= buf
;
203 if (szB
< sizeof(struct fat_header
))
205 if (ntohl(fh_be
->magic
) == FAT_MAGIC
)
208 if (szB
< sizeof(struct mach_header_64
))
210 if (mh
->magic
== MH_MAGIC_64
)
217 /* Unmap an image mapped in by map_image_aboard. */
218 static void unmap_image ( /*MOD*/ImageInfo
* ii
)
222 assert(ii
->img_szB
> 0);
223 r
= munmap( ii
->img
, ii
->img_szB
);
224 /* Do we care if this fails? I suppose so; it would indicate
225 some fairly serious snafu with the mapping of the file. */
227 memset(ii
, 0, sizeof(*ii
));
231 /* Map a given fat or thin object aboard, find the thin part if
232 necessary, do some checks, and write details of both the fat and
233 thin parts into *ii. Returns 32 (and leaves the file unmapped) if
234 the thin part is a 32 bit file. Returns 64 if it's a 64 bit file.
235 Does not return on failure. Guarantees to return pointers to a
236 valid(ish) Mach-O image if it succeeds. */
237 static Int
map_image_aboard ( /*OUT*/ImageInfo
* ii
, HChar
* filename
)
239 memset(ii
, 0, sizeof(*ii
));
241 /* First off, try to map the thing in. */
244 struct stat stat_buf
;
246 r
= stat(filename
, &stat_buf
);
248 fail("Can't stat image (to determine its size)?!");
249 size
= stat_buf
.st_size
;
251 fd
= open(filename
, O_RDWR
, 0);
253 fail("Can't open image for possible modification!");
255 printf("size %lu fd %d\n", size
, fd
);
256 void* v
= mmap ( NULL
, size
, PROT_READ
|PROT_WRITE
,
257 MAP_FILE
|MAP_SHARED
, fd
, 0 );
258 if (v
== MAP_FAILED
) {
259 perror("mmap failed");
260 fail("Can't mmap image for possible modification!");
269 /* Now it's mapped in and we have .img and .img_szB set. Look for
270 the embedded Mach-O object. If not findable, unmap and fail. */
271 { struct fat_header
* fh_be
;
272 struct fat_header fh
;
273 struct mach_header_64
* mh
;
275 // Assume initially that we have a thin image, and update
276 // these if it turns out to be fat.
277 ii
->macho_img
= ii
->img
;
278 ii
->macho_img_szB
= ii
->img_szB
;
280 // Check for fat header.
281 if (ii
->img_szB
< sizeof(struct fat_header
))
282 fail("Invalid Mach-O file (0 too small).");
284 // Fat header is always BIG-ENDIAN
285 fh_be
= (struct fat_header
*)ii
->img
;
286 fh
.magic
= ntohl(fh_be
->magic
);
287 fh
.nfat_arch
= ntohl(fh_be
->nfat_arch
);
288 if (fh
.magic
== FAT_MAGIC
) {
289 // Look for a good architecture.
290 struct fat_arch
*arch_be
;
291 struct fat_arch arch
;
293 if (ii
->img_szB
< sizeof(struct fat_header
)
294 + fh
.nfat_arch
* sizeof(struct fat_arch
))
295 fail("Invalid Mach-O file (1 too small).");
297 for (f
= 0, arch_be
= (struct fat_arch
*)(fh_be
+1);
301 # if defined(PLAT_x86_darwin)
302 cputype
= CPU_TYPE_X86
;
303 # elif defined(PLAT_amd64_darwin)
304 cputype
= CPU_TYPE_X86_64
;
306 # error "unknown architecture"
308 arch
.cputype
= ntohl(arch_be
->cputype
);
309 arch
.cpusubtype
= ntohl(arch_be
->cpusubtype
);
310 arch
.offset
= ntohl(arch_be
->offset
);
311 arch
.size
= ntohl(arch_be
->size
);
312 if (arch
.cputype
== cputype
) {
313 if (ii
->img_szB
< arch
.offset
+ arch
.size
)
314 fail("Invalid Mach-O file (2 too small).");
315 ii
->macho_img
= ii
->img
+ arch
.offset
;
316 ii
->macho_img_szB
= arch
.size
;
320 if (f
== fh
.nfat_arch
)
321 fail("No acceptable architecture found in fat file.");
324 /* Sanity check what we found. */
326 /* assured by logic above */
327 assert(ii
->img_szB
>= sizeof(struct fat_header
));
329 if (ii
->macho_img_szB
< sizeof(struct mach_header_64
))
330 fail("Invalid Mach-O file (3 too small).");
332 if (ii
->macho_img_szB
> ii
->img_szB
)
333 fail("Invalid Mach-O file (thin bigger than fat).");
335 if (ii
->macho_img
>= ii
->img
336 && ii
->macho_img
+ ii
->macho_img_szB
<= ii
->img
+ ii
->img_szB
) {
337 /* thin entirely within fat, as expected */
339 fail("Invalid Mach-O file (thin not inside fat).");
342 mh
= (struct mach_header_64
*)ii
->macho_img
;
343 if (mh
->magic
== MH_MAGIC
) {
345 assert(ii
->macho_img
);
346 assert(ii
->img_szB
> 0);
347 assert(ii
->macho_img_szB
> 0);
348 assert(ii
->macho_img
>= ii
->img
);
349 assert(ii
->macho_img
+ ii
->macho_img_szB
<= ii
->img
+ ii
->img_szB
);
352 if (mh
->magic
!= MH_MAGIC_64
)
353 fail("Invalid Mach-O file (bad magic).");
355 if (ii
->macho_img_szB
< sizeof(struct mach_header_64
) + mh
->sizeofcmds
)
356 fail("Invalid Mach-O file (4 too small).");
360 assert(ii
->macho_img
);
361 assert(ii
->img_szB
> 0);
362 assert(ii
->macho_img_szB
> 0);
363 assert(ii
->macho_img
>= ii
->img
);
364 assert(ii
->macho_img
+ ii
->macho_img_szB
<= ii
->img
+ ii
->img_szB
);
369 /*------------------------------------------------------------*/
371 /*--- Mach-O top-level processing ---*/
373 /*------------------------------------------------------------*/
375 void modify_macho_loadcmds ( HChar
* filename
,
376 ULong expected_stack_start
,
377 ULong expected_stack_size
)
380 memset(&ii
, 0, sizeof(ii
));
382 Int size
= map_image_aboard( &ii
, filename
);
384 fprintf(stderr
, "fixup_macho_loadcmds: Is 32-bit MachO file;"
385 " no modifications needed.\n");
391 assert(ii
.macho_img
!= NULL
&& ii
.macho_img_szB
> 0);
393 /* Poke around in the Mach-O header, to find some important
395 * the location of the __UNIXSTACK load command, if any
396 * the location of the __LINKEDIT load command, if any
397 * the initial RSP value as stated in the LC_UNIXTHREAD
400 /* The collected data */
402 Bool have_rsp
= False
;
403 struct segment_command_64
* seg__unixstack
= NULL
;
404 struct segment_command_64
* seg__linkedit
= NULL
;
405 struct segment_command_64
* seg__pagezero
= NULL
;
407 /* Loop over the load commands and fill in the above 4 variables. */
409 { struct mach_header_64
*mh
= (struct mach_header_64
*)ii
.macho_img
;
410 struct load_command
*cmd
;
413 for (c
= 0, cmd
= (struct load_command
*)(mh
+1);
415 c
++, cmd
= (struct load_command
*)(cmd
->cmdsize
416 + (unsigned long)cmd
)) {
418 printf("load cmd: offset %4lu size %3d kind %2d = ",
419 (unsigned long)((UChar
*)cmd
- (UChar
*)ii
.macho_img
),
420 cmd
->cmdsize
, cmd
->cmd
);
425 printf("LC_SEGMENT_64");
433 printf("LC_DYSYMTAB");
441 printf("LC_UNIXTHREAD");
446 fail("unexpected load command in Mach header");
452 /* Note what the stated initial RSP value is, so we can
453 check it is as expected. */
454 if (cmd
->cmd
== LC_UNIXTHREAD
) {
455 struct thread_command
* tcmd
= (struct thread_command
*)cmd
;
456 UInt
* w32s
= (UInt
*)( (UChar
*)tcmd
+ sizeof(*tcmd
) );
458 printf("UnixThread: flavor %u = ", w32s
[0]);
459 if (w32s
[0] == x86_THREAD_STATE64
&& !have_rsp
) {
461 printf("x86_THREAD_STATE64\n");
462 x86_thread_state64_t
* state64
463 = (x86_thread_state64_t
*)(&w32s
[2]);
465 init_rsp
= state64
->__rsp
;
467 printf("rsp = 0x%llx\n", init_rsp
);
476 if (cmd
->cmd
== LC_SEGMENT_64
) {
477 struct segment_command_64
*seg
= (struct segment_command_64
*)cmd
;
478 if (0 == strcmp(seg
->segname
, "__LINKEDIT"))
480 if (0 == strcmp(seg
->segname
, "__UNIXSTACK"))
481 seg__unixstack
= seg
;
482 if (0 == strcmp(seg
->segname
, "__PAGEZERO"))
490 Actions are then as follows:
492 * (always) check the RSP value is as expected, and abort if not
494 * if there's a UNIXSTACK load command, check it is as expected.
495 If not abort, if yes, do nothing more.
497 * (so there's no UNIXSTACK load command). if there's a LINKEDIT
498 load command, check if it is minimally usable (has 0 for
499 nsects and flags). If yes, convert it to a UNIXSTACK load
500 command. If there is none, or is unusable, then we're out of
501 options and have to abort.
504 fail("Can't find / check initial RSP setting");
505 if (init_rsp
!= expected_stack_start
+ expected_stack_size
)
506 fail("Initial RSP value not as expected");
508 fprintf(stderr
, "fixup_macho_loadcmds: "
509 "initial RSP is as expected (0x%llx)\n",
510 expected_stack_start
+ expected_stack_size
);
512 if (seg__unixstack
) {
513 struct segment_command_64
*seg
= seg__unixstack
;
514 if (seg
->vmaddr
!= expected_stack_start
)
515 fail("has __UNIXSTACK, but wrong ::vmaddr");
516 if (seg
->vmsize
!= expected_stack_size
)
517 fail("has __UNIXSTACK, but wrong ::vmsize");
518 if (seg
->maxprot
!= 7)
519 fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
520 if (seg
->initprot
!= 3)
521 fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
522 if (seg
->nsects
!= 0)
523 fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
525 fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
527 fprintf(stderr
, "fixup_macho_loadcmds: "
528 "acceptable __UNIXSTACK present; no modifications.\n" );
529 goto maybe_mash_pagezero
;
533 struct segment_command_64
*seg
= seg__linkedit
;
534 if (seg
->nsects
!= 0)
535 fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
537 fail("has __LINKEDIT, but wrong ::flags (should be 0)");
538 fprintf(stderr
, "fixup_macho_loadcmds: "
539 "no __UNIXSTACK present.\n" );
540 fprintf(stderr
, "fixup_macho_loadcmds: "
541 "converting __LINKEDIT to __UNIXSTACK.\n" );
542 strcpy(seg
->segname
, "__UNIXSTACK");
543 seg
->vmaddr
= expected_stack_start
;
544 seg
->vmsize
= expected_stack_size
;
550 goto maybe_mash_pagezero
;
554 fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
559 /* Deal with Problem (2) as documented above. */
560 # if DARWIN_VERS >= DARWIN_10_10
562 if (!seg__pagezero
) {
563 fail("Can't find __PAGEZERO to modify; can't continue.");
565 fprintf(stderr
, "fixup_macho_loadcmds: "
566 "changing __PAGEZERO.vmaddr from %p to 0x0.\n",
567 (void*)seg__pagezero
->vmaddr
);
568 seg__pagezero
->vmaddr
= 0;
577 static Bool
is_plausible_tool_exe_name ( HChar
* nm
)
583 // Does it end with this string?
584 p
= strstr(nm
, "-x86-darwin");
585 if (p
&& 0 == strcmp(p
, "-x86-darwin"))
588 p
= strstr(nm
, "-amd64-darwin");
589 if (p
&& 0 == strcmp(p
, "-amd64-darwin"))
596 int main ( int argc
, char** argv
)
599 ULong req_stack_addr
= 0;
600 ULong req_stack_size
= 0;
603 fail("args: -stack_addr-arg -stack_size-arg "
604 "name-of-tool-executable-to-modify");
606 r
= sscanf(argv
[1], "0x%llx", &req_stack_addr
);
607 if (r
!= 1) fail("invalid stack_addr arg");
609 r
= sscanf(argv
[2], "0x%llx", &req_stack_size
);
610 if (r
!= 1) fail("invalid stack_size arg");
612 fprintf(stderr
, "fixup_macho_loadcmds: "
613 "requested stack_addr (top) 0x%llx, "
614 "stack_size 0x%llx\n", req_stack_addr
, req_stack_size
);
616 if (!is_plausible_tool_exe_name(argv
[3]))
617 fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
619 fprintf(stderr
, "fixup_macho_loadcmds: examining tool exe: %s\n",
621 modify_macho_loadcmds( argv
[3], req_stack_addr
- req_stack_size
,
631 vmaddr 0x0000000138dea000
632 vmsize 0x00000000000ad000
645 vmaddr 0x0000000133800000
646 vmsize 0x0000000000800000