Add vbit-test-sec.vgtest and vbit-test-sec.stderr.exp to EXTRA_DIST.
[valgrind.git] / coregrind / fixup_macho_loadcmds.c
blobcdb3622a5066c6240d8d0271b389c52316820420
2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3 GPL 2+ therefore.
5 Can be compiled as either a 32- or 64-bit program (doesn't matter).
6 */
8 /* What does this program do? In short it postprocesses tool
9 executables on MacOSX, after linking using /usr/bin/ld.
11 This is to deal with two separate and entirely unrelated problems.
12 Problem (1) is a bug in the linker in Xcode 4.0.0. Problem (2) is
13 much newer and concerns linking 64-bit tool executables for
14 Yosemite (10.10).
16 --- Problem (1) ------------------------------------------------
18 This is a bug in the linker on Xcode 4.0.0 and Xcode 4.0.1. Xcode
19 versions prior to 4.0.0 are unaffected.
21 The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
23 The bug causes 64-bit tool executables to segfault at startup,
24 because:
26 Comparing the MachO load commands vs a (working) tool executable
27 that was created by Xcode 3.2.x, it appears that the new linker has
28 partially ignored the build system's request to place the tool
29 executable's stack at a non standard location. The build system
30 tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
32 With the Xcode 3.2 linker those flags produce two results:
34 (1) A load command to allocate the stack at the said location:
35 Load command 3
36 cmd LC_SEGMENT_64
37 cmdsize 72
38 segname __UNIXSTACK
39 vmaddr 0x0000000133800000
40 vmsize 0x0000000000800000
41 fileoff 2285568
42 filesize 0
43 maxprot 0x00000007
44 initprot 0x00000003
45 nsects 0
46 flags 0x0
48 (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
49 at process startup, 0x134000000.
51 With Xcode 4.0.1, (1) is missing but (2) is still present. The
52 tool executable therefore starts up with %rsp pointing to unmapped
53 memory and faults almost instantly.
55 The workaround implemented by this program is documented in comment
56 8 of bug 267997, viz:
58 One really sick workaround is to observe that the executables
59 contain a redundant MachO load command:
61 Load command 2
62 cmd LC_SEGMENT_64
63 cmdsize 72
64 segname __LINKEDIT
65 vmaddr 0x0000000138dea000
66 vmsize 0x00000000000ad000
67 fileoff 2658304
68 filesize 705632
69 maxprot 0x00000007
70 initprot 0x00000001
71 nsects 0
72 flags 0x0
74 The described section presumably contains information intended for
75 the dynamic linker, but is irrelevant because this is a statically
76 linked executable. Hence it might be possible to postprocess the
77 executables after linking, to overwrite this entry with the
78 information that would have been in the missing __UNIXSTACK entry.
79 I tried this by hand (with a binary editor) earlier and got
80 something that worked.
82 --- Problem (2) ------------------------------------------------
84 On MacOSX 10.10 (Yosemite), the kernel requires all valid
85 executables to have a __PAGEZERO section with SVMA of zero and size
86 of at least one page. However, our tool executables have a
87 __PAGEZERO section with SVMA set to the requested Valgrind load
88 address (typically 0x1'3800'0000). And the kernel won't start
89 those. So we take the opportunity to "fix" this by setting the
90 SVMA to zero. Seems to work and have no obvious bad side effects.
93 #define DEBUGPRINTING 0
95 #include <assert.h>
96 #include <stdlib.h>
97 #include <stdio.h>
98 #include <string.h>
99 #include <sys/mman.h>
100 #include <sys/stat.h>
101 #include <unistd.h>
102 #include <fcntl.h>
104 #undef PLAT_x86_darwin
105 #undef PLAT_amd64_darwin
107 #if defined(__APPLE__) && defined(__i386__)
108 # define PLAT_x86_darwin 1
109 #elif defined(__APPLE__) && defined(__x86_64__)
110 # define PLAT_amd64_darwin 1
111 #else
112 # error "Can't be compiled on this platform"
113 #endif
115 #include <mach-o/loader.h>
116 #include <mach-o/nlist.h>
117 #include <mach-o/fat.h>
118 #include <mach/i386/thread_status.h>
120 /* Get hold of DARWIN_VERS, and check it has a sane value. */
121 #include "config.h"
122 #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \
123 && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \
124 && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \
125 && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12 \
126 && DARWIN_VERS != DARWIN_10_13
127 # error "Unknown DARWIN_VERS value. This file only compiles on Darwin."
128 #endif
131 typedef unsigned char UChar;
132 typedef signed char Char;
133 typedef char HChar; /* signfulness depends on host */
135 typedef unsigned int UInt;
136 typedef signed int Int;
138 typedef unsigned char Bool;
139 #define True ((Bool)1)
140 #define False ((Bool)0)
142 typedef unsigned long UWord;
144 typedef UWord SizeT;
145 typedef UWord Addr;
147 typedef unsigned long long int ULong;
148 typedef signed long long int Long;
152 __attribute__((noreturn))
153 void fail ( HChar* msg )
155 fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
156 exit(1);
160 /*------------------------------------------------------------*/
161 /*--- ---*/
162 /*--- Mach-O file mapping/unmapping helpers ---*/
163 /*--- ---*/
164 /*------------------------------------------------------------*/
166 typedef
167 struct {
168 /* These two describe the entire mapped-in ("primary") image,
169 fat headers, kitchen sink, whatnot: the entire file. The
170 image is mapped into img[0 .. img_szB-1]. */
171 UChar* img;
172 SizeT img_szB;
173 /* These two describe the Mach-O object of interest, which is
174 presumably somewhere inside the primary image.
175 map_image_aboard() below, which generates this info, will
176 carefully check that the macho_ fields denote a section of
177 memory that falls entirely inside img[0 .. img_szB-1]. */
178 UChar* macho_img;
179 SizeT macho_img_szB;
181 ImageInfo;
184 Bool is_macho_object_file( const void* buf, SizeT szB )
186 /* (JRS: the Mach-O headers might not be in this mapped data,
187 because we only mapped a page for this initial check,
188 or at least not very much, and what's at the start of the file
189 is in general a so-called fat header. The Mach-O object we're
190 interested in could be arbitrarily far along the image, and so
191 we can't assume its header will fall within this page.) */
193 /* But we can say that either it's a fat object, in which case it
194 begins with a fat header, or it's unadorned Mach-O, in which
195 case it starts with a normal header. At least do what checks we
196 can to establish whether or not we're looking at something
197 sane. */
199 const struct fat_header* fh_be = buf;
200 const struct mach_header_64* mh = buf;
202 assert(buf);
203 if (szB < sizeof(struct fat_header))
204 return False;
205 if (ntohl(fh_be->magic) == FAT_MAGIC)
206 return True;
208 if (szB < sizeof(struct mach_header_64))
209 return False;
210 if (mh->magic == MH_MAGIC_64)
211 return True;
213 return False;
217 /* Unmap an image mapped in by map_image_aboard. */
218 static void unmap_image ( /*MOD*/ImageInfo* ii )
220 Int r;
221 assert(ii->img);
222 assert(ii->img_szB > 0);
223 r = munmap( ii->img, ii->img_szB );
224 /* Do we care if this fails? I suppose so; it would indicate
225 some fairly serious snafu with the mapping of the file. */
226 assert( !r );
227 memset(ii, 0, sizeof(*ii));
231 /* Map a given fat or thin object aboard, find the thin part if
232 necessary, do some checks, and write details of both the fat and
233 thin parts into *ii. Returns 32 (and leaves the file unmapped) if
234 the thin part is a 32 bit file. Returns 64 if it's a 64 bit file.
235 Does not return on failure. Guarantees to return pointers to a
236 valid(ish) Mach-O image if it succeeds. */
237 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
239 memset(ii, 0, sizeof(*ii));
241 /* First off, try to map the thing in. */
242 { SizeT size;
243 Int r, fd;
244 struct stat stat_buf;
246 r = stat(filename, &stat_buf);
247 if (r)
248 fail("Can't stat image (to determine its size)?!");
249 size = stat_buf.st_size;
251 fd = open(filename, O_RDWR, 0);
252 if (fd == -1)
253 fail("Can't open image for possible modification!");
254 if (DEBUGPRINTING)
255 printf("size %lu fd %d\n", size, fd);
256 void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
257 MAP_FILE|MAP_SHARED, fd, 0 );
258 if (v == MAP_FAILED) {
259 perror("mmap failed");
260 fail("Can't mmap image for possible modification!");
263 close(fd);
265 ii->img = (UChar*)v;
266 ii->img_szB = size;
269 /* Now it's mapped in and we have .img and .img_szB set. Look for
270 the embedded Mach-O object. If not findable, unmap and fail. */
271 { struct fat_header* fh_be;
272 struct fat_header fh;
273 struct mach_header_64* mh;
275 // Assume initially that we have a thin image, and update
276 // these if it turns out to be fat.
277 ii->macho_img = ii->img;
278 ii->macho_img_szB = ii->img_szB;
280 // Check for fat header.
281 if (ii->img_szB < sizeof(struct fat_header))
282 fail("Invalid Mach-O file (0 too small).");
284 // Fat header is always BIG-ENDIAN
285 fh_be = (struct fat_header *)ii->img;
286 fh.magic = ntohl(fh_be->magic);
287 fh.nfat_arch = ntohl(fh_be->nfat_arch);
288 if (fh.magic == FAT_MAGIC) {
289 // Look for a good architecture.
290 struct fat_arch *arch_be;
291 struct fat_arch arch;
292 Int f;
293 if (ii->img_szB < sizeof(struct fat_header)
294 + fh.nfat_arch * sizeof(struct fat_arch))
295 fail("Invalid Mach-O file (1 too small).");
297 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
298 f < fh.nfat_arch;
299 f++, arch_be++) {
300 Int cputype;
301 # if defined(PLAT_x86_darwin)
302 cputype = CPU_TYPE_X86;
303 # elif defined(PLAT_amd64_darwin)
304 cputype = CPU_TYPE_X86_64;
305 # else
306 # error "unknown architecture"
307 # endif
308 arch.cputype = ntohl(arch_be->cputype);
309 arch.cpusubtype = ntohl(arch_be->cpusubtype);
310 arch.offset = ntohl(arch_be->offset);
311 arch.size = ntohl(arch_be->size);
312 if (arch.cputype == cputype) {
313 if (ii->img_szB < arch.offset + arch.size)
314 fail("Invalid Mach-O file (2 too small).");
315 ii->macho_img = ii->img + arch.offset;
316 ii->macho_img_szB = arch.size;
317 break;
320 if (f == fh.nfat_arch)
321 fail("No acceptable architecture found in fat file.");
324 /* Sanity check what we found. */
326 /* assured by logic above */
327 assert(ii->img_szB >= sizeof(struct fat_header));
329 if (ii->macho_img_szB < sizeof(struct mach_header_64))
330 fail("Invalid Mach-O file (3 too small).");
332 if (ii->macho_img_szB > ii->img_szB)
333 fail("Invalid Mach-O file (thin bigger than fat).");
335 if (ii->macho_img >= ii->img
336 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
337 /* thin entirely within fat, as expected */
338 } else {
339 fail("Invalid Mach-O file (thin not inside fat).");
342 mh = (struct mach_header_64 *)ii->macho_img;
343 if (mh->magic == MH_MAGIC) {
344 assert(ii->img);
345 assert(ii->macho_img);
346 assert(ii->img_szB > 0);
347 assert(ii->macho_img_szB > 0);
348 assert(ii->macho_img >= ii->img);
349 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
350 return 32;
352 if (mh->magic != MH_MAGIC_64)
353 fail("Invalid Mach-O file (bad magic).");
355 if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
356 fail("Invalid Mach-O file (4 too small).");
359 assert(ii->img);
360 assert(ii->macho_img);
361 assert(ii->img_szB > 0);
362 assert(ii->macho_img_szB > 0);
363 assert(ii->macho_img >= ii->img);
364 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
365 return 64;
369 /*------------------------------------------------------------*/
370 /*--- ---*/
371 /*--- Mach-O top-level processing ---*/
372 /*--- ---*/
373 /*------------------------------------------------------------*/
375 void modify_macho_loadcmds ( HChar* filename,
376 ULong expected_stack_start,
377 ULong expected_stack_size )
379 ImageInfo ii;
380 memset(&ii, 0, sizeof(ii));
382 Int size = map_image_aboard( &ii, filename );
383 if (size == 32) {
384 fprintf(stderr, "fixup_macho_loadcmds: Is 32-bit MachO file;"
385 " no modifications needed.\n");
386 goto out;
389 assert(size == 64);
391 assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
393 /* Poke around in the Mach-O header, to find some important
394 stuff.
395 * the location of the __UNIXSTACK load command, if any
396 * the location of the __LINKEDIT load command, if any
397 * the initial RSP value as stated in the LC_UNIXTHREAD
400 /* The collected data */
401 ULong init_rsp = 0;
402 Bool have_rsp = False;
403 struct segment_command_64* seg__unixstack = NULL;
404 struct segment_command_64* seg__linkedit = NULL;
405 struct segment_command_64* seg__pagezero = NULL;
407 /* Loop over the load commands and fill in the above 4 variables. */
409 { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
410 struct load_command *cmd;
411 Int c;
413 for (c = 0, cmd = (struct load_command *)(mh+1);
414 c < mh->ncmds;
415 c++, cmd = (struct load_command *)(cmd->cmdsize
416 + (unsigned long)cmd)) {
417 if (DEBUGPRINTING)
418 printf("load cmd: offset %4lu size %3d kind %2d = ",
419 (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
420 cmd->cmdsize, cmd->cmd);
422 switch (cmd->cmd) {
423 case LC_SEGMENT_64:
424 if (DEBUGPRINTING)
425 printf("LC_SEGMENT_64");
426 break;
427 case LC_SYMTAB:
428 if (DEBUGPRINTING)
429 printf("LC_SYMTAB");
430 break;
431 case LC_DYSYMTAB:
432 if (DEBUGPRINTING)
433 printf("LC_DYSYMTAB");
434 break;
435 case LC_UUID:
436 if (DEBUGPRINTING)
437 printf("LC_UUID");
438 break;
439 case LC_UNIXTHREAD:
440 if (DEBUGPRINTING)
441 printf("LC_UNIXTHREAD");
442 break;
443 default:
444 if (DEBUGPRINTING)
445 printf("???");
446 fail("unexpected load command in Mach header");
447 break;
449 if (DEBUGPRINTING)
450 printf("\n");
452 /* Note what the stated initial RSP value is, so we can
453 check it is as expected. */
454 if (cmd->cmd == LC_UNIXTHREAD) {
455 struct thread_command* tcmd = (struct thread_command*)cmd;
456 UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
457 if (DEBUGPRINTING)
458 printf("UnixThread: flavor %u = ", w32s[0]);
459 if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
460 if (DEBUGPRINTING)
461 printf("x86_THREAD_STATE64\n");
462 x86_thread_state64_t* state64
463 = (x86_thread_state64_t*)(&w32s[2]);
464 have_rsp = True;
465 init_rsp = state64->__rsp;
466 if (DEBUGPRINTING)
467 printf("rsp = 0x%llx\n", init_rsp);
468 } else {
469 if (DEBUGPRINTING)
470 printf("???");
472 if (DEBUGPRINTING)
473 printf("\n");
476 if (cmd->cmd == LC_SEGMENT_64) {
477 struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
478 if (0 == strcmp(seg->segname, "__LINKEDIT"))
479 seg__linkedit = seg;
480 if (0 == strcmp(seg->segname, "__UNIXSTACK"))
481 seg__unixstack = seg;
482 if (0 == strcmp(seg->segname, "__PAGEZERO"))
483 seg__pagezero = seg;
490 Actions are then as follows:
492 * (always) check the RSP value is as expected, and abort if not
494 * if there's a UNIXSTACK load command, check it is as expected.
495 If not abort, if yes, do nothing more.
497 * (so there's no UNIXSTACK load command). if there's a LINKEDIT
498 load command, check if it is minimally usable (has 0 for
499 nsects and flags). If yes, convert it to a UNIXSTACK load
500 command. If there is none, or is unusable, then we're out of
501 options and have to abort.
503 if (!have_rsp)
504 fail("Can't find / check initial RSP setting");
505 if (init_rsp != expected_stack_start + expected_stack_size)
506 fail("Initial RSP value not as expected");
508 fprintf(stderr, "fixup_macho_loadcmds: "
509 "initial RSP is as expected (0x%llx)\n",
510 expected_stack_start + expected_stack_size );
512 if (seg__unixstack) {
513 struct segment_command_64 *seg = seg__unixstack;
514 if (seg->vmaddr != expected_stack_start)
515 fail("has __UNIXSTACK, but wrong ::vmaddr");
516 if (seg->vmsize != expected_stack_size)
517 fail("has __UNIXSTACK, but wrong ::vmsize");
518 if (seg->maxprot != 7)
519 fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
520 if (seg->initprot != 3)
521 fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
522 if (seg->nsects != 0)
523 fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
524 if (seg->flags != 0)
525 fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
526 /* looks ok */
527 fprintf(stderr, "fixup_macho_loadcmds: "
528 "acceptable __UNIXSTACK present; no modifications.\n" );
529 goto maybe_mash_pagezero;
532 if (seg__linkedit) {
533 struct segment_command_64 *seg = seg__linkedit;
534 if (seg->nsects != 0)
535 fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
536 if (seg->flags != 0)
537 fail("has __LINKEDIT, but wrong ::flags (should be 0)");
538 fprintf(stderr, "fixup_macho_loadcmds: "
539 "no __UNIXSTACK present.\n" );
540 fprintf(stderr, "fixup_macho_loadcmds: "
541 "converting __LINKEDIT to __UNIXSTACK.\n" );
542 strcpy(seg->segname, "__UNIXSTACK");
543 seg->vmaddr = expected_stack_start;
544 seg->vmsize = expected_stack_size;
545 seg->fileoff = 0;
546 seg->filesize = 0;
547 seg->maxprot = 7;
548 seg->initprot = 3;
549 /* success */
550 goto maybe_mash_pagezero;
553 /* out of options */
554 fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
555 "out of options.");
556 /* NOTREACHED */
558 maybe_mash_pagezero:
559 /* Deal with Problem (2) as documented above. */
560 # if DARWIN_VERS >= DARWIN_10_10
561 assert(size == 64);
562 if (!seg__pagezero) {
563 fail("Can't find __PAGEZERO to modify; can't continue.");
565 fprintf(stderr, "fixup_macho_loadcmds: "
566 "changing __PAGEZERO.vmaddr from %p to 0x0.\n",
567 (void*)seg__pagezero->vmaddr);
568 seg__pagezero->vmaddr = 0;
569 # endif
571 out:
572 if (ii.img)
573 unmap_image(&ii);
577 static Bool is_plausible_tool_exe_name ( HChar* nm )
579 HChar* p;
580 if (!nm)
581 return False;
583 // Does it end with this string?
584 p = strstr(nm, "-x86-darwin");
585 if (p && 0 == strcmp(p, "-x86-darwin"))
586 return True;
588 p = strstr(nm, "-amd64-darwin");
589 if (p && 0 == strcmp(p, "-amd64-darwin"))
590 return True;
592 return False;
596 int main ( int argc, char** argv )
598 Int r;
599 ULong req_stack_addr = 0;
600 ULong req_stack_size = 0;
602 if (argc != 4)
603 fail("args: -stack_addr-arg -stack_size-arg "
604 "name-of-tool-executable-to-modify");
606 r= sscanf(argv[1], "0x%llx", &req_stack_addr);
607 if (r != 1) fail("invalid stack_addr arg");
609 r= sscanf(argv[2], "0x%llx", &req_stack_size);
610 if (r != 1) fail("invalid stack_size arg");
612 fprintf(stderr, "fixup_macho_loadcmds: "
613 "requested stack_addr (top) 0x%llx, "
614 "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
616 if (!is_plausible_tool_exe_name(argv[3]))
617 fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
619 fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
620 argv[3] );
621 modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
622 req_stack_size );
624 return 0;
628 cmd LC_SEGMENT_64
629 cmdsize 72
630 segname __LINKEDIT
631 vmaddr 0x0000000138dea000
632 vmsize 0x00000000000ad000
633 fileoff 2658304
634 filesize 705632
635 maxprot 0x00000007
636 initprot 0x00000001
637 nsects 0
638 flags 0x0
642 cmd LC_SEGMENT_64
643 cmdsize 72
644 segname __UNIXSTACK
645 vmaddr 0x0000000133800000
646 vmsize 0x0000000000800000
647 fileoff 2498560
648 filesize 0
649 maxprot 0x00000007
650 initprot 0x00000003
651 nsects 0
652 flags 0x0