[AMDGPU][AsmParser][NFC] Translate parsed MIMG instructions to MCInsts automatically.
[llvm-project.git] / lldb / examples / python / mach_o.py
blob8c280689f845d3624071c76b52b3a7e59adb004d
1 #!/usr/bin/env python
3 import cmd
4 import dict_utils
5 import file_extract
6 import optparse
7 import re
8 import struct
9 import string
10 import io
11 import sys
12 import uuid
14 # Mach header "magic" constants
15 MH_MAGIC = 0xFEEDFACE
16 MH_CIGAM = 0xCEFAEDFE
17 MH_MAGIC_64 = 0xFEEDFACF
18 MH_CIGAM_64 = 0xCFFAEDFE
19 FAT_MAGIC = 0xCAFEBABE
20 FAT_CIGAM = 0xBEBAFECA
22 # Mach haeder "filetype" constants
23 MH_OBJECT = 0x00000001
24 MH_EXECUTE = 0x00000002
25 MH_FVMLIB = 0x00000003
26 MH_CORE = 0x00000004
27 MH_PRELOAD = 0x00000005
28 MH_DYLIB = 0x00000006
29 MH_DYLINKER = 0x00000007
30 MH_BUNDLE = 0x00000008
31 MH_DYLIB_STUB = 0x00000009
32 MH_DSYM = 0x0000000A
33 MH_KEXT_BUNDLE = 0x0000000B
35 # Mach haeder "flag" constant bits
36 MH_NOUNDEFS = 0x00000001
37 MH_INCRLINK = 0x00000002
38 MH_DYLDLINK = 0x00000004
39 MH_BINDATLOAD = 0x00000008
40 MH_PREBOUND = 0x00000010
41 MH_SPLIT_SEGS = 0x00000020
42 MH_LAZY_INIT = 0x00000040
43 MH_TWOLEVEL = 0x00000080
44 MH_FORCE_FLAT = 0x00000100
45 MH_NOMULTIDEFS = 0x00000200
46 MH_NOFIXPREBINDING = 0x00000400
47 MH_PREBINDABLE = 0x00000800
48 MH_ALLMODSBOUND = 0x00001000
49 MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000
50 MH_CANONICAL = 0x00004000
51 MH_WEAK_DEFINES = 0x00008000
52 MH_BINDS_TO_WEAK = 0x00010000
53 MH_ALLOW_STACK_EXECUTION = 0x00020000
54 MH_ROOT_SAFE = 0x00040000
55 MH_SETUID_SAFE = 0x00080000
56 MH_NO_REEXPORTED_DYLIBS = 0x00100000
57 MH_PIE = 0x00200000
58 MH_DEAD_STRIPPABLE_DYLIB = 0x00400000
59 MH_HAS_TLV_DESCRIPTORS = 0x00800000
60 MH_NO_HEAP_EXECUTION = 0x01000000
62 # Mach load command constants
63 LC_REQ_DYLD = 0x80000000
64 LC_SEGMENT = 0x00000001
65 LC_SYMTAB = 0x00000002
66 LC_SYMSEG = 0x00000003
67 LC_THREAD = 0x00000004
68 LC_UNIXTHREAD = 0x00000005
69 LC_LOADFVMLIB = 0x00000006
70 LC_IDFVMLIB = 0x00000007
71 LC_IDENT = 0x00000008
72 LC_FVMFILE = 0x00000009
73 LC_PREPAGE = 0x0000000A
74 LC_DYSYMTAB = 0x0000000B
75 LC_LOAD_DYLIB = 0x0000000C
76 LC_ID_DYLIB = 0x0000000D
77 LC_LOAD_DYLINKER = 0x0000000E
78 LC_ID_DYLINKER = 0x0000000F
79 LC_PREBOUND_DYLIB = 0x00000010
80 LC_ROUTINES = 0x00000011
81 LC_SUB_FRAMEWORK = 0x00000012
82 LC_SUB_UMBRELLA = 0x00000013
83 LC_SUB_CLIENT = 0x00000014
84 LC_SUB_LIBRARY = 0x00000015
85 LC_TWOLEVEL_HINTS = 0x00000016
86 LC_PREBIND_CKSUM = 0x00000017
87 LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD
88 LC_SEGMENT_64 = 0x00000019
89 LC_ROUTINES_64 = 0x0000001A
90 LC_UUID = 0x0000001B
91 LC_RPATH = 0x0000001C | LC_REQ_DYLD
92 LC_CODE_SIGNATURE = 0x0000001D
93 LC_SEGMENT_SPLIT_INFO = 0x0000001E
94 LC_REEXPORT_DYLIB = 0x0000001F | LC_REQ_DYLD
95 LC_LAZY_LOAD_DYLIB = 0x00000020
96 LC_ENCRYPTION_INFO = 0x00000021
97 LC_DYLD_INFO = 0x00000022
98 LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD
99 LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD
100 LC_VERSION_MIN_MACOSX = 0x00000024
101 LC_VERSION_MIN_IPHONEOS = 0x00000025
102 LC_FUNCTION_STARTS = 0x00000026
103 LC_DYLD_ENVIRONMENT = 0x00000027
105 # Mach CPU constants
106 CPU_ARCH_MASK = 0xFF000000
107 CPU_ARCH_ABI64 = 0x01000000
108 CPU_TYPE_ANY = 0xFFFFFFFF
109 CPU_TYPE_VAX = 1
110 CPU_TYPE_MC680x0 = 6
111 CPU_TYPE_I386 = 7
112 CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64
113 CPU_TYPE_MIPS = 8
114 CPU_TYPE_MC98000 = 10
115 CPU_TYPE_HPPA = 11
116 CPU_TYPE_ARM = 12
117 CPU_TYPE_MC88000 = 13
118 CPU_TYPE_SPARC = 14
119 CPU_TYPE_I860 = 15
120 CPU_TYPE_ALPHA = 16
121 CPU_TYPE_POWERPC = 18
122 CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
124 # VM protection constants
125 VM_PROT_READ = 1
126 VM_PROT_WRITE = 2
127 VM_PROT_EXECUTE = 4
129 # VM protection constants
130 N_STAB = 0xE0
131 N_PEXT = 0x10
132 N_TYPE = 0x0E
133 N_EXT = 0x01
135 # Values for nlist N_TYPE bits of the "Mach.NList.type" field.
136 N_UNDF = 0x0
137 N_ABS = 0x2
138 N_SECT = 0xE
139 N_PBUD = 0xC
140 N_INDR = 0xA
142 # Section indexes for the "Mach.NList.sect_idx" fields
143 NO_SECT = 0
144 MAX_SECT = 255
146 # Stab defines
147 N_GSYM = 0x20
148 N_FNAME = 0x22
149 N_FUN = 0x24
150 N_STSYM = 0x26
151 N_LCSYM = 0x28
152 N_BNSYM = 0x2E
153 N_OPT = 0x3C
154 N_RSYM = 0x40
155 N_SLINE = 0x44
156 N_ENSYM = 0x4E
157 N_SSYM = 0x60
158 N_SO = 0x64
159 N_OSO = 0x66
160 N_LSYM = 0x80
161 N_BINCL = 0x82
162 N_SOL = 0x84
163 N_PARAMS = 0x86
164 N_VERSION = 0x88
165 N_OLEVEL = 0x8A
166 N_PSYM = 0xA0
167 N_EINCL = 0xA2
168 N_ENTRY = 0xA4
169 N_LBRAC = 0xC0
170 N_EXCL = 0xC2
171 N_RBRAC = 0xE0
172 N_BCOMM = 0xE2
173 N_ECOMM = 0xE4
174 N_ECOML = 0xE8
175 N_LENG = 0xFE
177 vm_prot_names = ["---", "r--", "-w-", "rw-", "--x", "r-x", "-wx", "rwx"]
180 def dump_memory(base_addr, data, hex_bytes_len, num_per_line):
181 hex_bytes = data.encode("hex")
182 if hex_bytes_len == -1:
183 hex_bytes_len = len(hex_bytes)
184 addr = base_addr
185 ascii_str = ""
186 i = 0
187 while i < hex_bytes_len:
188 if ((i / 2) % num_per_line) == 0:
189 if i > 0:
190 print(" %s" % (ascii_str))
191 ascii_str = ""
192 print("0x%8.8x:" % (addr + i), end=" ")
193 hex_byte = hex_bytes[i : i + 2]
194 print(hex_byte, end=" ")
195 int_byte = int(hex_byte, 16)
196 ascii_char = "%c" % (int_byte)
197 if int_byte >= 32 and int_byte < 127:
198 ascii_str += ascii_char
199 else:
200 ascii_str += "."
201 i = i + 2
202 if ascii_str:
203 if (i / 2) % num_per_line:
204 padding = num_per_line - ((i / 2) % num_per_line)
205 else:
206 padding = 0
207 print("%*s%s" % (padding * 3 + 1, "", ascii_str))
208 print()
211 class TerminalColors:
212 """Simple terminal colors class"""
214 def __init__(self, enabled=True):
215 # TODO: discover terminal type from "file" and disable if
216 # it can't handle the color codes
217 self.enabled = enabled
219 def reset(self):
220 """Reset all terminal colors and formatting."""
221 if self.enabled:
222 return "\x1b[0m"
223 return ""
225 def bold(self, on=True):
226 """Enable or disable bold depending on the "on" parameter."""
227 if self.enabled:
228 if on:
229 return "\x1b[1m"
230 else:
231 return "\x1b[22m"
232 return ""
234 def italics(self, on=True):
235 """Enable or disable italics depending on the "on" parameter."""
236 if self.enabled:
237 if on:
238 return "\x1b[3m"
239 else:
240 return "\x1b[23m"
241 return ""
243 def underline(self, on=True):
244 """Enable or disable underline depending on the "on" parameter."""
245 if self.enabled:
246 if on:
247 return "\x1b[4m"
248 else:
249 return "\x1b[24m"
250 return ""
252 def inverse(self, on=True):
253 """Enable or disable inverse depending on the "on" parameter."""
254 if self.enabled:
255 if on:
256 return "\x1b[7m"
257 else:
258 return "\x1b[27m"
259 return ""
261 def strike(self, on=True):
262 """Enable or disable strike through depending on the "on" parameter."""
263 if self.enabled:
264 if on:
265 return "\x1b[9m"
266 else:
267 return "\x1b[29m"
268 return ""
270 def black(self, fg=True):
271 """Set the foreground or background color to black.
272 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
274 if self.enabled:
275 if fg:
276 return "\x1b[30m"
277 else:
278 return "\x1b[40m"
279 return ""
281 def red(self, fg=True):
282 """Set the foreground or background color to red.
283 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
285 if self.enabled:
286 if fg:
287 return "\x1b[31m"
288 else:
289 return "\x1b[41m"
290 return ""
292 def green(self, fg=True):
293 """Set the foreground or background color to green.
294 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
296 if self.enabled:
297 if fg:
298 return "\x1b[32m"
299 else:
300 return "\x1b[42m"
301 return ""
303 def yellow(self, fg=True):
304 """Set the foreground or background color to yellow.
305 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
307 if self.enabled:
308 if fg:
309 return "\x1b[43m"
310 else:
311 return "\x1b[33m"
312 return ""
314 def blue(self, fg=True):
315 """Set the foreground or background color to blue.
316 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
318 if self.enabled:
319 if fg:
320 return "\x1b[34m"
321 else:
322 return "\x1b[44m"
323 return ""
325 def magenta(self, fg=True):
326 """Set the foreground or background color to magenta.
327 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
329 if self.enabled:
330 if fg:
331 return "\x1b[35m"
332 else:
333 return "\x1b[45m"
334 return ""
336 def cyan(self, fg=True):
337 """Set the foreground or background color to cyan.
338 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
340 if self.enabled:
341 if fg:
342 return "\x1b[36m"
343 else:
344 return "\x1b[46m"
345 return ""
347 def white(self, fg=True):
348 """Set the foreground or background color to white.
349 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
351 if self.enabled:
352 if fg:
353 return "\x1b[37m"
354 else:
355 return "\x1b[47m"
356 return ""
358 def default(self, fg=True):
359 """Set the foreground or background color to the default.
360 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.
362 if self.enabled:
363 if fg:
364 return "\x1b[39m"
365 else:
366 return "\x1b[49m"
367 return ""
370 def swap_unpack_char():
371 """Returns the unpack prefix that will for non-native endian-ness."""
372 if struct.pack("H", 1).startswith("\x00"):
373 return "<"
374 return ">"
377 def dump_hex_bytes(addr, s, bytes_per_line=16):
378 i = 0
379 line = ""
380 for ch in s:
381 if (i % bytes_per_line) == 0:
382 if line:
383 print(line)
384 line = "%#8.8x: " % (addr + i)
385 line += "%02X " % ord(ch)
386 i += 1
387 print(line)
390 def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16):
391 i = 0
392 line = ""
393 a_len = len(a)
394 b_len = len(b)
395 if a_len < b_len:
396 max_len = b_len
397 else:
398 max_len = a_len
399 tty_colors = TerminalColors(True)
400 for i in range(max_len):
401 ch = None
402 if i < a_len:
403 ch_a = a[i]
404 ch = ch_a
405 else:
406 ch_a = None
407 if i < b_len:
408 ch_b = b[i]
409 if not ch:
410 ch = ch_b
411 else:
412 ch_b = None
413 mismatch = ch_a != ch_b
414 if (i % bytes_per_line) == 0:
415 if line:
416 print(line)
417 line = "%#8.8x: " % (addr + i)
418 if mismatch:
419 line += tty_colors.red()
420 line += "%02X " % ord(ch)
421 if mismatch:
422 line += tty_colors.default()
423 i += 1
425 print(line)
428 class Mach:
429 """Class that does everything mach-o related"""
431 class Arch:
432 """Class that implements mach-o architectures"""
434 def __init__(self, c=0, s=0):
435 self.cpu = c
436 self.sub = s
438 def set_cpu_type(self, c):
439 self.cpu = c
441 def set_cpu_subtype(self, s):
442 self.sub = s
444 def set_arch(self, c, s):
445 self.cpu = c
446 self.sub = s
448 def is_64_bit(self):
449 return (self.cpu & CPU_ARCH_ABI64) != 0
451 cpu_infos = [
452 ["arm", CPU_TYPE_ARM, CPU_TYPE_ANY],
453 ["arm", CPU_TYPE_ARM, 0],
454 ["armv4", CPU_TYPE_ARM, 5],
455 ["armv6", CPU_TYPE_ARM, 6],
456 ["armv5", CPU_TYPE_ARM, 7],
457 ["xscale", CPU_TYPE_ARM, 8],
458 ["armv7", CPU_TYPE_ARM, 9],
459 ["armv7f", CPU_TYPE_ARM, 10],
460 ["armv7s", CPU_TYPE_ARM, 11],
461 ["armv7k", CPU_TYPE_ARM, 12],
462 ["armv7m", CPU_TYPE_ARM, 15],
463 ["armv7em", CPU_TYPE_ARM, 16],
464 ["ppc", CPU_TYPE_POWERPC, CPU_TYPE_ANY],
465 ["ppc", CPU_TYPE_POWERPC, 0],
466 ["ppc601", CPU_TYPE_POWERPC, 1],
467 ["ppc602", CPU_TYPE_POWERPC, 2],
468 ["ppc603", CPU_TYPE_POWERPC, 3],
469 ["ppc603e", CPU_TYPE_POWERPC, 4],
470 ["ppc603ev", CPU_TYPE_POWERPC, 5],
471 ["ppc604", CPU_TYPE_POWERPC, 6],
472 ["ppc604e", CPU_TYPE_POWERPC, 7],
473 ["ppc620", CPU_TYPE_POWERPC, 8],
474 ["ppc750", CPU_TYPE_POWERPC, 9],
475 ["ppc7400", CPU_TYPE_POWERPC, 10],
476 ["ppc7450", CPU_TYPE_POWERPC, 11],
477 ["ppc970", CPU_TYPE_POWERPC, 100],
478 ["ppc64", CPU_TYPE_POWERPC64, 0],
479 ["ppc970-64", CPU_TYPE_POWERPC64, 100],
480 ["i386", CPU_TYPE_I386, 3],
481 ["i486", CPU_TYPE_I386, 4],
482 ["i486sx", CPU_TYPE_I386, 0x84],
483 ["i386", CPU_TYPE_I386, CPU_TYPE_ANY],
484 ["x86_64", CPU_TYPE_X86_64, 3],
485 ["x86_64", CPU_TYPE_X86_64, CPU_TYPE_ANY],
488 def __str__(self):
489 for info in self.cpu_infos:
490 if self.cpu == info[1] and (self.sub & 0x00FFFFFF) == info[2]:
491 return info[0]
492 return "{0}.{1}".format(self.cpu, self.sub)
494 class Magic(dict_utils.Enum):
495 enum = {
496 "MH_MAGIC": MH_MAGIC,
497 "MH_CIGAM": MH_CIGAM,
498 "MH_MAGIC_64": MH_MAGIC_64,
499 "MH_CIGAM_64": MH_CIGAM_64,
500 "FAT_MAGIC": FAT_MAGIC,
501 "FAT_CIGAM": FAT_CIGAM,
504 def __init__(self, initial_value=0):
505 dict_utils.Enum.__init__(self, initial_value, self.enum)
507 def is_skinny_mach_file(self):
508 return (
509 self.value == MH_MAGIC
510 or self.value == MH_CIGAM
511 or self.value == MH_MAGIC_64
512 or self.value == MH_CIGAM_64
515 def is_universal_mach_file(self):
516 return self.value == FAT_MAGIC or self.value == FAT_CIGAM
518 def unpack(self, data):
519 data.set_byte_order("native")
520 self.value = data.get_uint32()
522 def get_byte_order(self):
523 if (
524 self.value == MH_CIGAM
525 or self.value == MH_CIGAM_64
526 or self.value == FAT_CIGAM
528 return swap_unpack_char()
529 else:
530 return "="
532 def is_64_bit(self):
533 return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
535 def __init__(self):
536 self.magic = Mach.Magic()
537 self.content = None
538 self.path = None
540 def extract(self, path, extractor):
541 self.path = path
542 self.unpack(extractor)
544 def parse(self, path):
545 self.path = path
546 try:
547 f = open(self.path)
548 file_extractor = file_extract.FileExtract(f, "=")
549 self.unpack(file_extractor)
550 # f.close()
551 except IOError as xxx_todo_changeme:
552 (errno, strerror) = xxx_todo_changeme.args
553 print("I/O error({0}): {1}".format(errno, strerror))
554 except ValueError:
555 print("Could not convert data to an integer.")
556 except:
557 print("Unexpected error:", sys.exc_info()[0])
558 raise
560 def compare(self, rhs):
561 self.content.compare(rhs.content)
563 def dump(self, options=None):
564 self.content.dump(options)
566 def dump_header(self, dump_description=True, options=None):
567 self.content.dump_header(dump_description, options)
569 def dump_load_commands(self, dump_description=True, options=None):
570 self.content.dump_load_commands(dump_description, options)
572 def dump_sections(self, dump_description=True, options=None):
573 self.content.dump_sections(dump_description, options)
575 def dump_section_contents(self, options):
576 self.content.dump_section_contents(options)
578 def dump_symtab(self, dump_description=True, options=None):
579 self.content.dump_symtab(dump_description, options)
581 def dump_symbol_names_matching_regex(self, regex, file=None):
582 self.content.dump_symbol_names_matching_regex(regex, file)
584 def description(self):
585 return self.content.description()
587 def unpack(self, data):
588 self.magic.unpack(data)
589 if self.magic.is_skinny_mach_file():
590 self.content = Mach.Skinny(self.path)
591 elif self.magic.is_universal_mach_file():
592 self.content = Mach.Universal(self.path)
593 else:
594 self.content = None
596 if self.content is not None:
597 self.content.unpack(data, self.magic)
599 def is_valid(self):
600 return self.content is not None
602 class Universal:
603 def __init__(self, path):
604 self.path = path
605 self.type = "universal"
606 self.file_off = 0
607 self.magic = None
608 self.nfat_arch = 0
609 self.archs = list()
611 def description(self):
612 s = "%#8.8x: %s (" % (self.file_off, self.path)
613 archs_string = ""
614 for arch in self.archs:
615 if len(archs_string):
616 archs_string += ", "
617 archs_string += "%s" % arch.arch
618 s += archs_string
619 s += ")"
620 return s
622 def unpack(self, data, magic=None):
623 self.file_off = data.tell()
624 if magic is None:
625 self.magic = Mach.Magic()
626 self.magic.unpack(data)
627 else:
628 self.magic = magic
629 self.file_off = self.file_off - 4
630 # Universal headers are always in big endian
631 data.set_byte_order("big")
632 self.nfat_arch = data.get_uint32()
633 for i in range(self.nfat_arch):
634 self.archs.append(Mach.Universal.ArchInfo())
635 self.archs[i].unpack(data)
636 for i in range(self.nfat_arch):
637 self.archs[i].mach = Mach.Skinny(self.path)
638 data.seek(self.archs[i].offset, 0)
639 skinny_magic = Mach.Magic()
640 skinny_magic.unpack(data)
641 self.archs[i].mach.unpack(data, skinny_magic)
643 def compare(self, rhs):
644 print("error: comparing two universal files is not supported yet")
645 return False
647 def dump(self, options):
648 if options.dump_header:
649 print()
650 print(
651 "Universal Mach File: magic = %s, nfat_arch = %u"
652 % (self.magic, self.nfat_arch)
654 print()
655 if self.nfat_arch > 0:
656 if options.dump_header:
657 self.archs[0].dump_header(True, options)
658 for i in range(self.nfat_arch):
659 self.archs[i].dump_flat(options)
660 if options.dump_header:
661 print()
662 for i in range(self.nfat_arch):
663 self.archs[i].mach.dump(options)
665 def dump_header(self, dump_description=True, options=None):
666 if dump_description:
667 print(self.description())
668 for i in range(self.nfat_arch):
669 self.archs[i].mach.dump_header(True, options)
670 print()
672 def dump_load_commands(self, dump_description=True, options=None):
673 if dump_description:
674 print(self.description())
675 for i in range(self.nfat_arch):
676 self.archs[i].mach.dump_load_commands(True, options)
677 print()
679 def dump_sections(self, dump_description=True, options=None):
680 if dump_description:
681 print(self.description())
682 for i in range(self.nfat_arch):
683 self.archs[i].mach.dump_sections(True, options)
684 print()
686 def dump_section_contents(self, options):
687 for i in range(self.nfat_arch):
688 self.archs[i].mach.dump_section_contents(options)
689 print()
691 def dump_symtab(self, dump_description=True, options=None):
692 if dump_description:
693 print(self.description())
694 for i in range(self.nfat_arch):
695 self.archs[i].mach.dump_symtab(True, options)
696 print()
698 def dump_symbol_names_matching_regex(self, regex, file=None):
699 for i in range(self.nfat_arch):
700 self.archs[i].mach.dump_symbol_names_matching_regex(regex, file)
702 class ArchInfo:
703 def __init__(self):
704 self.arch = Mach.Arch(0, 0)
705 self.offset = 0
706 self.size = 0
707 self.align = 0
708 self.mach = None
710 def unpack(self, data):
711 # Universal headers are always in big endian
712 data.set_byte_order("big")
714 self.arch.cpu,
715 self.arch.sub,
716 self.offset,
717 self.size,
718 self.align,
719 ) = data.get_n_uint32(5)
721 def dump_header(self, dump_description=True, options=None):
722 if options.verbose:
723 print("CPU SUBTYPE OFFSET SIZE ALIGN")
724 print("---------- ---------- ---------- ---------- ----------")
725 else:
726 print("ARCH FILEOFFSET FILESIZE ALIGN")
727 print("---------- ---------- ---------- ----------")
729 def dump_flat(self, options):
730 if options.verbose:
731 print(
732 "%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x"
734 self.arch.cpu,
735 self.arch.sub,
736 self.offset,
737 self.size,
738 self.align,
741 else:
742 print(
743 "%-10s %#8.8x %#8.8x %#8.8x"
744 % (self.arch, self.offset, self.size, self.align)
747 def dump(self):
748 print(" cputype: %#8.8x" % self.arch.cpu)
749 print("cpusubtype: %#8.8x" % self.arch.sub)
750 print(" offset: %#8.8x" % self.offset)
751 print(" size: %#8.8x" % self.size)
752 print(" align: %#8.8x" % self.align)
754 def __str__(self):
755 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (
756 self.arch.cpu,
757 self.arch.sub,
758 self.offset,
759 self.size,
760 self.align,
763 def __repr__(self):
764 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (
765 self.arch.cpu,
766 self.arch.sub,
767 self.offset,
768 self.size,
769 self.align,
772 class Flags:
773 def __init__(self, b):
774 self.bits = b
776 def __str__(self):
777 s = ""
778 if self.bits & MH_NOUNDEFS:
779 s += "MH_NOUNDEFS | "
780 if self.bits & MH_INCRLINK:
781 s += "MH_INCRLINK | "
782 if self.bits & MH_DYLDLINK:
783 s += "MH_DYLDLINK | "
784 if self.bits & MH_BINDATLOAD:
785 s += "MH_BINDATLOAD | "
786 if self.bits & MH_PREBOUND:
787 s += "MH_PREBOUND | "
788 if self.bits & MH_SPLIT_SEGS:
789 s += "MH_SPLIT_SEGS | "
790 if self.bits & MH_LAZY_INIT:
791 s += "MH_LAZY_INIT | "
792 if self.bits & MH_TWOLEVEL:
793 s += "MH_TWOLEVEL | "
794 if self.bits & MH_FORCE_FLAT:
795 s += "MH_FORCE_FLAT | "
796 if self.bits & MH_NOMULTIDEFS:
797 s += "MH_NOMULTIDEFS | "
798 if self.bits & MH_NOFIXPREBINDING:
799 s += "MH_NOFIXPREBINDING | "
800 if self.bits & MH_PREBINDABLE:
801 s += "MH_PREBINDABLE | "
802 if self.bits & MH_ALLMODSBOUND:
803 s += "MH_ALLMODSBOUND | "
804 if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS:
805 s += "MH_SUBSECTIONS_VIA_SYMBOLS | "
806 if self.bits & MH_CANONICAL:
807 s += "MH_CANONICAL | "
808 if self.bits & MH_WEAK_DEFINES:
809 s += "MH_WEAK_DEFINES | "
810 if self.bits & MH_BINDS_TO_WEAK:
811 s += "MH_BINDS_TO_WEAK | "
812 if self.bits & MH_ALLOW_STACK_EXECUTION:
813 s += "MH_ALLOW_STACK_EXECUTION | "
814 if self.bits & MH_ROOT_SAFE:
815 s += "MH_ROOT_SAFE | "
816 if self.bits & MH_SETUID_SAFE:
817 s += "MH_SETUID_SAFE | "
818 if self.bits & MH_NO_REEXPORTED_DYLIBS:
819 s += "MH_NO_REEXPORTED_DYLIBS | "
820 if self.bits & MH_PIE:
821 s += "MH_PIE | "
822 if self.bits & MH_DEAD_STRIPPABLE_DYLIB:
823 s += "MH_DEAD_STRIPPABLE_DYLIB | "
824 if self.bits & MH_HAS_TLV_DESCRIPTORS:
825 s += "MH_HAS_TLV_DESCRIPTORS | "
826 if self.bits & MH_NO_HEAP_EXECUTION:
827 s += "MH_NO_HEAP_EXECUTION | "
828 # Strip the trailing " |" if we have any flags
829 if len(s) > 0:
830 s = s[0:-2]
831 return s
833 class FileType(dict_utils.Enum):
834 enum = {
835 "MH_OBJECT": MH_OBJECT,
836 "MH_EXECUTE": MH_EXECUTE,
837 "MH_FVMLIB": MH_FVMLIB,
838 "MH_CORE": MH_CORE,
839 "MH_PRELOAD": MH_PRELOAD,
840 "MH_DYLIB": MH_DYLIB,
841 "MH_DYLINKER": MH_DYLINKER,
842 "MH_BUNDLE": MH_BUNDLE,
843 "MH_DYLIB_STUB": MH_DYLIB_STUB,
844 "MH_DSYM": MH_DSYM,
845 "MH_KEXT_BUNDLE": MH_KEXT_BUNDLE,
848 def __init__(self, initial_value=0):
849 dict_utils.Enum.__init__(self, initial_value, self.enum)
851 class Skinny:
852 def __init__(self, path):
853 self.path = path
854 self.type = "skinny"
855 self.data = None
856 self.file_off = 0
857 self.magic = 0
858 self.arch = Mach.Arch(0, 0)
859 self.filetype = Mach.FileType(0)
860 self.ncmds = 0
861 self.sizeofcmds = 0
862 self.flags = Mach.Flags(0)
863 self.uuid = None
864 self.commands = list()
865 self.segments = list()
866 self.sections = list()
867 self.symbols = list()
868 self.sections.append(Mach.Section())
870 def description(self):
871 return "%#8.8x: %s (%s)" % (self.file_off, self.path, self.arch)
873 def unpack(self, data, magic=None):
874 self.data = data
875 self.file_off = data.tell()
876 if magic is None:
877 self.magic = Mach.Magic()
878 self.magic.unpack(data)
879 else:
880 self.magic = magic
881 self.file_off = self.file_off - 4
882 data.set_byte_order(self.magic.get_byte_order())
884 self.arch.cpu,
885 self.arch.sub,
886 self.filetype.value,
887 self.ncmds,
888 self.sizeofcmds,
889 bits,
890 ) = data.get_n_uint32(6)
891 self.flags.bits = bits
893 if self.is_64_bit():
894 data.get_uint32() # Skip reserved word in mach_header_64
896 for i in range(0, self.ncmds):
897 lc = self.unpack_load_command(data)
898 self.commands.append(lc)
900 def get_data(self):
901 if self.data:
902 self.data.set_byte_order(self.magic.get_byte_order())
903 return self.data
904 return None
906 def unpack_load_command(self, data):
907 lc = Mach.LoadCommand()
908 lc.unpack(self, data)
909 lc_command = lc.command.get_enum_value()
910 if lc_command == LC_SEGMENT or lc_command == LC_SEGMENT_64:
911 lc = Mach.SegmentLoadCommand(lc)
912 lc.unpack(self, data)
913 elif (
914 lc_command == LC_LOAD_DYLIB
915 or lc_command == LC_ID_DYLIB
916 or lc_command == LC_LOAD_WEAK_DYLIB
917 or lc_command == LC_REEXPORT_DYLIB
919 lc = Mach.DylibLoadCommand(lc)
920 lc.unpack(self, data)
921 elif (
922 lc_command == LC_LOAD_DYLINKER
923 or lc_command == LC_SUB_FRAMEWORK
924 or lc_command == LC_SUB_CLIENT
925 or lc_command == LC_SUB_UMBRELLA
926 or lc_command == LC_SUB_LIBRARY
927 or lc_command == LC_ID_DYLINKER
928 or lc_command == LC_RPATH
930 lc = Mach.LoadDYLDLoadCommand(lc)
931 lc.unpack(self, data)
932 elif lc_command == LC_DYLD_INFO_ONLY:
933 lc = Mach.DYLDInfoOnlyLoadCommand(lc)
934 lc.unpack(self, data)
935 elif lc_command == LC_SYMTAB:
936 lc = Mach.SymtabLoadCommand(lc)
937 lc.unpack(self, data)
938 elif lc_command == LC_DYSYMTAB:
939 lc = Mach.DYLDSymtabLoadCommand(lc)
940 lc.unpack(self, data)
941 elif lc_command == LC_UUID:
942 lc = Mach.UUIDLoadCommand(lc)
943 lc.unpack(self, data)
944 elif (
945 lc_command == LC_CODE_SIGNATURE
946 or lc_command == LC_SEGMENT_SPLIT_INFO
947 or lc_command == LC_FUNCTION_STARTS
949 lc = Mach.DataBlobLoadCommand(lc)
950 lc.unpack(self, data)
951 elif lc_command == LC_UNIXTHREAD:
952 lc = Mach.UnixThreadLoadCommand(lc)
953 lc.unpack(self, data)
954 elif lc_command == LC_ENCRYPTION_INFO:
955 lc = Mach.EncryptionInfoLoadCommand(lc)
956 lc.unpack(self, data)
957 lc.skip(data)
958 return lc
960 def compare(self, rhs):
961 print("\nComparing:")
962 print("a) %s %s" % (self.arch, self.path))
963 print("b) %s %s" % (rhs.arch, rhs.path))
964 result = True
965 if self.type == rhs.type:
966 for lhs_section in self.sections[1:]:
967 rhs_section = rhs.get_section_by_section(lhs_section)
968 if rhs_section:
969 print(
970 "comparing %s.%s..."
971 % (lhs_section.segname, lhs_section.sectname),
972 end=" ",
974 sys.stdout.flush()
975 lhs_data = lhs_section.get_contents(self)
976 rhs_data = rhs_section.get_contents(rhs)
977 if lhs_data and rhs_data:
978 if lhs_data == rhs_data:
979 print("ok")
980 else:
981 lhs_data_len = len(lhs_data)
982 rhs_data_len = len(rhs_data)
983 # if lhs_data_len < rhs_data_len:
984 # if lhs_data == rhs_data[0:lhs_data_len]:
985 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
986 # else:
987 # # TODO: check padding
988 # result = False
989 # elif lhs_data_len > rhs_data_len:
990 # if lhs_data[0:rhs_data_len] == rhs_data:
991 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
992 # else:
993 # # TODO: check padding
994 # result = False
995 # else:
996 result = False
997 print("error: sections differ")
998 # print 'a) %s' % (lhs_section)
999 # dump_hex_byte_string_diff(0, lhs_data, rhs_data)
1000 # print 'b) %s' % (rhs_section)
1001 # dump_hex_byte_string_diff(0, rhs_data, lhs_data)
1002 elif lhs_data and not rhs_data:
1003 print("error: section data missing from b:")
1004 print("a) %s" % (lhs_section))
1005 print("b) %s" % (rhs_section))
1006 result = False
1007 elif not lhs_data and rhs_data:
1008 print("error: section data missing from a:")
1009 print("a) %s" % (lhs_section))
1010 print("b) %s" % (rhs_section))
1011 result = False
1012 elif lhs_section.offset or rhs_section.offset:
1013 print("error: section data missing for both a and b:")
1014 print("a) %s" % (lhs_section))
1015 print("b) %s" % (rhs_section))
1016 result = False
1017 else:
1018 print("ok")
1019 else:
1020 result = False
1021 print(
1022 "error: section %s is missing in %s"
1023 % (lhs_section.sectname, rhs.path)
1025 else:
1026 print(
1027 "error: comparing a %s mach-o file with a %s mach-o file is not supported"
1028 % (self.type, rhs.type)
1030 result = False
1031 if not result:
1032 print("error: mach files differ")
1033 return result
1035 def dump_header(self, dump_description=True, options=None):
1036 if options.verbose:
1037 print(
1038 "MAGIC CPU SUBTYPE FILETYPE NUM CMDS SIZE CMDS FLAGS"
1040 print(
1041 "---------- ---------- ---------- ---------- -------- ---------- ----------"
1043 else:
1044 print(
1045 "MAGIC ARCH FILETYPE NUM CMDS SIZE CMDS FLAGS"
1047 print(
1048 "------------ ---------- -------------- -------- ---------- ----------"
1051 def dump_flat(self, options):
1052 if options.verbose:
1053 print(
1054 "%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x"
1056 self.magic,
1057 self.arch.cpu,
1058 self.arch.sub,
1059 self.filetype.value,
1060 self.ncmds,
1061 self.sizeofcmds,
1062 self.flags.bits,
1065 else:
1066 print(
1067 "%-12s %-10s %-14s %#8u %#8.8x %s"
1069 self.magic,
1070 self.arch,
1071 self.filetype,
1072 self.ncmds,
1073 self.sizeofcmds,
1074 self.flags,
1078 def dump(self, options):
1079 if options.dump_header:
1080 self.dump_header(True, options)
1081 if options.dump_load_commands:
1082 self.dump_load_commands(False, options)
1083 if options.dump_sections:
1084 self.dump_sections(False, options)
1085 if options.section_names:
1086 self.dump_section_contents(options)
1087 if options.dump_symtab:
1088 self.get_symtab()
1089 if len(self.symbols):
1090 self.dump_sections(False, options)
1091 else:
1092 print("No symbols")
1093 if options.find_mangled:
1094 self.dump_symbol_names_matching_regex(re.compile("^_?_Z"))
1096 def dump_header(self, dump_description=True, options=None):
1097 if dump_description:
1098 print(self.description())
1099 print("Mach Header")
1100 print(" magic: %#8.8x %s" % (self.magic.value, self.magic))
1101 print(" cputype: %#8.8x %s" % (self.arch.cpu, self.arch))
1102 print(" cpusubtype: %#8.8x" % self.arch.sub)
1103 print(
1104 " filetype: %#8.8x %s"
1105 % (self.filetype.get_enum_value(), self.filetype.get_enum_name())
1107 print(" ncmds: %#8.8x %u" % (self.ncmds, self.ncmds))
1108 print(" sizeofcmds: %#8.8x" % self.sizeofcmds)
1109 print(" flags: %#8.8x %s" % (self.flags.bits, self.flags))
1111 def dump_load_commands(self, dump_description=True, options=None):
1112 if dump_description:
1113 print(self.description())
1114 for lc in self.commands:
1115 print(lc)
1117 def get_section_by_name(self, name):
1118 for section in self.sections:
1119 if section.sectname and section.sectname == name:
1120 return section
1121 return None
1123 def get_section_by_section(self, other_section):
1124 for section in self.sections:
1125 if (
1126 section.sectname == other_section.sectname
1127 and section.segname == other_section.segname
1129 return section
1130 return None
1132 def dump_sections(self, dump_description=True, options=None):
1133 if dump_description:
1134 print(self.description())
1135 num_sections = len(self.sections)
1136 if num_sections > 1:
1137 self.sections[1].dump_header()
1138 for sect_idx in range(1, num_sections):
1139 print("%s" % self.sections[sect_idx])
1141 def dump_section_contents(self, options):
1142 saved_section_to_disk = False
1143 for sectname in options.section_names:
1144 section = self.get_section_by_name(sectname)
1145 if section:
1146 sect_bytes = section.get_contents(self)
1147 if options.outfile:
1148 if not saved_section_to_disk:
1149 outfile = open(options.outfile, "w")
1150 if options.extract_modules:
1151 # print "Extracting modules from mach file..."
1152 data = file_extract.FileExtract(
1153 io.BytesIO(sect_bytes), self.data.byte_order
1155 version = data.get_uint32()
1156 num_modules = data.get_uint32()
1157 # print "version = %u, num_modules = %u" %
1158 # (version, num_modules)
1159 for i in range(num_modules):
1160 data_offset = data.get_uint64()
1161 data_size = data.get_uint64()
1162 name_offset = data.get_uint32()
1163 language = data.get_uint32()
1164 flags = data.get_uint32()
1165 data.seek(name_offset)
1166 module_name = data.get_c_string()
1167 # print "module[%u] data_offset = %#16.16x,
1168 # data_size = %#16.16x, name_offset =
1169 # %#16.16x (%s), language = %u, flags =
1170 # %#x" % (i, data_offset, data_size,
1171 # name_offset, module_name, language,
1172 # flags)
1173 data.seek(data_offset)
1174 outfile.write(data.read_size(data_size))
1175 else:
1176 print(
1177 "Saving section %s to '%s'"
1178 % (sectname, options.outfile)
1180 outfile.write(sect_bytes)
1181 outfile.close()
1182 saved_section_to_disk = True
1183 else:
1184 print(
1185 "error: you can only save a single section to disk at a time, skipping section '%s'"
1186 % (sectname)
1188 else:
1189 print("section %s:\n" % (sectname))
1190 section.dump_header()
1191 print("%s\n" % (section))
1192 dump_memory(0, sect_bytes, options.max_count, 16)
1193 else:
1194 print('error: no section named "%s" was found' % (sectname))
1196 def get_segment(self, segname):
1197 if len(self.segments) == 1 and self.segments[0].segname == "":
1198 return self.segments[0]
1199 for segment in self.segments:
1200 if segment.segname == segname:
1201 return segment
1202 return None
1204 def get_first_load_command(self, lc_enum_value):
1205 for lc in self.commands:
1206 if lc.command.value == lc_enum_value:
1207 return lc
1208 return None
1210 def get_symtab(self):
1211 if self.data and not self.symbols:
1212 lc_symtab = self.get_first_load_command(LC_SYMTAB)
1213 if lc_symtab:
1214 symtab_offset = self.file_off
1215 if self.data.is_in_memory():
1216 linkedit_segment = self.get_segment("__LINKEDIT")
1217 if linkedit_segment:
1218 linkedit_vmaddr = linkedit_segment.vmaddr
1219 linkedit_fileoff = linkedit_segment.fileoff
1220 symtab_offset = (
1221 linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff
1223 symtab_offset = (
1224 linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff
1226 else:
1227 symtab_offset += lc_symtab.symoff
1229 self.data.seek(symtab_offset)
1230 is_64 = self.is_64_bit()
1231 for i in range(lc_symtab.nsyms):
1232 nlist = Mach.NList()
1233 nlist.unpack(self, self.data, lc_symtab)
1234 self.symbols.append(nlist)
1235 else:
1236 print("no LC_SYMTAB")
1238 def dump_symtab(self, dump_description=True, options=None):
1239 self.get_symtab()
1240 if dump_description:
1241 print(self.description())
1242 for i, symbol in enumerate(self.symbols):
1243 print("[%5u] %s" % (i, symbol))
1245 def dump_symbol_names_matching_regex(self, regex, file=None):
1246 self.get_symtab()
1247 for symbol in self.symbols:
1248 if symbol.name and regex.search(symbol.name):
1249 print(symbol.name)
1250 if file:
1251 file.write("%s\n" % (symbol.name))
1253 def is_64_bit(self):
1254 return self.magic.is_64_bit()
1256 class LoadCommand:
1257 class Command(dict_utils.Enum):
1258 enum = {
1259 "LC_SEGMENT": LC_SEGMENT,
1260 "LC_SYMTAB": LC_SYMTAB,
1261 "LC_SYMSEG": LC_SYMSEG,
1262 "LC_THREAD": LC_THREAD,
1263 "LC_UNIXTHREAD": LC_UNIXTHREAD,
1264 "LC_LOADFVMLIB": LC_LOADFVMLIB,
1265 "LC_IDFVMLIB": LC_IDFVMLIB,
1266 "LC_IDENT": LC_IDENT,
1267 "LC_FVMFILE": LC_FVMFILE,
1268 "LC_PREPAGE": LC_PREPAGE,
1269 "LC_DYSYMTAB": LC_DYSYMTAB,
1270 "LC_LOAD_DYLIB": LC_LOAD_DYLIB,
1271 "LC_ID_DYLIB": LC_ID_DYLIB,
1272 "LC_LOAD_DYLINKER": LC_LOAD_DYLINKER,
1273 "LC_ID_DYLINKER": LC_ID_DYLINKER,
1274 "LC_PREBOUND_DYLIB": LC_PREBOUND_DYLIB,
1275 "LC_ROUTINES": LC_ROUTINES,
1276 "LC_SUB_FRAMEWORK": LC_SUB_FRAMEWORK,
1277 "LC_SUB_UMBRELLA": LC_SUB_UMBRELLA,
1278 "LC_SUB_CLIENT": LC_SUB_CLIENT,
1279 "LC_SUB_LIBRARY": LC_SUB_LIBRARY,
1280 "LC_TWOLEVEL_HINTS": LC_TWOLEVEL_HINTS,
1281 "LC_PREBIND_CKSUM": LC_PREBIND_CKSUM,
1282 "LC_LOAD_WEAK_DYLIB": LC_LOAD_WEAK_DYLIB,
1283 "LC_SEGMENT_64": LC_SEGMENT_64,
1284 "LC_ROUTINES_64": LC_ROUTINES_64,
1285 "LC_UUID": LC_UUID,
1286 "LC_RPATH": LC_RPATH,
1287 "LC_CODE_SIGNATURE": LC_CODE_SIGNATURE,
1288 "LC_SEGMENT_SPLIT_INFO": LC_SEGMENT_SPLIT_INFO,
1289 "LC_REEXPORT_DYLIB": LC_REEXPORT_DYLIB,
1290 "LC_LAZY_LOAD_DYLIB": LC_LAZY_LOAD_DYLIB,
1291 "LC_ENCRYPTION_INFO": LC_ENCRYPTION_INFO,
1292 "LC_DYLD_INFO": LC_DYLD_INFO,
1293 "LC_DYLD_INFO_ONLY": LC_DYLD_INFO_ONLY,
1294 "LC_LOAD_UPWARD_DYLIB": LC_LOAD_UPWARD_DYLIB,
1295 "LC_VERSION_MIN_MACOSX": LC_VERSION_MIN_MACOSX,
1296 "LC_VERSION_MIN_IPHONEOS": LC_VERSION_MIN_IPHONEOS,
1297 "LC_FUNCTION_STARTS": LC_FUNCTION_STARTS,
1298 "LC_DYLD_ENVIRONMENT": LC_DYLD_ENVIRONMENT,
1301 def __init__(self, initial_value=0):
1302 dict_utils.Enum.__init__(self, initial_value, self.enum)
1304 def __init__(self, c=None, l=0, o=0):
1305 if c is not None:
1306 self.command = c
1307 else:
1308 self.command = Mach.LoadCommand.Command(0)
1309 self.length = l
1310 self.file_off = o
1312 def unpack(self, mach_file, data):
1313 self.file_off = data.tell()
1314 self.command.value, self.length = data.get_n_uint32(2)
1316 def skip(self, data):
1317 data.seek(self.file_off + self.length, 0)
1319 def __str__(self):
1320 lc_name = self.command.get_enum_name()
1321 return "%#8.8x: <%#4.4x> %-24s" % (self.file_off, self.length, lc_name)
1323 class Section:
1324 def __init__(self):
1325 self.index = 0
1326 self.is_64 = False
1327 self.sectname = None
1328 self.segname = None
1329 self.addr = 0
1330 self.size = 0
1331 self.offset = 0
1332 self.align = 0
1333 self.reloff = 0
1334 self.nreloc = 0
1335 self.flags = 0
1336 self.reserved1 = 0
1337 self.reserved2 = 0
1338 self.reserved3 = 0
1340 def unpack(self, is_64, data):
1341 self.is_64 = is_64
1342 self.sectname = data.get_fixed_length_c_string(16, "", True)
1343 self.segname = data.get_fixed_length_c_string(16, "", True)
1344 if self.is_64:
1345 self.addr, self.size = data.get_n_uint64(2)
1347 self.offset,
1348 self.align,
1349 self.reloff,
1350 self.nreloc,
1351 self.flags,
1352 self.reserved1,
1353 self.reserved2,
1354 self.reserved3,
1355 ) = data.get_n_uint32(8)
1356 else:
1357 self.addr, self.size = data.get_n_uint32(2)
1359 self.offset,
1360 self.align,
1361 self.reloff,
1362 self.nreloc,
1363 self.flags,
1364 self.reserved1,
1365 self.reserved2,
1366 ) = data.get_n_uint32(7)
1368 def dump_header(self):
1369 if self.is_64:
1370 print(
1371 "INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME"
1373 print(
1374 "===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------"
1376 else:
1377 print(
1378 "INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 NAME"
1380 print(
1381 "===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------"
1384 def __str__(self):
1385 if self.is_64:
1386 return (
1387 "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s"
1389 self.index,
1390 self.addr,
1391 self.size,
1392 self.offset,
1393 self.align,
1394 self.reloff,
1395 self.nreloc,
1396 self.flags,
1397 self.reserved1,
1398 self.reserved2,
1399 self.reserved3,
1400 self.segname,
1401 self.sectname,
1404 else:
1405 return (
1406 "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s"
1408 self.index,
1409 self.addr,
1410 self.size,
1411 self.offset,
1412 self.align,
1413 self.reloff,
1414 self.nreloc,
1415 self.flags,
1416 self.reserved1,
1417 self.reserved2,
1418 self.segname,
1419 self.sectname,
1423 def get_contents(self, mach_file):
1424 """Get the section contents as a python string"""
1425 if self.size > 0 and mach_file.get_segment(self.segname).filesize > 0:
1426 data = mach_file.get_data()
1427 if data:
1428 section_data_offset = mach_file.file_off + self.offset
1429 # print '%s.%s is at offset 0x%x with size 0x%x' %
1430 # (self.segname, self.sectname, section_data_offset,
1431 # self.size)
1432 data.push_offset_and_seek(section_data_offset)
1433 bytes = data.read_size(self.size)
1434 data.pop_offset_and_seek()
1435 return bytes
1436 return None
1438 class DylibLoadCommand(LoadCommand):
1439 def __init__(self, lc):
1440 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1441 self.name = None
1442 self.timestamp = 0
1443 self.current_version = 0
1444 self.compatibility_version = 0
1446 def unpack(self, mach_file, data):
1447 byte_order_char = mach_file.magic.get_byte_order()
1449 name_offset,
1450 self.timestamp,
1451 self.current_version,
1452 self.compatibility_version,
1453 ) = data.get_n_uint32(4)
1454 data.seek(self.file_off + name_offset, 0)
1455 self.name = data.get_fixed_length_c_string(self.length - 24)
1457 def __str__(self):
1458 s = Mach.LoadCommand.__str__(self)
1459 s += "%#8.8x %#8.8x %#8.8x " % (
1460 self.timestamp,
1461 self.current_version,
1462 self.compatibility_version,
1464 s += self.name
1465 return s
1467 class LoadDYLDLoadCommand(LoadCommand):
1468 def __init__(self, lc):
1469 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1470 self.name = None
1472 def unpack(self, mach_file, data):
1473 data.get_uint32()
1474 self.name = data.get_fixed_length_c_string(self.length - 12)
1476 def __str__(self):
1477 s = Mach.LoadCommand.__str__(self)
1478 s += "%s" % self.name
1479 return s
1481 class UnixThreadLoadCommand(LoadCommand):
1482 class ThreadState:
1483 def __init__(self):
1484 self.flavor = 0
1485 self.count = 0
1486 self.register_values = list()
1488 def unpack(self, data):
1489 self.flavor, self.count = data.get_n_uint32(2)
1490 self.register_values = data.get_n_uint32(self.count)
1492 def __str__(self):
1493 s = "flavor = %u, count = %u, regs =" % (self.flavor, self.count)
1494 i = 0
1495 for register_value in self.register_values:
1496 if i % 8 == 0:
1497 s += "\n "
1498 s += " %#8.8x" % register_value
1499 i += 1
1500 return s
1502 def __init__(self, lc):
1503 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1504 self.reg_sets = list()
1506 def unpack(self, mach_file, data):
1507 reg_set = Mach.UnixThreadLoadCommand.ThreadState()
1508 reg_set.unpack(data)
1509 self.reg_sets.append(reg_set)
1511 def __str__(self):
1512 s = Mach.LoadCommand.__str__(self)
1513 for reg_set in self.reg_sets:
1514 s += "%s" % reg_set
1515 return s
1517 class DYLDInfoOnlyLoadCommand(LoadCommand):
1518 def __init__(self, lc):
1519 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1520 self.rebase_off = 0
1521 self.rebase_size = 0
1522 self.bind_off = 0
1523 self.bind_size = 0
1524 self.weak_bind_off = 0
1525 self.weak_bind_size = 0
1526 self.lazy_bind_off = 0
1527 self.lazy_bind_size = 0
1528 self.export_off = 0
1529 self.export_size = 0
1531 def unpack(self, mach_file, data):
1532 byte_order_char = mach_file.magic.get_byte_order()
1534 self.rebase_off,
1535 self.rebase_size,
1536 self.bind_off,
1537 self.bind_size,
1538 self.weak_bind_off,
1539 self.weak_bind_size,
1540 self.lazy_bind_off,
1541 self.lazy_bind_size,
1542 self.export_off,
1543 self.export_size,
1544 ) = data.get_n_uint32(10)
1546 def __str__(self):
1547 s = Mach.LoadCommand.__str__(self)
1548 s += "rebase_off = %#8.8x, rebase_size = %u, " % (
1549 self.rebase_off,
1550 self.rebase_size,
1552 s += "bind_off = %#8.8x, bind_size = %u, " % (self.bind_off, self.bind_size)
1553 s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % (
1554 self.weak_bind_off,
1555 self.weak_bind_size,
1557 s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % (
1558 self.lazy_bind_off,
1559 self.lazy_bind_size,
1561 s += "export_off = %#8.8x, export_size = %u, " % (
1562 self.export_off,
1563 self.export_size,
1565 return s
1567 class DYLDSymtabLoadCommand(LoadCommand):
1568 def __init__(self, lc):
1569 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1570 self.ilocalsym = 0
1571 self.nlocalsym = 0
1572 self.iextdefsym = 0
1573 self.nextdefsym = 0
1574 self.iundefsym = 0
1575 self.nundefsym = 0
1576 self.tocoff = 0
1577 self.ntoc = 0
1578 self.modtaboff = 0
1579 self.nmodtab = 0
1580 self.extrefsymoff = 0
1581 self.nextrefsyms = 0
1582 self.indirectsymoff = 0
1583 self.nindirectsyms = 0
1584 self.extreloff = 0
1585 self.nextrel = 0
1586 self.locreloff = 0
1587 self.nlocrel = 0
1589 def unpack(self, mach_file, data):
1590 byte_order_char = mach_file.magic.get_byte_order()
1592 self.ilocalsym,
1593 self.nlocalsym,
1594 self.iextdefsym,
1595 self.nextdefsym,
1596 self.iundefsym,
1597 self.nundefsym,
1598 self.tocoff,
1599 self.ntoc,
1600 self.modtaboff,
1601 self.nmodtab,
1602 self.extrefsymoff,
1603 self.nextrefsyms,
1604 self.indirectsymoff,
1605 self.nindirectsyms,
1606 self.extreloff,
1607 self.nextrel,
1608 self.locreloff,
1609 self.nlocrel,
1610 ) = data.get_n_uint32(18)
1612 def __str__(self):
1613 s = Mach.LoadCommand.__str__(self)
1614 # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym)
1615 # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym)
1616 # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym)
1617 # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc)
1618 # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab)
1619 # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms)
1620 # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms)
1621 # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel)
1622 # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff,
1623 # self.nlocrel)
1624 s += "ilocalsym = %-10u, nlocalsym = %u\n" % (
1625 self.ilocalsym,
1626 self.nlocalsym,
1628 s += (
1629 " iextdefsym = %-10u, nextdefsym = %u\n"
1630 % (self.iextdefsym, self.nextdefsym)
1632 s += (
1633 " iundefsym = %-10u, nundefsym = %u\n"
1634 % (self.iundefsym, self.nundefsym)
1636 s += (
1637 " tocoff = %#8.8x, ntoc = %u\n"
1638 % (self.tocoff, self.ntoc)
1640 s += (
1641 " modtaboff = %#8.8x, nmodtab = %u\n"
1642 % (self.modtaboff, self.nmodtab)
1644 s += (
1645 " extrefsymoff = %#8.8x, nextrefsyms = %u\n"
1646 % (self.extrefsymoff, self.nextrefsyms)
1648 s += (
1649 " indirectsymoff = %#8.8x, nindirectsyms = %u\n"
1650 % (self.indirectsymoff, self.nindirectsyms)
1652 s += (
1653 " extreloff = %#8.8x, nextrel = %u\n"
1654 % (self.extreloff, self.nextrel)
1656 s += (
1657 " locreloff = %#8.8x, nlocrel = %u"
1658 % (self.locreloff, self.nlocrel)
1660 return s
1662 class SymtabLoadCommand(LoadCommand):
1663 def __init__(self, lc):
1664 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1665 self.symoff = 0
1666 self.nsyms = 0
1667 self.stroff = 0
1668 self.strsize = 0
1670 def unpack(self, mach_file, data):
1671 byte_order_char = mach_file.magic.get_byte_order()
1672 self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32(4)
1674 def __str__(self):
1675 s = Mach.LoadCommand.__str__(self)
1676 s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % (
1677 self.symoff,
1678 self.nsyms,
1679 self.stroff,
1680 self.strsize,
1682 return s
1684 class UUIDLoadCommand(LoadCommand):
1685 def __init__(self, lc):
1686 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1687 self.uuid = None
1689 def unpack(self, mach_file, data):
1690 uuid_data = data.get_n_uint8(16)
1691 uuid_str = ""
1692 for byte in uuid_data:
1693 uuid_str += "%2.2x" % byte
1694 self.uuid = uuid.UUID(uuid_str)
1695 mach_file.uuid = self.uuid
1697 def __str__(self):
1698 s = Mach.LoadCommand.__str__(self)
1699 s += self.uuid.__str__()
1700 return s
1702 class DataBlobLoadCommand(LoadCommand):
1703 def __init__(self, lc):
1704 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1705 self.dataoff = 0
1706 self.datasize = 0
1708 def unpack(self, mach_file, data):
1709 byte_order_char = mach_file.magic.get_byte_order()
1710 self.dataoff, self.datasize = data.get_n_uint32(2)
1712 def __str__(self):
1713 s = Mach.LoadCommand.__str__(self)
1714 s += "dataoff = %#8.8x, datasize = %u" % (self.dataoff, self.datasize)
1715 return s
1717 class EncryptionInfoLoadCommand(LoadCommand):
1718 def __init__(self, lc):
1719 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1720 self.cryptoff = 0
1721 self.cryptsize = 0
1722 self.cryptid = 0
1724 def unpack(self, mach_file, data):
1725 byte_order_char = mach_file.magic.get_byte_order()
1726 self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3)
1728 def __str__(self):
1729 s = Mach.LoadCommand.__str__(self)
1730 s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % (
1731 self.cryptoff,
1732 self.cryptoff + self.cryptsize,
1733 self.cryptsize,
1734 self.cryptid,
1736 return s
1738 class SegmentLoadCommand(LoadCommand):
1739 def __init__(self, lc):
1740 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1741 self.segname = None
1742 self.vmaddr = 0
1743 self.vmsize = 0
1744 self.fileoff = 0
1745 self.filesize = 0
1746 self.maxprot = 0
1747 self.initprot = 0
1748 self.nsects = 0
1749 self.flags = 0
1751 def unpack(self, mach_file, data):
1752 is_64 = self.command.get_enum_value() == LC_SEGMENT_64
1753 self.segname = data.get_fixed_length_c_string(16, "", True)
1754 if is_64:
1756 self.vmaddr,
1757 self.vmsize,
1758 self.fileoff,
1759 self.filesize,
1760 ) = data.get_n_uint64(4)
1761 else:
1763 self.vmaddr,
1764 self.vmsize,
1765 self.fileoff,
1766 self.filesize,
1767 ) = data.get_n_uint32(4)
1768 self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32(4)
1769 mach_file.segments.append(self)
1770 for i in range(self.nsects):
1771 section = Mach.Section()
1772 section.unpack(is_64, data)
1773 section.index = len(mach_file.sections)
1774 mach_file.sections.append(section)
1776 def __str__(self):
1777 s = Mach.LoadCommand.__str__(self)
1778 if self.command.get_enum_value() == LC_SEGMENT:
1779 s += "%#8.8x %#8.8x %#8.8x %#8.8x " % (
1780 self.vmaddr,
1781 self.vmsize,
1782 self.fileoff,
1783 self.filesize,
1785 else:
1786 s += "%#16.16x %#16.16x %#16.16x %#16.16x " % (
1787 self.vmaddr,
1788 self.vmsize,
1789 self.fileoff,
1790 self.filesize,
1792 s += "%s %s %3u %#8.8x" % (
1793 vm_prot_names[self.maxprot],
1794 vm_prot_names[self.initprot],
1795 self.nsects,
1796 self.flags,
1798 s += " " + self.segname
1799 return s
1801 class NList:
1802 class Type:
1803 class Stab(dict_utils.Enum):
1804 enum = {
1805 "N_GSYM": N_GSYM,
1806 "N_FNAME": N_FNAME,
1807 "N_FUN": N_FUN,
1808 "N_STSYM": N_STSYM,
1809 "N_LCSYM": N_LCSYM,
1810 "N_BNSYM": N_BNSYM,
1811 "N_OPT": N_OPT,
1812 "N_RSYM": N_RSYM,
1813 "N_SLINE": N_SLINE,
1814 "N_ENSYM": N_ENSYM,
1815 "N_SSYM": N_SSYM,
1816 "N_SO": N_SO,
1817 "N_OSO": N_OSO,
1818 "N_LSYM": N_LSYM,
1819 "N_BINCL": N_BINCL,
1820 "N_SOL": N_SOL,
1821 "N_PARAMS": N_PARAMS,
1822 "N_VERSION": N_VERSION,
1823 "N_OLEVEL": N_OLEVEL,
1824 "N_PSYM": N_PSYM,
1825 "N_EINCL": N_EINCL,
1826 "N_ENTRY": N_ENTRY,
1827 "N_LBRAC": N_LBRAC,
1828 "N_EXCL": N_EXCL,
1829 "N_RBRAC": N_RBRAC,
1830 "N_BCOMM": N_BCOMM,
1831 "N_ECOMM": N_ECOMM,
1832 "N_ECOML": N_ECOML,
1833 "N_LENG": N_LENG,
1836 def __init__(self, magic=0):
1837 dict_utils.Enum.__init__(self, magic, self.enum)
1839 def __init__(self, t=0):
1840 self.value = t
1842 def __str__(self):
1843 n_type = self.value
1844 if n_type & N_STAB:
1845 stab = Mach.NList.Type.Stab(self.value)
1846 return "%s" % stab
1847 else:
1848 type = self.value & N_TYPE
1849 type_str = ""
1850 if type == N_UNDF:
1851 type_str = "N_UNDF"
1852 elif type == N_ABS:
1853 type_str = "N_ABS "
1854 elif type == N_SECT:
1855 type_str = "N_SECT"
1856 elif type == N_PBUD:
1857 type_str = "N_PBUD"
1858 elif type == N_INDR:
1859 type_str = "N_INDR"
1860 else:
1861 type_str = "??? (%#2.2x)" % type
1862 if n_type & N_PEXT:
1863 type_str += " | PEXT"
1864 if n_type & N_EXT:
1865 type_str += " | EXT "
1866 return type_str
1868 def __init__(self):
1869 self.index = 0
1870 self.name_offset = 0
1871 self.name = 0
1872 self.type = Mach.NList.Type()
1873 self.sect_idx = 0
1874 self.desc = 0
1875 self.value = 0
1877 def unpack(self, mach_file, data, symtab_lc):
1878 self.index = len(mach_file.symbols)
1879 self.name_offset = data.get_uint32()
1880 self.type.value, self.sect_idx = data.get_n_uint8(2)
1881 self.desc = data.get_uint16()
1882 if mach_file.is_64_bit():
1883 self.value = data.get_uint64()
1884 else:
1885 self.value = data.get_uint32()
1886 data.push_offset_and_seek(
1887 mach_file.file_off + symtab_lc.stroff + self.name_offset
1889 # print "get string for symbol[%u]" % self.index
1890 self.name = data.get_c_string()
1891 data.pop_offset_and_seek()
1893 def __str__(self):
1894 name_display = ""
1895 if len(self.name):
1896 name_display = ' "%s"' % self.name
1897 return "%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s" % (
1898 self.name_offset,
1899 self.type.value,
1900 self.type,
1901 self.sect_idx,
1902 self.desc,
1903 self.value,
1904 name_display,
1907 class Interactive(cmd.Cmd):
1908 """Interactive command interpreter to mach-o files."""
1910 def __init__(self, mach, options):
1911 cmd.Cmd.__init__(self)
1912 self.intro = "Interactive mach-o command interpreter"
1913 self.prompt = "mach-o: %s %% " % mach.path
1914 self.mach = mach
1915 self.options = options
1917 def default(self, line):
1918 """Catch all for unknown command, which will exit the interpreter."""
1919 print("uknown command: %s" % line)
1920 return True
1922 def do_q(self, line):
1923 """Quit command"""
1924 return True
1926 def do_quit(self, line):
1927 """Quit command"""
1928 return True
1930 def do_header(self, line):
1931 """Dump mach-o file headers"""
1932 self.mach.dump_header(True, self.options)
1933 return False
1935 def do_load(self, line):
1936 """Dump all mach-o load commands"""
1937 self.mach.dump_load_commands(True, self.options)
1938 return False
1940 def do_sections(self, line):
1941 """Dump all mach-o sections"""
1942 self.mach.dump_sections(True, self.options)
1943 return False
1945 def do_symtab(self, line):
1946 """Dump all mach-o symbols in the symbol table"""
1947 self.mach.dump_symtab(True, self.options)
1948 return False
1951 if __name__ == "__main__":
1952 parser = optparse.OptionParser(
1953 description="A script that parses skinny and universal mach-o files."
1955 parser.add_option(
1956 "--arch",
1957 "-a",
1958 type="string",
1959 metavar="arch",
1960 dest="archs",
1961 action="append",
1962 help="specify one or more architectures by name",
1964 parser.add_option(
1965 "-v",
1966 "--verbose",
1967 action="store_true",
1968 dest="verbose",
1969 help="display verbose debug info",
1970 default=False,
1972 parser.add_option(
1973 "-H",
1974 "--header",
1975 action="store_true",
1976 dest="dump_header",
1977 help="dump the mach-o file header",
1978 default=False,
1980 parser.add_option(
1981 "-l",
1982 "--load-commands",
1983 action="store_true",
1984 dest="dump_load_commands",
1985 help="dump the mach-o load commands",
1986 default=False,
1988 parser.add_option(
1989 "-s",
1990 "--symtab",
1991 action="store_true",
1992 dest="dump_symtab",
1993 help="dump the mach-o symbol table",
1994 default=False,
1996 parser.add_option(
1997 "-S",
1998 "--sections",
1999 action="store_true",
2000 dest="dump_sections",
2001 help="dump the mach-o sections",
2002 default=False,
2004 parser.add_option(
2005 "--section",
2006 type="string",
2007 metavar="sectname",
2008 dest="section_names",
2009 action="append",
2010 help="Specify one or more section names to dump",
2011 default=[],
2013 parser.add_option(
2014 "-o",
2015 "--out",
2016 type="string",
2017 dest="outfile",
2018 help="Used in conjunction with the --section=NAME option to save a single section's data to disk.",
2019 default=False,
2021 parser.add_option(
2022 "-i",
2023 "--interactive",
2024 action="store_true",
2025 dest="interactive",
2026 help="enable interactive mode",
2027 default=False,
2029 parser.add_option(
2030 "-m",
2031 "--mangled",
2032 action="store_true",
2033 dest="find_mangled",
2034 help="dump all mangled names in a mach file",
2035 default=False,
2037 parser.add_option(
2038 "-c",
2039 "--compare",
2040 action="store_true",
2041 dest="compare",
2042 help="compare two mach files",
2043 default=False,
2045 parser.add_option(
2046 "-M",
2047 "--extract-modules",
2048 action="store_true",
2049 dest="extract_modules",
2050 help="Extract modules from file",
2051 default=False,
2053 parser.add_option(
2054 "-C",
2055 "--count",
2056 type="int",
2057 dest="max_count",
2058 help="Sets the max byte count when dumping section data",
2059 default=-1,
2062 (options, mach_files) = parser.parse_args()
2063 if options.extract_modules:
2064 if options.section_names:
2065 print("error: can't use --section option with the --extract-modules option")
2066 exit(1)
2067 if not options.outfile:
2068 print(
2069 "error: the --output=FILE option must be specified with the --extract-modules option"
2071 exit(1)
2072 options.section_names.append("__apple_ast")
2073 if options.compare:
2074 if len(mach_files) == 2:
2075 mach_a = Mach()
2076 mach_b = Mach()
2077 mach_a.parse(mach_files[0])
2078 mach_b.parse(mach_files[1])
2079 mach_a.compare(mach_b)
2080 else:
2081 print("error: --compare takes two mach files as arguments")
2082 else:
2083 if not (
2084 options.dump_header
2085 or options.dump_load_commands
2086 or options.dump_symtab
2087 or options.dump_sections
2088 or options.find_mangled
2089 or options.section_names
2091 options.dump_header = True
2092 options.dump_load_commands = True
2093 if options.verbose:
2094 print("options", options)
2095 print("mach_files", mach_files)
2096 for path in mach_files:
2097 mach = Mach()
2098 mach.parse(path)
2099 if options.interactive:
2100 interpreter = Mach.Interactive(mach, options)
2101 interpreter.cmdloop()
2102 else:
2103 mach.dump(options)