[docs] Update HowToReleaseLLVM documentation.
[llvm-project.git] / lldb / examples / python / mach_o.py
blobd9e1841363b7a388921f0fb8eb655c3fe0326e48
1 #!/usr/bin/env python
3 import cmd
4 import dict_utils
5 import file_extract
6 import optparse
7 import re
8 import struct
9 import string
10 import io
11 import sys
12 import uuid
14 # Mach header "magic" constants
15 MH_MAGIC = 0xfeedface
16 MH_CIGAM = 0xcefaedfe
17 MH_MAGIC_64 = 0xfeedfacf
18 MH_CIGAM_64 = 0xcffaedfe
19 FAT_MAGIC = 0xcafebabe
20 FAT_CIGAM = 0xbebafeca
22 # Mach haeder "filetype" constants
23 MH_OBJECT = 0x00000001
24 MH_EXECUTE = 0x00000002
25 MH_FVMLIB = 0x00000003
26 MH_CORE = 0x00000004
27 MH_PRELOAD = 0x00000005
28 MH_DYLIB = 0x00000006
29 MH_DYLINKER = 0x00000007
30 MH_BUNDLE = 0x00000008
31 MH_DYLIB_STUB = 0x00000009
32 MH_DSYM = 0x0000000a
33 MH_KEXT_BUNDLE = 0x0000000b
35 # Mach haeder "flag" constant bits
36 MH_NOUNDEFS = 0x00000001
37 MH_INCRLINK = 0x00000002
38 MH_DYLDLINK = 0x00000004
39 MH_BINDATLOAD = 0x00000008
40 MH_PREBOUND = 0x00000010
41 MH_SPLIT_SEGS = 0x00000020
42 MH_LAZY_INIT = 0x00000040
43 MH_TWOLEVEL = 0x00000080
44 MH_FORCE_FLAT = 0x00000100
45 MH_NOMULTIDEFS = 0x00000200
46 MH_NOFIXPREBINDING = 0x00000400
47 MH_PREBINDABLE = 0x00000800
48 MH_ALLMODSBOUND = 0x00001000
49 MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000
50 MH_CANONICAL = 0x00004000
51 MH_WEAK_DEFINES = 0x00008000
52 MH_BINDS_TO_WEAK = 0x00010000
53 MH_ALLOW_STACK_EXECUTION = 0x00020000
54 MH_ROOT_SAFE = 0x00040000
55 MH_SETUID_SAFE = 0x00080000
56 MH_NO_REEXPORTED_DYLIBS = 0x00100000
57 MH_PIE = 0x00200000
58 MH_DEAD_STRIPPABLE_DYLIB = 0x00400000
59 MH_HAS_TLV_DESCRIPTORS = 0x00800000
60 MH_NO_HEAP_EXECUTION = 0x01000000
62 # Mach load command constants
63 LC_REQ_DYLD = 0x80000000
64 LC_SEGMENT = 0x00000001
65 LC_SYMTAB = 0x00000002
66 LC_SYMSEG = 0x00000003
67 LC_THREAD = 0x00000004
68 LC_UNIXTHREAD = 0x00000005
69 LC_LOADFVMLIB = 0x00000006
70 LC_IDFVMLIB = 0x00000007
71 LC_IDENT = 0x00000008
72 LC_FVMFILE = 0x00000009
73 LC_PREPAGE = 0x0000000a
74 LC_DYSYMTAB = 0x0000000b
75 LC_LOAD_DYLIB = 0x0000000c
76 LC_ID_DYLIB = 0x0000000d
77 LC_LOAD_DYLINKER = 0x0000000e
78 LC_ID_DYLINKER = 0x0000000f
79 LC_PREBOUND_DYLIB = 0x00000010
80 LC_ROUTINES = 0x00000011
81 LC_SUB_FRAMEWORK = 0x00000012
82 LC_SUB_UMBRELLA = 0x00000013
83 LC_SUB_CLIENT = 0x00000014
84 LC_SUB_LIBRARY = 0x00000015
85 LC_TWOLEVEL_HINTS = 0x00000016
86 LC_PREBIND_CKSUM = 0x00000017
87 LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD
88 LC_SEGMENT_64 = 0x00000019
89 LC_ROUTINES_64 = 0x0000001a
90 LC_UUID = 0x0000001b
91 LC_RPATH = 0x0000001c | LC_REQ_DYLD
92 LC_CODE_SIGNATURE = 0x0000001d
93 LC_SEGMENT_SPLIT_INFO = 0x0000001e
94 LC_REEXPORT_DYLIB = 0x0000001f | LC_REQ_DYLD
95 LC_LAZY_LOAD_DYLIB = 0x00000020
96 LC_ENCRYPTION_INFO = 0x00000021
97 LC_DYLD_INFO = 0x00000022
98 LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD
99 LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD
100 LC_VERSION_MIN_MACOSX = 0x00000024
101 LC_VERSION_MIN_IPHONEOS = 0x00000025
102 LC_FUNCTION_STARTS = 0x00000026
103 LC_DYLD_ENVIRONMENT = 0x00000027
105 # Mach CPU constants
106 CPU_ARCH_MASK = 0xff000000
107 CPU_ARCH_ABI64 = 0x01000000
108 CPU_TYPE_ANY = 0xffffffff
109 CPU_TYPE_VAX = 1
110 CPU_TYPE_MC680x0 = 6
111 CPU_TYPE_I386 = 7
112 CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64
113 CPU_TYPE_MIPS = 8
114 CPU_TYPE_MC98000 = 10
115 CPU_TYPE_HPPA = 11
116 CPU_TYPE_ARM = 12
117 CPU_TYPE_MC88000 = 13
118 CPU_TYPE_SPARC = 14
119 CPU_TYPE_I860 = 15
120 CPU_TYPE_ALPHA = 16
121 CPU_TYPE_POWERPC = 18
122 CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
124 # VM protection constants
125 VM_PROT_READ = 1
126 VM_PROT_WRITE = 2
127 VM_PROT_EXECUTE = 4
129 # VM protection constants
130 N_STAB = 0xe0
131 N_PEXT = 0x10
132 N_TYPE = 0x0e
133 N_EXT = 0x01
135 # Values for nlist N_TYPE bits of the "Mach.NList.type" field.
136 N_UNDF = 0x0
137 N_ABS = 0x2
138 N_SECT = 0xe
139 N_PBUD = 0xc
140 N_INDR = 0xa
142 # Section indexes for the "Mach.NList.sect_idx" fields
143 NO_SECT = 0
144 MAX_SECT = 255
146 # Stab defines
147 N_GSYM = 0x20
148 N_FNAME = 0x22
149 N_FUN = 0x24
150 N_STSYM = 0x26
151 N_LCSYM = 0x28
152 N_BNSYM = 0x2e
153 N_OPT = 0x3c
154 N_RSYM = 0x40
155 N_SLINE = 0x44
156 N_ENSYM = 0x4e
157 N_SSYM = 0x60
158 N_SO = 0x64
159 N_OSO = 0x66
160 N_LSYM = 0x80
161 N_BINCL = 0x82
162 N_SOL = 0x84
163 N_PARAMS = 0x86
164 N_VERSION = 0x88
165 N_OLEVEL = 0x8A
166 N_PSYM = 0xa0
167 N_EINCL = 0xa2
168 N_ENTRY = 0xa4
169 N_LBRAC = 0xc0
170 N_EXCL = 0xc2
171 N_RBRAC = 0xe0
172 N_BCOMM = 0xe2
173 N_ECOMM = 0xe4
174 N_ECOML = 0xe8
175 N_LENG = 0xfe
177 vm_prot_names = ['---', 'r--', '-w-', 'rw-', '--x', 'r-x', '-wx', 'rwx']
180 def dump_memory(base_addr, data, hex_bytes_len, num_per_line):
181 hex_bytes = data.encode('hex')
182 if hex_bytes_len == -1:
183 hex_bytes_len = len(hex_bytes)
184 addr = base_addr
185 ascii_str = ''
186 i = 0
187 while i < hex_bytes_len:
188 if ((i / 2) % num_per_line) == 0:
189 if i > 0:
190 print(' %s' % (ascii_str))
191 ascii_str = ''
192 print('0x%8.8x:' % (addr + i), end=' ')
193 hex_byte = hex_bytes[i:i + 2]
194 print(hex_byte, end=' ')
195 int_byte = int(hex_byte, 16)
196 ascii_char = '%c' % (int_byte)
197 if int_byte >= 32 and int_byte < 127:
198 ascii_str += ascii_char
199 else:
200 ascii_str += '.'
201 i = i + 2
202 if ascii_str:
203 if (i / 2) % num_per_line:
204 padding = num_per_line - ((i / 2) % num_per_line)
205 else:
206 padding = 0
207 print('%*s%s' % (padding * 3 + 1, '', ascii_str))
208 print()
211 class TerminalColors:
212 '''Simple terminal colors class'''
214 def __init__(self, enabled=True):
215 # TODO: discover terminal type from "file" and disable if
216 # it can't handle the color codes
217 self.enabled = enabled
219 def reset(self):
220 '''Reset all terminal colors and formatting.'''
221 if self.enabled:
222 return "\x1b[0m"
223 return ''
225 def bold(self, on=True):
226 '''Enable or disable bold depending on the "on" parameter.'''
227 if self.enabled:
228 if on:
229 return "\x1b[1m"
230 else:
231 return "\x1b[22m"
232 return ''
234 def italics(self, on=True):
235 '''Enable or disable italics depending on the "on" parameter.'''
236 if self.enabled:
237 if on:
238 return "\x1b[3m"
239 else:
240 return "\x1b[23m"
241 return ''
243 def underline(self, on=True):
244 '''Enable or disable underline depending on the "on" parameter.'''
245 if self.enabled:
246 if on:
247 return "\x1b[4m"
248 else:
249 return "\x1b[24m"
250 return ''
252 def inverse(self, on=True):
253 '''Enable or disable inverse depending on the "on" parameter.'''
254 if self.enabled:
255 if on:
256 return "\x1b[7m"
257 else:
258 return "\x1b[27m"
259 return ''
261 def strike(self, on=True):
262 '''Enable or disable strike through depending on the "on" parameter.'''
263 if self.enabled:
264 if on:
265 return "\x1b[9m"
266 else:
267 return "\x1b[29m"
268 return ''
270 def black(self, fg=True):
271 '''Set the foreground or background color to black.
272 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
273 if self.enabled:
274 if fg:
275 return "\x1b[30m"
276 else:
277 return "\x1b[40m"
278 return ''
280 def red(self, fg=True):
281 '''Set the foreground or background color to red.
282 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
283 if self.enabled:
284 if fg:
285 return "\x1b[31m"
286 else:
287 return "\x1b[41m"
288 return ''
290 def green(self, fg=True):
291 '''Set the foreground or background color to green.
292 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
293 if self.enabled:
294 if fg:
295 return "\x1b[32m"
296 else:
297 return "\x1b[42m"
298 return ''
300 def yellow(self, fg=True):
301 '''Set the foreground or background color to yellow.
302 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
303 if self.enabled:
304 if fg:
305 return "\x1b[43m"
306 else:
307 return "\x1b[33m"
308 return ''
310 def blue(self, fg=True):
311 '''Set the foreground or background color to blue.
312 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
313 if self.enabled:
314 if fg:
315 return "\x1b[34m"
316 else:
317 return "\x1b[44m"
318 return ''
320 def magenta(self, fg=True):
321 '''Set the foreground or background color to magenta.
322 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
323 if self.enabled:
324 if fg:
325 return "\x1b[35m"
326 else:
327 return "\x1b[45m"
328 return ''
330 def cyan(self, fg=True):
331 '''Set the foreground or background color to cyan.
332 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
333 if self.enabled:
334 if fg:
335 return "\x1b[36m"
336 else:
337 return "\x1b[46m"
338 return ''
340 def white(self, fg=True):
341 '''Set the foreground or background color to white.
342 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
343 if self.enabled:
344 if fg:
345 return "\x1b[37m"
346 else:
347 return "\x1b[47m"
348 return ''
350 def default(self, fg=True):
351 '''Set the foreground or background color to the default.
352 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
353 if self.enabled:
354 if fg:
355 return "\x1b[39m"
356 else:
357 return "\x1b[49m"
358 return ''
361 def swap_unpack_char():
362 """Returns the unpack prefix that will for non-native endian-ness."""
363 if struct.pack('H', 1).startswith("\x00"):
364 return '<'
365 return '>'
368 def dump_hex_bytes(addr, s, bytes_per_line=16):
369 i = 0
370 line = ''
371 for ch in s:
372 if (i % bytes_per_line) == 0:
373 if line:
374 print(line)
375 line = '%#8.8x: ' % (addr + i)
376 line += "%02X " % ord(ch)
377 i += 1
378 print(line)
381 def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16):
382 i = 0
383 line = ''
384 a_len = len(a)
385 b_len = len(b)
386 if a_len < b_len:
387 max_len = b_len
388 else:
389 max_len = a_len
390 tty_colors = TerminalColors(True)
391 for i in range(max_len):
392 ch = None
393 if i < a_len:
394 ch_a = a[i]
395 ch = ch_a
396 else:
397 ch_a = None
398 if i < b_len:
399 ch_b = b[i]
400 if not ch:
401 ch = ch_b
402 else:
403 ch_b = None
404 mismatch = ch_a != ch_b
405 if (i % bytes_per_line) == 0:
406 if line:
407 print(line)
408 line = '%#8.8x: ' % (addr + i)
409 if mismatch:
410 line += tty_colors.red()
411 line += "%02X " % ord(ch)
412 if mismatch:
413 line += tty_colors.default()
414 i += 1
416 print(line)
419 class Mach:
420 """Class that does everything mach-o related"""
422 class Arch:
423 """Class that implements mach-o architectures"""
425 def __init__(self, c=0, s=0):
426 self.cpu = c
427 self.sub = s
429 def set_cpu_type(self, c):
430 self.cpu = c
432 def set_cpu_subtype(self, s):
433 self.sub = s
435 def set_arch(self, c, s):
436 self.cpu = c
437 self.sub = s
439 def is_64_bit(self):
440 return (self.cpu & CPU_ARCH_ABI64) != 0
442 cpu_infos = [
443 ["arm", CPU_TYPE_ARM, CPU_TYPE_ANY],
444 ["arm", CPU_TYPE_ARM, 0],
445 ["armv4", CPU_TYPE_ARM, 5],
446 ["armv6", CPU_TYPE_ARM, 6],
447 ["armv5", CPU_TYPE_ARM, 7],
448 ["xscale", CPU_TYPE_ARM, 8],
449 ["armv7", CPU_TYPE_ARM, 9],
450 ["armv7f", CPU_TYPE_ARM, 10],
451 ["armv7s", CPU_TYPE_ARM, 11],
452 ["armv7k", CPU_TYPE_ARM, 12],
453 ["armv7m", CPU_TYPE_ARM, 15],
454 ["armv7em", CPU_TYPE_ARM, 16],
455 ["ppc", CPU_TYPE_POWERPC, CPU_TYPE_ANY],
456 ["ppc", CPU_TYPE_POWERPC, 0],
457 ["ppc601", CPU_TYPE_POWERPC, 1],
458 ["ppc602", CPU_TYPE_POWERPC, 2],
459 ["ppc603", CPU_TYPE_POWERPC, 3],
460 ["ppc603e", CPU_TYPE_POWERPC, 4],
461 ["ppc603ev", CPU_TYPE_POWERPC, 5],
462 ["ppc604", CPU_TYPE_POWERPC, 6],
463 ["ppc604e", CPU_TYPE_POWERPC, 7],
464 ["ppc620", CPU_TYPE_POWERPC, 8],
465 ["ppc750", CPU_TYPE_POWERPC, 9],
466 ["ppc7400", CPU_TYPE_POWERPC, 10],
467 ["ppc7450", CPU_TYPE_POWERPC, 11],
468 ["ppc970", CPU_TYPE_POWERPC, 100],
469 ["ppc64", CPU_TYPE_POWERPC64, 0],
470 ["ppc970-64", CPU_TYPE_POWERPC64, 100],
471 ["i386", CPU_TYPE_I386, 3],
472 ["i486", CPU_TYPE_I386, 4],
473 ["i486sx", CPU_TYPE_I386, 0x84],
474 ["i386", CPU_TYPE_I386, CPU_TYPE_ANY],
475 ["x86_64", CPU_TYPE_X86_64, 3],
476 ["x86_64", CPU_TYPE_X86_64, CPU_TYPE_ANY],
479 def __str__(self):
480 for info in self.cpu_infos:
481 if self.cpu == info[1] and (self.sub & 0x00ffffff) == info[2]:
482 return info[0]
483 return "{0}.{1}".format(self.cpu, self.sub)
485 class Magic(dict_utils.Enum):
487 enum = {
488 'MH_MAGIC': MH_MAGIC,
489 'MH_CIGAM': MH_CIGAM,
490 'MH_MAGIC_64': MH_MAGIC_64,
491 'MH_CIGAM_64': MH_CIGAM_64,
492 'FAT_MAGIC': FAT_MAGIC,
493 'FAT_CIGAM': FAT_CIGAM
496 def __init__(self, initial_value=0):
497 dict_utils.Enum.__init__(self, initial_value, self.enum)
499 def is_skinny_mach_file(self):
500 return self.value == MH_MAGIC or self.value == MH_CIGAM or self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
502 def is_universal_mach_file(self):
503 return self.value == FAT_MAGIC or self.value == FAT_CIGAM
505 def unpack(self, data):
506 data.set_byte_order('native')
507 self.value = data.get_uint32()
509 def get_byte_order(self):
510 if self.value == MH_CIGAM or self.value == MH_CIGAM_64 or self.value == FAT_CIGAM:
511 return swap_unpack_char()
512 else:
513 return '='
515 def is_64_bit(self):
516 return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
518 def __init__(self):
519 self.magic = Mach.Magic()
520 self.content = None
521 self.path = None
523 def extract(self, path, extractor):
524 self.path = path
525 self.unpack(extractor)
527 def parse(self, path):
528 self.path = path
529 try:
530 f = open(self.path)
531 file_extractor = file_extract.FileExtract(f, '=')
532 self.unpack(file_extractor)
533 # f.close()
534 except IOError as xxx_todo_changeme:
535 (errno, strerror) = xxx_todo_changeme.args
536 print("I/O error({0}): {1}".format(errno, strerror))
537 except ValueError:
538 print("Could not convert data to an integer.")
539 except:
540 print("Unexpected error:", sys.exc_info()[0])
541 raise
543 def compare(self, rhs):
544 self.content.compare(rhs.content)
546 def dump(self, options=None):
547 self.content.dump(options)
549 def dump_header(self, dump_description=True, options=None):
550 self.content.dump_header(dump_description, options)
552 def dump_load_commands(self, dump_description=True, options=None):
553 self.content.dump_load_commands(dump_description, options)
555 def dump_sections(self, dump_description=True, options=None):
556 self.content.dump_sections(dump_description, options)
558 def dump_section_contents(self, options):
559 self.content.dump_section_contents(options)
561 def dump_symtab(self, dump_description=True, options=None):
562 self.content.dump_symtab(dump_description, options)
564 def dump_symbol_names_matching_regex(self, regex, file=None):
565 self.content.dump_symbol_names_matching_regex(regex, file)
567 def description(self):
568 return self.content.description()
570 def unpack(self, data):
571 self.magic.unpack(data)
572 if self.magic.is_skinny_mach_file():
573 self.content = Mach.Skinny(self.path)
574 elif self.magic.is_universal_mach_file():
575 self.content = Mach.Universal(self.path)
576 else:
577 self.content = None
579 if self.content is not None:
580 self.content.unpack(data, self.magic)
582 def is_valid(self):
583 return self.content is not None
585 class Universal:
587 def __init__(self, path):
588 self.path = path
589 self.type = 'universal'
590 self.file_off = 0
591 self.magic = None
592 self.nfat_arch = 0
593 self.archs = list()
595 def description(self):
596 s = '%#8.8x: %s (' % (self.file_off, self.path)
597 archs_string = ''
598 for arch in self.archs:
599 if len(archs_string):
600 archs_string += ', '
601 archs_string += '%s' % arch.arch
602 s += archs_string
603 s += ')'
604 return s
606 def unpack(self, data, magic=None):
607 self.file_off = data.tell()
608 if magic is None:
609 self.magic = Mach.Magic()
610 self.magic.unpack(data)
611 else:
612 self.magic = magic
613 self.file_off = self.file_off - 4
614 # Universal headers are always in big endian
615 data.set_byte_order('big')
616 self.nfat_arch = data.get_uint32()
617 for i in range(self.nfat_arch):
618 self.archs.append(Mach.Universal.ArchInfo())
619 self.archs[i].unpack(data)
620 for i in range(self.nfat_arch):
621 self.archs[i].mach = Mach.Skinny(self.path)
622 data.seek(self.archs[i].offset, 0)
623 skinny_magic = Mach.Magic()
624 skinny_magic.unpack(data)
625 self.archs[i].mach.unpack(data, skinny_magic)
627 def compare(self, rhs):
628 print('error: comparing two universal files is not supported yet')
629 return False
631 def dump(self, options):
632 if options.dump_header:
633 print()
634 print("Universal Mach File: magic = %s, nfat_arch = %u" % (self.magic, self.nfat_arch))
635 print()
636 if self.nfat_arch > 0:
637 if options.dump_header:
638 self.archs[0].dump_header(True, options)
639 for i in range(self.nfat_arch):
640 self.archs[i].dump_flat(options)
641 if options.dump_header:
642 print()
643 for i in range(self.nfat_arch):
644 self.archs[i].mach.dump(options)
646 def dump_header(self, dump_description=True, options=None):
647 if dump_description:
648 print(self.description())
649 for i in range(self.nfat_arch):
650 self.archs[i].mach.dump_header(True, options)
651 print()
653 def dump_load_commands(self, dump_description=True, options=None):
654 if dump_description:
655 print(self.description())
656 for i in range(self.nfat_arch):
657 self.archs[i].mach.dump_load_commands(True, options)
658 print()
660 def dump_sections(self, dump_description=True, options=None):
661 if dump_description:
662 print(self.description())
663 for i in range(self.nfat_arch):
664 self.archs[i].mach.dump_sections(True, options)
665 print()
667 def dump_section_contents(self, options):
668 for i in range(self.nfat_arch):
669 self.archs[i].mach.dump_section_contents(options)
670 print()
672 def dump_symtab(self, dump_description=True, options=None):
673 if dump_description:
674 print(self.description())
675 for i in range(self.nfat_arch):
676 self.archs[i].mach.dump_symtab(True, options)
677 print()
679 def dump_symbol_names_matching_regex(self, regex, file=None):
680 for i in range(self.nfat_arch):
681 self.archs[i].mach.dump_symbol_names_matching_regex(
682 regex, file)
684 class ArchInfo:
686 def __init__(self):
687 self.arch = Mach.Arch(0, 0)
688 self.offset = 0
689 self.size = 0
690 self.align = 0
691 self.mach = None
693 def unpack(self, data):
694 # Universal headers are always in big endian
695 data.set_byte_order('big')
696 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align = data.get_n_uint32(
699 def dump_header(self, dump_description=True, options=None):
700 if options.verbose:
701 print("CPU SUBTYPE OFFSET SIZE ALIGN")
702 print("---------- ---------- ---------- ---------- ----------")
703 else:
704 print("ARCH FILEOFFSET FILESIZE ALIGN")
705 print("---------- ---------- ---------- ----------")
707 def dump_flat(self, options):
708 if options.verbose:
709 print("%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align))
710 else:
711 print("%-10s %#8.8x %#8.8x %#8.8x" % (self.arch, self.offset, self.size, self.align))
713 def dump(self):
714 print(" cputype: %#8.8x" % self.arch.cpu)
715 print("cpusubtype: %#8.8x" % self.arch.sub)
716 print(" offset: %#8.8x" % self.offset)
717 print(" size: %#8.8x" % self.size)
718 print(" align: %#8.8x" % self.align)
720 def __str__(self):
721 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (
722 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
724 def __repr__(self):
725 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (
726 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
728 class Flags:
730 def __init__(self, b):
731 self.bits = b
733 def __str__(self):
734 s = ''
735 if self.bits & MH_NOUNDEFS:
736 s += 'MH_NOUNDEFS | '
737 if self.bits & MH_INCRLINK:
738 s += 'MH_INCRLINK | '
739 if self.bits & MH_DYLDLINK:
740 s += 'MH_DYLDLINK | '
741 if self.bits & MH_BINDATLOAD:
742 s += 'MH_BINDATLOAD | '
743 if self.bits & MH_PREBOUND:
744 s += 'MH_PREBOUND | '
745 if self.bits & MH_SPLIT_SEGS:
746 s += 'MH_SPLIT_SEGS | '
747 if self.bits & MH_LAZY_INIT:
748 s += 'MH_LAZY_INIT | '
749 if self.bits & MH_TWOLEVEL:
750 s += 'MH_TWOLEVEL | '
751 if self.bits & MH_FORCE_FLAT:
752 s += 'MH_FORCE_FLAT | '
753 if self.bits & MH_NOMULTIDEFS:
754 s += 'MH_NOMULTIDEFS | '
755 if self.bits & MH_NOFIXPREBINDING:
756 s += 'MH_NOFIXPREBINDING | '
757 if self.bits & MH_PREBINDABLE:
758 s += 'MH_PREBINDABLE | '
759 if self.bits & MH_ALLMODSBOUND:
760 s += 'MH_ALLMODSBOUND | '
761 if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS:
762 s += 'MH_SUBSECTIONS_VIA_SYMBOLS | '
763 if self.bits & MH_CANONICAL:
764 s += 'MH_CANONICAL | '
765 if self.bits & MH_WEAK_DEFINES:
766 s += 'MH_WEAK_DEFINES | '
767 if self.bits & MH_BINDS_TO_WEAK:
768 s += 'MH_BINDS_TO_WEAK | '
769 if self.bits & MH_ALLOW_STACK_EXECUTION:
770 s += 'MH_ALLOW_STACK_EXECUTION | '
771 if self.bits & MH_ROOT_SAFE:
772 s += 'MH_ROOT_SAFE | '
773 if self.bits & MH_SETUID_SAFE:
774 s += 'MH_SETUID_SAFE | '
775 if self.bits & MH_NO_REEXPORTED_DYLIBS:
776 s += 'MH_NO_REEXPORTED_DYLIBS | '
777 if self.bits & MH_PIE:
778 s += 'MH_PIE | '
779 if self.bits & MH_DEAD_STRIPPABLE_DYLIB:
780 s += 'MH_DEAD_STRIPPABLE_DYLIB | '
781 if self.bits & MH_HAS_TLV_DESCRIPTORS:
782 s += 'MH_HAS_TLV_DESCRIPTORS | '
783 if self.bits & MH_NO_HEAP_EXECUTION:
784 s += 'MH_NO_HEAP_EXECUTION | '
785 # Strip the trailing " |" if we have any flags
786 if len(s) > 0:
787 s = s[0:-2]
788 return s
790 class FileType(dict_utils.Enum):
792 enum = {
793 'MH_OBJECT': MH_OBJECT,
794 'MH_EXECUTE': MH_EXECUTE,
795 'MH_FVMLIB': MH_FVMLIB,
796 'MH_CORE': MH_CORE,
797 'MH_PRELOAD': MH_PRELOAD,
798 'MH_DYLIB': MH_DYLIB,
799 'MH_DYLINKER': MH_DYLINKER,
800 'MH_BUNDLE': MH_BUNDLE,
801 'MH_DYLIB_STUB': MH_DYLIB_STUB,
802 'MH_DSYM': MH_DSYM,
803 'MH_KEXT_BUNDLE': MH_KEXT_BUNDLE
806 def __init__(self, initial_value=0):
807 dict_utils.Enum.__init__(self, initial_value, self.enum)
809 class Skinny:
811 def __init__(self, path):
812 self.path = path
813 self.type = 'skinny'
814 self.data = None
815 self.file_off = 0
816 self.magic = 0
817 self.arch = Mach.Arch(0, 0)
818 self.filetype = Mach.FileType(0)
819 self.ncmds = 0
820 self.sizeofcmds = 0
821 self.flags = Mach.Flags(0)
822 self.uuid = None
823 self.commands = list()
824 self.segments = list()
825 self.sections = list()
826 self.symbols = list()
827 self.sections.append(Mach.Section())
829 def description(self):
830 return '%#8.8x: %s (%s)' % (self.file_off, self.path, self.arch)
832 def unpack(self, data, magic=None):
833 self.data = data
834 self.file_off = data.tell()
835 if magic is None:
836 self.magic = Mach.Magic()
837 self.magic.unpack(data)
838 else:
839 self.magic = magic
840 self.file_off = self.file_off - 4
841 data.set_byte_order(self.magic.get_byte_order())
842 self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, bits = data.get_n_uint32(
844 self.flags.bits = bits
846 if self.is_64_bit():
847 data.get_uint32() # Skip reserved word in mach_header_64
849 for i in range(0, self.ncmds):
850 lc = self.unpack_load_command(data)
851 self.commands.append(lc)
853 def get_data(self):
854 if self.data:
855 self.data.set_byte_order(self.magic.get_byte_order())
856 return self.data
857 return None
859 def unpack_load_command(self, data):
860 lc = Mach.LoadCommand()
861 lc.unpack(self, data)
862 lc_command = lc.command.get_enum_value()
863 if (lc_command == LC_SEGMENT or
864 lc_command == LC_SEGMENT_64):
865 lc = Mach.SegmentLoadCommand(lc)
866 lc.unpack(self, data)
867 elif (lc_command == LC_LOAD_DYLIB or
868 lc_command == LC_ID_DYLIB or
869 lc_command == LC_LOAD_WEAK_DYLIB or
870 lc_command == LC_REEXPORT_DYLIB):
871 lc = Mach.DylibLoadCommand(lc)
872 lc.unpack(self, data)
873 elif (lc_command == LC_LOAD_DYLINKER or
874 lc_command == LC_SUB_FRAMEWORK or
875 lc_command == LC_SUB_CLIENT or
876 lc_command == LC_SUB_UMBRELLA or
877 lc_command == LC_SUB_LIBRARY or
878 lc_command == LC_ID_DYLINKER or
879 lc_command == LC_RPATH):
880 lc = Mach.LoadDYLDLoadCommand(lc)
881 lc.unpack(self, data)
882 elif (lc_command == LC_DYLD_INFO_ONLY):
883 lc = Mach.DYLDInfoOnlyLoadCommand(lc)
884 lc.unpack(self, data)
885 elif (lc_command == LC_SYMTAB):
886 lc = Mach.SymtabLoadCommand(lc)
887 lc.unpack(self, data)
888 elif (lc_command == LC_DYSYMTAB):
889 lc = Mach.DYLDSymtabLoadCommand(lc)
890 lc.unpack(self, data)
891 elif (lc_command == LC_UUID):
892 lc = Mach.UUIDLoadCommand(lc)
893 lc.unpack(self, data)
894 elif (lc_command == LC_CODE_SIGNATURE or
895 lc_command == LC_SEGMENT_SPLIT_INFO or
896 lc_command == LC_FUNCTION_STARTS):
897 lc = Mach.DataBlobLoadCommand(lc)
898 lc.unpack(self, data)
899 elif (lc_command == LC_UNIXTHREAD):
900 lc = Mach.UnixThreadLoadCommand(lc)
901 lc.unpack(self, data)
902 elif (lc_command == LC_ENCRYPTION_INFO):
903 lc = Mach.EncryptionInfoLoadCommand(lc)
904 lc.unpack(self, data)
905 lc.skip(data)
906 return lc
908 def compare(self, rhs):
909 print("\nComparing:")
910 print("a) %s %s" % (self.arch, self.path))
911 print("b) %s %s" % (rhs.arch, rhs.path))
912 result = True
913 if self.type == rhs.type:
914 for lhs_section in self.sections[1:]:
915 rhs_section = rhs.get_section_by_section(lhs_section)
916 if rhs_section:
917 print('comparing %s.%s...' % (lhs_section.segname, lhs_section.sectname), end=' ')
918 sys.stdout.flush()
919 lhs_data = lhs_section.get_contents(self)
920 rhs_data = rhs_section.get_contents(rhs)
921 if lhs_data and rhs_data:
922 if lhs_data == rhs_data:
923 print('ok')
924 else:
925 lhs_data_len = len(lhs_data)
926 rhs_data_len = len(rhs_data)
927 # if lhs_data_len < rhs_data_len:
928 # if lhs_data == rhs_data[0:lhs_data_len]:
929 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
930 # else:
931 # # TODO: check padding
932 # result = False
933 # elif lhs_data_len > rhs_data_len:
934 # if lhs_data[0:rhs_data_len] == rhs_data:
935 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
936 # else:
937 # # TODO: check padding
938 # result = False
939 # else:
940 result = False
941 print('error: sections differ')
942 # print 'a) %s' % (lhs_section)
943 # dump_hex_byte_string_diff(0, lhs_data, rhs_data)
944 # print 'b) %s' % (rhs_section)
945 # dump_hex_byte_string_diff(0, rhs_data, lhs_data)
946 elif lhs_data and not rhs_data:
947 print('error: section data missing from b:')
948 print('a) %s' % (lhs_section))
949 print('b) %s' % (rhs_section))
950 result = False
951 elif not lhs_data and rhs_data:
952 print('error: section data missing from a:')
953 print('a) %s' % (lhs_section))
954 print('b) %s' % (rhs_section))
955 result = False
956 elif lhs_section.offset or rhs_section.offset:
957 print('error: section data missing for both a and b:')
958 print('a) %s' % (lhs_section))
959 print('b) %s' % (rhs_section))
960 result = False
961 else:
962 print('ok')
963 else:
964 result = False
965 print('error: section %s is missing in %s' % (lhs_section.sectname, rhs.path))
966 else:
967 print('error: comparing a %s mach-o file with a %s mach-o file is not supported' % (self.type, rhs.type))
968 result = False
969 if not result:
970 print('error: mach files differ')
971 return result
973 def dump_header(self, dump_description=True, options=None):
974 if options.verbose:
975 print("MAGIC CPU SUBTYPE FILETYPE NUM CMDS SIZE CMDS FLAGS")
976 print("---------- ---------- ---------- ---------- -------- ---------- ----------")
977 else:
978 print("MAGIC ARCH FILETYPE NUM CMDS SIZE CMDS FLAGS")
979 print("------------ ---------- -------------- -------- ---------- ----------")
981 def dump_flat(self, options):
982 if options.verbose:
983 print("%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x" % (self.magic, self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, self.flags.bits))
984 else:
985 print("%-12s %-10s %-14s %#8u %#8.8x %s" % (self.magic, self.arch, self.filetype, self.ncmds, self.sizeofcmds, self.flags))
987 def dump(self, options):
988 if options.dump_header:
989 self.dump_header(True, options)
990 if options.dump_load_commands:
991 self.dump_load_commands(False, options)
992 if options.dump_sections:
993 self.dump_sections(False, options)
994 if options.section_names:
995 self.dump_section_contents(options)
996 if options.dump_symtab:
997 self.get_symtab()
998 if len(self.symbols):
999 self.dump_sections(False, options)
1000 else:
1001 print("No symbols")
1002 if options.find_mangled:
1003 self.dump_symbol_names_matching_regex(re.compile('^_?_Z'))
1005 def dump_header(self, dump_description=True, options=None):
1006 if dump_description:
1007 print(self.description())
1008 print("Mach Header")
1009 print(" magic: %#8.8x %s" % (self.magic.value, self.magic))
1010 print(" cputype: %#8.8x %s" % (self.arch.cpu, self.arch))
1011 print(" cpusubtype: %#8.8x" % self.arch.sub)
1012 print(" filetype: %#8.8x %s" % (self.filetype.get_enum_value(), self.filetype.get_enum_name()))
1013 print(" ncmds: %#8.8x %u" % (self.ncmds, self.ncmds))
1014 print(" sizeofcmds: %#8.8x" % self.sizeofcmds)
1015 print(" flags: %#8.8x %s" % (self.flags.bits, self.flags))
1017 def dump_load_commands(self, dump_description=True, options=None):
1018 if dump_description:
1019 print(self.description())
1020 for lc in self.commands:
1021 print(lc)
1023 def get_section_by_name(self, name):
1024 for section in self.sections:
1025 if section.sectname and section.sectname == name:
1026 return section
1027 return None
1029 def get_section_by_section(self, other_section):
1030 for section in self.sections:
1031 if section.sectname == other_section.sectname and section.segname == other_section.segname:
1032 return section
1033 return None
1035 def dump_sections(self, dump_description=True, options=None):
1036 if dump_description:
1037 print(self.description())
1038 num_sections = len(self.sections)
1039 if num_sections > 1:
1040 self.sections[1].dump_header()
1041 for sect_idx in range(1, num_sections):
1042 print("%s" % self.sections[sect_idx])
1044 def dump_section_contents(self, options):
1045 saved_section_to_disk = False
1046 for sectname in options.section_names:
1047 section = self.get_section_by_name(sectname)
1048 if section:
1049 sect_bytes = section.get_contents(self)
1050 if options.outfile:
1051 if not saved_section_to_disk:
1052 outfile = open(options.outfile, 'w')
1053 if options.extract_modules:
1054 # print "Extracting modules from mach file..."
1055 data = file_extract.FileExtract(
1056 io.BytesIO(sect_bytes), self.data.byte_order)
1057 version = data.get_uint32()
1058 num_modules = data.get_uint32()
1059 # print "version = %u, num_modules = %u" %
1060 # (version, num_modules)
1061 for i in range(num_modules):
1062 data_offset = data.get_uint64()
1063 data_size = data.get_uint64()
1064 name_offset = data.get_uint32()
1065 language = data.get_uint32()
1066 flags = data.get_uint32()
1067 data.seek(name_offset)
1068 module_name = data.get_c_string()
1069 # print "module[%u] data_offset = %#16.16x,
1070 # data_size = %#16.16x, name_offset =
1071 # %#16.16x (%s), language = %u, flags =
1072 # %#x" % (i, data_offset, data_size,
1073 # name_offset, module_name, language,
1074 # flags)
1075 data.seek(data_offset)
1076 outfile.write(data.read_size(data_size))
1077 else:
1078 print("Saving section %s to '%s'" % (sectname, options.outfile))
1079 outfile.write(sect_bytes)
1080 outfile.close()
1081 saved_section_to_disk = True
1082 else:
1083 print("error: you can only save a single section to disk at a time, skipping section '%s'" % (sectname))
1084 else:
1085 print('section %s:\n' % (sectname))
1086 section.dump_header()
1087 print('%s\n' % (section))
1088 dump_memory(0, sect_bytes, options.max_count, 16)
1089 else:
1090 print('error: no section named "%s" was found' % (sectname))
1092 def get_segment(self, segname):
1093 if len(self.segments) == 1 and self.segments[0].segname == '':
1094 return self.segments[0]
1095 for segment in self.segments:
1096 if segment.segname == segname:
1097 return segment
1098 return None
1100 def get_first_load_command(self, lc_enum_value):
1101 for lc in self.commands:
1102 if lc.command.value == lc_enum_value:
1103 return lc
1104 return None
1106 def get_symtab(self):
1107 if self.data and not self.symbols:
1108 lc_symtab = self.get_first_load_command(LC_SYMTAB)
1109 if lc_symtab:
1110 symtab_offset = self.file_off
1111 if self.data.is_in_memory():
1112 linkedit_segment = self.get_segment('__LINKEDIT')
1113 if linkedit_segment:
1114 linkedit_vmaddr = linkedit_segment.vmaddr
1115 linkedit_fileoff = linkedit_segment.fileoff
1116 symtab_offset = linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff
1117 symtab_offset = linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff
1118 else:
1119 symtab_offset += lc_symtab.symoff
1121 self.data.seek(symtab_offset)
1122 is_64 = self.is_64_bit()
1123 for i in range(lc_symtab.nsyms):
1124 nlist = Mach.NList()
1125 nlist.unpack(self, self.data, lc_symtab)
1126 self.symbols.append(nlist)
1127 else:
1128 print("no LC_SYMTAB")
1130 def dump_symtab(self, dump_description=True, options=None):
1131 self.get_symtab()
1132 if dump_description:
1133 print(self.description())
1134 for i, symbol in enumerate(self.symbols):
1135 print('[%5u] %s' % (i, symbol))
1137 def dump_symbol_names_matching_regex(self, regex, file=None):
1138 self.get_symtab()
1139 for symbol in self.symbols:
1140 if symbol.name and regex.search(symbol.name):
1141 print(symbol.name)
1142 if file:
1143 file.write('%s\n' % (symbol.name))
1145 def is_64_bit(self):
1146 return self.magic.is_64_bit()
1148 class LoadCommand:
1150 class Command(dict_utils.Enum):
1151 enum = {
1152 'LC_SEGMENT': LC_SEGMENT,
1153 'LC_SYMTAB': LC_SYMTAB,
1154 'LC_SYMSEG': LC_SYMSEG,
1155 'LC_THREAD': LC_THREAD,
1156 'LC_UNIXTHREAD': LC_UNIXTHREAD,
1157 'LC_LOADFVMLIB': LC_LOADFVMLIB,
1158 'LC_IDFVMLIB': LC_IDFVMLIB,
1159 'LC_IDENT': LC_IDENT,
1160 'LC_FVMFILE': LC_FVMFILE,
1161 'LC_PREPAGE': LC_PREPAGE,
1162 'LC_DYSYMTAB': LC_DYSYMTAB,
1163 'LC_LOAD_DYLIB': LC_LOAD_DYLIB,
1164 'LC_ID_DYLIB': LC_ID_DYLIB,
1165 'LC_LOAD_DYLINKER': LC_LOAD_DYLINKER,
1166 'LC_ID_DYLINKER': LC_ID_DYLINKER,
1167 'LC_PREBOUND_DYLIB': LC_PREBOUND_DYLIB,
1168 'LC_ROUTINES': LC_ROUTINES,
1169 'LC_SUB_FRAMEWORK': LC_SUB_FRAMEWORK,
1170 'LC_SUB_UMBRELLA': LC_SUB_UMBRELLA,
1171 'LC_SUB_CLIENT': LC_SUB_CLIENT,
1172 'LC_SUB_LIBRARY': LC_SUB_LIBRARY,
1173 'LC_TWOLEVEL_HINTS': LC_TWOLEVEL_HINTS,
1174 'LC_PREBIND_CKSUM': LC_PREBIND_CKSUM,
1175 'LC_LOAD_WEAK_DYLIB': LC_LOAD_WEAK_DYLIB,
1176 'LC_SEGMENT_64': LC_SEGMENT_64,
1177 'LC_ROUTINES_64': LC_ROUTINES_64,
1178 'LC_UUID': LC_UUID,
1179 'LC_RPATH': LC_RPATH,
1180 'LC_CODE_SIGNATURE': LC_CODE_SIGNATURE,
1181 'LC_SEGMENT_SPLIT_INFO': LC_SEGMENT_SPLIT_INFO,
1182 'LC_REEXPORT_DYLIB': LC_REEXPORT_DYLIB,
1183 'LC_LAZY_LOAD_DYLIB': LC_LAZY_LOAD_DYLIB,
1184 'LC_ENCRYPTION_INFO': LC_ENCRYPTION_INFO,
1185 'LC_DYLD_INFO': LC_DYLD_INFO,
1186 'LC_DYLD_INFO_ONLY': LC_DYLD_INFO_ONLY,
1187 'LC_LOAD_UPWARD_DYLIB': LC_LOAD_UPWARD_DYLIB,
1188 'LC_VERSION_MIN_MACOSX': LC_VERSION_MIN_MACOSX,
1189 'LC_VERSION_MIN_IPHONEOS': LC_VERSION_MIN_IPHONEOS,
1190 'LC_FUNCTION_STARTS': LC_FUNCTION_STARTS,
1191 'LC_DYLD_ENVIRONMENT': LC_DYLD_ENVIRONMENT
1194 def __init__(self, initial_value=0):
1195 dict_utils.Enum.__init__(self, initial_value, self.enum)
1197 def __init__(self, c=None, l=0, o=0):
1198 if c is not None:
1199 self.command = c
1200 else:
1201 self.command = Mach.LoadCommand.Command(0)
1202 self.length = l
1203 self.file_off = o
1205 def unpack(self, mach_file, data):
1206 self.file_off = data.tell()
1207 self.command.value, self.length = data.get_n_uint32(2)
1209 def skip(self, data):
1210 data.seek(self.file_off + self.length, 0)
1212 def __str__(self):
1213 lc_name = self.command.get_enum_name()
1214 return '%#8.8x: <%#4.4x> %-24s' % (self.file_off,
1215 self.length, lc_name)
1217 class Section:
1219 def __init__(self):
1220 self.index = 0
1221 self.is_64 = False
1222 self.sectname = None
1223 self.segname = None
1224 self.addr = 0
1225 self.size = 0
1226 self.offset = 0
1227 self.align = 0
1228 self.reloff = 0
1229 self.nreloc = 0
1230 self.flags = 0
1231 self.reserved1 = 0
1232 self.reserved2 = 0
1233 self.reserved3 = 0
1235 def unpack(self, is_64, data):
1236 self.is_64 = is_64
1237 self.sectname = data.get_fixed_length_c_string(16, '', True)
1238 self.segname = data.get_fixed_length_c_string(16, '', True)
1239 if self.is_64:
1240 self.addr, self.size = data.get_n_uint64(2)
1241 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3 = data.get_n_uint32(
1243 else:
1244 self.addr, self.size = data.get_n_uint32(2)
1245 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2 = data.get_n_uint32(
1248 def dump_header(self):
1249 if self.is_64:
1250 print("INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME")
1251 print("===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------")
1252 else:
1253 print("INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 NAME")
1254 print("===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------")
1256 def __str__(self):
1257 if self.is_64:
1258 return "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (
1259 self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3, self.segname, self.sectname)
1260 else:
1261 return "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (
1262 self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.segname, self.sectname)
1264 def get_contents(self, mach_file):
1265 '''Get the section contents as a python string'''
1266 if self.size > 0 and mach_file.get_segment(
1267 self.segname).filesize > 0:
1268 data = mach_file.get_data()
1269 if data:
1270 section_data_offset = mach_file.file_off + self.offset
1271 # print '%s.%s is at offset 0x%x with size 0x%x' %
1272 # (self.segname, self.sectname, section_data_offset,
1273 # self.size)
1274 data.push_offset_and_seek(section_data_offset)
1275 bytes = data.read_size(self.size)
1276 data.pop_offset_and_seek()
1277 return bytes
1278 return None
1280 class DylibLoadCommand(LoadCommand):
1282 def __init__(self, lc):
1283 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1284 self.name = None
1285 self.timestamp = 0
1286 self.current_version = 0
1287 self.compatibility_version = 0
1289 def unpack(self, mach_file, data):
1290 byte_order_char = mach_file.magic.get_byte_order()
1291 name_offset, self.timestamp, self.current_version, self.compatibility_version = data.get_n_uint32(
1293 data.seek(self.file_off + name_offset, 0)
1294 self.name = data.get_fixed_length_c_string(self.length - 24)
1296 def __str__(self):
1297 s = Mach.LoadCommand.__str__(self)
1298 s += "%#8.8x %#8.8x %#8.8x " % (self.timestamp,
1299 self.current_version,
1300 self.compatibility_version)
1301 s += self.name
1302 return s
1304 class LoadDYLDLoadCommand(LoadCommand):
1306 def __init__(self, lc):
1307 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1308 self.name = None
1310 def unpack(self, mach_file, data):
1311 data.get_uint32()
1312 self.name = data.get_fixed_length_c_string(self.length - 12)
1314 def __str__(self):
1315 s = Mach.LoadCommand.__str__(self)
1316 s += "%s" % self.name
1317 return s
1319 class UnixThreadLoadCommand(LoadCommand):
1321 class ThreadState:
1323 def __init__(self):
1324 self.flavor = 0
1325 self.count = 0
1326 self.register_values = list()
1328 def unpack(self, data):
1329 self.flavor, self.count = data.get_n_uint32(2)
1330 self.register_values = data.get_n_uint32(self.count)
1332 def __str__(self):
1333 s = "flavor = %u, count = %u, regs =" % (
1334 self.flavor, self.count)
1335 i = 0
1336 for register_value in self.register_values:
1337 if i % 8 == 0:
1338 s += "\n "
1339 s += " %#8.8x" % register_value
1340 i += 1
1341 return s
1343 def __init__(self, lc):
1344 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1345 self.reg_sets = list()
1347 def unpack(self, mach_file, data):
1348 reg_set = Mach.UnixThreadLoadCommand.ThreadState()
1349 reg_set.unpack(data)
1350 self.reg_sets.append(reg_set)
1352 def __str__(self):
1353 s = Mach.LoadCommand.__str__(self)
1354 for reg_set in self.reg_sets:
1355 s += "%s" % reg_set
1356 return s
1358 class DYLDInfoOnlyLoadCommand(LoadCommand):
1360 def __init__(self, lc):
1361 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1362 self.rebase_off = 0
1363 self.rebase_size = 0
1364 self.bind_off = 0
1365 self.bind_size = 0
1366 self.weak_bind_off = 0
1367 self.weak_bind_size = 0
1368 self.lazy_bind_off = 0
1369 self.lazy_bind_size = 0
1370 self.export_off = 0
1371 self.export_size = 0
1373 def unpack(self, mach_file, data):
1374 byte_order_char = mach_file.magic.get_byte_order()
1375 self.rebase_off, self.rebase_size, self.bind_off, self.bind_size, self.weak_bind_off, self.weak_bind_size, self.lazy_bind_off, self.lazy_bind_size, self.export_off, self.export_size = data.get_n_uint32(
1378 def __str__(self):
1379 s = Mach.LoadCommand.__str__(self)
1380 s += "rebase_off = %#8.8x, rebase_size = %u, " % (
1381 self.rebase_off, self.rebase_size)
1382 s += "bind_off = %#8.8x, bind_size = %u, " % (
1383 self.bind_off, self.bind_size)
1384 s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % (
1385 self.weak_bind_off, self.weak_bind_size)
1386 s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % (
1387 self.lazy_bind_off, self.lazy_bind_size)
1388 s += "export_off = %#8.8x, export_size = %u, " % (
1389 self.export_off, self.export_size)
1390 return s
1392 class DYLDSymtabLoadCommand(LoadCommand):
1394 def __init__(self, lc):
1395 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1396 self.ilocalsym = 0
1397 self.nlocalsym = 0
1398 self.iextdefsym = 0
1399 self.nextdefsym = 0
1400 self.iundefsym = 0
1401 self.nundefsym = 0
1402 self.tocoff = 0
1403 self.ntoc = 0
1404 self.modtaboff = 0
1405 self.nmodtab = 0
1406 self.extrefsymoff = 0
1407 self.nextrefsyms = 0
1408 self.indirectsymoff = 0
1409 self.nindirectsyms = 0
1410 self.extreloff = 0
1411 self.nextrel = 0
1412 self.locreloff = 0
1413 self.nlocrel = 0
1415 def unpack(self, mach_file, data):
1416 byte_order_char = mach_file.magic.get_byte_order()
1417 self.ilocalsym, self.nlocalsym, self.iextdefsym, self.nextdefsym, self.iundefsym, self.nundefsym, self.tocoff, self.ntoc, self.modtaboff, self.nmodtab, self.extrefsymoff, self.nextrefsyms, self.indirectsymoff, self.nindirectsyms, self.extreloff, self.nextrel, self.locreloff, self.nlocrel = data.get_n_uint32(
1420 def __str__(self):
1421 s = Mach.LoadCommand.__str__(self)
1422 # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym)
1423 # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym)
1424 # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym)
1425 # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc)
1426 # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab)
1427 # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms)
1428 # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms)
1429 # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel)
1430 # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff,
1431 # self.nlocrel)
1432 s += "ilocalsym = %-10u, nlocalsym = %u\n" % (
1433 self.ilocalsym, self.nlocalsym)
1434 s += " iextdefsym = %-10u, nextdefsym = %u\n" % (
1435 self.iextdefsym, self.nextdefsym)
1436 s += " iundefsym = %-10u, nundefsym = %u\n" % (
1437 self.iundefsym, self.nundefsym)
1438 s += " tocoff = %#8.8x, ntoc = %u\n" % (
1439 self.tocoff, self.ntoc)
1440 s += " modtaboff = %#8.8x, nmodtab = %u\n" % (
1441 self.modtaboff, self.nmodtab)
1442 s += " extrefsymoff = %#8.8x, nextrefsyms = %u\n" % (
1443 self.extrefsymoff, self.nextrefsyms)
1444 s += " indirectsymoff = %#8.8x, nindirectsyms = %u\n" % (
1445 self.indirectsymoff, self.nindirectsyms)
1446 s += " extreloff = %#8.8x, nextrel = %u\n" % (
1447 self.extreloff, self.nextrel)
1448 s += " locreloff = %#8.8x, nlocrel = %u" % (
1449 self.locreloff, self.nlocrel)
1450 return s
1452 class SymtabLoadCommand(LoadCommand):
1454 def __init__(self, lc):
1455 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1456 self.symoff = 0
1457 self.nsyms = 0
1458 self.stroff = 0
1459 self.strsize = 0
1461 def unpack(self, mach_file, data):
1462 byte_order_char = mach_file.magic.get_byte_order()
1463 self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32(
1466 def __str__(self):
1467 s = Mach.LoadCommand.__str__(self)
1468 s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % (
1469 self.symoff, self.nsyms, self.stroff, self.strsize)
1470 return s
1472 class UUIDLoadCommand(LoadCommand):
1474 def __init__(self, lc):
1475 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1476 self.uuid = None
1478 def unpack(self, mach_file, data):
1479 uuid_data = data.get_n_uint8(16)
1480 uuid_str = ''
1481 for byte in uuid_data:
1482 uuid_str += '%2.2x' % byte
1483 self.uuid = uuid.UUID(uuid_str)
1484 mach_file.uuid = self.uuid
1486 def __str__(self):
1487 s = Mach.LoadCommand.__str__(self)
1488 s += self.uuid.__str__()
1489 return s
1491 class DataBlobLoadCommand(LoadCommand):
1493 def __init__(self, lc):
1494 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1495 self.dataoff = 0
1496 self.datasize = 0
1498 def unpack(self, mach_file, data):
1499 byte_order_char = mach_file.magic.get_byte_order()
1500 self.dataoff, self.datasize = data.get_n_uint32(2)
1502 def __str__(self):
1503 s = Mach.LoadCommand.__str__(self)
1504 s += "dataoff = %#8.8x, datasize = %u" % (
1505 self.dataoff, self.datasize)
1506 return s
1508 class EncryptionInfoLoadCommand(LoadCommand):
1510 def __init__(self, lc):
1511 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1512 self.cryptoff = 0
1513 self.cryptsize = 0
1514 self.cryptid = 0
1516 def unpack(self, mach_file, data):
1517 byte_order_char = mach_file.magic.get_byte_order()
1518 self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3)
1520 def __str__(self):
1521 s = Mach.LoadCommand.__str__(self)
1522 s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % (
1523 self.cryptoff, self.cryptoff + self.cryptsize, self.cryptsize, self.cryptid)
1524 return s
1526 class SegmentLoadCommand(LoadCommand):
1528 def __init__(self, lc):
1529 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1530 self.segname = None
1531 self.vmaddr = 0
1532 self.vmsize = 0
1533 self.fileoff = 0
1534 self.filesize = 0
1535 self.maxprot = 0
1536 self.initprot = 0
1537 self.nsects = 0
1538 self.flags = 0
1540 def unpack(self, mach_file, data):
1541 is_64 = self.command.get_enum_value() == LC_SEGMENT_64
1542 self.segname = data.get_fixed_length_c_string(16, '', True)
1543 if is_64:
1544 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint64(
1546 else:
1547 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint32(
1549 self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32(
1551 mach_file.segments.append(self)
1552 for i in range(self.nsects):
1553 section = Mach.Section()
1554 section.unpack(is_64, data)
1555 section.index = len(mach_file.sections)
1556 mach_file.sections.append(section)
1558 def __str__(self):
1559 s = Mach.LoadCommand.__str__(self)
1560 if self.command.get_enum_value() == LC_SEGMENT:
1561 s += "%#8.8x %#8.8x %#8.8x %#8.8x " % (
1562 self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1563 else:
1564 s += "%#16.16x %#16.16x %#16.16x %#16.16x " % (
1565 self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1566 s += "%s %s %3u %#8.8x" % (vm_prot_names[self.maxprot], vm_prot_names[
1567 self.initprot], self.nsects, self.flags)
1568 s += ' ' + self.segname
1569 return s
1571 class NList:
1573 class Type:
1575 class Stab(dict_utils.Enum):
1576 enum = {
1577 'N_GSYM': N_GSYM,
1578 'N_FNAME': N_FNAME,
1579 'N_FUN': N_FUN,
1580 'N_STSYM': N_STSYM,
1581 'N_LCSYM': N_LCSYM,
1582 'N_BNSYM': N_BNSYM,
1583 'N_OPT': N_OPT,
1584 'N_RSYM': N_RSYM,
1585 'N_SLINE': N_SLINE,
1586 'N_ENSYM': N_ENSYM,
1587 'N_SSYM': N_SSYM,
1588 'N_SO': N_SO,
1589 'N_OSO': N_OSO,
1590 'N_LSYM': N_LSYM,
1591 'N_BINCL': N_BINCL,
1592 'N_SOL': N_SOL,
1593 'N_PARAMS': N_PARAMS,
1594 'N_VERSION': N_VERSION,
1595 'N_OLEVEL': N_OLEVEL,
1596 'N_PSYM': N_PSYM,
1597 'N_EINCL': N_EINCL,
1598 'N_ENTRY': N_ENTRY,
1599 'N_LBRAC': N_LBRAC,
1600 'N_EXCL': N_EXCL,
1601 'N_RBRAC': N_RBRAC,
1602 'N_BCOMM': N_BCOMM,
1603 'N_ECOMM': N_ECOMM,
1604 'N_ECOML': N_ECOML,
1605 'N_LENG': N_LENG
1608 def __init__(self, magic=0):
1609 dict_utils.Enum.__init__(self, magic, self.enum)
1611 def __init__(self, t=0):
1612 self.value = t
1614 def __str__(self):
1615 n_type = self.value
1616 if n_type & N_STAB:
1617 stab = Mach.NList.Type.Stab(self.value)
1618 return '%s' % stab
1619 else:
1620 type = self.value & N_TYPE
1621 type_str = ''
1622 if type == N_UNDF:
1623 type_str = 'N_UNDF'
1624 elif type == N_ABS:
1625 type_str = 'N_ABS '
1626 elif type == N_SECT:
1627 type_str = 'N_SECT'
1628 elif type == N_PBUD:
1629 type_str = 'N_PBUD'
1630 elif type == N_INDR:
1631 type_str = 'N_INDR'
1632 else:
1633 type_str = "??? (%#2.2x)" % type
1634 if n_type & N_PEXT:
1635 type_str += ' | PEXT'
1636 if n_type & N_EXT:
1637 type_str += ' | EXT '
1638 return type_str
1640 def __init__(self):
1641 self.index = 0
1642 self.name_offset = 0
1643 self.name = 0
1644 self.type = Mach.NList.Type()
1645 self.sect_idx = 0
1646 self.desc = 0
1647 self.value = 0
1649 def unpack(self, mach_file, data, symtab_lc):
1650 self.index = len(mach_file.symbols)
1651 self.name_offset = data.get_uint32()
1652 self.type.value, self.sect_idx = data.get_n_uint8(2)
1653 self.desc = data.get_uint16()
1654 if mach_file.is_64_bit():
1655 self.value = data.get_uint64()
1656 else:
1657 self.value = data.get_uint32()
1658 data.push_offset_and_seek(
1659 mach_file.file_off +
1660 symtab_lc.stroff +
1661 self.name_offset)
1662 # print "get string for symbol[%u]" % self.index
1663 self.name = data.get_c_string()
1664 data.pop_offset_and_seek()
1666 def __str__(self):
1667 name_display = ''
1668 if len(self.name):
1669 name_display = ' "%s"' % self.name
1670 return '%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s' % (self.name_offset,
1671 self.type.value, self.type, self.sect_idx, self.desc, self.value, name_display)
1673 class Interactive(cmd.Cmd):
1674 '''Interactive command interpreter to mach-o files.'''
1676 def __init__(self, mach, options):
1677 cmd.Cmd.__init__(self)
1678 self.intro = 'Interactive mach-o command interpreter'
1679 self.prompt = 'mach-o: %s %% ' % mach.path
1680 self.mach = mach
1681 self.options = options
1683 def default(self, line):
1684 '''Catch all for unknown command, which will exit the interpreter.'''
1685 print("uknown command: %s" % line)
1686 return True
1688 def do_q(self, line):
1689 '''Quit command'''
1690 return True
1692 def do_quit(self, line):
1693 '''Quit command'''
1694 return True
1696 def do_header(self, line):
1697 '''Dump mach-o file headers'''
1698 self.mach.dump_header(True, self.options)
1699 return False
1701 def do_load(self, line):
1702 '''Dump all mach-o load commands'''
1703 self.mach.dump_load_commands(True, self.options)
1704 return False
1706 def do_sections(self, line):
1707 '''Dump all mach-o sections'''
1708 self.mach.dump_sections(True, self.options)
1709 return False
1711 def do_symtab(self, line):
1712 '''Dump all mach-o symbols in the symbol table'''
1713 self.mach.dump_symtab(True, self.options)
1714 return False
1716 if __name__ == '__main__':
1717 parser = optparse.OptionParser(
1718 description='A script that parses skinny and universal mach-o files.')
1719 parser.add_option(
1720 '--arch',
1721 '-a',
1722 type='string',
1723 metavar='arch',
1724 dest='archs',
1725 action='append',
1726 help='specify one or more architectures by name')
1727 parser.add_option(
1728 '-v',
1729 '--verbose',
1730 action='store_true',
1731 dest='verbose',
1732 help='display verbose debug info',
1733 default=False)
1734 parser.add_option(
1735 '-H',
1736 '--header',
1737 action='store_true',
1738 dest='dump_header',
1739 help='dump the mach-o file header',
1740 default=False)
1741 parser.add_option(
1742 '-l',
1743 '--load-commands',
1744 action='store_true',
1745 dest='dump_load_commands',
1746 help='dump the mach-o load commands',
1747 default=False)
1748 parser.add_option(
1749 '-s',
1750 '--symtab',
1751 action='store_true',
1752 dest='dump_symtab',
1753 help='dump the mach-o symbol table',
1754 default=False)
1755 parser.add_option(
1756 '-S',
1757 '--sections',
1758 action='store_true',
1759 dest='dump_sections',
1760 help='dump the mach-o sections',
1761 default=False)
1762 parser.add_option(
1763 '--section',
1764 type='string',
1765 metavar='sectname',
1766 dest='section_names',
1767 action='append',
1768 help='Specify one or more section names to dump',
1769 default=[])
1770 parser.add_option(
1771 '-o',
1772 '--out',
1773 type='string',
1774 dest='outfile',
1775 help='Used in conjunction with the --section=NAME option to save a single section\'s data to disk.',
1776 default=False)
1777 parser.add_option(
1778 '-i',
1779 '--interactive',
1780 action='store_true',
1781 dest='interactive',
1782 help='enable interactive mode',
1783 default=False)
1784 parser.add_option(
1785 '-m',
1786 '--mangled',
1787 action='store_true',
1788 dest='find_mangled',
1789 help='dump all mangled names in a mach file',
1790 default=False)
1791 parser.add_option(
1792 '-c',
1793 '--compare',
1794 action='store_true',
1795 dest='compare',
1796 help='compare two mach files',
1797 default=False)
1798 parser.add_option(
1799 '-M',
1800 '--extract-modules',
1801 action='store_true',
1802 dest='extract_modules',
1803 help='Extract modules from file',
1804 default=False)
1805 parser.add_option(
1806 '-C',
1807 '--count',
1808 type='int',
1809 dest='max_count',
1810 help='Sets the max byte count when dumping section data',
1811 default=-1)
1813 (options, mach_files) = parser.parse_args()
1814 if options.extract_modules:
1815 if options.section_names:
1816 print("error: can't use --section option with the --extract-modules option")
1817 exit(1)
1818 if not options.outfile:
1819 print("error: the --output=FILE option must be specified with the --extract-modules option")
1820 exit(1)
1821 options.section_names.append("__apple_ast")
1822 if options.compare:
1823 if len(mach_files) == 2:
1824 mach_a = Mach()
1825 mach_b = Mach()
1826 mach_a.parse(mach_files[0])
1827 mach_b.parse(mach_files[1])
1828 mach_a.compare(mach_b)
1829 else:
1830 print('error: --compare takes two mach files as arguments')
1831 else:
1832 if not (options.dump_header or options.dump_load_commands or options.dump_symtab or options.dump_sections or options.find_mangled or options.section_names):
1833 options.dump_header = True
1834 options.dump_load_commands = True
1835 if options.verbose:
1836 print('options', options)
1837 print('mach_files', mach_files)
1838 for path in mach_files:
1839 mach = Mach()
1840 mach.parse(path)
1841 if options.interactive:
1842 interpreter = Mach.Interactive(mach, options)
1843 interpreter.cmdloop()
1844 else:
1845 mach.dump(options)