16 print(msg
, file=sys
.stderr
)
21 from colorama
import Fore
, Style
, Back
24 except ModuleNotFoundError
as e
:
25 fail(f
"Missing prerequisite python module {e.name}. "
26 "Run `python3 -m pip install --user colorama ansiwrap attrs watchdog` to install prerequisites.")
28 # Prefer to use diff_settings.py from the current working directory
29 sys
.path
.insert(0, '.')
32 except ModuleNotFoundError
:
33 fail("Unable to find diff_settings.py in the same directory.")
37 parser
= argparse
.ArgumentParser(
38 description
="Diff MIPS assembly.")
39 parser
.add_argument('start',
40 help="Function name or address to start diffing from.")
41 parser
.add_argument('end', nargs
='?',
42 help="Address to end diff at.")
43 parser
.add_argument('-o', dest
='diff_obj', action
='store_true',
44 help="Diff .o files rather than a whole binary. This makes it possible to see symbol names. (Recommended)")
45 parser
.add_argument('--base-asm', dest
='base_asm', metavar
='FILE',
46 help="Read assembly from given file instead of configured base img.")
47 parser
.add_argument('--write-asm', dest
='write_asm', metavar
='FILE',
48 help="Write the current assembly output to file, e.g. for use with --base-asm.")
49 parser
.add_argument('-m', '--make', dest
='make', action
='store_true',
50 help="Automatically run 'make' on the .o file or binary before diffing.")
51 parser
.add_argument('-l', '--skip-lines', dest
='skip_lines', type=int, default
=0,
52 help="Skip the first N lines of output.")
53 parser
.add_argument('-f', '--stop-jr-ra', dest
='stop_jrra', action
='store_true',
54 help="Stop disassembling at the first 'jr ra'. Some functions have multiple return points, so use with care!")
55 parser
.add_argument('-i', '--ignore-large-imms', dest
='ignore_large_imms', action
='store_true',
56 help="Pretend all large enough immediates are the same.")
57 parser
.add_argument('-B', '--no-show-branches', dest
='show_branches', action
='store_false',
58 help="Don't visualize branches/branch targets.")
59 parser
.add_argument('-S', '--base-shift', dest
='base_shift', type=str, default
='0',
60 help="Diff position X in our img against position X + shift in the base img. "
61 "Arithmetic is allowed, so e.g. |-S \"0x1234 - 0x4321\"| is a reasonable "
62 "flag to pass if it is known that position 0x1234 in the base img syncs "
63 "up with position 0x4321 in our img. Not supported together with -o.")
64 parser
.add_argument('-w', '--watch', dest
='watch', action
='store_true',
65 help="Automatically update when source/object files change. "
66 "Recommended in combination with -m.")
67 parser
.add_argument('--width', dest
='column_width', type=int, default
=50,
68 help="Sets the width of the left and right view column.")
70 # Project-specific flags, e.g. different versions/make arguments.
71 if hasattr(diff_settings
, "add_custom_arguments"):
72 diff_settings
.add_custom_arguments(parser
)
74 args
= parser
.parse_args()
76 # Set imgs, map file and make flags in a project-specific manner.
78 diff_settings
.apply(config
, args
)
80 baseimg
= config
.get('baseimg', None)
81 myimg
= config
.get('myimg', None)
82 mapfile
= config
.get('mapfile', None)
83 makeflags
= config
.get('makeflags', [])
84 source_directories
= config
.get('source_directories', None)
86 MAX_FUNCTION_SIZE_LINES
= 1024
87 MAX_FUNCTION_SIZE_BYTES
= 1024 * 4
101 BUFFER_CMD
= ["tail", "-c", str(10**9)]
102 LESS_CMD
= ["less", "-Ric"]
105 FS_WATCH_EXTENSIONS
= ['.c', '.h']
109 binutils_prefix
= None
111 for binutils_cand
in ['mips-linux-gnu-', 'mips64-elf-']:
113 subprocess
.check_call([binutils_cand
+ "objdump", "--version"], stdout
=subprocess
.DEVNULL
, stderr
=subprocess
.DEVNULL
)
114 binutils_prefix
= binutils_cand
116 except subprocess
.CalledProcessError
:
118 except FileNotFoundError
:
121 if not binutils_prefix
:
122 fail("Missing binutils; please ensure mips-linux-gnu-objdump or mips64-elf-objdump exist.")
124 def eval_int(expr
, emsg
=None):
126 ret
= ast
.literal_eval(expr
)
127 if not isinstance(ret
, int):
128 raise Exception("not an integer")
135 def run_make(target
, capture_output
=False):
137 return subprocess
.run(["make"] + makeflags
+ [target
], stderr
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
139 subprocess
.check_call(["make"] + makeflags
+ [target
])
141 def restrict_to_function(dump
, fn_name
):
143 search
= f
'<{fn_name}>:'
145 for line
in dump
.split('\n'):
147 if len(out
) >= MAX_FUNCTION_SIZE_LINES
:
152 return '\n'.join(out
)
154 def run_objdump(cmd
):
155 flags
, target
, restrict
= cmd
156 out
= subprocess
.check_output([binutils_prefix
+ "objdump"] + flags
+ [target
], universal_newlines
=True)
157 if restrict
is not None:
158 return restrict_to_function(out
, restrict
)
161 base_shift
= eval_int(args
.base_shift
, "Failed to parse --base-shift (-S) argument as an integer.")
163 def search_map_file(fn_name
):
165 fail(f
"No map file configured; cannot find function {fn_name}.")
168 with
open(mapfile
) as f
:
169 lines
= f
.read().split('\n')
171 fail(f
"Failed to open map file {mapfile} for reading.")
179 if line
.startswith(' .text'):
180 cur_objfile
= line
.split()[3]
181 if 'load address' in line
:
182 tokens
= last_line
.split() + line
.split()
183 ram
= int(tokens
[1], 0)
184 rom
= int(tokens
[5], 0)
185 ram_to_rom
= rom
- ram
186 if line
.endswith(' ' + fn_name
):
187 ram
= int(line
.split()[0], 0)
188 if cur_objfile
is not None and ram_to_rom
is not None:
189 cands
.append((cur_objfile
, ram
+ ram_to_rom
))
191 except Exception as e
:
193 traceback
.print_exc()
194 fail(f
"Internal error while parsing map file")
197 fail(f
"Found multiple occurrences of function {fn_name} in map file.")
204 fail("--base-shift not compatible with -o")
205 if args
.end
is not None:
206 fail("end address not supported together with -o")
207 if args
.start
.startswith('0'):
208 fail("numerical start address not supported with -o; pass a function name")
210 objfile
, _
= search_map_file(args
.start
)
212 fail("Not able to find .o file for function.")
217 if not os
.path
.isfile(objfile
):
218 fail("Not able to find .o file for function.")
220 refobjfile
= "expected/" + objfile
221 if not os
.path
.isfile(refobjfile
):
222 fail(f
'Please ensure an OK .o file exists at "{refobjfile}".')
224 objdump_flags
= ["-drz"]
227 (objdump_flags
, refobjfile
, args
.start
),
228 (objdump_flags
, objfile
, args
.start
)
232 if not baseimg
or not myimg
:
233 fail("Missing myimg/baseimg in config.")
236 start_addr
= eval_int(args
.start
)
237 if start_addr
is None:
238 _
, start_addr
= search_map_file(args
.start
)
239 if start_addr
is None:
240 fail("Not able to find function in map file.")
241 if args
.end
is not None:
242 end_addr
= eval_int(args
.end
, "End address must be an integer expression.")
244 end_addr
= start_addr
+ MAX_FUNCTION_SIZE_BYTES
245 objdump_flags
= ['-Dz', '-bbinary', '-mmips', '-EB']
246 flags1
= [f
"--start-address={start_addr + base_shift}", f
"--stop-address={end_addr + base_shift}"]
247 flags2
= [f
"--start-address={start_addr}", f
"--stop-address={end_addr}"]
250 (objdump_flags
+ flags1
, baseimg
, None),
251 (objdump_flags
+ flags2
, myimg
, None)
254 # Alignment with ANSI colors is broken, let's fix it.
255 def ansi_ljust(s
, width
):
256 needed
= width
- ansiwrap
.ansilen(s
)
258 return s
+ ' ' * needed
262 re_int
= re
.compile(r
'[0-9]+')
263 re_comments
= re
.compile(r
'<.*?>')
264 re_regs
= re
.compile(r
'\b(a[0-3]|t[0-9]|s[0-7]|at|v[01]|f[12]?[0-9]|f3[01]|fp)\b')
265 re_sprel
= re
.compile(r
',([1-9][0-9]*|0x[1-9a-f][0-9a-f]*)\(sp\)')
266 re_large_imm
= re
.compile(r
'-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}')
267 forbidden
= set(string
.ascii_letters
+ '_')
268 branch_likely_instructions
= set([
269 'beql', 'bnel', 'beqzl', 'bnezl', 'bgezl', 'bgtzl', 'blezl', 'bltzl',
272 branch_instructions
= set([
273 'b', 'beq', 'bne', 'beqz', 'bnez', 'bgez', 'bgtz', 'blez', 'bltz',
275 ] + list(branch_likely_instructions
))
277 def hexify_int(row
, pat
):
280 # leave one-digit ints alone
282 start
, end
= pat
.span()
283 if start
and row
[start
- 1] in forbidden
:
285 if end
< len(row
) and row
[end
] in forbidden
:
287 return hex(int(full
))
289 def parse_relocated_line(line
):
291 ind2
= line
.rindex(',')
293 ind2
= line
.rindex('\t')
294 before
= line
[:ind2
+1]
295 after
= line
[ind2
+1:]
296 ind2
= after
.find('(')
298 imm
, after
= after
, ''
300 imm
, after
= after
[:ind2
], after
[ind2
:]
303 return before
, imm
, after
305 def process_reloc(row
, prev
):
306 before
, imm
, after
= parse_relocated_line(prev
)
307 repl
= row
.split()[-1]
309 if before
.strip() == 'jal' and not imm
.startswith('0x'):
311 repl
+= '+' + imm
if int(imm
,0) > 0 else imm
312 if 'R_MIPS_LO16' in row
:
313 repl
= f
'%lo({repl})'
314 elif 'R_MIPS_HI16' in row
:
315 # Ideally we'd pair up R_MIPS_LO16 and R_MIPS_HI16 to generate a
316 # correct addend for each, but objdump doesn't give us the order of
317 # the relocations, so we can't find the right LO16. :(
318 repl
= f
'%hi({repl})'
320 assert 'R_MIPS_26' in row
, f
"unknown relocation type '{row}'"
321 return before
+ repl
+ after
330 if not args
.diff_obj
:
332 if lines
and not lines
[-1]:
336 if args
.diff_obj
and ('>:' in row
or not row
):
340 if diff_rows
[-1] != '<delay-slot>':
341 diff_rows
[-1] = process_reloc(row
, diff_rows
[-1])
342 originals
[-1] = process_reloc(row
, originals
[-1])
345 row
= re
.sub(re_comments
, '', row
)
347 tabs
= row
.split('\t')
348 row
= '\t'.join(tabs
[2:])
349 line_num
= tabs
[0].strip()
350 row_parts
= row
.split('\t', 1)
351 mnemonic
= row_parts
[0].strip()
352 if mnemonic
not in branch_instructions
:
353 row
= re
.sub(re_int
, lambda s
: hexify_int(row
, s
), row
)
358 mnemonic
= '<delay-slot>'
359 if mnemonic
in branch_likely_instructions
:
361 row
= re
.sub(re_regs
, '<reg>', row
)
362 row
= re
.sub(re_sprel
, ',addr(sp)', row
)
363 if args
.ignore_large_imms
:
364 row
= re
.sub(re_large_imm
, '<imm>', row
)
366 # Replace tabs with spaces
367 mnemonics
.append(mnemonic
)
368 diff_rows
.append(row
)
369 originals
.append(original
)
370 line_nums
.append(line_num
)
371 if mnemonic
in branch_instructions
:
372 target
= row_parts
[1].strip().split(',')[-1]
373 if mnemonic
in branch_likely_instructions
:
374 target
= hex(int(target
, 16) - 4)[2:]
375 branch_targets
.append(target
)
377 branch_targets
.append(None)
378 if args
.stop_jrra
and mnemonic
== 'jr' and row_parts
[1].strip() == 'ra':
382 originals
= [original
.strip() for original
in originals
]
383 originals
= [''.join(f
'{o:<8s}' for o
in original
.split('\t')) for original
in originals
]
384 # return diff_rows, diff_rows, line_nums
385 return mnemonics
, diff_rows
, originals
, line_nums
, branch_targets
387 def format_single_line_diff(line1
, line2
, column_width
):
388 return f
"{ansi_ljust(line1,column_width)}{ansi_ljust(line2,column_width)}"
391 def __init__(self
, base_index
):
392 self
.color_index
= base_index
393 self
.symbol_colors
= {}
395 def color_symbol(self
, s
, t
=None):
397 color
= self
.symbol_colors
[s
]
399 color
= COLOR_ROTATION
[self
.color_index
% len(COLOR_ROTATION
)]
400 self
.color_index
+= 1
401 self
.symbol_colors
[s
] = color
403 return f
'{color}{t}{Fore.RESET}'
405 def normalize_large_imms(row
):
406 if args
.ignore_large_imms
:
407 row
= re
.sub(re_large_imm
, '<imm>', row
)
410 def do_diff(basedump
, mydump
):
411 asm_lines1
= basedump
.split('\n')
412 asm_lines2
= mydump
.split('\n')
417 # output.append(sha1sum(mydump))
419 mnemonics1
, asm_lines1
, originals1
, line_nums1
, branch_targets1
= process(asm_lines1
)
420 mnemonics2
, asm_lines2
, originals2
, line_nums2
, branch_targets2
= process(asm_lines2
)
422 sc1
= SymbolColorer(0)
423 sc2
= SymbolColorer(0)
424 sc3
= SymbolColorer(4)
425 sc4
= SymbolColorer(4)
426 sc5
= SymbolColorer(0)
427 sc6
= SymbolColorer(0)
431 if args
.show_branches
:
432 for (bts
, btset
, sc
) in [(branch_targets1
, bts1
, sc5
), (branch_targets2
, bts2
, sc6
)]:
436 sc
.color_symbol(bt
+ ":")
438 differ
: difflib
.SequenceMatcher
= difflib
.SequenceMatcher(a
=mnemonics1
, b
=mnemonics2
, autojunk
=False)
439 for (tag
, i1
, i2
, j1
, j2
) in differ
.get_opcodes():
440 lines1
= asm_lines1
[i1
:i2
]
441 lines2
= asm_lines2
[j1
:j2
]
443 for k
, (line1
, line2
) in enumerate(itertools
.zip_longest(lines1
, lines2
)):
451 original1
= originals1
[i1
+k
]
452 line_num1
= line_nums1
[i1
+k
]
457 original2
= originals2
[j1
+k
]
458 line_num2
= line_nums2
[j1
+k
]
463 line_color
= Fore
.RESET
466 if normalize_large_imms(original1
) == normalize_large_imms(original2
):
467 out1
= f
'{original1}'
468 out2
= f
'{original2}'
469 elif line1
== '<delay-slot>':
470 out1
= f
'{Style.DIM}{original1}'
471 out2
= f
'{Style.DIM}{original2}'
473 line_color
= Fore
.YELLOW
475 out1
= f
'{Fore.YELLOW}{original1}{Style.RESET_ALL}'
476 out2
= f
'{Fore.YELLOW}{original2}{Style.RESET_ALL}'
477 out1
= re
.sub(re_regs
, lambda s
: sc1
.color_symbol(s
.group()), out1
)
478 out2
= re
.sub(re_regs
, lambda s
: sc2
.color_symbol(s
.group()), out2
)
479 out1
= re
.sub(re_sprel
, lambda s
: sc3
.color_symbol(s
.group()), out1
)
480 out2
= re
.sub(re_sprel
, lambda s
: sc4
.color_symbol(s
.group()), out2
)
481 elif tag
in ['replace', 'equal']:
483 line_color
= Fore
.BLUE
484 out1
= f
"{Fore.BLUE}{original1}{Style.RESET_ALL}"
485 out2
= f
"{Fore.BLUE}{original2}{Style.RESET_ALL}"
486 elif tag
== 'delete':
488 line_color
= Fore
.RED
489 out1
= f
"{Fore.RED}{original1}{Style.RESET_ALL}"
491 elif tag
== 'insert':
493 line_color
= Fore
.GREEN
495 out2
= f
"{Fore.GREEN}{original2}{Style.RESET_ALL}"
501 line_num1
= line_num1
if out1
else ''
502 line_num2
= line_num2
if out2
else ''
504 if args
.show_branches
and out1
:
505 if line_num1
in bts1
:
506 in_arrow1
= sc5
.color_symbol(line_num1
, '~>')
507 if branch_targets1
[i1
+k
] is not None:
508 out_arrow1
= ' ' + sc5
.color_symbol(branch_targets1
[i1
+k
] + ":", '~>')
509 if args
.show_branches
and out2
:
510 if line_num2
in bts2
:
511 in_arrow2
= sc6
.color_symbol(line_num2
, '~>')
512 if branch_targets2
[j1
+k
] is not None:
513 out_arrow2
= ' ' + sc6
.color_symbol(branch_targets2
[j1
+k
] + ":", '~>')
515 out1
= f
"{line_color}{line_num1} {in_arrow1} {out1}{Style.RESET_ALL}{out_arrow1}"
516 out2
= f
"{line_color}{line_prefix} {line_num2} {in_arrow2} {out2}{Style.RESET_ALL}{out_arrow2}"
517 output
.append(format_single_line_diff(out1
, out2
, args
.column_width
))
519 return output
[args
.skip_lines
:]
522 def debounced_fs_watch(targets
, outq
, debounce_delay
):
523 import watchdog
.events
524 import watchdog
.observers
526 class WatchEventHandler(watchdog
.events
.FileSystemEventHandler
):
527 def __init__(self
, queue
, file_targets
):
529 self
.file_targets
= file_targets
531 def on_modified(self
, ev
):
532 if isinstance(ev
, watchdog
.events
.FileModifiedEvent
):
533 self
.changed(ev
.src_path
)
535 def on_moved(self
, ev
):
536 if isinstance(ev
, watchdog
.events
.FileMovedEvent
):
537 self
.changed(ev
.dest_path
)
539 def should_notify(self
, path
):
540 for target
in self
.file_targets
:
543 if args
.make
and any(path
.endswith(suffix
) for suffix
in FS_WATCH_EXTENSIONS
):
547 def changed(self
, path
):
548 if self
.should_notify(path
):
549 self
.queue
.put(time
.time())
551 def debounce_thread():
552 listenq
= queue
.Queue()
554 event_handler
= WatchEventHandler(listenq
, file_targets
)
555 observer
= watchdog
.observers
.Observer()
557 for target
in targets
:
558 if os
.path
.isdir(target
):
559 observer
.schedule(event_handler
, target
, recursive
=True)
561 file_targets
.append(target
)
562 target
= os
.path
.dirname(target
)
563 if target
not in observed
:
565 observer
.schedule(event_handler
, target
)
571 delay
= t
+ debounce_delay
- time
.time()
574 # consume entire queue
578 t
= listenq
.get(block
=False)
583 th
= threading
.Thread(target
=debounce_thread
, daemon
=True)
588 def __init__(self
, basedump
, mydump
):
589 self
.basedump
= basedump
594 if self
.emsg
is not None:
597 output
= '\n'.join(do_diff(self
.basedump
, self
.mydump
))
599 # Pipe the output through 'tail' and only then to less, to ensure the
600 # write call doesn't block. ('tail' has to buffer all its input before
601 # it starts writing.) This also means we don't have to deal with pipe
603 buffer_proc
= subprocess
.Popen(BUFFER_CMD
, stdin
=subprocess
.PIPE
,
604 stdout
=subprocess
.PIPE
)
605 less_proc
= subprocess
.Popen(LESS_CMD
, stdin
=buffer_proc
.stdout
)
606 buffer_proc
.stdin
.write(output
.encode())
607 buffer_proc
.stdin
.close()
608 buffer_proc
.stdout
.close()
609 return (buffer_proc
, less_proc
)
612 proca
, procb
= self
.run_less()
616 def run_async(self
, watch_queue
):
617 self
.watch_queue
= watch_queue
618 self
.ready_queue
= queue
.Queue()
619 self
.pending_update
= None
620 dthread
= threading
.Thread(target
=self
.display_thread
)
622 self
.ready_queue
.get()
624 def display_thread(self
):
625 proca
, procb
= self
.run_less()
626 self
.less_proc
= procb
627 self
.ready_queue
.put(0)
631 self
.less_proc
= None
634 os
.system("tput reset")
635 if ret
!= 0 and self
.pending_update
is not None:
636 # killed by program with the intent to refresh
637 msg
, error
= self
.pending_update
638 self
.pending_update
= None
644 proca
, procb
= self
.run_less()
645 self
.less_proc
= procb
646 self
.ready_queue
.put(0)
648 # terminated by user, or killed
649 self
.watch_queue
.put(None)
650 self
.ready_queue
.put(0)
653 def progress(self
, msg
):
654 # Write message to top-left corner
655 sys
.stdout
.write("\x1b7\x1b[1;1f{}\x1b8".format(msg
+ " "))
658 def update(self
, text
, error
):
659 if not error
and not self
.emsg
and text
== self
.mydump
:
660 self
.progress("Unchanged. ")
662 self
.pending_update
= (text
, error
)
663 if not self
.less_proc
:
665 self
.less_proc
.kill()
666 self
.ready_queue
.get()
669 if not self
.less_proc
:
671 self
.less_proc
.kill()
672 self
.ready_queue
.get()
677 make_target
, basecmd
, mycmd
= dump_objfile()
679 make_target
, basecmd
, mycmd
= dump_binary()
681 if args
.write_asm
is not None:
682 mydump
= run_objdump(mycmd
)
683 with
open(args
.write_asm
) as f
:
685 print(f
"Wrote assembly to {args.write_asm}.")
688 if args
.base_asm
is not None:
689 with
open(args
.base_asm
) as f
:
692 basedump
= run_objdump(basecmd
)
694 mydump
= run_objdump(mycmd
)
696 display
= Display(basedump
, mydump
)
702 yn
= input("Warning: watch-mode (-w) enabled without auto-make (-m). You will have to run make manually. Ok? (Y/n) ")
703 if yn
.lower() == 'n':
707 if hasattr(diff_settings
, "watch_sources_for_target"):
708 watch_sources
= diff_settings
.watch_sources_for_target(make_target
)
709 watch_sources
= watch_sources
or source_directories
710 if not watch_sources
:
711 fail("Missing source_directories config, don't know what to watch.")
713 watch_sources
= [make_target
]
715 debounced_fs_watch(watch_sources
, q
, DEBOUNCE_DELAY
)
725 last_build
= time
.time()
727 display
.progress("Building...")
728 ret
= run_make(make_target
, capture_output
=True)
729 if ret
.returncode
!= 0:
730 display
.update(ret
.stderr
.decode() or ret
.stdout
.decode(), error
=True)
732 mydump
= run_objdump(mycmd
)
733 display
.update(mydump
, error
=False)
734 except KeyboardInterrupt: