2 # ScratchABit - interactive disassembler
4 # Copyright (c) 2015 Paul Sokolovsky
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
31 from picotui
.widgets
import *
32 from picotui
import editorext
as editor
33 from picotui
.editorext
import Viewer
44 print("%08x %s" % (p
.cmd
.ea
, p
.cmd
.disasm
))
45 p
.cmd
.ea
+= p
.cmd
.size
50 def __init__(self
, f
, ctrl
):
54 def add_line(self
, addr
, line
):
55 line
= ("%08x " % addr
) + line
.indent
+ line
.render() + "\n"
57 if self
.cnt
% 256 == 0:
58 self
.ctrl
.show_status("Writing: 0x%x" % addr
)
62 class Editor(editor
.EditorExt
):
64 def __init__(self
, *args
):
65 super().__init
__(*args
)
70 def set_model(self
, model
):
72 self
.set_lines(model
.lines())
73 # Invalidate top_line. Assuming goto_*() will be called
75 self
.top_line
= sys
.maxsize
77 def show_line(self
, l
, i
):
79 if not isinstance(l
, str):
84 b
= self
.model
.AS
.get_bytes(l
.ea
, l
.size
)
85 bin
= str(binascii
.hexlify(b
[:show_bytes
]), "ascii")
86 if l
.size
> show_bytes
:
88 res
+= idaapi
.fillstr(bin
, show_bytes
* 2 + 1)
89 res
+= l
.indent
+ l
.render()
90 super().show_line(res
, i
)
92 def goto_addr(self
, to_addr
, from_addr
=None):
94 self
.show_status("No address-like value to go to")
97 if isinstance(to_addr
, tuple):
98 to_addr
, subno
= to_addr
99 adj_addr
= self
.model
.AS
.adjust_addr_reverse(to_addr
)
101 self
.show_status("Unknown address: 0x%x" % to_addr
)
105 # If we can position cursor within current screen, do that,
107 no
= self
.model
.addr2line_no(to_addr
, subno
)
109 if self
.line_visible(no
):
111 if from_addr
is not None:
112 self
.addr_stack
.append(from_addr
)
115 # Otherwise, re-render model around needed address, and redraw screen
117 model
= engine
.render_partial_around(to_addr
, 0, HEIGHT
* 2)
118 self
.show_status("Rendering time: %fs" % (time
.time() - t
))
120 self
.show_status("Unknown address: 0x%x" % to_addr
)
122 self
.set_model(model
)
124 no
= self
.model
.addr2line_no(to_addr
, subno
)
126 if from_addr
is not None:
127 self
.addr_stack
.append(from_addr
)
128 if not self
.goto_line(no
):
129 # Need to redraw always, because we changed underlying model
132 self
.show_status("Unknown address: %x" % to_addr
)
134 def update_model(self
, stay_on_real
=False):
135 """Re-render model and update screen in such way that cursor stayed
136 on the same line (as far as possible).
137 stay_on_real == False - try to stay on same relative line no. for
139 stay_on_real == True - try to stay on the line which contains real
140 bytes for the current address (use this if you know that cursor
141 stayed on such line before the update).
143 addr
, subno
= self
.cur_addr_subno()
145 model
= engine
.render_partial_around(addr
, subno
, HEIGHT
* 2)
146 self
.show_status("Rendering time: %fs" % (time
.time() - t
))
147 self
.set_model(model
)
149 self
.cur_line
= model
.target_addr_lineno_real
151 self
.cur_line
= model
.target_addr_lineno
152 self
.top_line
= self
.cur_line
- self
.row
153 #log.debug("update_model: addr=%x, row=%d, cur_line=%d, top_line=%d" % (addr, self.row, self.cur_line, self.top_line))
156 def handle_cursor_keys(self
, key
):
158 if super().handle_cursor_keys(key
):
159 if self
.cur_line
== cl
:
161 #log.debug("handle_cursor_keys: cur: %d, total: %d", self.cur_line, self.total_lines)
162 if self
.cur_line
<= HEIGHT
or self
.total_lines
- self
.cur_line
<= HEIGHT
:
163 log
.debug("handle_cursor_keys: triggering update")
171 line
= self
.get_cur_line()
174 # Address of the next line. It may be the same address as the
175 # current line, as several lines may "belong" to the same address,
176 # (virtual lines like headers, etc.)
177 def next_line_addr(self
):
179 return self
.content
[self
.cur_line
+ 1].ea
183 # Return next address following the current line. May need to skip
186 addr
= self
.cur_addr()
187 n
= self
.cur_line
+ 1
189 while self
.content
[n
].ea
== addr
:
191 return self
.content
[n
].ea
195 def cur_addr_subno(self
):
196 line
= self
.get_cur_line()
197 return (line
.ea
, line
.subno
)
199 def cur_operand_no(self
, line
):
200 col
= self
.col
- engine
.DisasmObj
.LEADER_SIZE
- len(line
.indent
)
201 #self.show_status("Enter pressed: %s, %s" % (col, line))
202 for i
, pos
in enumerate(line
.arg_pos
):
203 if pos
[0] <= col
<= pos
[1]:
207 def analyze_status(self
, cnt
):
208 self
.show_status("Analyzing (%d insts so far)" % cnt
)
210 def write_func(self
, addr
):
211 func
= self
.model
.AS
.lookup_func(addr
)
213 funcname
= self
.model
.AS
.get_label(func
.start
)
214 outfile
= funcname
+ ".lst"
215 with
open(outfile
, "w") as f
:
216 model
= TextSaveModel(f
, self
)
217 for start
, end
in func
.get_ranges():
219 start
= engine
.render_from(model
, start
, 1)
223 def handle_edit_key(self
, key
):
225 return self
.handle_key_unprotected(key
)
226 except Exception as e
:
227 log
.exception("Exception processing user command")
232 self
.dialog_box(L
, T
, W
, H
)
233 v
= Viewer(L
+ 1, T
+ 1, W
- 2, H
- 2)
236 "Exception occured processing the command. Press Esc to continue.",
237 "Recommended action is saving database, quitting and comparing",
238 "database files with backup copies for possibility of data loss",
239 "or corruption. The exception was also logged to scratchabit.log.",
240 "Please report way to reproduce it to",
241 "https://github.com/pfalcon/ScratchABit/issues",
243 ] + traceback
.format_exc().splitlines())
248 def handle_key_unprotected(self
, key
):
249 line
= self
.get_cur_line()
250 if key
== editor
.KEY_ENTER
:
251 line
= self
.get_cur_line()
252 log
.info("Enter pressed: %s" % line
)
253 op_no
= self
.cur_operand_no(line
)
254 self
.show_status("Enter pressed: %s, %s" % (self
.col
, op_no
))
256 # No longer try to jump only to addresses in args, parse
257 # textual representation below
258 if False and isinstance(line
, engine
.DisasmObj
):
261 to_addr
= o
.get_addr()
263 o
= line
.get_operand_addr()
265 to_addr
= o
.get_addr()
267 pos
= self
.col
- line
.LEADER_SIZE
- len(line
.indent
)
268 word
= utils
.get_word_at_pos(line
.cache
, pos
)
270 if word
[0].isdigit():
271 to_addr
= int(word
, 0)
273 to_addr
= self
.model
.AS
.resolve_label(word
)
275 self
.show_status("Unknown address: %s" % word
)
277 self
.goto_addr(to_addr
, from_addr
=self
.cur_addr_subno())
278 elif key
== editor
.KEY_ESC
:
280 self
.show_status("Returning")
281 self
.goto_addr(self
.addr_stack
.pop())
283 return editor
.KEY_QUIT
284 elif key
== b
"\x1b[5;5~": # Ctrl+PgUp
285 self
.goto_addr(self
.model
.AS
.min_addr(), from_addr
=line
.ea
)
286 elif key
== b
"\x1b[6;5~": # Ctrl+PgDn
287 self
.goto_addr(self
.model
.AS
.max_addr(), from_addr
=line
.ea
)
289 addr
= self
.cur_addr()
290 self
.show_status("Analyzing at %x" % addr
)
291 engine
.add_entrypoint(addr
, False)
292 engine
.analyze(self
.analyze_status
)
295 addr
= self
.cur_addr()
296 fl
= self
.model
.AS
.get_flags(addr
)
297 if fl
not in (self
.model
.AS
.DATA
, self
.model
.AS
.UNK
):
298 self
.show_status("Undefine first")
300 if fl
== self
.model
.AS
.UNK
:
301 self
.model
.AS
.set_flags(addr
, 1, self
.model
.AS
.DATA
, self
.model
.AS
.DATA_CONT
)
303 sz
= self
.model
.AS
.get_unit_size(addr
)
304 self
.model
.undefine_unit(addr
)
307 self
.model
.AS
.set_flags(addr
, sz
, self
.model
.AS
.DATA
, self
.model
.AS
.DATA_CONT
)
310 addr
= self
.cur_addr()
311 fl
= self
.model
.AS
.get_flags(addr
)
312 if fl
not in (self
.model
.AS
.UNK
, self
.model
.AS
.DATA
):
313 self
.show_status("Undefine first")
318 b
= self
.model
.AS
.get_byte(addr
)
319 fl
= self
.model
.AS
.get_flags(addr
)
320 if not (0x20 <= b
<= 0x7e or b
in (0x0a, 0x0d)):
324 if fl
not in (self
.model
.AS
.UNK
, self
.model
.AS
.DATA
, self
.model
.AS
.DATA_CONT
):
327 if c
< '0' or c
in string
.punctuation
:
333 self
.model
.AS
.set_flags(self
.cur_addr(), sz
, self
.model
.AS
.STR
, self
.model
.AS
.DATA_CONT
)
334 self
.model
.AS
.make_unique_label(self
.cur_addr(), label
)
337 addr
= self
.cur_addr()
338 self
.model
.undefine_unit(addr
)
342 op_no
= self
.cur_operand_no(self
.get_cur_line())
344 addr
= self
.cur_addr()
345 subtype
= self
.model
.AS
.get_arg_prop(addr
, op_no
, "subtype")
346 if subtype
!= engine
.IMM_ADDR
:
348 engine
.IMM_UHEX
: engine
.IMM_UDEC
,
349 engine
.IMM_UDEC
: engine
.IMM_UHEX
,
351 self
.model
.AS
.set_arg_prop(addr
, op_no
, "subtype", next_subtype
[subtype
])
353 self
.show_status("Changed arg #%d to %s" % (op_no
, next_subtype
[subtype
]))
355 addr
= self
.cur_addr()
356 line
= self
.get_cur_line()
357 o
= line
.get_operand_addr()
359 self
.show_status("Cannot convert operand to offset")
361 if o
.type != idaapi
.o_imm
or not self
.model
.AS
.is_valid_addr(o
.get_addr()):
362 self
.show_status("Cannot convert operand to offset: #%s: %s" % (o
.n
, o
.type))
365 if self
.model
.AS
.get_arg_prop(addr
, o
.n
, "subtype") == engine
.IMM_ADDR
:
366 self
.model
.AS
.unmake_arg_offset(addr
, o
.n
, o
.get_addr())
368 self
.model
.AS
.make_arg_offset(addr
, o
.n
, o
.get_addr())
369 self
.update_model(True)
371 addr
= self
.cur_addr()
372 comment
= self
.model
.AS
.get_comment(addr
) or ""
373 res
= self
.dialog_edit_line(line
=comment
, width
=60)
375 self
.model
.AS
.set_comment(addr
, res
)
380 addr
= self
.cur_addr()
381 label
= self
.model
.AS
.get_label(addr
)
382 def_label
= self
.model
.AS
.get_default_label(addr
)
383 s
= label
or def_label
385 res
= self
.dialog_edit_line(line
=s
)
391 if self
.model
.AS
.label_exists(res
):
393 self
.show_status("Duplicate label")
395 self
.model
.AS
.set_label(addr
, res
)
397 # If it's new label, we need to add it to model
403 d
= Dialog(4, 4, title
="Go to")
404 d
.add(1, 1, WLabel("Label/addr:"))
405 entry
= WAutoComplete(20, "", self
.model
.AS
.get_label_list())
407 entry
.finish_dialog
= ACTION_OK
409 d
.add(1, 2, WLabel("Press Down to auto-complete"))
414 value
= entry
.get_text()
415 if '0' <= value
[0] <= '9':
418 addr
= self
.model
.AS
.resolve_label(value
)
419 self
.goto_addr(addr
, from_addr
=self
.cur_addr())
421 elif key
== editor
.KEY_F1
:
425 saveload
.save_state(project_dir
)
426 self
.show_status("Saved.")
427 elif key
== b
"\x11": # ^Q
428 class IssueList(WListBox
):
429 def render_line(self
, l
):
431 d
= Dialog(4, 4, title
="Problems list")
432 lw
= IssueList(40, 16, self
.model
.AS
.get_issues())
433 lw
.finish_dialog
= ACTION_OK
438 val
= lw
.get_cur_line()
440 self
.goto_addr(val
[0], from_addr
=self
.cur_addr())
443 off
, area
= self
.model
.AS
.addr2area(self
.cur_addr())
444 props
= area
[engine
.PROPS
]
445 percent
= 100 * off
/ (area
[engine
.END
] - area
[engine
.START
] + 1)
446 func
= self
.model
.AS
.lookup_func(self
.cur_addr())
447 func
= self
.model
.AS
.get_label(func
.start
) if func
else None
448 self
.show_status("Area: 0x%x %s (%s): %.1f%%, func: %s" % (
449 area
[engine
.START
], props
.get("name", "noname"), props
["access"], percent
, func
456 self
.dialog_box(L
, T
, W
, H
)
457 v
= Viewer(L
+ 1, T
+ 1, W
- 2, H
- 2)
459 for area
in self
.model
.AS
.get_areas():
460 props
= area
[engine
.PROPS
]
461 lines
.append("%08x-%08x %s:" % (area
[engine
.START
], area
[engine
.END
], props
.get("name", "noname")))
462 flags
= area
[engine
.FLAGS
]
465 for i
in range(len(flags
)):
466 if i
% 64 == 0 and l
:
469 c
= engine
.flag2char(flags
[i
])
470 # For "function's instructions", make continuation byte be
471 # clearly distinguishable too.
472 if c
== "c" and last_capital
== "F":
483 out_fname
= "out.lst"
484 with
open(out_fname
, "w") as f
:
485 engine
.render_partial(TextSaveModel(f
, self
), 0, 0, 10000000)
486 self
.show_status("Disassembly listing written: " + out_fname
)
487 elif key
== b
"\x17": # Ctrl+W
488 outfile
= self
.write_func(self
.cur_addr())
490 self
.show_status("Wrote file: %s" % outfile
)
491 elif key
== b
"\x15": # Ctrl+U
493 addr
= self
.next_addr()
495 flags
= self
.model
.AS
.get_flags(addr
)
496 if flags
== self
.model
.AS
.UNK
:
497 self
.goto_addr(addr
, from_addr
=self
.cur_addr())
500 elif key
in (b
"/", b
"?"): # "/" and Shift+"/"
501 class FoundException(Exception): pass
502 class TextSearchModel
:
503 def __init__(self
, substr
, ctrl
):
507 def add_line(self
, addr
, line
):
509 if self
.search
in line
:
510 raise FoundException(addr
)
511 if self
.cnt
% 256 == 0:
512 self
.ctrl
.show_status("Searching: 0x%x" % addr
)
515 d
= Dialog(4, 4, title
="Text Search")
516 d
.add(1, 1, WLabel("Search for:"))
517 entry
= WTextEntry(20, self
.search_str
)
518 entry
.finish_dialog
= ACTION_OK
522 self
.search_str
= entry
.get_text()
523 if res
!= ACTION_OK
or not self
.search_str
:
525 addr
= self
.cur_addr()
527 addr
= self
.next_line_addr()
530 engine
.render_from(TextSearchModel(self
.search_str
, self
), addr
, 10000000)
531 except FoundException
as res
:
532 self
.goto_addr(res
.args
[0], from_addr
=self
.cur_addr())
534 self
.show_status("Not found: " + self
.search_str
)
537 self
.show_status("Unbound key: " + repr(key
))
544 def filter_config_line(l
):
545 l
= re
.sub(r
"#.*$", "", l
)
549 def load_symbols(fname
):
550 with
open(fname
) as f
:
552 l
= filter_config_line(l
)
555 m
= re
.search(r
"\b([A-Za-z_$.][A-Za-z0-9_$.]*)\s*=\s*((0x)?[0-9A-Fa-f]+)", l
)
558 ENTRYPOINTS
.append((m
.group(1), int(m
.group(2), 0)))
560 print("Warning: cannot parse entrypoint info from: %r" % l
)
562 def parse_entrypoints(f
):
564 l
= filter_config_line(l
)
569 m
= re
.match(r
'load "(.+?)"', l
)
571 load_symbols(m
.group(1))
573 label
, addr
= [v
.strip() for v
in l
.split("=")]
574 ENTRYPOINTS
.append((label
, int(addr
, 0)))
578 def load_target_file(loader
, fname
):
579 entry
= loader
.load(engine
.ADDRESS_SPACE
, fname
)
580 log
.info("Loaded %s, entrypoint: %s", fname
, hex(entry
) if entry
is not None else None)
581 if entry
is not None:
582 ENTRYPOINTS
.append(("_ENTRY_", entry
))
585 def parse_disasm_def(fname
):
588 with
open(fname
) as f
:
590 l
= filter_config_line(l
)
600 print("Processing section: %s" % section
)
601 if section
== "entrypoints":
602 l
= parse_entrypoints(f
)
604 assert 0, "Unknown section: " + section
611 if l
.startswith("load"):
613 if args
[2][0] in string
.digits
:
614 addr
= int(args
[2], 0)
615 print("Loading %s @0x%x" % (args
[1], addr
))
616 engine
.ADDRESS_SPACE
.load_content(open(args
[1], "rb"), addr
)
618 print("Loading %s (%s plugin)" % (args
[1], args
[2]))
619 loader
= __import__(args
[2])
620 load_target_file(loader
, args
[1])
621 elif l
.startswith("cpu "):
623 CPU_PLUGIN
= __import__(args
[1])
624 print("Loading CPU plugin %s" % (args
[1]))
625 elif l
.startswith("show bytes "):
627 show_bytes
= int(args
[2])
628 elif l
.startswith("area "):
630 assert len(args
) == 4
632 # Allow undescores to separate digit groups
634 return int(s
.replace("_", ""), 0)
637 m
= re
.match(r
"(.+?)\s*\(\s*(.+?)\s*\)", args
[2])
638 start
= str2int(m
.group(1))
639 end
= start
+ str2int(m
.group(2)) - 1
641 m
= re
.match(r
"(.+)\s*-\s*(.+)", args
[2])
642 start
= str2int(m
.group(1))
643 end
= str2int(m
.group(2))
645 a
= engine
.ADDRESS_SPACE
.add_area(start
, end
, {"name": args
[1], "access": args
[3].upper()})
646 print("Adding area: %s" % engine
.str_area(a
))
648 assert 0, "Unknown directive: " + l
651 if __name__
== "__main__":
652 # Plugin dirs are relative to the dir where scratchabit.py resides.
653 # sys.path[0] below provide absolute path of this dir, resolved for
655 plugin_dirs
= ["plugins", "plugins/cpu", "plugins/loader"]
656 for d
in plugin_dirs
:
657 sys
.path
.append(os
.path
.join(sys
.path
[0], d
))
658 log
.basicConfig(filename
="scratchabit.log", format
='%(asctime)s %(message)s', level
=log
.DEBUG
)
661 if sys
.argv
[1].endswith(".def"):
662 parse_disasm_def(sys
.argv
[1])
663 project_name
= sys
.argv
[1].rsplit(".", 1)[0]
665 import default_plugins
666 for loader_id
in default_plugins
.loaders
:
667 loader
= __import__(loader_id
)
668 arch_id
= loader
.detect(sys
.argv
[1])
672 print("Error: file '%s' not recognized by default loaders" % sys
.argv
[1])
674 if arch_id
not in default_plugins
.cpus
:
675 print("Error: no plugin for CPU '%s' as detected for file '%s'" % (arch_id
, sys
.argv
[1]))
677 load_target_file(loader
, sys
.argv
[1])
678 CPU_PLUGIN
= __import__(default_plugins
.cpus
[arch_id
])
679 project_name
= sys
.argv
[1]
681 p
= CPU_PLUGIN
.PROCESSOR_ENTRY()
682 engine
.set_processor(p
)
683 if hasattr(p
, "help_text"):
684 help.set_cpu_help(p
.help_text
)
686 engine
.DisasmObj
.LEADER_SIZE
= 8 + 1
688 engine
.DisasmObj
.LEADER_SIZE
+= show_bytes
* 2 + 1
690 # Strip suffix if any from def filename
691 project_dir
= project_name
+ ".scratchabit"
693 if saveload
.save_exists(project_dir
):
694 saveload
.load_state(project_dir
)
696 for label
, addr
in ENTRYPOINTS
:
697 if engine
.ADDRESS_SPACE
.is_exec(addr
):
698 engine
.add_entrypoint(addr
)
699 engine
.ADDRESS_SPACE
.make_unique_label(addr
, label
)
701 sys
.stdout
.write("Performing initial analysis... %d\r" % cnt
)
702 engine
.analyze(_progress
)
705 #engine.print_address_map()
708 if os
.path
.exists(project_dir
+ "/session.addr_stack"):
709 addr_stack
= saveload
.load_addr_stack(project_dir
)
711 show_addr
= addr_stack
.pop()
714 show_addr
= ENTRYPOINTS
[0][1]
716 show_addr
= engine
.ADDRESS_SPACE
.min_addr()
719 #_model = engine.render()
720 _model
= engine
.render_partial_around(show_addr
, 0, HEIGHT
* 2)
721 print("Rendering time: %fs" % (time
.time() - t
))
722 #print(_model.lines())
727 screen_size
= Editor
.screen_size()
728 e
= Editor(1, 1, screen_size
[0] - 2, screen_size
[1] - 3)
731 e
.draw_box(0, 0, screen_size
[0], screen_size
[1] - 1)
733 e
.addr_stack
= addr_stack
734 e
.goto_addr(show_addr
)
737 log
.exception("Unhandled exception")
743 saveload
.save_session(project_dir
, e
)