scratchabit: Factor out call_script(), call main() if exists with APP object.
[ScratchABit.git] / engine.py
blob3a2298844376bdf721bd8746a1db5871452de356
1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 import sys
18 import binascii
19 import json
20 import logging as log
22 from rangeset import RangeSet
24 import idaapi
25 import idc
28 # ScratchABit API and code
31 START = 0
32 END = 1
33 PROPS = 2
34 BYTES = 3
35 FLAGS = 4
37 IMM_UHEX = None
38 IMM_SHEX = "shex"
39 IMM_UDEC = "udec"
40 IMM_SDEC = "sdec"
41 IMM_CHAR = "chr"
42 IMM_ADDR = "addr"
44 def str_area(area):
45 if not area:
46 return "Area(None)"
47 return "Area(0x%x-0x%x, %s)" % (area[START], area[END], area[PROPS])
49 def area_props(area):
50 return area[PROPS]
53 class InvalidAddrException(Exception):
54 "Thrown when dereferencing address which doesn't exist in AddressSpace."
55 def __init__(self, addr):
56 self.args = (addr, hex(addr))
59 class Function:
61 def __init__(self, start, end=None):
62 self.ranges = RangeSet()
63 self.start = start
64 self.end = end
66 def add_insn(self, addr, sz):
67 self.ranges.add((addr, addr + sz))
69 def add_range(self, start, end):
70 self.ranges.add((start, end))
72 def get_ranges(self):
73 return self.ranges.to_list()
75 def get_end(self):
76 if self.end is not None:
77 return self.end
78 bounds = self.ranges.bounds()
79 if bounds:
80 return bounds[1]
82 def get_end_method(self):
83 if self.end is not None:
84 return "as set by loader (detected: 0x%x)" % (self.ranges.bounds()[1] - 1)
85 return "as detected"
87 class AddressSpace:
88 UNK = 0
89 CODE = 0x01
90 CODE_CONT = 0x02
91 DATA = 0x04
92 DATA_CONT = 0x08
93 STR = 0x10 # Continuation is DATA_CONT
94 FILL = 0x40 # Filler/alignment bytes
95 FUNC = 0x80 # Can appear with CODE, meaning this instruction belongs to a function
97 def __init__(self):
98 self.area_list = []
99 # Map from referenced addresses to their properties. Among them:
100 # "args":
101 # Properties of instruction's args; at the very least, this should
102 # differentiate between literal numeric values and addresses/offsets/pointers
103 # to other objects
104 # "comm":
105 # Comment
106 # "label"
107 # Label
108 # "xref":
109 # Cross-reference records
110 # "fun_s", "fun_e"
111 # Function start and beyond-end addresses, map to Function object
112 self.addr_map = {}
113 # Map from label to its address
114 self.labels_rev = {}
115 # Problem spots which automatic control/data flow couldn't resolve
116 self.issues = {}
117 # Cached last accessed area
118 self.last_area = None
120 # Memory Area API
122 def add_area(self, start, end, props):
123 log.debug("add_area(%x, %x, %s)", start, end, props)
124 sz = end - start + 1
125 bytes = bytearray(sz)
126 flags = bytearray(sz)
127 a = (start, end, props, bytes, flags)
128 self.area_list.append(a)
129 # Area list should be sorted. Assume it's short and just resort it each time.
130 self.area_list.sort()
131 return a
133 def get_areas(self):
134 return self.area_list
136 def area_no(self, area):
137 return self.area_list.index(area)
139 def addr2area(self, addr):
140 if self.last_area:
141 a = self.last_area
142 if a[0] <= addr <= a[1]:
143 return (addr - a[0], a)
144 for a in self.area_list:
145 if a[0] <= addr <= a[1]:
146 self.last_area = a
147 return (addr - a[0], a)
148 return (None, None)
150 def min_addr(self):
151 return self.area_list[0][START]
153 def max_addr(self):
154 return self.area_list[-1][END]
156 # Return next address in the address space, or None
157 def next_addr(self, addr):
158 offset, area = self.addr2area(addr)
159 if addr != area[END]:
160 return addr + 1
161 i = self.area_no(area) + 1
162 if i == len(self.area_list):
163 return None
164 return self.area_list[i][START]
166 def is_exec(self, addr):
167 off, area = self.addr2area(addr)
168 if not area:
169 return False
170 return "X" in area[PROPS]["access"]
172 # Binary Data API
174 def load_content(self, file, addr, sz=None):
175 off, area = self.addr2area(addr)
176 to = off + sz if sz else None
177 file.readinto(memoryview(area[BYTES])[off:to])
179 def is_valid_addr(self, addr):
180 off, area = self.addr2area(addr)
181 return area is not None
183 def get_byte(self, addr):
184 off, area = self.addr2area(addr)
185 if area is None:
186 raise InvalidAddrException(addr)
187 return area[BYTES][off]
189 def set_byte(self, addr, val):
190 off, area = self.addr2area(addr)
191 if area is None:
192 raise InvalidAddrException(addr)
193 area[BYTES][off] = val & 0xff
195 def get_bytes(self, addr, sz):
196 off, area = self.addr2area(addr)
197 if area is None:
198 raise InvalidAddrException(addr)
199 return area[BYTES][off:off + sz]
201 def get_data(self, addr, sz):
202 # TODO: address size
203 if sz == 4:
204 sym = self.get_addr_prop(addr, "sym")
205 if sym is not None:
206 return sym
208 off, area = self.addr2area(addr)
209 val = 0
210 for i in range(sz):
211 val = val | (area[BYTES][off + i] << 8 * i)
212 return val
214 def set_data(self, addr, data, sz):
215 off, area = self.addr2area(addr)
216 val = 0
217 for i in range(sz):
218 area[BYTES][off + i] = data & 0xff
219 data >>= 8
221 # Binary Data Flags API
223 def get_flags(self, addr, mask=0x7f):
224 off, area = self.addr2area(addr)
225 if area is None:
226 raise InvalidAddrException(addr)
227 return area[FLAGS][off] & mask
229 def get_unit_size(self, addr):
230 off, area = self.addr2area(addr)
231 flags = area[FLAGS]
232 sz = 1
233 if flags[off] & 0x7f == self.CODE:
234 f = self.CODE_CONT
235 elif flags[off] in (self.DATA, self.STR):
236 f = self.DATA_CONT
237 elif flags[off] == self.FILL:
238 f = self.FILL
239 else:
240 return 1
241 off += 1
242 while flags[off] == f:
243 off += 1
244 sz += 1
245 return sz
247 # Taking an offset inside unit, return offset to the beginning of unit
248 @classmethod
249 def adjust_offset_reverse(cls, off, area):
250 flags = area[FLAGS]
251 if flags[off] == cls.FILL:
252 while off > 0:
253 if flags[off] != cls.FILL:
254 off += 1
255 break
256 off -= 1
257 return off
259 while off > 0:
260 if flags[off] in (cls.CODE_CONT, cls.DATA_CONT):
261 off -= 1
262 else:
263 break
264 return off
266 def adjust_addr_reverse(self, addr):
267 off, area = self.addr2area(addr)
268 if area is None:
269 return None
270 return self.adjust_offset_reverse(off, area) + area[START]
272 def set_flags(self, addr, sz, head_fl, rest_fl=0):
273 off, area = self.addr2area(addr)
274 flags = area[FLAGS]
275 flags[off] = head_fl
276 off += 1
277 for i in range(sz - 1):
278 flags[off + i] = rest_fl
280 def make_undefined(self, addr, sz):
281 self.set_flags(addr, sz, self.UNK, self.UNK)
283 def make_code(self, addr, sz, extra_flags=0):
284 off, area = self.addr2area(addr)
285 area_byte_flags = area[FLAGS]
286 area_byte_flags[off] |= self.CODE | extra_flags
287 for i in range(sz - 1):
288 area_byte_flags[off + 1 + i] |= self.CODE_CONT
290 def make_data(self, addr, sz):
291 off, area = self.addr2area(addr)
292 area_byte_flags = area[FLAGS]
293 area_byte_flags[off] |= self.DATA
294 for i in range(sz - 1):
295 area_byte_flags[off + 1 + i] |= self.DATA_CONT
297 def make_data_array(self, addr, sz, num_items, prefix=""):
298 # Make a data array. First-class arrays are not supported so far,
299 # so just mark data units sequentially
300 self.append_comment(addr, "%sArray, num %s: %d" % (prefix, "bytes" if sz == 1 else "items", num_items))
301 for i in range(num_items):
302 self.make_data(addr, sz)
303 addr += sz
305 def make_filler(self, addr, sz):
306 self.set_flags(addr, sz, self.FILL, self.FILL)
308 # Address properties API
310 def set_addr_prop(self, addr, prop, val):
311 self.addr_map.setdefault(addr, {})[prop] = val
313 def get_addr_prop(self, addr, prop, default=None):
314 return self.addr_map.get(addr, {}).get(prop, default)
316 def get_addr_prop_dict(self, addr):
317 return self.addr_map.get(addr, {})
319 # Label API
321 def get_default_label_prefix(self, ea):
322 fl = self.get_flags(ea)
323 if fl == self.CODE:
324 prefix = "loc_"
325 elif fl & self.DATA:
326 prefix = "dat_"
327 else:
328 prefix = "unk_"
329 return prefix
331 def get_default_label(self, ea):
332 prefix = self.get_default_label_prefix(ea)
333 return "%s%08x" % (prefix, ea)
335 def make_label(self, prefix, ea):
336 l = self.get_addr_prop(ea, "label")
337 if isinstance(l, str):
338 # If it's real label, don't change it
339 return
340 if not prefix:
341 prefix = self.get_default_label_prefix(ea)
342 l = "%s%08x" % (prefix, ea)
343 self.set_addr_prop(ea, "label", l)
344 self.labels_rev[l] = ea
346 # auto_label will change its prefix automatically based on
347 # type of data it points.
348 def make_auto_label(self, ea):
349 if self.get_addr_prop(ea, "label"):
350 return
351 self.set_addr_prop(ea, "label", ea)
352 self.labels_rev[ea] = ea
354 # Delete a label, only if it's auto
355 def del_auto_label(self, ea):
356 label = self.get_addr_prop(ea, "label")
357 if not label or isinstance(label, str):
358 return
359 self.set_addr_prop(ea, "label", None)
360 del self.labels_rev[ea]
362 def get_label(self, ea):
363 label = self.get_addr_prop(ea, "label")
364 if isinstance(label, int):
365 return "%s%08x" % (self.get_default_label_prefix(ea), label)
366 return label
368 def set_label(self, ea, label):
369 # Make sure the label can be actually visible - create an area for it if none
370 off, area = self.addr2area(ea)
371 if area is None:
372 self.add_area(ea, ea, {"name": "autocreated to host %s label" % label})
373 self.set_addr_prop(ea, "label", label)
374 self.labels_rev[label] = ea
376 def make_unique_label(self, ea, label):
377 existing = self.get_label(ea)
378 if existing == label:
379 return label
380 cnt = 0
381 while True:
382 l = label
383 if cnt > 0:
384 l += "__%d" % cnt
385 if l not in self.labels_rev:
386 self.set_label(ea, l)
387 return l
388 cnt += 1
390 def get_label_list(self):
391 return sorted([x if isinstance(x, str) else self.get_default_label(x) for x in self.labels_rev.keys()])
393 def resolve_label(self, label):
394 if label in self.labels_rev:
395 return self.labels_rev[label]
396 try:
397 ea = int(label.split("_", 1)[1], 16)
398 except:
399 return None
400 if ea in self.labels_rev and self.get_default_label(ea) == label:
401 return ea
403 def label_exists(self, label):
404 return label in self.labels_rev
406 # Comment API
408 def get_comment(self, ea):
409 comm = self.get_addr_prop(ea, "comm")
410 return comm
412 def set_comment(self, ea, comm):
413 self.set_addr_prop(ea, "comm", comm)
415 def append_comment(self, ea, comm):
416 existing = self.get_comment(ea)
417 if existing is not None:
418 comm = existing + "\n" + comm
419 self.set_addr_prop(ea, "comm", comm)
421 # (Pseudo)instruction Argument Properties API
423 def set_arg_prop(self, ea, arg_no, prop, prop_val):
424 arg_props = self.get_addr_prop(ea, "args", {})
425 if arg_no not in arg_props:
426 arg_props[arg_no] = {}
427 props = arg_props[arg_no]
428 props[prop] = prop_val
429 self.set_addr_prop(ea, "args", arg_props)
431 def get_arg_prop(self, ea, arg_no, prop):
432 arg_props = self.get_addr_prop(ea, "args", {})
433 return arg_props.get(arg_no, {}).get(prop)
435 def get_arg_prop_dict(self, ea, arg_no):
436 arg_props = self.get_addr_prop(ea, "args", {})
437 return arg_props.get(arg_no, {})
439 def make_arg_offset(self, insn_addr, arg_no, ref_addr):
440 # Convert an immediate argument to an offset one
441 # insn_addr - address of (pseudo)instruction
442 # arg_no - argument no. of instruction
443 # ref_addr - value of the argument (i.e. address it refers to)
444 old_subtype = self.get_arg_prop(insn_addr, arg_no, "subtype")
445 if old_subtype and old_subtype != IMM_ADDR:
446 # Preserve old numeric value subtype to unconvert back to it
447 # if need.
448 self.set_arg_prop(insn_addr, arg_no, "num_subtype", old_subtype)
450 self.set_arg_prop(insn_addr, arg_no, "subtype", IMM_ADDR)
452 if isinstance(ref_addr, str):
453 # Symbolic address
454 # TODO: this works only for "dd" virtual instruction
455 self.set_addr_prop(insn_addr, "sym", ref_addr)
456 return
458 label = self.get_label(ref_addr)
459 if not label:
460 self.make_auto_label(ref_addr)
461 self.add_xref(insn_addr, ref_addr, idaapi.dr_O)
463 def unmake_arg_offset(self, insn_addr, arg_no, ref_addr):
464 # Convert offset argument to normal immediate value
465 old_subtype = self.get_arg_prop(insn_addr, arg_no, "num_subtype")
466 self.set_arg_prop(insn_addr, arg_no, "subtype", old_subtype)
467 self.del_xref(insn_addr, ref_addr, idaapi.dr_O)
468 # If this was last xref, and label is automatic, kill it too
469 if not self.get_xrefs(ref_addr):
470 self.del_auto_label(ref_addr)
473 # Xref API
475 def add_xref(self, from_ea, to_ea, type):
476 xrefs = self.get_addr_prop(to_ea, "xrefs", {})
477 xrefs[from_ea] = type
478 self.set_addr_prop(to_ea, "xrefs", xrefs)
480 def del_xref(self, from_ea, to_ea, type):
481 xrefs = self.get_addr_prop(to_ea, "xrefs", {})
482 del xrefs[from_ea]
483 self.set_addr_prop(to_ea, "xrefs", xrefs)
485 def get_xrefs(self, ea):
486 xrefs = self.get_addr_prop(ea, "xrefs", None)
487 return xrefs
489 # Functions API
491 def make_func(self, from_ea, to_ea_excl=None):
492 f = self.get_addr_prop(from_ea, "fun_s")
493 if f is not None:
494 return f
495 f = Function(from_ea, to_ea_excl)
496 self.set_addr_prop(from_ea, "fun_s", f)
498 if to_ea_excl is not None:
499 self.set_addr_prop(to_ea_excl, "fun_e", f)
500 return f
502 def is_func(self, ea):
503 return self.get_addr_prop(ea, "fun_s") is not None
505 # If ea is start of function, return Function object
506 def get_func_start(self, ea):
507 return self.get_addr_prop(ea, "fun_s")
509 # If ea is end of function, return Function object
510 def get_func_end(self, ea):
511 return self.get_addr_prop(ea, "fun_e")
513 def set_func_end(self, func, ea):
514 self.set_addr_prop(ea, "fun_e", func)
516 # Look up function containing address
517 def lookup_func(self, ea):
518 # TODO: cache func ranges, use binary search instead
519 for start, props in self.addr_map.items():
520 func = props.get("fun_s")
521 if func and ea >= start:
522 end = func.get_end()
523 if end is not None and ea < end:
524 return func
526 # Issues API
528 def add_issue(self, ea, descr):
529 self.issues[ea] = descr
531 def get_issues(self):
532 res = []
533 for ea in sorted(self.issues.keys()):
534 res.append((ea, self.issues[ea]))
535 return res
537 # Persistence API
539 def save_area(self, stream, area):
540 stream.write("%08x %08x\n" % (area[START], area[END]))
541 flags = area[FLAGS]
542 i = 0
543 while True:
544 chunk = flags[i:i + 32]
545 if not chunk:
546 break
547 stream.write(str(binascii.hexlify(chunk), 'utf-8') + "\n")
548 i += 32
549 stream.write("\n")
552 def save_areas(self, stream):
553 for a in self.area_list:
554 self.save_area(stream, a)
557 def save_addr_props(self, prefix):
558 areas = self.area_list
559 area_i = 0
560 stream = open(prefix + ".%08x" % areas[area_i][START], "w")
561 area_end = areas[area_i][END]
562 stream.write("header:\n")
563 stream.write(" version: 1.0\n")
564 for addr, props in sorted(self.addr_map.items()):
565 if addr > area_end:
566 stream.close()
567 area_i += 1
568 while addr > areas[area_i][END]:
569 area_i += 1
570 assert addr >= areas[area_i][START]
571 stream = open(prefix + ".%08x" % areas[area_i][START], "w")
572 #stream.write("addr=%x area_end=%x\n" % (addr, area_end))
573 area_end = areas[area_i][END]
574 stream.write("header:\n")
575 stream.write(" version: 1.0\n")
576 # If entry has just fun_e data, skip it
577 if len(props) == 1 and "fun_e" in props:
578 continue
579 stream.write("0x%08x:\n" % addr)
580 fl = self.get_flags(addr)
581 stream.write(" f: %s %02x\n" % (flag2char(fl), fl))
582 label = props.get("label")
583 arg_props = props.get("args")
584 comm = props.get("comm")
585 xrefs = props.get("xrefs")
586 func = props.get("fun_s")
587 if label is not None:
588 if label == addr:
589 stream.write(" l:\n")
590 else:
591 stream.write(" l: %s\n" % label)
592 if arg_props is not None:
593 arg_props_header = False
594 for arg_no, data in sorted(arg_props.items()):
595 data = {k: v for k, v in data.items() if v is not None}
596 if data:
597 if not arg_props_header:
598 stream.write(" args:\n")
599 arg_props_header = True
600 stream.write(" %s: %r\n" % (arg_no, data))
601 #for k, v in sorted(data.items()):
602 # stream.write(" %s: %s\n" % (k, v))
603 if comm is not None:
604 stream.write(" cmnt: %r\n" % comm)
606 if func is not None:
607 if func.end is not None:
608 stream.write(" fn_end: 0x%08x\n" % func.end)
609 else:
610 stream.write(" fn_end: '?'\n")
611 stream.write(" fn_ranges: [")
612 first = True
613 for r in func.get_ranges():
614 if not first:
615 stream.write(", ")
616 stream.write("[0x%08x,0x%08x]" % r)
617 first = False
618 stream.write("]\n")
620 if xrefs:
621 stream.write(" x:\n" % xrefs)
622 for from_addr in sorted(xrefs.keys()):
623 stream.write(" - 0x%08x: %s\n" % (from_addr, xrefs[from_addr]))
625 def load_addr_props(self, stream):
626 l = stream.readline()
627 assert l == "header:\n"
628 l = stream.readline()
629 assert l == " version: 1.0\n"
630 l = stream.readline()
631 while l:
632 assert l.endswith(":\n")
633 addr = int(l[:-2], 0)
634 props = self.addr_map.get(addr, {})
635 l = stream.readline()
636 while l and l[0] == " ":
637 key, val = [x.strip() for x in l.split(":", 1)]
638 l = None
640 if key == "l":
641 if not val:
642 val = addr
643 props["label"] = val
644 self.labels_rev[val] = addr
645 elif key == "cmnt":
646 props["comm"] = val[1:-1].replace("\\n", "\n")
647 elif key == "fn_end":
648 if val == "'?'":
649 end = None
650 else:
651 end = int(val, 0)
652 f = Function(addr, end)
653 props["fun_s"] = f
654 # Handled by finish_func() below
655 #if end is not None:
656 # self.addr_map[end] = {"fun_e": f}
657 elif key == "fn_ranges":
658 if val != "[]":
659 assert val.startswith("[[") and val.endswith("]]"), val
660 val = val[2:-2]
661 f = props["fun_s"]
662 for r in val.split("], ["):
663 r = [int(x, 0) for x in r.split(",")]
664 f.add_range(*r)
665 # Now, call finish func to set func end address, either from
666 # fn_end or fn_ranges
667 finish_func(f)
669 elif key == "args":
670 arg_props = {}
671 while True:
672 l = stream.readline()
673 if not l or not l.startswith(" "):
674 break
675 arg_no, data = [x.strip() for x in l.split(":", 1)]
676 assert data[0] == "{" and data[-1] == "}"
677 data = data[1:-1]
678 vals = {}
679 for pair in data.split(","):
680 seq = [x.strip() for x in pair.split(":", 1)]
681 for x in seq:
682 assert x[0] == "'" and x[-1] == "'", x
683 k, v = [x[1:-1] for x in seq]
684 vals[k] = v
685 arg_props[int(arg_no)] = vals
686 props["args"] = arg_props
688 elif key == "x":
689 xrefs = {}
690 while True:
691 l = stream.readline()
692 if not l or not l.startswith(" - "):
693 break
694 key, val = [x.strip() for x in l[3:].split(":", 1)]
695 xrefs[int(key, 0)] = val
696 assert xrefs
697 props["xrefs"] = xrefs
699 if l is None:
700 l = stream.readline()
702 self.addr_map[addr] = props
704 def load_area(self, stream, area):
705 l = stream.readline()
706 vals = [int(v, 16) for v in l.split()]
707 assert area[START] == vals[0] and area[END] == vals[1]
708 flags = area[FLAGS]
709 i = 0
710 while True:
711 l = stream.readline().rstrip()
712 if not l:
713 break
714 l = binascii.unhexlify(l)
715 flags[i:i + len(l)] = l
716 i += len(l)
718 def load_areas(self, stream):
719 for a in self.area_list:
720 self.load_area(stream, a)
723 # Hack for idaapi interfacing
724 # TODO: should go to "Analysis" object
725 def analisys_stack_push(self, ea, is_call=True):
726 global analisys_stack_branches, analisys_stack_calls
727 # If we know something is func (e.g. from loader), jump
728 # to it means tail-call.
729 if is_call or self.is_func(ea):
730 analisys_stack_calls.append(ea)
731 else:
732 analisys_stack_branches.append(ea)
735 ADDRESS_SPACE = AddressSpace()
736 _processor = None
737 def set_processor(p):
738 global _processor
739 _processor = p
740 idaapi.set_processor(p)
743 analisys_stack_calls = []
744 analisys_stack_branches = []
746 def add_entrypoint(ea, as_func=True):
747 if as_func:
748 ADDRESS_SPACE.make_func(ea, None)
749 analisys_stack_calls.append(ea)
750 else:
751 analisys_stack_branches.append(ea)
753 def init_cmd(ea):
754 _processor.cmd.ea = ea
755 _processor.cmd.size = 0
756 _processor.cmd.disasm = None
758 def finish_func(f):
759 if f:
760 log.info("Function %s (0x%x) ranges: %s" % (ADDRESS_SPACE.get_label(f.start), f.start, f.ranges.str(hex)))
761 end = f.get_end()
762 if end is not None:
763 ADDRESS_SPACE.set_func_end(f, end)
765 def analyze(callback=lambda cnt:None):
766 cnt = 0
767 limit = 1000000
768 current_func = None
769 while limit:
770 if analisys_stack_branches:
771 ea = analisys_stack_branches.pop()
772 fl = ADDRESS_SPACE.get_flags(ea, 0xff)
773 if current_func:
774 if fl == ADDRESS_SPACE.CODE | ADDRESS_SPACE.FUNC:
775 continue
776 assert fl in (ADDRESS_SPACE.CODE, ADDRESS_SPACE.UNK)
777 else:
778 if fl != ADDRESS_SPACE.UNK:
779 continue
780 elif analisys_stack_calls:
781 finish_func(current_func)
782 ea = analisys_stack_calls.pop()
783 fun = ADDRESS_SPACE.get_func_start(ea)
784 if fun.get_ranges():
785 continue
786 log.info("Starting analysis of function 0x%x" % ea)
787 current_func = ADDRESS_SPACE.make_func(ea)
788 else:
789 finish_func(current_func)
790 break
791 init_cmd(ea)
792 try:
793 insn_sz = _processor.ana()
794 except InvalidAddrException:
795 # Ran out of memory area, just continue
796 # with the rest of paths
797 continue
798 # print("size: %d" % insn_sz, _processor.cmd)
799 if insn_sz:
800 if not _processor.emu():
801 assert False
802 if current_func:
803 current_func.add_insn(ea, insn_sz)
804 ADDRESS_SPACE.make_code(ea, insn_sz, ADDRESS_SPACE.FUNC)
805 else:
806 ADDRESS_SPACE.make_code(ea, insn_sz)
807 _processor.out()
808 # print("%08x %s" % (_processor.cmd.ea, _processor.cmd.disasm))
809 # print("---------")
810 limit -= 1
811 cnt += 1
812 if cnt % 1000 == 0:
813 callback(cnt)
814 # if not analisys_stack:
815 # print("Analisys finished")
819 class Model:
821 def __init__(self, target_addr=0, target_subno=0):
822 self._lines = []
823 self._cnt = 0
824 self._subcnt = 0
825 self._last_addr = -1
826 self._addr2line = {}
827 self.AS = None
828 self.target_addr = target_addr
829 self.target_subno = target_subno
830 self.target_addr_lineno_0 = -1
831 self.target_addr_lineno = -1
832 self.target_addr_lineno_real = -1
834 def lines(self):
835 return self._lines
837 def add_line(self, addr, line):
838 if addr != self._last_addr:
839 self._last_addr = addr
840 self._subcnt = 0
841 if addr == self.target_addr:
842 if self._subcnt == 0:
843 # Contains first line related to the given addr
844 self.target_addr_lineno_0 = self._cnt
845 if self._subcnt == self.target_subno:
846 # Contains line no. target_subno related to the given addr
847 self.target_addr_lineno = self._cnt
848 if not line.virtual:
849 # Contains line where actual instr/data/unknown bytes are
850 # rendered (vs labels/xrefs/etc.)
851 self.target_addr_lineno_real = self._cnt
852 self._lines.append(line)
853 self._addr2line[(addr, self._subcnt)] = self._cnt
854 line.subno = self._subcnt
855 if not line.virtual:
856 # Line of "real" disasm object
857 self._addr2line[(addr, -1)] = self._cnt
858 self._cnt += 1
859 self._subcnt += 1
861 def addr2line_no(self, addr, subno=-1):
862 return self._addr2line.get((addr, subno))
864 def undefine_unit(self, addr):
865 sz = self.AS.get_unit_size(addr)
866 self.AS.make_undefined(addr, sz)
869 def data_sz2mnem(sz):
870 s = {1: "db", 2: "dw", 4: "dd"}[sz]
871 return idaapi.fillstr(s, idaapi.DEFAULT_WIDTH)
874 class DisasmObj:
876 # Size of "leader fields" in disasm window - address, raw bytes, etc.
877 # May be set by MVC controller
878 LEADER_SIZE = 9
880 # Default indent for a line
881 indent = " " * idaapi.DEFAULT_INDENT
883 # Default operand positions list is empty and set on class level
884 # to save memory. To be overriden on object level.
885 arg_pos = ()
887 # If False, this object corresponds to real bytes in input binary stream
888 # If True, doesn't correspond to bytes in memory: labels, etc.
889 virtual = True
891 # Textual comment to append
892 comment = ""
894 # Instance variable expected to be set on each instance:
895 # ea =
896 # size =
897 # subno = # relative no. of several lines corresponding to the same ea
899 def render(self):
900 # Render object as a string, set it as .cache, and return it
901 pass
903 def get_operand_addr(self):
904 # Get "the most addressful" operand
905 # This for example will be called when Enter is pressed
906 # not on a specific instruction operand, so this should
907 # return value of the operand which contains an address
908 # (or the "most suitable" of them if there're few).
909 return None
911 def __len__(self):
912 # Each object should return real character len as display on the screen.
913 # Should be fast - called on each cursor movement.
914 try:
915 return self.LEADER_SIZE + len(self.indent) + len(self.cache)
916 except AttributeError:
917 return self.LEADER_SIZE + len(self.indent) + len(self.render())
919 def content_len(self):
920 return len(self) - (self.LEADER_SIZE + len(self.indent))
923 class Instruction(idaapi.insn_t, DisasmObj):
925 virtual = False
927 def render(self):
928 _processor.cmd = self
929 _processor.out()
930 s = self.disasm + self.comment
931 self.cache = s
932 return s
934 def get_operand_addr(self):
935 # Assumes RISC design where only one operand can be address
936 mem = imm = None
937 for o in self._operands:
938 if o.flags & idaapi.OF_SHOW:
939 if o.type == idaapi.o_near:
940 # Jumps have priority
941 return o
942 if o.type == idaapi.o_mem:
943 mem = o
944 elif o.type == idaapi.o_imm:
945 imm = o
946 if mem:
947 return mem
948 return imm
951 class Data(DisasmObj):
953 virtual = False
955 def __init__(self, ea, sz, val):
956 self.ea = ea
957 self.size = sz
958 self.val = val
960 def render(self):
961 subtype = ADDRESS_SPACE.get_arg_prop(self.ea, 0, "subtype")
962 if subtype == IMM_ADDR:
963 label = self.val
964 if not isinstance(label, str):
965 label = ADDRESS_SPACE.get_label(label)
966 s = "%s%s" % (data_sz2mnem(self.size), label)
967 else:
968 s = "%s0x%x" % (data_sz2mnem(self.size), self.val)
969 s += self.comment
970 self.cache = s
971 return s
973 def get_operand_addr(self):
974 o = idaapi.op_t(0)
975 o.value = self.val
976 o.addr = self.val
977 o.type = idaapi.o_imm
978 return o
981 class String(DisasmObj):
983 virtual = False
985 def __init__(self, ea, sz, val):
986 self.ea = ea
987 self.size = sz
988 self.val = val
990 def render(self):
991 s = "%s%s" % (data_sz2mnem(1), repr(self.val).replace("\\x00", "\\0"))
992 s += self.comment
993 self.cache = s
994 return s
997 class Fill(DisasmObj):
999 virtual = False
1001 def __init__(self, ea, sz):
1002 self.ea = ea
1003 self.size = sz
1004 self.cache = idaapi.fillstr(".fill", idaapi.DEFAULT_WIDTH) + str(sz)
1006 def render(self):
1007 return self.cache
1010 class Unknown(DisasmObj):
1012 virtual = False
1013 size = 1
1015 def __init__(self, ea, val):
1016 self.ea = ea
1017 self.val = val
1019 def render(self):
1020 ch = ""
1021 if 0x20 <= self.val <= 0x7e:
1022 ch = " ; '%s'" % chr(self.val)
1023 s = "%s0x%02x%s" % (idaapi.fillstr("unk", idaapi.DEFAULT_WIDTH), self.val, ch)
1024 s += self.comment
1025 self.cache = s
1026 return s
1029 class Label(DisasmObj):
1031 indent = ""
1033 def __init__(self, ea):
1034 self.ea = ea
1036 def render(self):
1037 label = ADDRESS_SPACE.get_label(self.ea)
1038 s = "%s:" % label
1039 self.cache = s
1040 return s
1043 class Xref(DisasmObj):
1045 indent = ""
1047 def __init__(self, ea, from_addr, type):
1048 self.ea = ea
1049 self.from_addr = from_addr
1050 self.type = type
1052 def render(self):
1053 s = (" " * idaapi.DEFAULT_XREF_INDENT) + "; xref: 0x%x %s" % (self.from_addr, self.type)
1054 self.cache = s
1055 return s
1057 def get_operand_addr(self):
1058 o = idaapi.op_t(0)
1059 o.addr = self.from_addr
1060 return o
1063 class Literal(DisasmObj):
1065 indent = ""
1067 def __init__(self, ea, str):
1068 self.ea = ea
1069 self.cache = str
1071 def render(self):
1072 return self.cache
1075 def render():
1076 model = Model()
1077 render_partial(model, 0, 0, 1000000)
1078 return model
1080 # How much bytes may a single disasm object (i.e. a line) occupy
1081 MAX_UNIT_SIZE = 4
1083 def render_partial_around(addr, subno, context_lines):
1084 log.debug("render_partial_around(%x, %d)", addr, subno)
1085 off, area = ADDRESS_SPACE.addr2area(addr)
1086 if area is None:
1087 return None
1088 back = context_lines * MAX_UNIT_SIZE
1089 off -= back
1090 if off < 0:
1091 area_no = ADDRESS_SPACE.area_no(area) - 1
1092 while area_no >= 0:
1093 area = ADDRESS_SPACE.area_list[area_no]
1094 sz = area[1] - area[0] + 1
1095 off += sz
1096 if off >= 0:
1097 break
1098 area_no -= 1
1099 if off < 0:
1100 # Reached beginning of address space, just set as such
1101 off = 0
1102 assert off >= 0
1103 log.debug("render_partial_around: off=0x%x, %s", off, str_area(area))
1104 off = ADDRESS_SPACE.adjust_offset_reverse(off, area)
1105 log.debug("render_partial_around adjusted: off=0x%x, %s", off, str_area(area))
1106 model = Model(addr, subno)
1107 render_partial(model, ADDRESS_SPACE.area_list.index(area), off, context_lines, addr)
1108 log.debug("render_partial_around model done, lines: %d", len(model.lines()))
1109 assert model.target_addr_lineno_0 >= 0
1110 if model.target_addr_lineno == -1:
1111 # If we couldn't find exact subno, use 0th subno of that addr
1112 # TODO: maybe should be last subno, because if we couldn't find
1113 # exact one, it was ~ last and removed, so current last is "closer"
1114 # to it.
1115 model.target_addr_lineno = model.target_addr_lineno_0
1116 return model
1119 def render_from(model, addr, num_lines):
1120 off, area = ADDRESS_SPACE.addr2area(addr)
1121 if area is None:
1122 return None
1123 return render_partial(model, ADDRESS_SPACE.area_list.index(area), off, num_lines)
1126 def render_partial(model, area_no, offset, num_lines, target_addr=-1):
1127 model.AS = ADDRESS_SPACE
1128 start = True
1129 #for a in ADDRESS_SPACE.area_list:
1130 while area_no < len(ADDRESS_SPACE.area_list):
1131 a = ADDRESS_SPACE.area_list[area_no]
1132 area_no += 1
1133 i = 0
1134 if start:
1135 i = offset
1136 start = False
1137 if i == 0:
1138 model.add_line(a[START], Literal(a[START], "; Start of 0x%x area (%s)" % (a[START], a[PROPS].get("name", "noname"))))
1139 bytes = a[BYTES]
1140 flags = a[FLAGS]
1141 areasize = len(bytes)
1142 while i < areasize:
1143 addr = a[START] + i
1144 # If we didn't yet reach target address, compensate for
1145 # the following decrement of num_lines. The logic is:
1146 # render all lines up to target_addr, and then num_lines past it.
1147 if target_addr >= 0 and addr < target_addr:
1148 num_lines += 1
1150 props = ADDRESS_SPACE.get_addr_prop_dict(addr)
1151 func = props.get("fun_s")
1152 if func:
1153 model.add_line(addr, Literal(addr, "; Start of function '%s'" % ADDRESS_SPACE.get_label(func.start)))
1155 xrefs = props.get("xrefs")
1156 if xrefs:
1157 for from_addr in sorted(xrefs.keys()):
1158 model.add_line(addr, Xref(addr, from_addr, xrefs[from_addr]))
1160 label = props.get("label")
1161 if label:
1162 model.add_line(addr, Label(addr))
1164 f = flags[i] & 0x7f
1165 if f == AddressSpace.UNK:
1166 out = Unknown(addr, bytes[i])
1167 sz = 1
1168 i += 1
1169 elif f & AddressSpace.DATA:
1170 sz = 1
1171 j = i + 1
1172 while j < areasize and flags[j] & AddressSpace.DATA_CONT:
1173 sz += 1
1174 j += 1
1175 assert sz <= 4
1176 out = Data(addr, sz, ADDRESS_SPACE.get_data(addr, sz))
1177 i += sz
1178 elif f == AddressSpace.STR:
1179 str = chr(bytes[i])
1180 sz = 1
1181 j = i + 1
1182 while j < areasize and flags[j] == AddressSpace.DATA_CONT:
1183 str += chr(bytes[j])
1184 sz += 1
1185 j += 1
1186 out = String(addr, sz, str)
1187 i += sz
1188 elif f == AddressSpace.FILL:
1189 sz = 1
1190 j = i + 1
1191 while j < areasize and flags[j] == AddressSpace.FILL:
1192 sz += 1
1193 j += 1
1194 out = Fill(addr, sz)
1195 i += sz
1196 elif f == AddressSpace.CODE:
1197 out = Instruction(addr)
1198 _processor.cmd = out
1199 sz = _processor.ana()
1200 _processor.out()
1201 i += sz
1202 else:
1203 model.add_line(addr, Literal(addr, "; UNEXPECTED value: %02x flags: %02x" % (bytes[i], f)))
1204 sz = 1
1205 i += 1
1206 assert 0, "@%08x flags=%x" % (addr, f)
1208 comm = props.get("comm")
1209 if comm:
1210 comm_indent = " " * (out.content_len() + len(out.indent) + 2)
1211 out.comment = " ; " + comm.split("\n", 1)[0]
1213 model.add_line(addr, out)
1214 #sys.stdout.write(out + "\n")
1216 if comm:
1217 for comm_l in comm.split("\n")[1:]:
1218 comm_obj = Literal(addr, "; " + comm_l)
1219 comm_obj.indent = comm_indent
1220 model.add_line(addr, comm_obj)
1222 next_addr = addr + sz
1223 next_props = ADDRESS_SPACE.get_addr_prop_dict(next_addr)
1224 func_end = next_props.get("fun_e")
1225 if func_end:
1226 model.add_line(addr, Literal(addr, "; End of function '%s' (%s)" % (
1227 ADDRESS_SPACE.get_label(func_end.start), func_end.get_end_method()
1230 num_lines -= 1
1231 if not num_lines:
1232 return next_addr
1234 model.add_line(a[END], Literal(a[END], "; End of 0x%x area (%s)" % (a[START], a[PROPS].get("name", "noname"))))
1237 def flag2char(f):
1238 if f == AddressSpace.UNK:
1239 return "."
1240 elif f == AddressSpace.CODE:
1241 return "C"
1242 elif f == AddressSpace.CODE | AddressSpace.FUNC:
1243 return "F"
1244 elif f == AddressSpace.CODE_CONT:
1245 return "c"
1246 elif f == AddressSpace.DATA:
1247 return "D"
1248 elif f == AddressSpace.DATA_CONT:
1249 return "d"
1250 elif f == AddressSpace.STR:
1251 return "A"
1252 elif f == AddressSpace.FILL:
1253 return "-"
1254 else:
1255 return "X"
1257 def print_address_map():
1258 for a in ADDRESS_SPACE.area_list:
1259 for i in range(len(a[FLAGS])):
1260 if i % 128 == 0:
1261 sys.stdout.write("\n")
1262 sys.stdout.write("%08x " % (a[START] + i))
1263 sys.stdout.write(flag2char(a[FLAGS][i]))
1264 sys.stdout.write("\n")
1267 idaapi.set_address_space(ADDRESS_SPACE)
1268 idc.set_address_space(ADDRESS_SPACE)