1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 from rangeset
import RangeSet
28 # ScratchABit API and code
41 # Some architectures, e.g. ARM uses lowest bit(s) of code address to
42 # encode additional information (e.g. Thumb mode for ARM). To get
43 # real instruction address from such an encoded address, we nee to
44 # AND it with this value.
51 return "Area(0x%x-0x%x, %s)" % (area
[START
], area
[END
], area
[PROPS
])
59 def __init__(self
, start
, end
=None):
60 self
.ranges
= RangeSet()
64 def add_insn(self
, addr
, sz
):
65 self
.ranges
.add((addr
, addr
+ sz
))
67 def add_range(self
, start
, end
):
68 self
.ranges
.add((start
, end
))
71 return self
.ranges
.to_list()
74 if self
.end
is not None:
76 bounds
= self
.ranges
.bounds()
80 def get_end_method(self
):
81 if self
.end
is not None:
82 bounds
= self
.ranges
.bounds()
85 addr
= "0x%x" % (self
.ranges
.bounds()[1] - 1)
86 return "as set by loader (detected: %s)" % addr
95 STR
= 0x10 # Continuation is DATA_CONT
96 FILL
= 0x40 # Filler/alignment bytes
97 FUNC
= 0x80 # Can appear with CODE, meaning this instruction belongs to a function
101 # List of subareas and binary search index for it
102 self
.subarea_list
= []
103 self
.subarea_search
= []
104 # Map from referenced addresses to their properties. Among them:
106 # Properties of instruction's args; at the very least, this should
107 # differentiate between literal numeric values and addresses/offsets/pointers
114 # Cross-reference records
116 # Function start and beyond-end addresses, map to Function object
118 # Map from label to its address
120 # Problem spots which automatic control/data flow couldn't resolve
122 # Cached last accessed area
123 self
.last_area
= None
124 # Cached function start addresses
125 self
.func_starts
= None
126 # Map from func_starts's indexes to function objects
127 self
.func_starts_arr
= []
128 # True during loading stage, False during UI interaction stage
129 self
.is_loading
= False
130 # Was area flags/content changed (and thus require saving)?
135 def add_area(self
, start
, end
, props
):
136 log
.debug("add_area(%x, %x, %s)", start
, end
, props
)
138 bytes
= bytearray(sz
)
139 flags
= bytearray(sz
)
140 a
= (start
, end
, props
, bytes
, flags
)
141 self
.area_list
.append(a
)
142 # Area list should be sorted. Assume it's short and just resort it each time.
143 self
.area_list
.sort()
147 return self
.area_list
149 def area_no(self
, area
):
150 return self
.area_list
.index(area
)
152 def addr2area(self
, addr
):
155 if a
[0] <= addr
<= a
[1]:
156 return (addr
- a
[0], a
)
157 for a
in self
.area_list
:
158 if a
[0] <= addr
<= a
[1]:
160 return (addr
- a
[0], a
)
164 return self
.area_list
[0][START
]
167 return self
.area_list
[-1][END
]
169 # Return next address in the address space, or None
170 def next_addr(self
, addr
):
171 offset
, area
= self
.addr2area(addr
)
172 if addr
!= area
[END
]:
174 i
= self
.area_no(area
) + 1
175 if i
== len(self
.area_list
):
177 return self
.area_list
[i
][START
]
179 def is_exec(self
, addr
):
180 off
, area
= self
.addr2area(addr
)
183 return "X" in area
[PROPS
]["access"]
187 def load_content(self
, file, addr
, sz
=None):
188 off
, area
= self
.addr2area(addr
)
189 to
= off
+ sz
if sz
else None
190 file.readinto(memoryview(area
[BYTES
])[off
:to
])
192 def is_valid_addr(self
, addr
):
193 off
, area
= self
.addr2area(addr
)
194 return area
is not None
196 def get_byte(self
, addr
):
197 off
, area
= self
.addr2area(addr
)
199 raise InvalidAddrException(addr
)
200 return area
[BYTES
][off
]
202 def set_byte(self
, addr
, val
):
204 off
, area
= self
.addr2area(addr
)
206 raise InvalidAddrException(addr
)
207 area
[BYTES
][off
] = val
& 0xff
209 def get_bytes(self
, addr
, sz
):
210 off
, area
= self
.addr2area(addr
)
212 raise InvalidAddrException(addr
)
213 return area
[BYTES
][off
:off
+ sz
]
215 def get_data(self
, addr
, sz
):
218 sym
= self
.get_addr_prop(addr
, "sym")
222 off
, area
= self
.addr2area(addr
)
225 val
= val |
(area
[BYTES
][off
+ i
] << 8 * i
)
228 def set_data(self
, addr
, data
, sz
):
230 off
, area
= self
.addr2area(addr
)
233 area
[BYTES
][off
+ i
] = data
& 0xff
236 # Convenience function for plugins
237 def memcpy(self
, dst
, src
, sz
):
239 b
= self
.get_byte(src
)
240 self
.set_byte(dst
, b
)
244 # Binary Data Flags API
246 def get_flags(self
, addr
, mask
=0x7f):
247 off
, area
= self
.addr2area(addr
)
249 raise InvalidAddrException(addr
)
250 return area
[FLAGS
][off
] & mask
252 def get_unit_size(self
, addr
):
253 off
, area
= self
.addr2area(addr
)
256 if flags
[off
] & 0x7f == self
.CODE
:
258 elif flags
[off
] in (self
.DATA
, self
.STR
):
260 elif flags
[off
] == self
.FILL
:
267 while flags
[off
] == f
:
276 # Taking an offset inside unit, return offset to the beginning of unit
278 def adjust_offset_reverse(cls
, off
, area
):
280 if flags
[off
] == cls
.FILL
:
282 if flags
[off
] != cls
.FILL
:
289 if flags
[off
] in (cls
.CODE_CONT
, cls
.DATA_CONT
):
295 def adjust_addr_reverse(self
, addr
):
296 off
, area
= self
.addr2area(addr
)
299 return self
.adjust_offset_reverse(off
, area
) + area
[START
]
301 def set_flags(self
, addr
, sz
, head_fl
, rest_fl
=0):
303 off
, area
= self
.addr2area(addr
)
307 for i
in range(sz
- 1):
308 flags
[off
+ i
] = rest_fl
310 def make_undefined(self
, addr
, sz
):
311 self
.set_flags(addr
, sz
, self
.UNK
, self
.UNK
)
313 def make_code(self
, addr
, sz
, extra_flags
=0):
315 off
, area
= self
.addr2area(addr
)
316 area_byte_flags
= area
[FLAGS
]
317 area_byte_flags
[off
] |
= self
.CODE | extra_flags
318 for i
in range(sz
- 1):
319 area_byte_flags
[off
+ 1 + i
] |
= self
.CODE_CONT
321 # Mark instructions in given range as belonging to function
322 def mark_func_bytes(self
, addr
, sz
):
324 off
, area
= self
.addr2area(addr
)
325 area_byte_flags
= area
[FLAGS
]
327 fl
= area_byte_flags
[off
+ i
]
328 assert fl
in (self
.CODE
, self
.CODE_CONT
)
330 area_byte_flags
[off
+ i
] |
= self
.FUNC
332 def make_data(self
, addr
, sz
):
334 off
, area
= self
.addr2area(addr
)
335 area_byte_flags
= area
[FLAGS
]
336 area_byte_flags
[off
] |
= self
.DATA
337 for i
in range(sz
- 1):
338 area_byte_flags
[off
+ 1 + i
] |
= self
.DATA_CONT
340 def make_data_array(self
, addr
, sz
, num_items
, prefix
=""):
341 # Make a data array. First-class arrays are not supported so far,
342 # so just mark data units sequentially
343 self
.append_comment(addr
, "%sArray, num %s: %d" % (prefix
, "bytes" if sz
== 1 else "items", num_items
))
344 for i
in range(num_items
):
345 self
.make_data(addr
, sz
)
348 def make_filler(self
, addr
, sz
):
349 self
.set_flags(addr
, sz
, self
.FILL
, self
.FILL
)
351 # Address properties API
353 def set_addr_prop(self
, addr
, prop
, val
):
355 self
.addr_map
.setdefault(addr
, {})[prop
] = val
357 def get_addr_prop(self
, addr
, prop
, default
=None):
358 return self
.addr_map
.get(addr
, {}).get(prop
, default
)
360 def get_addr_prop_dict(self
, addr
):
361 return self
.addr_map
.get(addr
, {})
365 def get_default_label_prefix(self
, ea
):
366 fl
= self
.get_flags(ea
)
375 def get_default_label(self
, ea
):
376 prefix
= self
.get_default_label_prefix(ea
)
377 return "%s%08x" % (prefix
, ea
)
379 def make_label(self
, prefix
, ea
):
380 l
= self
.get_addr_prop(ea
, "label")
381 if isinstance(l
, str):
382 # If it's real label, don't change it
385 prefix
= self
.get_default_label_prefix(ea
)
386 l
= "%s%08x" % (prefix
, ea
)
387 self
.set_addr_prop(ea
, "label", l
)
388 self
.labels_rev
[l
] = ea
390 # auto_label will change its prefix automatically based on
391 # type of data it points.
392 def make_auto_label(self
, ea
):
393 if self
.get_addr_prop(ea
, "label"):
395 self
.set_addr_prop(ea
, "label", ea
)
396 self
.labels_rev
[ea
] = ea
398 # Delete a label, only if it's auto
399 def del_auto_label(self
, ea
):
400 label
= self
.get_addr_prop(ea
, "label")
401 if not label
or isinstance(label
, str):
403 self
.set_addr_prop(ea
, "label", None)
404 del self
.labels_rev
[ea
]
406 def get_label(self
, ea
):
407 label
= self
.get_addr_prop(ea
, "label")
408 if isinstance(label
, int):
409 return "%s%08x" % (self
.get_default_label_prefix(ea
), label
)
412 def set_label(self
, ea
, label
):
413 # Make sure the label can be actually visible - create an area for it if none
414 off
, area
= self
.addr2area(ea
)
416 self
.add_area(ea
, ea
, {"name": "autocreated to host %s label" % label
})
418 existing
= self
.get_addr_prop(ea
, "label")
419 if existing
is not None and not isinstance(existing
, int):
420 log
.warn("Duplicate label for %x: %s (existing: %s)" % (ea
, label
, existing
))
421 self
.append_comment(ea
, "Another label: " + label
)
423 self
.set_addr_prop(ea
, "label", label
)
424 self
.labels_rev
[label
] = ea
426 def make_unique_label(self
, ea
, label
):
427 existing
= self
.get_label(ea
)
428 if existing
== label
:
435 if l
not in self
.labels_rev
:
436 self
.set_label(ea
, l
)
437 if self
.is_loading
and cnt
> 0:
438 self
.append_comment(ea
, "Original label: " + label
)
442 def get_label_list(self
):
443 return sorted([x
if isinstance(x
, str) else self
.get_default_label(x
) for x
in self
.labels_rev
.keys()])
445 def resolve_label(self
, label
):
446 if label
in self
.labels_rev
:
447 return self
.labels_rev
[label
]
449 ea
= int(label
.split("_", 1)[1], 16)
452 if ea
in self
.labels_rev
and self
.get_default_label(ea
) == label
:
455 def label_exists(self
, label
):
456 return label
in self
.labels_rev
460 def get_comment(self
, ea
):
461 comm
= self
.get_addr_prop(ea
, "comm")
464 def set_comment(self
, ea
, comm
):
465 self
.set_addr_prop(ea
, "comm", comm
)
467 def append_comment(self
, ea
, comm
):
468 existing
= self
.get_comment(ea
)
469 if existing
is not None:
470 comm
= existing
+ "\n" + comm
471 self
.set_addr_prop(ea
, "comm", comm
)
473 # (Pseudo)instruction Argument Properties API
475 def set_arg_prop(self
, ea
, arg_no
, prop
, prop_val
):
476 arg_props
= self
.get_addr_prop(ea
, "args", {})
477 if arg_no
not in arg_props
:
478 arg_props
[arg_no
] = {}
479 props
= arg_props
[arg_no
]
480 props
[prop
] = prop_val
481 self
.set_addr_prop(ea
, "args", arg_props
)
483 def get_arg_prop(self
, ea
, arg_no
, prop
):
484 arg_props
= self
.get_addr_prop(ea
, "args", {})
485 return arg_props
.get(arg_no
, {}).get(prop
)
487 def get_arg_prop_dict(self
, ea
, arg_no
):
488 arg_props
= self
.get_addr_prop(ea
, "args", {})
489 return arg_props
.get(arg_no
, {})
491 def make_arg_offset(self
, insn_addr
, arg_no
, ref_addr
):
492 # Convert an immediate argument to an offset one
493 # insn_addr - address of (pseudo)instruction
494 # arg_no - argument no. of instruction
495 # ref_addr - value of the argument (i.e. address it refers to)
496 old_subtype
= self
.get_arg_prop(insn_addr
, arg_no
, "subtype")
497 if old_subtype
and old_subtype
!= IMM_ADDR
:
498 # Preserve old numeric value subtype to unconvert back to it
500 self
.set_arg_prop(insn_addr
, arg_no
, "num_subtype", old_subtype
)
502 self
.set_arg_prop(insn_addr
, arg_no
, "subtype", IMM_ADDR
)
504 if isinstance(ref_addr
, str):
506 # TODO: this works only for "dd" virtual instruction
507 self
.set_addr_prop(insn_addr
, "sym", ref_addr
)
510 label
= self
.get_label(ref_addr
)
512 self
.make_auto_label(ref_addr
)
513 self
.add_xref(insn_addr
, ref_addr
, idaapi
.dr_O
)
515 def unmake_arg_offset(self
, insn_addr
, arg_no
, ref_addr
):
516 # Convert offset argument to normal immediate value
517 old_subtype
= self
.get_arg_prop(insn_addr
, arg_no
, "num_subtype")
518 self
.set_arg_prop(insn_addr
, arg_no
, "subtype", old_subtype
)
519 self
.del_xref(insn_addr
, ref_addr
, idaapi
.dr_O
)
520 # If this was last xref, and label is automatic, kill it too
521 if not self
.get_xrefs(ref_addr
):
522 self
.del_auto_label(ref_addr
)
524 def is_arg_offset(self
, insn_addr
, arg_no
):
525 old_subtype
= self
.get_arg_prop(insn_addr
, arg_no
, "subtype")
526 return old_subtype
== IMM_ADDR
530 def add_xref(self
, from_ea
, to_ea
, type):
531 xrefs
= self
.get_addr_prop(to_ea
, "xrefs", {})
532 xrefs
[from_ea
] = type
533 self
.set_addr_prop(to_ea
, "xrefs", xrefs
)
535 def del_xref(self
, from_ea
, to_ea
, type):
536 xrefs
= self
.get_addr_prop(to_ea
, "xrefs", {})
538 self
.set_addr_prop(to_ea
, "xrefs", xrefs
)
540 def get_xrefs(self
, ea
):
541 xrefs
= self
.get_addr_prop(ea
, "xrefs", None)
546 def make_func(self
, from_ea
, to_ea_excl
=None):
547 f
= self
.get_addr_prop(from_ea
, "fun_s")
550 f
= Function(from_ea
, to_ea_excl
)
551 self
.set_addr_prop(from_ea
, "fun_s", f
)
553 if to_ea_excl
is not None:
554 self
.set_addr_prop(to_ea_excl
, "fun_e", f
)
556 self
.func_starts
= None
559 def is_func(self
, ea
):
560 return self
.get_addr_prop(ea
, "fun_s") is not None
562 # If ea is start of function, return Function object
563 def get_func_start(self
, ea
):
564 return self
.get_addr_prop(ea
, "fun_s")
566 # If ea is end of function, return Function object
567 def get_func_end(self
, ea
):
568 return self
.get_addr_prop(ea
, "fun_e")
570 def set_func_end(self
, func
, ea
):
571 self
.set_addr_prop(ea
, "fun_e", func
)
573 # Look up function containing address
574 def lookup_func(self
, ea
):
575 # TODO: cache func ranges, use binary search instead
576 if self
.func_starts
is None:
577 self
.func_starts
= []
578 self
.func_starts_arr
= []
579 for start
, props
in sorted(self
.addr_map
.items()):
580 func
= props
.get("fun_s")
582 self
.func_starts
.append(start
)
583 self
.func_starts_arr
.append(func
)
585 i
= bisect
.bisect_right(self
.func_starts
, ea
)
587 func
= self
.func_starts_arr
[i
- 1]
589 if end
and func
.start
<= ea
< end
:
594 def iter_funcs(self
):
595 for addr
, props
in self
.addr_map
.items():
596 func
= props
.get("fun_s")
600 def get_func_list(self
):
601 return sorted([self
.get_label(addr
) for addr
, f
in self
.iter_funcs()])
605 def add_subarea(self
, start
, end
, name
):
606 log
.debug("add_subarea(%x, %x, %s)", start
, end
, name
)
607 self
.subarea_list
.append((start
, end
, name
))
608 self
.subarea_search
.append(start
)
610 # Call this once all add_subarea() calls were made
611 def finish_subareas(self
):
612 self
.subarea_list
.sort()
613 self
.subarea_search
.sort()
615 # Look up subarea containing address
616 def lookup_subarea(self
, ea
):
617 i
= bisect
.bisect_right(self
.subarea_search
, ea
)
619 area
= self
.subarea_list
[i
- 1]
620 if area
[0] <= ea
<= area
[1]:
626 def add_issue(self
, ea
, descr
):
627 self
.issues
[ea
] = descr
629 def get_issues(self
):
631 for ea
in sorted(self
.issues
.keys()):
632 res
.append((ea
, self
.issues
[ea
]))
637 def save_area(self
, stream
, area
):
638 stream
.write("%08x %08x\n" % (area
[START
], area
[END
]))
642 chunk
= flags
[i
:i
+ 32]
645 stream
.write(str(binascii
.hexlify(chunk
), 'utf-8') + "\n")
650 def save_areas(self
, stream
):
651 for a
in self
.area_list
:
652 self
.save_area(stream
, a
)
655 def save_addr_props(self
, prefix
):
656 areas
= self
.area_list
658 stream
= open(prefix
+ ".%08x" % areas
[area_i
][START
], "w")
659 area_end
= areas
[area_i
][END
]
660 stream
.write("header:\n")
661 stream
.write(" version: 1.0\n")
662 for addr
, props
in sorted(self
.addr_map
.items()):
663 # If entry has just fun_e data, skip it. As fun_e is set
664 # on an address past the last byte of func, this address
665 # also may not belong to any section, so skipping it
666 # to start with is helpful.
667 if len(props
) == 1 and "fun_e" in props
:
673 while addr
> areas
[area_i
][END
]:
675 assert addr
>= areas
[area_i
][START
]
676 stream
= open(prefix
+ ".%08x" % areas
[area_i
][START
], "w")
677 #stream.write("addr=%x area_end=%x\n" % (addr, area_end))
678 area_end
= areas
[area_i
][END
]
679 stream
.write("header:\n")
680 stream
.write(" version: 1.0\n")
681 stream
.write("0x%08x:\n" % addr
)
682 fl
= self
.get_flags(addr
)
683 stream
.write(" f: %s %02x\n" % (flag2char(fl
), fl
))
684 label
= props
.get("label")
685 arg_props
= props
.get("args")
686 comm
= props
.get("comm")
687 xrefs
= props
.get("xrefs")
688 func
= props
.get("fun_s")
689 if label
is not None:
691 stream
.write(" l:\n")
693 stream
.write(" l: %s\n" % label
)
694 if arg_props
is not None:
695 arg_props_header
= False
696 for arg_no
, data
in sorted(arg_props
.items()):
697 data
= {k
: v
for k
, v
in data
.items() if v
is not None}
699 if not arg_props_header
:
700 stream
.write(" args:\n")
701 arg_props_header
= True
702 stream
.write(" %s: %r\n" % (arg_no
, data
))
703 #for k, v in sorted(data.items()):
704 # stream.write(" %s: %s\n" % (k, v))
706 stream
.write(" cmnt: %r\n" % comm
)
709 if func
.end
is not None:
710 stream
.write(" fn_end: 0x%08x\n" % func
.end
)
712 stream
.write(" fn_end: '?'\n")
713 stream
.write(" fn_ranges: [")
715 for r
in func
.get_ranges():
718 stream
.write("[0x%08x,0x%08x]" % r
)
723 stream
.write(" x:\n" % xrefs
)
724 for from_addr
in sorted(xrefs
.keys()):
725 stream
.write(" - 0x%08x: %s\n" % (from_addr
, xrefs
[from_addr
]))
727 def load_addr_props(self
, stream
):
728 l
= stream
.readline()
729 assert l
== "header:\n"
730 l
= stream
.readline()
731 assert l
== " version: 1.0\n"
732 l
= stream
.readline()
734 assert l
.endswith(":\n")
735 addr
= int(l
[:-2], 0)
736 props
= self
.addr_map
.get(addr
, {})
737 l
= stream
.readline()
738 while l
and l
[0] == " ":
739 key
, val
= [x
.strip() for x
in l
.split(":", 1)]
746 self
.labels_rev
[val
] = addr
748 props
["comm"] = val
[1:-1].replace("\\n", "\n")
749 elif key
== "fn_end":
754 f
= Function(addr
, end
)
756 # Handled by finish_func() below
758 # self.addr_map[end] = {"fun_e": f}
759 elif key
== "fn_ranges":
761 assert val
.startswith("[[") and val
.endswith("]]"), val
764 for r
in val
.split("], ["):
765 r
= [int(x
, 0) for x
in r
.split(",")]
767 # Now, call finish func to set func end address, either from
768 # fn_end or fn_ranges
774 l
= stream
.readline()
775 if not l
or not l
.startswith(" "):
777 arg_no
, data
= [x
.strip() for x
in l
.split(":", 1)]
778 assert data
[0] == "{" and data
[-1] == "}"
781 for pair
in data
.split(","):
782 seq
= [x
.strip() for x
in pair
.split(":", 1)]
784 assert x
[0] == "'" and x
[-1] == "'", x
785 k
, v
= [x
[1:-1] for x
in seq
]
787 arg_props
[int(arg_no
)] = vals
788 props
["args"] = arg_props
793 l
= stream
.readline()
794 if not l
or not l
.startswith(" - "):
796 key
, val
= [x
.strip() for x
in l
[3:].split(":", 1)]
797 xrefs
[int(key
, 0)] = val
799 props
["xrefs"] = xrefs
802 l
= stream
.readline()
804 self
.addr_map
[addr
] = props
806 def load_area(self
, stream
, area
):
807 l
= stream
.readline()
808 vals
= [int(v
, 16) for v
in l
.split()]
809 assert area
[START
] == vals
[0] and area
[END
] == vals
[1]
813 l
= stream
.readline().rstrip()
816 l
= binascii
.unhexlify(l
)
817 flags
[i
:i
+ len(l
)] = l
820 def load_areas(self
, stream
):
821 for a
in self
.area_list
:
822 self
.load_area(stream
, a
)
825 # Hack for idaapi interfacing
826 # TODO: should go to "Analysis" object
827 def analisys_stack_push(self
, ea
, flow_flag
=idaapi
.fl_JN
):
828 global analisys_stack_branches
, analisys_stack_calls
829 global analisys_stack_returns
, analysis_current_func
830 # If we know something is func (e.g. from loader), jump
831 # to it means tail-call.
832 if flow_flag
== idaapi
.fl_RET_FROM_CALL
:
833 analisys_stack_returns
.append((ea
, analysis_current_func
))
834 elif flow_flag
== idaapi
.fl_CN
or self
.is_func(ea
):
835 analisys_stack_calls
.append(ea
)
837 analisys_stack_branches
.append(ea
)
840 ADDRESS_SPACE
= AddressSpace()
842 def set_processor(p
):
845 idaapi
.set_processor(p
)
849 global arch_id
, code_addr_mask
851 if arch_id
== "arm_32_thumb":
855 analisys_stack_calls
= []
856 analisys_stack_returns
= []
857 analisys_stack_branches
= []
858 analysis_current_func
= None
860 def add_entrypoint(ea
, as_func
=True):
862 ADDRESS_SPACE
.make_func(ea
, None)
863 analisys_stack_calls
.append(ea
)
865 analisys_stack_branches
.append(ea
)
868 _processor
.cmd
.ea
= ea
869 _processor
.cmd
.size
= 0
870 _processor
.cmd
.disasm
= None
874 log
.info("Function %s (0x%x) ranges: %s" % (ADDRESS_SPACE
.get_label(f
.start
), f
.start
, f
.ranges
.str(hex)))
877 ADDRESS_SPACE
.set_func_end(f
, end
)
879 def analyze(callback
=lambda cnt
:None):
880 global analysis_current_func
883 analysis_current_func
= None
885 if analisys_stack_branches
:
886 ea
= analisys_stack_branches
.pop()
888 fl
= ADDRESS_SPACE
.get_flags(ea
, 0xff)
889 except InvalidAddrException
:
890 log
.warn("Branch outside address space detected: 0x%x" % ea
)
893 if fl
== ADDRESS_SPACE
.CODE | ADDRESS_SPACE
.FUNC
:
894 fun
= ADDRESS_SPACE
.get_func_start(ea
)
896 log
.warn("Jump to (or flow into) a function at 0x%x detected" % ea
)
898 if analysis_current_func
:
899 if fl
== ADDRESS_SPACE
.CODE | ADDRESS_SPACE
.FUNC
:
901 if fl
not in (ADDRESS_SPACE
.CODE
, ADDRESS_SPACE
.UNK
):
902 log
.warn("Unexpected flags 0x%x at 0x%x while tracing code branch, skipping it", fl
, ea
)
903 ADDRESS_SPACE
.add_issue(ea
, "Jump/flow into non-code")
906 if fl
!= ADDRESS_SPACE
.UNK
:
907 if fl
!= ADDRESS_SPACE
.CODE
:
908 ADDRESS_SPACE
.add_issue(ea
, "Jump/flow into non-code")
910 elif analisys_stack_calls
:
911 finish_func(analysis_current_func
)
912 analysis_current_func
= None
913 ea
= analisys_stack_calls
.pop()
914 fun
= ADDRESS_SPACE
.get_func_start(ea
)
917 log
.info("Starting analysis of function 0x%x" % ea
)
918 analysis_current_func
= ADDRESS_SPACE
.make_func(ea
)
919 elif analisys_stack_returns
:
920 ea
, analysis_current_func
= analisys_stack_returns
.pop()
921 #log.debug("Restarting analysis of call return at 0x%x (fl=%x)", ea, ADDRESS_SPACE.get_flags(ea, 0xff))
922 analisys_stack_branches
.append(ea
)
925 finish_func(analysis_current_func
)
929 insn_sz
= _processor
.ana()
930 except InvalidAddrException
:
931 # Ran out of memory area, just continue
932 # with the rest of paths
934 # print("size: %d" % insn_sz, _processor.cmd)
936 if not _processor
.emu():
938 if analysis_current_func
:
939 analysis_current_func
.add_insn(ea
, insn_sz
)
940 ADDRESS_SPACE
.make_code(ea
, insn_sz
, ADDRESS_SPACE
.FUNC
)
942 ADDRESS_SPACE
.make_code(ea
, insn_sz
)
944 # print("%08x %s" % (_processor.cmd.ea, _processor.cmd.disasm))
950 # if not analisys_stack:
951 # print("Analisys finished")
957 def __init__(self
, target_addr
=0, target_subno
=0):
964 self
.target_addr
= target_addr
965 self
.target_subno
= target_subno
966 self
.target_addr_lineno_0
= -1
967 self
.target_addr_lineno
= -1
968 self
.target_addr_lineno_real
= -1
973 def add_object(self
, addr
, line
):
974 if addr
!= self
._last
_addr
:
975 self
._last
_addr
= addr
977 if addr
== self
.target_addr
:
978 if self
._subcnt
== 0:
979 # Contains first line related to the given addr
980 self
.target_addr_lineno_0
= self
._cnt
981 if self
._subcnt
== self
.target_subno
:
982 # Contains line no. target_subno related to the given addr
983 self
.target_addr_lineno
= self
._cnt
985 # Contains line where actual instr/data/unknown bytes are
986 # rendered (vs labels/xrefs/etc.)
987 self
.target_addr_lineno_real
= self
._cnt
988 self
._lines
.append(line
)
989 self
._addr
2line
[(addr
, self
._subcnt
)] = self
._cnt
990 line
.subno
= self
._subcnt
992 # Line of "real" disasm object
993 self
._addr
2line
[(addr
, -1)] = self
._cnt
997 def addr2line_no(self
, addr
, subno
=-1):
998 return self
._addr
2line
.get((addr
, subno
))
1000 def undefine_unit(self
, addr
):
1001 sz
= self
.AS
.get_unit_size(addr
)
1002 self
.AS
.make_undefined(addr
, sz
)
1005 def data_sz2mnem(sz
):
1006 s
= {1: "db", 2: "dw", 4: "dd"}[sz
]
1007 return idaapi
.fillstr(s
, idaapi
.DEFAULT_WIDTH
)
1012 # Size of "leader fields" in disasm window - address, raw bytes, etc.
1013 # May be set by MVC controller
1016 # Default indent for a line
1017 indent
= " " * idaapi
.DEFAULT_INDENT
1019 # Default operand positions list is empty and set on class level
1020 # to save memory. To be overriden on object level.
1023 # If False, this object corresponds to real bytes in input binary stream
1024 # If True, doesn't correspond to bytes in memory: labels, etc.
1027 # Textual comment to append
1030 # Instance variable expected to be set on each instance:
1033 # subno = # relative no. of several lines corresponding to the same ea
1036 # Render object as a string, set it as .cache, and return it
1039 def get_operand_addr(self
):
1040 # Get "the most addressful" operand
1041 # This for example will be called when Enter is pressed
1042 # not on a specific instruction operand, so this should
1043 # return value of the operand which contains an address
1044 # (or the "most suitable" of them if there're few).
1048 # Each object should return real character len as display on the screen.
1049 # Should be fast - called on each cursor movement.
1051 return self
.LEADER_SIZE
+ len(self
.indent
) + len(self
.cache
)
1052 except AttributeError:
1053 return self
.LEADER_SIZE
+ len(self
.indent
) + len(self
.render())
1055 def content_len(self
):
1056 return len(self
) - (self
.LEADER_SIZE
+ len(self
.indent
))
1059 class Instruction(idaapi
.insn_t
, DisasmObj
):
1064 _processor
.cmd
= self
1066 s
= self
.disasm
+ self
.comment
1070 def get_operand_addr(self
):
1071 # Assumes RISC design where only one operand can be address
1073 for o
in self
._operands
:
1074 if o
.flags
& idaapi
.OF_SHOW
:
1075 if o
.type == idaapi
.o_near
:
1076 # Jumps have priority
1078 if o
.type == idaapi
.o_mem
:
1080 elif o
.type == idaapi
.o_imm
:
1087 class Data(DisasmObj
):
1091 def __init__(self
, ea
, sz
, val
):
1097 subtype
= ADDRESS_SPACE
.get_arg_prop(self
.ea
, 0, "subtype")
1098 if subtype
== IMM_ADDR
:
1100 if not isinstance(label
, str):
1101 label
= ADDRESS_SPACE
.get_label(label
)
1102 s
= "%s%s" % (data_sz2mnem(self
.size
), label
)
1104 s
= "%s0x%x" % (data_sz2mnem(self
.size
), self
.val
)
1109 def get_operand_addr(self
):
1113 o
.type = idaapi
.o_imm
1117 class String(DisasmObj
):
1121 def __init__(self
, ea
, sz
, val
):
1127 s
= "%s%s" % (data_sz2mnem(1), repr(self
.val
).replace("\\x00", "\\0"))
1133 class Fill(DisasmObj
):
1137 def __init__(self
, ea
, sz
):
1140 self
.cache
= idaapi
.fillstr(".fill", idaapi
.DEFAULT_WIDTH
) + str(sz
)
1146 class Unknown(DisasmObj
):
1151 def __init__(self
, ea
, val
):
1157 if 0x20 <= self
.val
<= 0x7e:
1158 ch
= " ; '%s'" % chr(self
.val
)
1159 s
= "%s0x%02x%s" % (idaapi
.fillstr("unk", idaapi
.DEFAULT_WIDTH
), self
.val
, ch
)
1165 class Label(DisasmObj
):
1169 def __init__(self
, ea
):
1173 label
= ADDRESS_SPACE
.get_label(self
.ea
)
1179 class Xref(DisasmObj
):
1183 def __init__(self
, ea
, from_addr
, type):
1185 self
.from_addr
= from_addr
1189 func
= ADDRESS_SPACE
.lookup_func(self
.from_addr
)
1192 extra
= ADDRESS_SPACE
.get_label(func
.start
)
1193 off
= self
.from_addr
- func
.start
1195 extra
+= "+0x%x" % off
1196 extra
= " (%s)" % extra
1197 s
= (" " * idaapi
.DEFAULT_XREF_INDENT
) + "; xref: %s 0x%x" % (self
.type, self
.from_addr
) + extra
1201 def get_operand_addr(self
):
1203 o
.addr
= self
.from_addr
1207 class Literal(DisasmObj
):
1211 def __init__(self
, ea
, str):
1219 # Separate types to differentiate content
1220 class AreaWrapper(Literal
):
1223 # Separate types to differentiate content
1224 class FunctionWrapper(Literal
):
1230 render_partial(model
, 0, 0, 1000000)
1233 # How much bytes may a single disasm object (i.e. a line) occupy
1236 def render_partial_around(addr
, subno
, context_lines
):
1237 log
.debug("render_partial_around(%x, %d)", addr
, subno
)
1238 off
, area
= ADDRESS_SPACE
.addr2area(addr
)
1241 back
= context_lines
* MAX_UNIT_SIZE
1244 area_no
= ADDRESS_SPACE
.area_no(area
) - 1
1246 area
= ADDRESS_SPACE
.area_list
[area_no
]
1247 sz
= area
[1] - area
[0] + 1
1253 # Reached beginning of address space, just set as such
1256 log
.debug("render_partial_around: off=0x%x, %s", off
, str_area(area
))
1257 off
= ADDRESS_SPACE
.adjust_offset_reverse(off
, area
)
1258 log
.debug("render_partial_around adjusted: off=0x%x, %s", off
, str_area(area
))
1259 model
= Model(addr
, subno
)
1260 render_partial(model
, ADDRESS_SPACE
.area_list
.index(area
), off
, context_lines
, addr
)
1261 log
.debug("render_partial_around model done, lines: %d", len(model
.lines()))
1262 assert model
.target_addr_lineno_0
>= 0
1263 if model
.target_addr_lineno
== -1:
1264 # If we couldn't find exact subno, use 0th subno of that addr
1265 # TODO: maybe should be last subno, because if we couldn't find
1266 # exact one, it was ~ last and removed, so current last is "closer"
1268 model
.target_addr_lineno
= model
.target_addr_lineno_0
1272 def render_from(model
, addr
, num_lines
):
1273 off
, area
= ADDRESS_SPACE
.addr2area(addr
)
1276 return render_partial(model
, ADDRESS_SPACE
.area_list
.index(area
), off
, num_lines
)
1279 def render_partial(model
, area_no
, offset
, num_lines
, target_addr
=-1):
1280 model
.AS
= ADDRESS_SPACE
1282 #for a in ADDRESS_SPACE.area_list:
1283 while area_no
< len(ADDRESS_SPACE
.area_list
):
1284 a
= ADDRESS_SPACE
.area_list
[area_no
]
1291 model
.add_object(a
[START
], AreaWrapper(a
[START
], "; Start of 0x%x area (%s)" % (a
[START
], a
[PROPS
].get("name", "noname"))))
1294 areasize
= len(bytes
)
1297 # If we didn't yet reach target address, compensate for
1298 # the following decrement of num_lines. The logic is:
1299 # render all lines up to target_addr, and then num_lines past it.
1300 if target_addr
>= 0 and addr
< target_addr
:
1303 props
= ADDRESS_SPACE
.get_addr_prop_dict(addr
)
1304 func
= props
.get("fun_s")
1306 model
.add_object(addr
, FunctionWrapper(addr
, "; Start of function '%s'" % ADDRESS_SPACE
.get_label(func
.start
)))
1308 xrefs
= props
.get("xrefs")
1310 for from_addr
in sorted(xrefs
.keys()):
1311 model
.add_object(addr
, Xref(addr
, from_addr
, xrefs
[from_addr
]))
1313 label
= props
.get("label")
1315 model
.add_object(addr
, Label(addr
))
1318 if f
== AddressSpace
.UNK
:
1319 out
= Unknown(addr
, bytes
[i
])
1322 elif f
& AddressSpace
.DATA
:
1325 while j
< areasize
and flags
[j
] & AddressSpace
.DATA_CONT
:
1329 out
= Data(addr
, sz
, ADDRESS_SPACE
.get_data(addr
, sz
))
1331 elif f
== AddressSpace
.STR
:
1335 while j
< areasize
and flags
[j
] == AddressSpace
.DATA_CONT
:
1336 str += chr(bytes
[j
])
1339 out
= String(addr
, sz
, str)
1341 elif f
== AddressSpace
.FILL
:
1344 while j
< areasize
and flags
[j
] == AddressSpace
.FILL
:
1347 out
= Fill(addr
, sz
)
1349 elif f
== AddressSpace
.CODE
:
1350 out
= Instruction(addr
)
1351 _processor
.cmd
= out
1352 sz
= _processor
.ana()
1356 out
= Literal(addr
, "; UNEXPECTED value: %02x flags: %02x" % (bytes
[i
], f
))
1359 assert 0, "@%08x flags=%x" % (addr
, f
)
1361 comm
= props
.get("comm")
1363 comm_indent
= " " * (out
.content_len() + len(out
.indent
) + 2)
1364 out
.comment
= " ; " + comm
.split("\n", 1)[0]
1366 model
.add_object(addr
, out
)
1367 #sys.stdout.write(out + "\n")
1370 for comm_l
in comm
.split("\n")[1:]:
1371 comm_obj
= Literal(addr
, "; " + comm_l
)
1372 comm_obj
.indent
= comm_indent
1373 model
.add_object(addr
, comm_obj
)
1375 next_addr
= addr
+ sz
1376 next_props
= ADDRESS_SPACE
.get_addr_prop_dict(next_addr
)
1377 func_end
= next_props
.get("fun_e")
1379 model
.add_object(addr
, FunctionWrapper(addr
, "; End of function '%s' (%s)" % (
1380 ADDRESS_SPACE
.get_label(func_end
.start
), func_end
.get_end_method()
1387 model
.add_object(a
[END
], AreaWrapper(a
[END
], "; End of 0x%x area (%s)" % (a
[START
], a
[PROPS
].get("name", "noname"))))
1391 if f
== AddressSpace
.UNK
:
1393 elif f
== AddressSpace
.CODE
:
1395 elif f
== AddressSpace
.CODE | AddressSpace
.FUNC
:
1397 elif f
== AddressSpace
.CODE_CONT
:
1399 elif f
== AddressSpace
.DATA
:
1401 elif f
== AddressSpace
.DATA_CONT
:
1403 elif f
== AddressSpace
.STR
:
1405 elif f
== AddressSpace
.FILL
:
1410 def print_address_map():
1411 for a
in ADDRESS_SPACE
.area_list
:
1412 for i
in range(len(a
[FLAGS
])):
1414 sys
.stdout
.write("\n")
1415 sys
.stdout
.write("%08x " % (a
[START
] + i
))
1416 sys
.stdout
.write(flag2char(a
[FLAGS
][i
]))
1417 sys
.stdout
.write("\n")
1420 idaapi
.set_address_space(ADDRESS_SPACE
)