1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 from rangeset
import RangeSet
28 # ScratchABit API and code
47 return "Area(0x%x-0x%x, %s)" % (area
[START
], area
[END
], area
[PROPS
])
53 class InvalidAddrException(Exception):
54 "Thrown when dereferencing address which doesn't exist in AddressSpace."
55 def __init__(self
, addr
):
56 self
.args
= (addr
, hex(addr
))
61 def __init__(self
, start
, end
=None):
62 self
.ranges
= RangeSet()
66 def add_insn(self
, addr
, sz
):
67 self
.ranges
.add((addr
, addr
+ sz
))
69 def add_range(self
, start
, end
):
70 self
.ranges
.add((start
, end
))
73 return self
.ranges
.to_list()
76 if self
.end
is not None:
78 bounds
= self
.ranges
.bounds()
82 def get_end_method(self
):
83 if self
.end
is not None:
84 return "as set by loader (detected: 0x%x)" % (self
.ranges
.bounds()[1] - 1)
93 STR
= 0x10 # Continuation is DATA_CONT
94 FILL
= 0x40 # Filler/alignment bytes
95 FUNC
= 0x80 # Can appear with CODE, meaning this instruction belongs to a function
99 # Map from referenced addresses to their properties. Among them:
101 # Properties of instruction's args; at the very least, this should
102 # differentiate between literal numeric values and addresses/offsets/pointers
109 # Cross-reference records
111 # Function start and beyond-end addresses, map to Function object
113 # Map from label to its address
115 # Problem spots which automatic control/data flow couldn't resolve
117 # Cached last accessed area
118 self
.last_area
= None
122 def add_area(self
, start
, end
, props
):
123 log
.debug("add_area(%x, %x, %s)", start
, end
, props
)
125 bytes
= bytearray(sz
)
126 flags
= bytearray(sz
)
127 a
= (start
, end
, props
, bytes
, flags
)
128 self
.area_list
.append(a
)
129 # Area list should be sorted. Assume it's short and just resort it each time.
130 self
.area_list
.sort()
134 return self
.area_list
136 def area_no(self
, area
):
137 return self
.area_list
.index(area
)
139 def addr2area(self
, addr
):
142 if a
[0] <= addr
<= a
[1]:
143 return (addr
- a
[0], a
)
144 for a
in self
.area_list
:
145 if a
[0] <= addr
<= a
[1]:
147 return (addr
- a
[0], a
)
151 return self
.area_list
[0][START
]
154 return self
.area_list
[-1][END
]
156 # Return next address in the address space, or None
157 def next_addr(self
, addr
):
158 offset
, area
= self
.addr2area(addr
)
159 if addr
!= area
[END
]:
161 i
= self
.area_no(area
) + 1
162 if i
== len(self
.area_list
):
164 return self
.area_list
[i
][START
]
166 def is_exec(self
, addr
):
167 off
, area
= self
.addr2area(addr
)
170 return "X" in area
[PROPS
]["access"]
174 def load_content(self
, file, addr
, sz
=None):
175 off
, area
= self
.addr2area(addr
)
176 to
= off
+ sz
if sz
else None
177 file.readinto(memoryview(area
[BYTES
])[off
:to
])
179 def is_valid_addr(self
, addr
):
180 off
, area
= self
.addr2area(addr
)
181 return area
is not None
183 def get_byte(self
, addr
):
184 off
, area
= self
.addr2area(addr
)
186 raise InvalidAddrException(addr
)
187 return area
[BYTES
][off
]
189 def set_byte(self
, addr
, val
):
190 off
, area
= self
.addr2area(addr
)
192 raise InvalidAddrException(addr
)
193 area
[BYTES
][off
] = val
& 0xff
195 def get_bytes(self
, addr
, sz
):
196 off
, area
= self
.addr2area(addr
)
198 raise InvalidAddrException(addr
)
199 return area
[BYTES
][off
:off
+ sz
]
201 def get_data(self
, addr
, sz
):
204 sym
= self
.get_addr_prop(addr
, "sym")
208 off
, area
= self
.addr2area(addr
)
211 val
= val |
(area
[BYTES
][off
+ i
] << 8 * i
)
214 def set_data(self
, addr
, data
, sz
):
215 off
, area
= self
.addr2area(addr
)
218 area
[BYTES
][off
+ i
] = data
& 0xff
221 # Binary Data Flags API
223 def get_flags(self
, addr
, mask
=0x7f):
224 off
, area
= self
.addr2area(addr
)
226 raise InvalidAddrException(addr
)
227 return area
[FLAGS
][off
] & mask
229 def get_unit_size(self
, addr
):
230 off
, area
= self
.addr2area(addr
)
233 if flags
[off
] & 0x7f == self
.CODE
:
235 elif flags
[off
] in (self
.DATA
, self
.STR
):
237 elif flags
[off
] == self
.FILL
:
242 while flags
[off
] == f
:
247 # Taking an offset inside unit, return offset to the beginning of unit
249 def adjust_offset_reverse(cls
, off
, area
):
251 if flags
[off
] == cls
.FILL
:
253 if flags
[off
] != cls
.FILL
:
260 if flags
[off
] in (cls
.CODE_CONT
, cls
.DATA_CONT
):
266 def adjust_addr_reverse(self
, addr
):
267 off
, area
= self
.addr2area(addr
)
270 return self
.adjust_offset_reverse(off
, area
) + area
[START
]
272 def set_flags(self
, addr
, sz
, head_fl
, rest_fl
=0):
273 off
, area
= self
.addr2area(addr
)
277 for i
in range(sz
- 1):
278 flags
[off
+ i
] = rest_fl
280 def make_undefined(self
, addr
, sz
):
281 self
.set_flags(addr
, sz
, self
.UNK
, self
.UNK
)
283 def make_code(self
, addr
, sz
, extra_flags
=0):
284 off
, area
= self
.addr2area(addr
)
285 area_byte_flags
= area
[FLAGS
]
286 area_byte_flags
[off
] |
= self
.CODE | extra_flags
287 for i
in range(sz
- 1):
288 area_byte_flags
[off
+ 1 + i
] |
= self
.CODE_CONT
290 def make_data(self
, addr
, sz
):
291 off
, area
= self
.addr2area(addr
)
292 area_byte_flags
= area
[FLAGS
]
293 area_byte_flags
[off
] |
= self
.DATA
294 for i
in range(sz
- 1):
295 area_byte_flags
[off
+ 1 + i
] |
= self
.DATA_CONT
297 def make_data_array(self
, addr
, sz
, num_items
, prefix
=""):
298 # Make a data array. First-class arrays are not supported so far,
299 # so just mark data units sequentially
300 self
.append_comment(addr
, "%sArray, num %s: %d" % (prefix
, "bytes" if sz
== 1 else "items", num_items
))
301 for i
in range(num_items
):
302 self
.make_data(addr
, sz
)
305 def make_filler(self
, addr
, sz
):
306 self
.set_flags(addr
, sz
, self
.FILL
, self
.FILL
)
308 # Address properties API
310 def set_addr_prop(self
, addr
, prop
, val
):
311 self
.addr_map
.setdefault(addr
, {})[prop
] = val
313 def get_addr_prop(self
, addr
, prop
, default
=None):
314 return self
.addr_map
.get(addr
, {}).get(prop
, default
)
316 def get_addr_prop_dict(self
, addr
):
317 return self
.addr_map
.get(addr
, {})
321 def get_default_label_prefix(self
, ea
):
322 fl
= self
.get_flags(ea
)
331 def get_default_label(self
, ea
):
332 prefix
= self
.get_default_label_prefix(ea
)
333 return "%s%08x" % (prefix
, ea
)
335 def make_label(self
, prefix
, ea
):
336 l
= self
.get_addr_prop(ea
, "label")
337 if isinstance(l
, str):
338 # If it's real label, don't change it
341 prefix
= self
.get_default_label_prefix(ea
)
342 l
= "%s%08x" % (prefix
, ea
)
343 self
.set_addr_prop(ea
, "label", l
)
344 self
.labels_rev
[l
] = ea
346 # auto_label will change its prefix automatically based on
347 # type of data it points.
348 def make_auto_label(self
, ea
):
349 if self
.get_addr_prop(ea
, "label"):
351 self
.set_addr_prop(ea
, "label", ea
)
352 self
.labels_rev
[ea
] = ea
354 # Delete a label, only if it's auto
355 def del_auto_label(self
, ea
):
356 label
= self
.get_addr_prop(ea
, "label")
357 if not label
or isinstance(label
, str):
359 self
.set_addr_prop(ea
, "label", None)
360 del self
.labels_rev
[ea
]
362 def get_label(self
, ea
):
363 label
= self
.get_addr_prop(ea
, "label")
364 if isinstance(label
, int):
365 return "%s%08x" % (self
.get_default_label_prefix(ea
), label
)
368 def set_label(self
, ea
, label
):
369 # Make sure the label can be actually visible - create an area for it if none
370 off
, area
= self
.addr2area(ea
)
372 self
.add_area(ea
, ea
, {"name": "autocreated to host %s label" % label
})
373 self
.set_addr_prop(ea
, "label", label
)
374 self
.labels_rev
[label
] = ea
376 def make_unique_label(self
, ea
, label
):
377 existing
= self
.get_label(ea
)
378 if existing
== label
:
385 if l
not in self
.labels_rev
:
386 self
.set_label(ea
, l
)
390 def get_label_list(self
):
391 return sorted([x
if isinstance(x
, str) else self
.get_default_label(x
) for x
in self
.labels_rev
.keys()])
393 def resolve_label(self
, label
):
394 if label
in self
.labels_rev
:
395 return self
.labels_rev
[label
]
397 ea
= int(label
.split("_", 1)[1], 16)
400 if ea
in self
.labels_rev
and self
.get_default_label(ea
) == label
:
403 def label_exists(self
, label
):
404 return label
in self
.labels_rev
408 def get_comment(self
, ea
):
409 comm
= self
.get_addr_prop(ea
, "comm")
412 def set_comment(self
, ea
, comm
):
413 self
.set_addr_prop(ea
, "comm", comm
)
415 def append_comment(self
, ea
, comm
):
416 existing
= self
.get_comment(ea
)
417 if existing
is not None:
418 comm
= existing
+ "\n" + comm
419 self
.set_addr_prop(ea
, "comm", comm
)
421 # (Pseudo)instruction Argument Properties API
423 def set_arg_prop(self
, ea
, arg_no
, prop
, prop_val
):
424 arg_props
= self
.get_addr_prop(ea
, "args", {})
425 if arg_no
not in arg_props
:
426 arg_props
[arg_no
] = {}
427 props
= arg_props
[arg_no
]
428 props
[prop
] = prop_val
429 self
.set_addr_prop(ea
, "args", arg_props
)
431 def get_arg_prop(self
, ea
, arg_no
, prop
):
432 arg_props
= self
.get_addr_prop(ea
, "args", {})
433 return arg_props
.get(arg_no
, {}).get(prop
)
435 def get_arg_prop_dict(self
, ea
, arg_no
):
436 arg_props
= self
.get_addr_prop(ea
, "args", {})
437 return arg_props
.get(arg_no
, {})
439 def make_arg_offset(self
, insn_addr
, arg_no
, ref_addr
):
440 # Convert an immediate argument to an offset one
441 # insn_addr - address of (pseudo)instruction
442 # arg_no - argument no. of instruction
443 # ref_addr - value of the argument (i.e. address it refers to)
444 old_subtype
= self
.get_arg_prop(insn_addr
, arg_no
, "subtype")
445 if old_subtype
and old_subtype
!= IMM_ADDR
:
446 # Preserve old numeric value subtype to unconvert back to it
448 self
.set_arg_prop(insn_addr
, arg_no
, "num_subtype", old_subtype
)
450 self
.set_arg_prop(insn_addr
, arg_no
, "subtype", IMM_ADDR
)
452 if isinstance(ref_addr
, str):
454 # TODO: this works only for "dd" virtual instruction
455 self
.set_addr_prop(insn_addr
, "sym", ref_addr
)
458 label
= self
.get_label(ref_addr
)
460 self
.make_auto_label(ref_addr
)
461 self
.add_xref(insn_addr
, ref_addr
, idaapi
.dr_O
)
463 def unmake_arg_offset(self
, insn_addr
, arg_no
, ref_addr
):
464 # Convert offset argument to normal immediate value
465 old_subtype
= self
.get_arg_prop(insn_addr
, arg_no
, "num_subtype")
466 self
.set_arg_prop(insn_addr
, arg_no
, "subtype", old_subtype
)
467 self
.del_xref(insn_addr
, ref_addr
, idaapi
.dr_O
)
468 # If this was last xref, and label is automatic, kill it too
469 if not self
.get_xrefs(ref_addr
):
470 self
.del_auto_label(ref_addr
)
475 def add_xref(self
, from_ea
, to_ea
, type):
476 xrefs
= self
.get_addr_prop(to_ea
, "xrefs", {})
477 xrefs
[from_ea
] = type
478 self
.set_addr_prop(to_ea
, "xrefs", xrefs
)
480 def del_xref(self
, from_ea
, to_ea
, type):
481 xrefs
= self
.get_addr_prop(to_ea
, "xrefs", {})
483 self
.set_addr_prop(to_ea
, "xrefs", xrefs
)
485 def get_xrefs(self
, ea
):
486 xrefs
= self
.get_addr_prop(ea
, "xrefs", None)
491 def make_func(self
, from_ea
, to_ea_excl
=None):
492 f
= self
.get_addr_prop(from_ea
, "fun_s")
495 f
= Function(from_ea
, to_ea_excl
)
496 self
.set_addr_prop(from_ea
, "fun_s", f
)
498 if to_ea_excl
is not None:
499 self
.set_addr_prop(to_ea_excl
, "fun_e", f
)
502 def is_func(self
, ea
):
503 return self
.get_addr_prop(ea
, "fun_s") is not None
505 # If ea is start of function, return Function object
506 def get_func_start(self
, ea
):
507 return self
.get_addr_prop(ea
, "fun_s")
509 # If ea is end of function, return Function object
510 def get_func_end(self
, ea
):
511 return self
.get_addr_prop(ea
, "fun_e")
513 def set_func_end(self
, func
, ea
):
514 self
.set_addr_prop(ea
, "fun_e", func
)
516 # Look up function containing address
517 def lookup_func(self
, ea
):
518 # TODO: cache func ranges, use binary search instead
519 for start
, props
in self
.addr_map
.items():
520 func
= props
.get("fun_s")
521 if func
and ea
>= start
:
523 if end
is not None and ea
< end
:
528 def add_issue(self
, ea
, descr
):
529 self
.issues
[ea
] = descr
531 def get_issues(self
):
533 for ea
in sorted(self
.issues
.keys()):
534 res
.append((ea
, self
.issues
[ea
]))
539 def save_area(self
, stream
, area
):
540 stream
.write("%08x %08x\n" % (area
[START
], area
[END
]))
544 chunk
= flags
[i
:i
+ 32]
547 stream
.write(str(binascii
.hexlify(chunk
), 'utf-8') + "\n")
552 def save_areas(self
, stream
):
553 for a
in self
.area_list
:
554 self
.save_area(stream
, a
)
557 def save_addr_props(self
, prefix
):
558 areas
= self
.area_list
560 stream
= open(prefix
+ ".%08x" % areas
[area_i
][START
], "w")
561 area_end
= areas
[area_i
][END
]
562 stream
.write("header:\n")
563 stream
.write(" version: 1.0\n")
564 for addr
, props
in sorted(self
.addr_map
.items()):
568 while addr
> areas
[area_i
][END
]:
570 assert addr
>= areas
[area_i
][START
]
571 stream
= open(prefix
+ ".%08x" % areas
[area_i
][START
], "w")
572 #stream.write("addr=%x area_end=%x\n" % (addr, area_end))
573 area_end
= areas
[area_i
][END
]
574 stream
.write("header:\n")
575 stream
.write(" version: 1.0\n")
576 # If entry has just fun_e data, skip it
577 if len(props
) == 1 and "fun_e" in props
:
579 stream
.write("0x%08x:\n" % addr
)
580 fl
= self
.get_flags(addr
)
581 stream
.write(" f: %s %02x\n" % (flag2char(fl
), fl
))
582 label
= props
.get("label")
583 arg_props
= props
.get("args")
584 comm
= props
.get("comm")
585 xrefs
= props
.get("xrefs")
586 func
= props
.get("fun_s")
587 if label
is not None:
589 stream
.write(" l:\n")
591 stream
.write(" l: %s\n" % label
)
592 if arg_props
is not None:
593 arg_props_header
= False
594 for arg_no
, data
in sorted(arg_props
.items()):
595 data
= {k
: v
for k
, v
in data
.items() if v
is not None}
597 if not arg_props_header
:
598 stream
.write(" args:\n")
599 arg_props_header
= True
600 stream
.write(" %s: %r\n" % (arg_no
, data
))
601 #for k, v in sorted(data.items()):
602 # stream.write(" %s: %s\n" % (k, v))
604 stream
.write(" cmnt: %r\n" % comm
)
607 if func
.end
is not None:
608 stream
.write(" fn_end: 0x%08x\n" % func
.end
)
610 stream
.write(" fn_end: '?'\n")
611 stream
.write(" fn_ranges: [")
613 for r
in func
.get_ranges():
616 stream
.write("[0x%08x,0x%08x]" % r
)
621 stream
.write(" x:\n" % xrefs
)
622 for from_addr
in sorted(xrefs
.keys()):
623 stream
.write(" - 0x%08x: %s\n" % (from_addr
, xrefs
[from_addr
]))
625 def load_addr_props(self
, stream
):
626 l
= stream
.readline()
627 assert l
== "header:\n"
628 l
= stream
.readline()
629 assert l
== " version: 1.0\n"
630 l
= stream
.readline()
632 assert l
.endswith(":\n")
633 addr
= int(l
[:-2], 0)
634 props
= self
.addr_map
.get(addr
, {})
635 l
= stream
.readline()
636 while l
and l
[0] == " ":
637 key
, val
= [x
.strip() for x
in l
.split(":", 1)]
644 self
.labels_rev
[val
] = addr
646 props
["comm"] = val
[1:-1].replace("\\n", "\n")
647 elif key
== "fn_end":
652 f
= Function(addr
, end
)
654 # Handled by finish_func() below
656 # self.addr_map[end] = {"fun_e": f}
657 elif key
== "fn_ranges":
659 assert val
.startswith("[[") and val
.endswith("]]"), val
662 for r
in val
.split("], ["):
663 r
= [int(x
, 0) for x
in r
.split(",")]
665 # Now, call finish func to set func end address, either from
666 # fn_end or fn_ranges
672 l
= stream
.readline()
673 if not l
or not l
.startswith(" "):
675 arg_no
, data
= [x
.strip() for x
in l
.split(":", 1)]
676 assert data
[0] == "{" and data
[-1] == "}"
679 for pair
in data
.split(","):
680 seq
= [x
.strip() for x
in pair
.split(":", 1)]
682 assert x
[0] == "'" and x
[-1] == "'", x
683 k
, v
= [x
[1:-1] for x
in seq
]
685 arg_props
[int(arg_no
)] = vals
686 props
["args"] = arg_props
691 l
= stream
.readline()
692 if not l
or not l
.startswith(" - "):
694 key
, val
= [x
.strip() for x
in l
[3:].split(":", 1)]
695 xrefs
[int(key
, 0)] = val
697 props
["xrefs"] = xrefs
700 l
= stream
.readline()
702 self
.addr_map
[addr
] = props
704 def load_area(self
, stream
, area
):
705 l
= stream
.readline()
706 vals
= [int(v
, 16) for v
in l
.split()]
707 assert area
[START
] == vals
[0] and area
[END
] == vals
[1]
711 l
= stream
.readline().rstrip()
714 l
= binascii
.unhexlify(l
)
715 flags
[i
:i
+ len(l
)] = l
718 def load_areas(self
, stream
):
719 for a
in self
.area_list
:
720 self
.load_area(stream
, a
)
723 # Hack for idaapi interfacing
724 # TODO: should go to "Analysis" object
725 def analisys_stack_push(self
, ea
, is_call
=True):
726 global analisys_stack_branches
, analisys_stack_calls
727 # If we know something is func (e.g. from loader), jump
728 # to it means tail-call.
729 if is_call
or self
.is_func(ea
):
730 analisys_stack_calls
.append(ea
)
732 analisys_stack_branches
.append(ea
)
735 ADDRESS_SPACE
= AddressSpace()
737 def set_processor(p
):
740 idaapi
.set_processor(p
)
743 analisys_stack_calls
= []
744 analisys_stack_branches
= []
746 def add_entrypoint(ea
, as_func
=True):
748 ADDRESS_SPACE
.make_func(ea
, None)
749 analisys_stack_calls
.append(ea
)
751 analisys_stack_branches
.append(ea
)
754 _processor
.cmd
.ea
= ea
755 _processor
.cmd
.size
= 0
756 _processor
.cmd
.disasm
= None
760 log
.info("Function %s (0x%x) ranges: %s" % (ADDRESS_SPACE
.get_label(f
.start
), f
.start
, f
.ranges
.str(hex)))
763 ADDRESS_SPACE
.set_func_end(f
, end
)
765 def analyze(callback
=lambda cnt
:None):
770 if analisys_stack_branches
:
771 ea
= analisys_stack_branches
.pop()
772 fl
= ADDRESS_SPACE
.get_flags(ea
, 0xff)
774 if fl
== ADDRESS_SPACE
.CODE | ADDRESS_SPACE
.FUNC
:
776 assert fl
in (ADDRESS_SPACE
.CODE
, ADDRESS_SPACE
.UNK
)
778 if fl
!= ADDRESS_SPACE
.UNK
:
780 elif analisys_stack_calls
:
781 finish_func(current_func
)
782 ea
= analisys_stack_calls
.pop()
783 fun
= ADDRESS_SPACE
.get_func_start(ea
)
786 log
.info("Starting analysis of function 0x%x" % ea
)
787 current_func
= ADDRESS_SPACE
.make_func(ea
)
789 finish_func(current_func
)
793 insn_sz
= _processor
.ana()
794 except InvalidAddrException
:
795 # Ran out of memory area, just continue
796 # with the rest of paths
798 # print("size: %d" % insn_sz, _processor.cmd)
800 if not _processor
.emu():
803 current_func
.add_insn(ea
, insn_sz
)
804 ADDRESS_SPACE
.make_code(ea
, insn_sz
, ADDRESS_SPACE
.FUNC
)
806 ADDRESS_SPACE
.make_code(ea
, insn_sz
)
808 # print("%08x %s" % (_processor.cmd.ea, _processor.cmd.disasm))
814 # if not analisys_stack:
815 # print("Analisys finished")
821 def __init__(self
, target_addr
=0, target_subno
=0):
828 self
.target_addr
= target_addr
829 self
.target_subno
= target_subno
830 self
.target_addr_lineno_0
= -1
831 self
.target_addr_lineno
= -1
832 self
.target_addr_lineno_real
= -1
837 def add_line(self
, addr
, line
):
838 if addr
!= self
._last
_addr
:
839 self
._last
_addr
= addr
841 if addr
== self
.target_addr
:
842 if self
._subcnt
== 0:
843 # Contains first line related to the given addr
844 self
.target_addr_lineno_0
= self
._cnt
845 if self
._subcnt
== self
.target_subno
:
846 # Contains line no. target_subno related to the given addr
847 self
.target_addr_lineno
= self
._cnt
849 # Contains line where actual instr/data/unknown bytes are
850 # rendered (vs labels/xrefs/etc.)
851 self
.target_addr_lineno_real
= self
._cnt
852 self
._lines
.append(line
)
853 self
._addr
2line
[(addr
, self
._subcnt
)] = self
._cnt
854 line
.subno
= self
._subcnt
856 # Line of "real" disasm object
857 self
._addr
2line
[(addr
, -1)] = self
._cnt
861 def addr2line_no(self
, addr
, subno
=-1):
862 return self
._addr
2line
.get((addr
, subno
))
864 def undefine_unit(self
, addr
):
865 sz
= self
.AS
.get_unit_size(addr
)
866 self
.AS
.make_undefined(addr
, sz
)
869 def data_sz2mnem(sz
):
870 s
= {1: "db", 2: "dw", 4: "dd"}[sz
]
871 return idaapi
.fillstr(s
, idaapi
.DEFAULT_WIDTH
)
876 # Size of "leader fields" in disasm window - address, raw bytes, etc.
877 # May be set by MVC controller
880 # Default indent for a line
881 indent
= " " * idaapi
.DEFAULT_INDENT
883 # Default operand positions list is empty and set on class level
884 # to save memory. To be overriden on object level.
887 # If False, this object corresponds to real bytes in input binary stream
888 # If True, doesn't correspond to bytes in memory: labels, etc.
891 # Textual comment to append
894 # Instance variable expected to be set on each instance:
897 # subno = # relative no. of several lines corresponding to the same ea
900 # Render object as a string, set it as .cache, and return it
903 def get_operand_addr(self
):
904 # Get "the most addressful" operand
905 # This for example will be called when Enter is pressed
906 # not on a specific instruction operand, so this should
907 # return value of the operand which contains an address
908 # (or the "most suitable" of them if there're few).
912 # Each object should return real character len as display on the screen.
913 # Should be fast - called on each cursor movement.
915 return self
.LEADER_SIZE
+ len(self
.indent
) + len(self
.cache
)
916 except AttributeError:
917 return self
.LEADER_SIZE
+ len(self
.indent
) + len(self
.render())
919 def content_len(self
):
920 return len(self
) - (self
.LEADER_SIZE
+ len(self
.indent
))
923 class Instruction(idaapi
.insn_t
, DisasmObj
):
928 _processor
.cmd
= self
930 s
= self
.disasm
+ self
.comment
934 def get_operand_addr(self
):
935 # Assumes RISC design where only one operand can be address
937 for o
in self
._operands
:
938 if o
.flags
& idaapi
.OF_SHOW
:
939 if o
.type == idaapi
.o_near
:
940 # Jumps have priority
942 if o
.type == idaapi
.o_mem
:
944 elif o
.type == idaapi
.o_imm
:
951 class Data(DisasmObj
):
955 def __init__(self
, ea
, sz
, val
):
961 subtype
= ADDRESS_SPACE
.get_arg_prop(self
.ea
, 0, "subtype")
962 if subtype
== IMM_ADDR
:
964 if not isinstance(label
, str):
965 label
= ADDRESS_SPACE
.get_label(label
)
966 s
= "%s%s" % (data_sz2mnem(self
.size
), label
)
968 s
= "%s0x%x" % (data_sz2mnem(self
.size
), self
.val
)
973 def get_operand_addr(self
):
977 o
.type = idaapi
.o_imm
981 class String(DisasmObj
):
985 def __init__(self
, ea
, sz
, val
):
991 s
= "%s%s" % (data_sz2mnem(1), repr(self
.val
).replace("\\x00", "\\0"))
997 class Fill(DisasmObj
):
1001 def __init__(self
, ea
, sz
):
1004 self
.cache
= idaapi
.fillstr(".fill", idaapi
.DEFAULT_WIDTH
) + str(sz
)
1010 class Unknown(DisasmObj
):
1015 def __init__(self
, ea
, val
):
1021 if 0x20 <= self
.val
<= 0x7e:
1022 ch
= " ; '%s'" % chr(self
.val
)
1023 s
= "%s0x%02x%s" % (idaapi
.fillstr("unk", idaapi
.DEFAULT_WIDTH
), self
.val
, ch
)
1029 class Label(DisasmObj
):
1033 def __init__(self
, ea
):
1037 label
= ADDRESS_SPACE
.get_label(self
.ea
)
1043 class Xref(DisasmObj
):
1047 def __init__(self
, ea
, from_addr
, type):
1049 self
.from_addr
= from_addr
1053 s
= (" " * idaapi
.DEFAULT_XREF_INDENT
) + "; xref: 0x%x %s" % (self
.from_addr
, self
.type)
1057 def get_operand_addr(self
):
1059 o
.addr
= self
.from_addr
1063 class Literal(DisasmObj
):
1067 def __init__(self
, ea
, str):
1077 render_partial(model
, 0, 0, 1000000)
1080 # How much bytes may a single disasm object (i.e. a line) occupy
1083 def render_partial_around(addr
, subno
, context_lines
):
1084 log
.debug("render_partial_around(%x, %d)", addr
, subno
)
1085 off
, area
= ADDRESS_SPACE
.addr2area(addr
)
1088 back
= context_lines
* MAX_UNIT_SIZE
1091 area_no
= ADDRESS_SPACE
.area_no(area
) - 1
1093 area
= ADDRESS_SPACE
.area_list
[area_no
]
1094 sz
= area
[1] - area
[0] + 1
1100 # Reached beginning of address space, just set as such
1103 log
.debug("render_partial_around: off=0x%x, %s", off
, str_area(area
))
1104 off
= ADDRESS_SPACE
.adjust_offset_reverse(off
, area
)
1105 log
.debug("render_partial_around adjusted: off=0x%x, %s", off
, str_area(area
))
1106 model
= Model(addr
, subno
)
1107 render_partial(model
, ADDRESS_SPACE
.area_list
.index(area
), off
, context_lines
, addr
)
1108 log
.debug("render_partial_around model done, lines: %d", len(model
.lines()))
1109 assert model
.target_addr_lineno_0
>= 0
1110 if model
.target_addr_lineno
== -1:
1111 # If we couldn't find exact subno, use 0th subno of that addr
1112 # TODO: maybe should be last subno, because if we couldn't find
1113 # exact one, it was ~ last and removed, so current last is "closer"
1115 model
.target_addr_lineno
= model
.target_addr_lineno_0
1119 def render_from(model
, addr
, num_lines
):
1120 off
, area
= ADDRESS_SPACE
.addr2area(addr
)
1123 return render_partial(model
, ADDRESS_SPACE
.area_list
.index(area
), off
, num_lines
)
1126 def render_partial(model
, area_no
, offset
, num_lines
, target_addr
=-1):
1127 model
.AS
= ADDRESS_SPACE
1129 #for a in ADDRESS_SPACE.area_list:
1130 while area_no
< len(ADDRESS_SPACE
.area_list
):
1131 a
= ADDRESS_SPACE
.area_list
[area_no
]
1138 model
.add_line(a
[START
], Literal(a
[START
], "; Start of 0x%x area (%s)" % (a
[START
], a
[PROPS
].get("name", "noname"))))
1141 areasize
= len(bytes
)
1144 # If we didn't yet reach target address, compensate for
1145 # the following decrement of num_lines. The logic is:
1146 # render all lines up to target_addr, and then num_lines past it.
1147 if target_addr
>= 0 and addr
< target_addr
:
1150 props
= ADDRESS_SPACE
.get_addr_prop_dict(addr
)
1151 func
= props
.get("fun_s")
1153 model
.add_line(addr
, Literal(addr
, "; Start of function '%s'" % ADDRESS_SPACE
.get_label(func
.start
)))
1155 xrefs
= props
.get("xrefs")
1157 for from_addr
in sorted(xrefs
.keys()):
1158 model
.add_line(addr
, Xref(addr
, from_addr
, xrefs
[from_addr
]))
1160 label
= props
.get("label")
1162 model
.add_line(addr
, Label(addr
))
1165 if f
== AddressSpace
.UNK
:
1166 out
= Unknown(addr
, bytes
[i
])
1169 elif f
& AddressSpace
.DATA
:
1172 while j
< areasize
and flags
[j
] & AddressSpace
.DATA_CONT
:
1176 out
= Data(addr
, sz
, ADDRESS_SPACE
.get_data(addr
, sz
))
1178 elif f
== AddressSpace
.STR
:
1182 while j
< areasize
and flags
[j
] == AddressSpace
.DATA_CONT
:
1183 str += chr(bytes
[j
])
1186 out
= String(addr
, sz
, str)
1188 elif f
== AddressSpace
.FILL
:
1191 while j
< areasize
and flags
[j
] == AddressSpace
.FILL
:
1194 out
= Fill(addr
, sz
)
1196 elif f
== AddressSpace
.CODE
:
1197 out
= Instruction(addr
)
1198 _processor
.cmd
= out
1199 sz
= _processor
.ana()
1203 model
.add_line(addr
, Literal(addr
, "; UNEXPECTED value: %02x flags: %02x" % (bytes
[i
], f
)))
1206 assert 0, "@%08x flags=%x" % (addr
, f
)
1208 comm
= props
.get("comm")
1210 comm_indent
= " " * (out
.content_len() + len(out
.indent
) + 2)
1211 out
.comment
= " ; " + comm
.split("\n", 1)[0]
1213 model
.add_line(addr
, out
)
1214 #sys.stdout.write(out + "\n")
1217 for comm_l
in comm
.split("\n")[1:]:
1218 comm_obj
= Literal(addr
, "; " + comm_l
)
1219 comm_obj
.indent
= comm_indent
1220 model
.add_line(addr
, comm_obj
)
1222 next_addr
= addr
+ sz
1223 next_props
= ADDRESS_SPACE
.get_addr_prop_dict(next_addr
)
1224 func_end
= next_props
.get("fun_e")
1226 model
.add_line(addr
, Literal(addr
, "; End of function '%s' (%s)" % (
1227 ADDRESS_SPACE
.get_label(func_end
.start
), func_end
.get_end_method()
1234 model
.add_line(a
[END
], Literal(a
[END
], "; End of 0x%x area (%s)" % (a
[START
], a
[PROPS
].get("name", "noname"))))
1238 if f
== AddressSpace
.UNK
:
1240 elif f
== AddressSpace
.CODE
:
1242 elif f
== AddressSpace
.CODE | AddressSpace
.FUNC
:
1244 elif f
== AddressSpace
.CODE_CONT
:
1246 elif f
== AddressSpace
.DATA
:
1248 elif f
== AddressSpace
.DATA_CONT
:
1250 elif f
== AddressSpace
.STR
:
1252 elif f
== AddressSpace
.FILL
:
1257 def print_address_map():
1258 for a
in ADDRESS_SPACE
.area_list
:
1259 for i
in range(len(a
[FLAGS
])):
1261 sys
.stdout
.write("\n")
1262 sys
.stdout
.write("%08x " % (a
[START
] + i
))
1263 sys
.stdout
.write(flag2char(a
[FLAGS
][i
]))
1264 sys
.stdout
.write("\n")
1267 idaapi
.set_address_space(ADDRESS_SPACE
)
1268 idc
.set_address_space(ADDRESS_SPACE
)