1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 from io
import StringIO
21 from scratchabit
import defs
22 from scratchabit
.defs
import InvalidAddrException
29 DATA_SIZE
= {dt_byte
: 1, dt_word
: 2, dt_dword
: 4}
36 # Immediate value, can be either numeric value, or address of memory
37 # ("offset"), further differentiated by value subtype (offset, hex, dec, etc.)
40 # Location in memory. Should be used only if instruction guaranteedly
41 # access memory at the given address of the given size (direct addressing
42 # mode). Should not be mixed up with o_imm of offset subtype.
47 o_idpspec0
= "o_idpspec0"
59 # Assembler syntax flags
68 # Operand flags (op_t.flags)
69 OF_SHOW
= 0x08 # If not set, operand is not shown
71 # Operand/value output flags (OutValue, etc.)
83 # Basic instruction semantics ("features" is IDA-speak)
85 CF_STOP
= 2 # Control flow stops here, e.g. jump, ret
86 CF_JUMP
= 4 # Not just a jump, indirect jump (or call)!
88 # Code references (i.e. control flow flags)
89 fl_CN
= 1 # "call near"
90 fl_JN
= 2 # "jump near"
91 fl_F
= 3 # "ordinary flow"
92 # ScratchABit extensions:
93 # Return address from a call. Next instruction from a call, whenever possible,
94 # Should use this flag instead of fl_F. This is because there's no guarantee
95 # that a call will return, so such code paths need to be treated with different
96 # priority than "next instruction" and "jump" code paths.
105 dr_O
= "o" # "Offset" reference, address of an item is taken
107 # Segment permissions
118 def __init__(self
, no
):
124 if hasattr(self
, "addr"):
126 if hasattr(self
, "value"):
131 #return str(self.__dict__)
132 return "op_t(#%d, t=%s, addr/val=%s)" % (self
.n
, self
.type, self
.get_addr())
136 def __init__(self
, ea
=0):
140 self
._operands
= [op_t(i
) for i
in range(UA_MAXOP
)]
143 def get_canon_feature(self
):
144 return _processor
.instruc
[self
.itype
]["feature"]
146 def __getitem__(self
, i
):
147 return self
._operands
[i
]
149 # ScratchABit extension
150 def num_operands(self
):
151 for i
, op
in enumerate(self
._operands
):
152 if op
.type == o_void
:
157 #return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self.ea, self.size, self.itype, self.disasm, self._operands)
158 used_operands
= self
._operands
[0:self
.num_operands()]
159 return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self
.ea
, self
.size
, self
.itype
, self
.disasm
, used_operands
)
168 # Instruction rendition API ("out()" in IDA-speak)
172 # Non-IDAPython symbols
173 # Default instruction field width, 8 is IDA standard
175 # Default indentation of instructions
177 # Default indentation of xref comments
178 DEFAULT_XREF_INDENT
= 13
182 def init_output_buffer(n
):
187 def term_output_buffer():
190 def fillstr(s
, width
):
192 s
+= " " * (width
- len(s
))
195 def OutMnem(width
=DEFAULT_WIDTH
):
196 global _processor
, u_line
197 # print(_processor.instruc[cmd.itype])
198 s
= _processor
.instruc
[_processor
.cmd
.itype
]["name"]
199 u_line
.write(fillstr(s
, width
))
205 # // This call to out_symbol() is another helper function in the
206 # // IDA kernel. It writes the specified character to the current
207 # // buffer, using the user-configurable 'symbol' color.
216 def out_one_operand(op_no
):
217 global _processor
, u_line
220 # Init array of this operand's positions in output line
221 if not hasattr(cmd
, "arg_pos") or not cmd
.arg_pos
:
222 cmd
.arg_pos
= [[0, 0] for i
in range(UA_MAXOP
)]
225 op
.props
= ADDRESS_SPACE
.get_arg_prop_dict(cmd
.ea
, op_no
)
227 # Record start position of this operand in output line
228 cmd
.arg_pos
[op_no
][0] = len(u_line
.getvalue())
232 # Record end position of this operand in output line
233 cmd
.arg_pos
[op_no
][1] = len(u_line
.getvalue())
236 def OutValue(op
, flags
):
243 # Undefined symbol value
244 if isinstance(val
, str):
247 subtype
= op
.props
.get("subtype")
248 if subtype
== defs
.IMM_ADDR
:
249 out_name_expr(op
, val
, BADADDR
)
250 elif subtype
== defs
.IMM_UDEC
:
251 u_line
.write(str(val
))
253 u_line
.write(hex(val
))
255 def OutLong(val
, base
):
258 u_line
.write(bin(val
))
260 u_line
.write(oct(val
))
262 u_line
.write(str(val
))
264 u_line
.write(hex(val
))
266 raise NotImplementetError
268 def out_name_expr(op
, ea
, offset
):
270 # print(op, ea, offset)
271 assert offset
== BADADDR
273 label
= ADDRESS_SPACE
.get_label(ea
)
274 except InvalidAddrException
:
275 log
.warn("out_name_expr: Error getting label for 0x%x", ea
)
280 u_line
.write(hex(ea
))
289 def out_register(reg
):
292 def MakeLine(output_buffer
):
295 _processor
.cmd
.disasm
= output_buffer
.getvalue().rstrip()
298 # End of instruction rendition API
302 # Address space access API
305 def get_full_byte(ea
):
306 return ADDRESS_SPACE
.get_byte(ea
)
309 def get_bytes(ea
, sz
):
310 return ADDRESS_SPACE
.get_bytes(ea
, sz
)
313 def get_full_val(ea
, val_sz
):
314 return ADDRESS_SPACE
.get_data(ea
, val_sz
)
316 def ua_add_cref(opoff
, ea
, flags
):
317 ADDRESS_SPACE
.analisys_stack_push(ea
, flags
)
319 ADDRESS_SPACE
.make_auto_label(ea
)
320 ADDRESS_SPACE
.add_xref(_processor
.cmd
.ea
, ea
, "j")
322 ADDRESS_SPACE
.make_label("fun_", ea
)
323 ADDRESS_SPACE
.add_xref(_processor
.cmd
.ea
, ea
, "c")
325 fl
= ADDRESS_SPACE
.get_flags(ea
, 0xff)
326 if fl
& ADDRESS_SPACE
.FUNC
:
327 if not ADDRESS_SPACE
.is_func(ea
):
328 log
.warn("Address 0x%x calls inside another function: 0x%x", _processor
.cmd
.ea
, ea
)
329 except InvalidAddrException
:
330 log
.exception("Could not check 0x%x as being a call target inside another function", ea
)
331 ADDRESS_SPACE
.make_func(ea
, None)
334 def ua_dodata2(opoff
, ea
, dtype
):
335 # print(opoff, hex(ea), dtype)
336 # address_map[ea] = {"type": type, "access": set()}
337 ADDRESS_SPACE
.make_data(ea
, DATA_SIZE
[dtype
])
338 ADDRESS_SPACE
.make_auto_label(ea
)
340 def ua_add_dref(opoff
, ea
, access
):
341 ADDRESS_SPACE
.add_xref(_processor
.cmd
.ea
, ea
, access
)
347 def QueueMark(type, ea
):
349 ADDRESS_SPACE
.add_issue(ea
, "Indirect jump")
350 elif type == Q_noName
:
351 ADDRESS_SPACE
.add_issue(ea
, "Ref to address outside address space")
356 # End of Address space access API
360 # Instruction operands API
365 # TODO: ref_addr is extension
366 def op_offset(ea
, op_no
, reftype
, ref_addr
):
367 ADDRESS_SPACE
.make_arg_offset(ea
, op_no
, ref_addr
)
369 def is_offset(ea
, op_no
):
370 return ADDRESS_SPACE
.get_arg_prop(ea
, op_no
, "subtype") == defs
.IMM_ADDR
374 # End of Instruction operands API
379 # Note that repeating comments are not supported, so the "repeating" argument
383 def set_cmt(ea
, cmt
, repeating
):
384 ADDRESS_SPACE
.set_comment(ea
, cmt
)
386 def get_cmt(ea
, repeating
):
387 return ADDRESS_SPACE
.get_comment(ea
)
397 # "cmd is a global variable of type insn_t. It is contains information
398 # about the last decoded instruction. This variable is also filled by
399 # processor modules when they decode instructions."
404 def set_processor(p
):
410 def set_address_space(aspace
):
412 ADDRESS_SPACE
= aspace