1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 from io
import StringIO
21 from scratchabit
import defs
28 DATA_SIZE
= {dt_byte
: 1, dt_word
: 2, dt_dword
: 4}
35 # Immediate value, can be either numeric value, or address of memory
36 # ("offset"), further differentiated by value subtype (offset, hex, dec, etc.)
39 # Location in memory. Should be used only if instruction guaranteedly
40 # access memory at the given address of the given size (direct addressing
41 # mode). Should not be mixed up with o_imm of offset subtype.
46 o_idpspec0
= "o_idpspec0"
58 # Assembler syntax flags
67 # Operand flags (op_t.flags)
68 OF_SHOW
= 0x08 # If not set, operand is not shown
70 # Operand/value output flags (OutValue, etc.)
82 # Basic instruction semantics ("features" is IDA-speak)
84 CF_STOP
= 2 # Control flow stops here, e.g. jump, ret
85 CF_JUMP
= 4 # Not just a jump, indirect jump (or call)!
87 # Code references (i.e. control flow flags)
88 fl_CN
= 1 # "call near"
89 fl_JN
= 2 # "jump near"
90 fl_F
= 3 # "ordinary flow"
91 # ScratchABit extensions:
92 # Return address from a call. Next instruction from a call, whenever possible,
93 # Should use this flag instead of fl_F. This is because there's no guarantee
94 # that a call will return, so such code paths need to be treated with different
95 # priority than "next instruction" and "jump" code paths.
104 dr_O
= "o" # "Offset" reference, address of an item is taken
106 # Segment permissions
117 def __init__(self
, no
):
123 if hasattr(self
, "addr"):
125 if hasattr(self
, "value"):
130 #return str(self.__dict__)
131 return "op_t(#%d, t=%s, addr/val=%s)" % (self
.n
, self
.type, self
.get_addr())
135 def __init__(self
, ea
=0):
139 self
._operands
= [op_t(i
) for i
in range(UA_MAXOP
)]
142 def get_canon_feature(self
):
143 return _processor
.instruc
[self
.itype
]["feature"]
145 def __getitem__(self
, i
):
146 return self
._operands
[i
]
148 # ScratchABit extension
149 def num_operands(self
):
150 for i
, op
in enumerate(self
._operands
):
151 if op
.type == o_void
:
156 #return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self.ea, self.size, self.itype, self.disasm, self._operands)
157 used_operands
= self
._operands
[0:self
.num_operands()]
158 return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self
.ea
, self
.size
, self
.itype
, self
.disasm
, used_operands
)
167 # Instruction rendition API ("out()" in IDA-speak)
171 # Non-IDAPython symbols
172 # Default instruction field width, 8 is IDA standard
174 # Default indentation of instructions
176 # Default indentation of xref comments
177 DEFAULT_XREF_INDENT
= 13
181 def init_output_buffer(n
):
186 def term_output_buffer():
189 def fillstr(s
, width
):
191 s
+= " " * (width
- len(s
))
194 def OutMnem(width
=DEFAULT_WIDTH
):
195 global _processor
, u_line
196 # print(_processor.instruc[cmd.itype])
197 s
= _processor
.instruc
[_processor
.cmd
.itype
]["name"]
198 u_line
.write(fillstr(s
, width
))
204 # // This call to out_symbol() is another helper function in the
205 # // IDA kernel. It writes the specified character to the current
206 # // buffer, using the user-configurable 'symbol' color.
215 def out_one_operand(op_no
):
216 global _processor
, u_line
219 # Init array of this operand's positions in output line
220 if not hasattr(cmd
, "arg_pos") or not cmd
.arg_pos
:
221 cmd
.arg_pos
= [[0, 0] for i
in range(UA_MAXOP
)]
224 op
.props
= ADDRESS_SPACE
.get_arg_prop_dict(cmd
.ea
, op_no
)
226 # Record start position of this operand in output line
227 cmd
.arg_pos
[op_no
][0] = len(u_line
.getvalue())
231 # Record end position of this operand in output line
232 cmd
.arg_pos
[op_no
][1] = len(u_line
.getvalue())
235 def OutValue(op
, flags
):
242 # Undefined symbol value
243 if isinstance(val
, str):
246 subtype
= op
.props
.get("subtype")
247 if subtype
== defs
.IMM_ADDR
:
248 out_name_expr(op
, val
, BADADDR
)
249 elif subtype
== defs
.IMM_UDEC
:
250 u_line
.write(str(val
))
252 u_line
.write(hex(val
))
254 def OutLong(val
, base
):
257 u_line
.write(bin(val
))
259 u_line
.write(oct(val
))
261 u_line
.write(str(val
))
263 u_line
.write(hex(val
))
265 raise NotImplementetError
267 def out_name_expr(op
, ea
, offset
):
269 # print(op, ea, offset)
270 assert offset
== BADADDR
271 label
= ADDRESS_SPACE
.get_label(ea
)
275 u_line
.write(hex(ea
))
284 def out_register(reg
):
287 def MakeLine(output_buffer
):
290 _processor
.cmd
.disasm
= output_buffer
.getvalue().rstrip()
293 # End of instruction rendition API
297 # Address space access API
300 def get_full_byte(ea
):
301 return ADDRESS_SPACE
.get_byte(ea
)
304 def get_full_val(ea
, val_sz
):
305 return ADDRESS_SPACE
.get_data(ea
, val_sz
)
307 def ua_add_cref(opoff
, ea
, flags
):
308 ADDRESS_SPACE
.analisys_stack_push(ea
, flags
)
310 ADDRESS_SPACE
.make_auto_label(ea
)
311 ADDRESS_SPACE
.add_xref(_processor
.cmd
.ea
, ea
, "j")
313 ADDRESS_SPACE
.make_label("fun_", ea
)
314 ADDRESS_SPACE
.add_xref(_processor
.cmd
.ea
, ea
, "c")
315 fl
= ADDRESS_SPACE
.get_flags(ea
, 0xff)
316 if fl
& ADDRESS_SPACE
.FUNC
:
317 if not ADDRESS_SPACE
.is_func(ea
):
318 log
.warn("Address 0x%x calls inside another function: 0x%x", _processor
.cmd
.ea
, ea
)
319 ADDRESS_SPACE
.make_func(ea
, None)
322 def ua_dodata2(opoff
, ea
, dtype
):
323 # print(opoff, hex(ea), dtype)
324 # address_map[ea] = {"type": type, "access": set()}
325 ADDRESS_SPACE
.make_data(ea
, DATA_SIZE
[dtype
])
326 ADDRESS_SPACE
.make_auto_label(ea
)
328 def ua_add_dref(opoff
, ea
, access
):
329 ADDRESS_SPACE
.add_xref(_processor
.cmd
.ea
, ea
, access
)
335 def QueueMark(type, ea
):
337 ADDRESS_SPACE
.add_issue(ea
, "Indirect jump")
338 elif type == Q_noName
:
339 ADDRESS_SPACE
.add_issue(ea
, "Ref to address outside address space")
344 # End of Address space access API
348 # Instruction operands API
353 # TODO: ref_addr is extension
354 def op_offset(ea
, op_no
, reftype
, ref_addr
):
355 ADDRESS_SPACE
.make_arg_offset(ea
, op_no
, ref_addr
)
357 def is_offset(ea
, op_no
):
358 return ADDRESS_SPACE
.get_arg_prop(ea
, op_no
, "subtype") == defs
.IMM_ADDR
362 # End of Instruction operands API
367 # Note that repeating comments are not supported, so the "repeating" argument
371 def set_cmt(ea
, cmt
, repeating
):
372 ADDRESS_SPACE
.set_comment(ea
, cmt
)
374 def get_cmt(ea
, repeating
):
375 return ADDRESS_SPACE
.get_comment(ea
)
385 # "cmd is a global variable of type insn_t. It is contains information
386 # about the last decoded instruction. This variable is also filled by
387 # processor modules when they decode instructions."
392 def set_processor(p
):
398 def set_address_space(aspace
):
400 ADDRESS_SPACE
= aspace