engine: Add AddressSpace.memcpy() function.
[ScratchABit.git] / idaapi.py
blob34fcd0d4795d9c9f02012de6f05183b802593a8d
1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 import sys
18 from io import StringIO
19 import logging as log
21 import engine
24 # Data types
25 dt_byte = "dt_byte"
26 dt_word = "dt_word"
27 dt_dword = "dt_dword"
28 DATA_SIZE = {dt_byte: 1, dt_word: 2, dt_dword: 4}
30 # IDA standard 6
31 UA_MAXOP = 6
33 # Operand types
34 o_void = "-"
35 # Immediate value, can be either numeric value, or address of memory
36 # ("offset"), further differentiated by value subtype (offset, hex, dec, etc.)
37 o_imm = "o_imm"
38 o_reg = "o_reg"
39 # Location in memory. Should be used only if instruction guaranteedly
40 # access memory at the given address of the given size (direct addressing
41 # mode). Should not be mixed up with o_imm of offset subtype.
42 o_mem = "o_mem"
43 o_near = "o_near"
44 o_phrase = "o_phrase"
45 o_displ = "o_displ"
46 o_idpspec0 = "o_idpspec0"
48 class BADADDR: pass
50 # Processor flags
51 PR_SEGS = 1
52 PR_DEFSEG32 = 2
53 PR_RNAMESOK = 4
54 PR_ADJSEGS = 8
55 PRN_HEX = 16
56 PR_USE32 = 32
58 # Assembler syntax flags
59 ASH_HEXF3 = 1
60 ASD_DECF0 = 2
61 ASO_OCTF1 = 4
62 ASB_BINF3 = 8
63 AS_NOTAB = 16
64 AS_ASCIIC = 32
65 AS_ASCIIZ = 64
67 # Operand flags (op_t.flags)
68 OF_SHOW = 0x08 # If not set, operand is not shown
70 # Operand/value output flags (OutValue, etc.)
71 OOFS_IFSIGN = 0
72 OOFS_NOSIGN = 1
73 OOFS_NEEDSIGN = 2
74 OOF_SIGNED = 4
75 OOF_NUMBER = 8
76 OOFW_IMM = 0
77 OOFW_16 = 0x10
78 OOFW_32 = 0x20
79 OOFW_8 = 0x30
80 OOF_ADDR = 0x40
82 # Basic instruction semantics ("features" is IDA-speak)
83 CF_CALL = 1
84 CF_STOP = 2 # Control flow stops here, e.g. jump, ret
85 CF_JUMP = 4 # Not just a jump, indirect jump (or call)!
87 # Code references (i.e. control flow flags)
88 fl_CN = 1 # "call near"
89 fl_JN = 2 # "jump near"
90 fl_F = 3 # "ordinary flow"
91 # ScratchABit extensions:
92 # Return address from a call. Next instruction from a call, whenever possible,
93 # Should use this flag instead of fl_F. This is because there's no guarantee
94 # that a call will return, so such code paths need to be treated with different
95 # priority than "next instruction" and "jump" code paths.
96 fl_RET_FROM_CALL = 10
97 # Sane names
98 fl_CALL = fl_CN
99 fl_JUMP = fl_JN
101 # Data references
102 dr_R = "r"
103 dr_W = "w"
104 dr_O = "o" # "Offset" reference, address of an item is taken
106 # Segment permissions
107 SEGPERM_EXEC = 1
108 SEGPERM_WRITE = 2
109 SEGPERM_READ = 4
112 class cvar:
113 pass
115 class op_t:
117 def __init__(self, no):
118 self.n = no
119 self.type = None
120 self.flags = OF_SHOW
122 def get_addr(self):
123 if hasattr(self, "addr"):
124 return self.addr
125 if hasattr(self, "value"):
126 return self.value
127 return None
129 def __repr__(self):
130 #return str(self.__dict__)
131 return "op_t(#%d, t=%s, addr/val=%s)" % (self.n, self.type, self.get_addr())
133 class insn_t:
135 def __init__(self, ea=0):
136 self.ea = ea
137 self.size = 0
138 self.itype = 0
139 self._operands = [op_t(i) for i in range(UA_MAXOP)]
140 self.disasm = None
142 def get_canon_feature(self):
143 return _processor.instruc[self.itype]["feature"]
145 def __getitem__(self, i):
146 return self._operands[i]
148 # ScratchABit extension
149 def num_operands(self):
150 for i, op in enumerate(self._operands):
151 if op.type == o_void:
152 return i
153 return UA_MAXOP
155 def __repr__(self):
156 #return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self.ea, self.size, self.itype, self.disasm, self._operands)
157 used_operands = self._operands[0:self.num_operands()]
158 return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self.ea, self.size, self.itype, self.disasm, used_operands)
161 class processor_t:
162 def __init__(self):
163 self.cmd = cmd
167 # Instruction rendition API ("out()" in IDA-speak)
170 COLOR_ERROR = "*"
171 # Non-IDAPython symbols
172 # Default instruction field width, 8 is IDA standard
173 DEFAULT_WIDTH = 8
174 # Default indentation of instructions
175 DEFAULT_INDENT = 4
176 # Default indentation of xref comments
177 DEFAULT_XREF_INDENT = 13
179 u_line = None
181 def init_output_buffer(n):
182 global u_line
183 u_line = StringIO()
184 return u_line
186 def term_output_buffer():
187 pass
189 def fillstr(s, width):
190 if len(s) < width:
191 s += " " * (width - len(s))
192 return s
194 def OutMnem(width=DEFAULT_WIDTH):
195 global _processor, u_line
196 # print(_processor.instruc[cmd.itype])
197 s = _processor.instruc[_processor.cmd.itype]["name"]
198 u_line.write(fillstr(s, width))
200 def OutChar(c):
201 global u_line
202 u_line.write(c)
204 # // This call to out_symbol() is another helper function in the
205 # // IDA kernel. It writes the specified character to the current
206 # // buffer, using the user-configurable 'symbol' color.
207 def out_symbol(c):
208 OutChar(c)
210 # Append string
211 def OutLine(s):
212 global u_line
213 u_line.write(s)
215 def out_one_operand(op_no):
216 global _processor, u_line
217 cmd = _processor.cmd
219 # Init array of this operand's positions in output line
220 if not hasattr(cmd, "arg_pos") or not cmd.arg_pos:
221 cmd.arg_pos = [[0, 0] for i in range(UA_MAXOP)]
223 op = cmd[op_no]
224 op.props = ADDRESS_SPACE.get_arg_prop_dict(cmd.ea, op_no)
226 # Record start position of this operand in output line
227 cmd.arg_pos[op_no][0] = len(u_line.getvalue())
229 _processor.outop(op)
231 # Record end position of this operand in output line
232 cmd.arg_pos[op_no][1] = len(u_line.getvalue())
235 def OutValue(op, flags):
236 global u_line
237 # print(op, flags)
238 if flags & OOF_ADDR:
239 val = op.addr
240 else:
241 val = op.value
242 # Undefined symbol value
243 if isinstance(val, str):
244 u_line.write(val)
245 return
246 subtype = op.props.get("subtype")
247 if subtype == engine.IMM_ADDR:
248 out_name_expr(op, val, BADADDR)
249 elif subtype == engine.IMM_UDEC:
250 u_line.write(str(val))
251 else:
252 u_line.write(hex(val))
254 def OutLong(val, base):
255 global u_line
256 if base == 2:
257 u_line.write(bin(val))
258 elif base == 8:
259 u_line.write(oct(val))
260 elif base == 10:
261 u_line.write(str(val))
262 elif base == 16:
263 u_line.write(hex(val))
264 else:
265 raise NotImplementetError
267 def out_name_expr(op, ea, offset):
268 global u_line
269 # print(op, ea, offset)
270 assert offset == BADADDR
271 label = ADDRESS_SPACE.get_label(ea)
272 if label:
273 u_line.write(label)
274 else:
275 u_line.write(hex(ea))
276 return True
278 def out_tagon(tag):
279 pass
281 def out_register(reg):
282 OutLine(reg)
284 def MakeLine(output_buffer):
285 # global cmd
286 global _processor
287 _processor.cmd.disasm = output_buffer.getvalue().rstrip()
290 # End of instruction rendition API
294 # Address space access API
297 def get_full_byte(ea):
298 return ADDRESS_SPACE.get_byte(ea)
300 # Extension
301 def get_full_val(ea, val_sz):
302 return ADDRESS_SPACE.get_data(ea, val_sz)
304 def ua_add_cref(opoff, ea, flags):
305 ADDRESS_SPACE.analisys_stack_push(ea, flags)
306 if flags == fl_JN:
307 ADDRESS_SPACE.make_auto_label(ea)
308 ADDRESS_SPACE.add_xref(_processor.cmd.ea, ea, "j")
309 elif flags == fl_CN:
310 ADDRESS_SPACE.make_label("fun_", ea)
311 ADDRESS_SPACE.add_xref(_processor.cmd.ea, ea, "c")
312 fl = ADDRESS_SPACE.get_flags(ea, 0xff)
313 if fl & ADDRESS_SPACE.FUNC:
314 if not ADDRESS_SPACE.is_func(ea):
315 log.warn("Address 0x%x calls inside another function: 0x%x", _processor.cmd.ea, ea)
316 ADDRESS_SPACE.make_func(ea, None)
319 def ua_dodata2(opoff, ea, dtype):
320 # print(opoff, hex(ea), dtype)
321 # address_map[ea] = {"type": type, "access": set()}
322 ADDRESS_SPACE.make_data(ea, DATA_SIZE[dtype])
323 ADDRESS_SPACE.make_auto_label(ea)
325 def ua_add_dref(opoff, ea, access):
326 ADDRESS_SPACE.add_xref(_processor.cmd.ea, ea, access)
327 pass
329 Q_jumps = 1
331 def QueueMark(type, ea):
332 if type == Q_jumps:
333 ADDRESS_SPACE.add_issue(ea, "Indirect jump")
334 else:
335 assert 0
338 # End of Address space access API
342 # Instruction operands API
345 REF_OFF32 = 2
347 # TODO: ref_addr is extension
348 def op_offset(ea, op_no, reftype, ref_addr):
349 ADDRESS_SPACE.make_arg_offset(ea, op_no, ref_addr)
351 def is_offset(ea, op_no):
352 return ADDRESS_SPACE.get_arg_prop(ea, op_no, "subtype") == engine.IMM_ADDR
356 # End of Instruction operands API
361 # Interfacing
363 # "cmd is a global variable of type insn_t. It is contains information
364 # about the last decoded instruction. This variable is also filled by
365 # processor modules when they decode instructions."
366 cmd = insn_t()
368 _processor = None
370 def set_processor(p):
371 global _processor
372 _processor = p
374 ADDRESS_SPACE = None
376 def set_address_space(aspace):
377 global ADDRESS_SPACE
378 ADDRESS_SPACE = aspace