README: Update for 2.0 release.
[ScratchABit.git] / idaapi.py
blob06ff7c3f0eac24d6e6527cd326985bf34d050d15
1 # ScratchABit - interactive disassembler
3 # Copyright (c) 2015 Paul Sokolovsky
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 import sys
18 from io import StringIO
19 import logging as log
21 from scratchabit import defs
22 from scratchabit.defs import InvalidAddrException
25 # Data types
26 dt_byte = "dt_byte"
27 dt_word = "dt_word"
28 dt_dword = "dt_dword"
29 DATA_SIZE = {dt_byte: 1, dt_word: 2, dt_dword: 4}
31 # IDA standard 6
32 UA_MAXOP = 16
34 # Operand types
35 o_void = "-"
36 # Immediate value, can be either numeric value, or address of memory
37 # ("offset"), further differentiated by value subtype (offset, hex, dec, etc.)
38 o_imm = "o_imm"
39 o_reg = "o_reg"
40 # Location in memory. Should be used only if instruction guaranteedly
41 # access memory at the given address of the given size (direct addressing
42 # mode). Should not be mixed up with o_imm of offset subtype.
43 o_mem = "o_mem"
44 o_near = "o_near"
45 o_phrase = "o_phrase"
46 o_displ = "o_displ"
47 o_idpspec0 = "o_idpspec0"
49 class BADADDR: pass
51 # Processor flags
52 PR_SEGS = 1
53 PR_DEFSEG32 = 2
54 PR_RNAMESOK = 4
55 PR_ADJSEGS = 8
56 PRN_HEX = 16
57 PR_USE32 = 32
59 # Assembler syntax flags
60 ASH_HEXF3 = 1
61 ASD_DECF0 = 2
62 ASO_OCTF1 = 4
63 ASB_BINF3 = 8
64 AS_NOTAB = 16
65 AS_ASCIIC = 32
66 AS_ASCIIZ = 64
68 # Operand flags (op_t.flags)
69 OF_SHOW = 0x08 # If not set, operand is not shown
71 # Operand/value output flags (OutValue, etc.)
72 OOFS_IFSIGN = 0
73 OOFS_NOSIGN = 1
74 OOFS_NEEDSIGN = 2
75 OOF_SIGNED = 4
76 OOF_NUMBER = 8
77 OOFW_IMM = 0
78 OOFW_16 = 0x10
79 OOFW_32 = 0x20
80 OOFW_8 = 0x30
81 OOF_ADDR = 0x40
83 # Basic instruction semantics ("features" is IDA-speak)
84 CF_CALL = 1
85 CF_STOP = 2 # Control flow stops here, e.g. jump, ret
86 CF_JUMP = 4 # Not just a jump, indirect jump (or call)!
88 # Code references (i.e. control flow flags)
89 fl_CN = 1 # "call near"
90 fl_JN = 2 # "jump near"
91 fl_F = 3 # "ordinary flow"
92 # ScratchABit extensions:
93 # Return address from a call. Next instruction from a call, whenever possible,
94 # Should use this flag instead of fl_F. This is because there's no guarantee
95 # that a call will return, so such code paths need to be treated with different
96 # priority than "next instruction" and "jump" code paths.
97 fl_RET_FROM_CALL = 10
98 # Sane names
99 fl_CALL = fl_CN
100 fl_JUMP = fl_JN
102 # Data references
103 dr_R = "r"
104 dr_W = "w"
105 dr_O = "o" # "Offset" reference, address of an item is taken
107 # Segment permissions
108 SEGPERM_EXEC = 1
109 SEGPERM_WRITE = 2
110 SEGPERM_READ = 4
113 class cvar:
114 pass
116 class op_t:
118 def __init__(self, no):
119 self.n = no
120 self.type = None
121 self.flags = OF_SHOW
123 def get_addr(self):
124 if hasattr(self, "addr"):
125 return self.addr
126 if hasattr(self, "value"):
127 return self.value
128 return None
130 def __repr__(self):
131 #return str(self.__dict__)
132 return "op_t(#%d, t=%s, addr/val=%s)" % (self.n, self.type, self.get_addr())
134 class insn_t:
136 def __init__(self, ea=0):
137 self.ea = ea
138 self.size = 0
139 self.itype = 0
140 self._operands = [op_t(i) for i in range(UA_MAXOP)]
141 self.disasm = None
143 def get_canon_feature(self):
144 return _processor.instruc[self.itype]["feature"]
146 def __getitem__(self, i):
147 return self._operands[i]
149 # ScratchABit extension
150 def num_operands(self):
151 for i, op in enumerate(self._operands):
152 if op.type == o_void:
153 return i
154 return UA_MAXOP
156 def __repr__(self):
157 #return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self.ea, self.size, self.itype, self.disasm, self._operands)
158 used_operands = self._operands[0:self.num_operands()]
159 return "insn_t(ea=%x, sz=%d, id=%d, %r, %s)" % (self.ea, self.size, self.itype, self.disasm, used_operands)
162 class processor_t:
163 def __init__(self):
164 self.cmd = cmd
168 # Instruction rendition API ("out()" in IDA-speak)
171 COLOR_ERROR = "*"
172 # Non-IDAPython symbols
173 # Default instruction field width, 8 is IDA standard
174 DEFAULT_WIDTH = 8
175 # Default indentation of instructions
176 DEFAULT_INDENT = 4
177 # Default indentation of xref comments
178 DEFAULT_XREF_INDENT = 13
180 u_line = None
182 def init_output_buffer(n):
183 global u_line
184 u_line = StringIO()
185 return u_line
187 def term_output_buffer():
188 pass
190 def fillstr(s, width):
191 if len(s) < width:
192 s += " " * (width - len(s))
193 return s
195 def OutMnem(width=DEFAULT_WIDTH):
196 global _processor, u_line
197 # print(_processor.instruc[cmd.itype])
198 s = _processor.instruc[_processor.cmd.itype]["name"]
199 u_line.write(fillstr(s, width))
201 def OutChar(c):
202 global u_line
203 u_line.write(c)
205 # // This call to out_symbol() is another helper function in the
206 # // IDA kernel. It writes the specified character to the current
207 # // buffer, using the user-configurable 'symbol' color.
208 def out_symbol(c):
209 OutChar(c)
211 # Append string
212 def OutLine(s):
213 global u_line
214 u_line.write(s)
216 def out_one_operand(op_no):
217 global _processor, u_line
218 cmd = _processor.cmd
220 # Init array of this operand's positions in output line
221 if not hasattr(cmd, "arg_pos") or not cmd.arg_pos:
222 cmd.arg_pos = [[0, 0] for i in range(UA_MAXOP)]
224 op = cmd[op_no]
225 op.props = ADDRESS_SPACE.get_arg_prop_dict(cmd.ea, op_no)
227 # Record start position of this operand in output line
228 cmd.arg_pos[op_no][0] = len(u_line.getvalue())
230 _processor.outop(op)
232 # Record end position of this operand in output line
233 cmd.arg_pos[op_no][1] = len(u_line.getvalue())
236 def OutValue(op, flags):
237 global u_line
238 # print(op, flags)
239 if flags & OOF_ADDR:
240 val = op.addr
241 else:
242 val = op.value
243 # Undefined symbol value
244 if isinstance(val, str):
245 u_line.write(val)
246 return
247 subtype = op.props.get("subtype")
248 if subtype == defs.IMM_ADDR:
249 out_name_expr(op, val, BADADDR)
250 elif subtype == defs.IMM_UDEC:
251 u_line.write(str(val))
252 else:
253 u_line.write(hex(val))
255 def OutLong(val, base):
256 global u_line
257 if base == 2:
258 u_line.write(bin(val))
259 elif base == 8:
260 u_line.write(oct(val))
261 elif base == 10:
262 u_line.write(str(val))
263 elif base == 16:
264 u_line.write(hex(val))
265 else:
266 raise NotImplementetError
268 def out_name_expr(op, ea, offset):
269 global u_line
270 # print(op, ea, offset)
271 assert offset == BADADDR
272 try:
273 label = ADDRESS_SPACE.get_label(ea)
274 except InvalidAddrException:
275 log.warn("out_name_expr: Error getting label for 0x%x", ea)
276 return False
277 if label:
278 u_line.write(label)
279 else:
280 u_line.write(hex(ea))
281 return True
283 def out_tagon(tag):
284 pass
286 def out_tagoff(tag):
287 pass
289 def out_register(reg):
290 OutLine(reg)
292 def MakeLine(output_buffer):
293 # global cmd
294 global _processor
295 _processor.cmd.disasm = output_buffer.getvalue().rstrip()
298 # End of instruction rendition API
302 # Address space access API
305 def get_full_byte(ea):
306 return ADDRESS_SPACE.get_byte(ea)
308 # Extension
309 def get_bytes(ea, sz):
310 return ADDRESS_SPACE.get_bytes(ea, sz)
312 # Extension
313 def get_full_val(ea, val_sz):
314 return ADDRESS_SPACE.get_data(ea, val_sz)
316 def ua_add_cref(opoff, ea, flags):
317 ADDRESS_SPACE.analisys_stack_push(ea, flags)
318 if flags == fl_JN:
319 ADDRESS_SPACE.make_auto_label(ea)
320 ADDRESS_SPACE.add_xref(_processor.cmd.ea, ea, "j")
321 elif flags == fl_CN:
322 ADDRESS_SPACE.make_label("fun_", ea)
323 ADDRESS_SPACE.add_xref(_processor.cmd.ea, ea, "c")
324 try:
325 fl = ADDRESS_SPACE.get_flags(ea, 0xff)
326 if fl & ADDRESS_SPACE.FUNC:
327 if not ADDRESS_SPACE.is_func(ea):
328 log.warn("Address 0x%x calls inside another function: 0x%x", _processor.cmd.ea, ea)
329 except InvalidAddrException:
330 log.exception("Could not check 0x%x as being a call target inside another function", ea)
331 ADDRESS_SPACE.make_func(ea, None)
334 def ua_dodata2(opoff, ea, dtype):
335 # print(opoff, hex(ea), dtype)
336 # address_map[ea] = {"type": type, "access": set()}
337 ADDRESS_SPACE.make_data(ea, DATA_SIZE[dtype])
338 ADDRESS_SPACE.make_auto_label(ea)
340 def ua_add_dref(opoff, ea, access):
341 ADDRESS_SPACE.add_xref(_processor.cmd.ea, ea, access)
342 pass
344 Q_jumps = 1
345 Q_noName = 2
347 def QueueMark(type, ea):
348 if type == Q_jumps:
349 ADDRESS_SPACE.add_issue(ea, "Indirect jump")
350 elif type == Q_noName:
351 ADDRESS_SPACE.add_issue(ea, "Ref to address outside address space")
352 else:
353 assert 0
356 # End of Address space access API
360 # Instruction operands API
363 REF_OFF32 = 2
365 # TODO: ref_addr is extension
366 def op_offset(ea, op_no, reftype, ref_addr):
367 ADDRESS_SPACE.make_arg_offset(ea, op_no, ref_addr)
369 def is_offset(ea, op_no):
370 return ADDRESS_SPACE.get_arg_prop(ea, op_no, "subtype") == defs.IMM_ADDR
374 # End of Instruction operands API
378 # Comment API
379 # Note that repeating comments are not supported, so the "repeating" argument
380 # is ignored
383 def set_cmt(ea, cmt, repeating):
384 ADDRESS_SPACE.set_comment(ea, cmt)
386 def get_cmt(ea, repeating):
387 return ADDRESS_SPACE.get_comment(ea)
390 # End of Comment API
395 # Interfacing
397 # "cmd is a global variable of type insn_t. It is contains information
398 # about the last decoded instruction. This variable is also filled by
399 # processor modules when they decode instructions."
400 cmd = insn_t()
402 _processor = None
404 def set_processor(p):
405 global _processor
406 _processor = p
408 ADDRESS_SPACE = None
410 def set_address_space(aspace):
411 global ADDRESS_SPACE
412 ADDRESS_SPACE = aspace