1 /* Copyright (c) 2007, Google Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 * Author: Joi Sigurdsson
33 * Implementation of MiniDisassembler.
36 #include "mini_disassembler.h"
40 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits
,
41 bool address_default_is_32_bits
)
42 : operand_default_is_32_bits_(operand_default_is_32_bits
),
43 address_default_is_32_bits_(address_default_is_32_bits
) {
47 MiniDisassembler::MiniDisassembler()
48 : operand_default_is_32_bits_(true),
49 address_default_is_32_bits_(true) {
53 InstructionType
MiniDisassembler::Disassemble(
54 unsigned char* start_byte
,
55 unsigned int& instruction_bytes
) {
56 // Clean up any state from previous invocations.
59 // Start by processing any prefixes.
60 unsigned char* current_byte
= start_byte
;
61 unsigned int size
= 0;
62 InstructionType instruction_type
= ProcessPrefixes(current_byte
, size
);
64 if (IT_UNKNOWN
== instruction_type
)
65 return instruction_type
;
70 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
71 // and address_is_32_bits_ flags are correctly set.
73 instruction_type
= ProcessOpcode(current_byte
, 0, size
);
75 // Check for error processing instruction
76 if ((IT_UNKNOWN
== instruction_type_
) || (IT_UNUSED
== instruction_type_
)) {
82 // Invariant: operand_bytes_ indicates the total size of operands
83 // specified by the opcode and/or ModR/M byte and/or SIB byte.
84 // pCurrentByte points to the first byte after the ModR/M byte, or after
85 // the SIB byte if it is present (i.e. the first byte of any operands
86 // encoded in the instruction).
88 // We get the total length of any prefixes, the opcode, and the ModR/M and
89 // SIB bytes if present, by taking the difference of the original starting
90 // address and the current byte (which points to the first byte of the
91 // operands if present, or to the first byte of the next instruction if
92 // they are not). Adding the count of bytes in the operands encoded in
93 // the instruction gives us the full length of the instruction in bytes.
94 instruction_bytes
+= operand_bytes_
+ (current_byte
- start_byte
);
96 // Return the instruction type, which was set by ProcessOpcode().
97 return instruction_type_
;
100 void MiniDisassembler::Initialize() {
101 operand_is_32_bits_
= operand_default_is_32_bits_
;
102 address_is_32_bits_
= address_default_is_32_bits_
;
104 operand_default_support_64_bits_
= true;
106 operand_default_support_64_bits_
= false;
108 operand_is_64_bits_
= false;
111 should_decode_modrm_
= false;
112 instruction_type_
= IT_UNKNOWN
;
113 got_f2_prefix_
= false;
114 got_f3_prefix_
= false;
115 got_66_prefix_
= false;
118 InstructionType
MiniDisassembler::ProcessPrefixes(unsigned char* start_byte
,
119 unsigned int& size
) {
120 InstructionType instruction_type
= IT_GENERIC
;
121 const Opcode
& opcode
= s_ia32_opcode_map_
[0].table_
[*start_byte
];
123 switch (opcode
.type_
) {
124 case IT_PREFIX_ADDRESS
:
125 address_is_32_bits_
= !address_default_is_32_bits_
;
126 goto nochangeoperand
;
127 case IT_PREFIX_OPERAND
:
128 operand_is_32_bits_
= !operand_default_is_32_bits_
;
132 if (0xF2 == (*start_byte
))
133 got_f2_prefix_
= true;
134 else if (0xF3 == (*start_byte
))
135 got_f3_prefix_
= true;
136 else if (0x66 == (*start_byte
))
137 got_66_prefix_
= true;
138 else if (operand_default_support_64_bits_
&& (*start_byte
) & 0x48)
139 operand_is_64_bits_
= true;
141 instruction_type
= opcode
.type_
;
143 // we got a prefix, so add one and check next byte
144 ProcessPrefixes(start_byte
+ 1, size
);
146 break; // not a prefix byte
149 return instruction_type
;
152 InstructionType
MiniDisassembler::ProcessOpcode(unsigned char* start_byte
,
153 unsigned int table_index
,
154 unsigned int& size
) {
155 const OpcodeTable
& table
= s_ia32_opcode_map_
[table_index
]; // Get our table
156 unsigned char current_byte
= (*start_byte
) >> table
.shift_
;
157 current_byte
= current_byte
& table
.mask_
; // Mask out the bits we will use
159 // Check whether the byte we have is inside the table we have.
160 if (current_byte
< table
.min_lim_
|| current_byte
> table
.max_lim_
) {
161 instruction_type_
= IT_UNKNOWN
;
162 return instruction_type_
;
165 const Opcode
& opcode
= table
.table_
[current_byte
];
166 if (IT_UNUSED
== opcode
.type_
) {
167 // This instruction is not used by the IA-32 ISA, so we indicate
168 // this to the user. Probably means that we were pointed to
169 // a byte in memory that was not the start of an instruction.
170 instruction_type_
= IT_UNUSED
;
171 return instruction_type_
;
172 } else if (IT_REFERENCE
== opcode
.type_
) {
173 // We are looking at an opcode that has more bytes (or is continued
174 // in the ModR/M byte). Recursively find the opcode definition in
175 // the table for the opcode's next byte.
177 ProcessOpcode(start_byte
+ 1, opcode
.table_index_
, size
);
178 return instruction_type_
;
181 const SpecificOpcode
* specific_opcode
= (SpecificOpcode
*)&opcode
;
182 if (opcode
.is_prefix_dependent_
) {
183 if (got_f2_prefix_
&& opcode
.opcode_if_f2_prefix_
.mnemonic_
!= 0) {
184 specific_opcode
= &opcode
.opcode_if_f2_prefix_
;
185 } else if (got_f3_prefix_
&& opcode
.opcode_if_f3_prefix_
.mnemonic_
!= 0) {
186 specific_opcode
= &opcode
.opcode_if_f3_prefix_
;
187 } else if (got_66_prefix_
&& opcode
.opcode_if_66_prefix_
.mnemonic_
!= 0) {
188 specific_opcode
= &opcode
.opcode_if_66_prefix_
;
192 // Inv: The opcode type is known.
193 instruction_type_
= specific_opcode
->type_
;
195 // Let's process the operand types to see if we have any immediate
196 // operands, and/or a ModR/M byte.
198 ProcessOperand(specific_opcode
->flag_dest_
);
199 ProcessOperand(specific_opcode
->flag_source_
);
200 ProcessOperand(specific_opcode
->flag_aux_
);
202 // Inv: We have processed the opcode and incremented operand_bytes_
203 // by the number of bytes of any operands specified by the opcode
204 // that are stored in the instruction (not registers etc.). Now
205 // we need to return the total number of bytes for the opcode and
206 // for the ModR/M or SIB bytes if they are present.
208 if (table
.mask_
!= 0xff) {
210 // we're looking at a ModR/M byte so we're not going to
211 // count that into the opcode size
212 ProcessModrm(start_byte
, size
);
215 // need to count the ModR/M byte even if it's just being
216 // used for opcode extension
222 // The ModR/M byte is the next byte.
224 ProcessModrm(start_byte
+ 1, size
);
233 bool MiniDisassembler::ProcessOperand(int flag_operand
) {
234 bool succeeded
= true;
235 if (AM_NOT_USED
== flag_operand
)
238 // Decide what to do based on the addressing mode.
239 switch (flag_operand
& AM_MASK
) {
240 // No ModR/M byte indicated by these addressing modes, and no
241 // additional (e.g. immediate) parameters.
242 case AM_A
: // Direct address
243 case AM_F
: // EFLAGS register
244 case AM_X
: // Memory addressed by the DS:SI register pair
245 case AM_Y
: // Memory addressed by the ES:DI register pair
246 case AM_IMPLICIT
: // Parameter is implicit, occupies no space in
250 // There is a ModR/M byte but it does not necessarily need
252 case AM_C
: // reg field of ModR/M selects a control register
253 case AM_D
: // reg field of ModR/M selects a debug register
254 case AM_G
: // reg field of ModR/M selects a general register
255 case AM_P
: // reg field of ModR/M selects an MMX register
256 case AM_R
: // mod field of ModR/M may refer only to a general register
257 case AM_S
: // reg field of ModR/M selects a segment register
258 case AM_T
: // reg field of ModR/M selects a test register
259 case AM_V
: // reg field of ModR/M selects a 128-bit XMM register
263 // In these addressing modes, there is a ModR/M byte and it needs to be
264 // decoded. No other (e.g. immediate) params than indicated in ModR/M.
265 case AM_E
: // Operand is either a general-purpose register or memory,
266 // specified by ModR/M byte
267 case AM_M
: // ModR/M byte will refer only to memory
268 case AM_Q
: // Operand is either an MMX register or memory (complex
269 // evaluation), specified by ModR/M byte
270 case AM_W
: // Operand is either a 128-bit XMM register or memory (complex
271 // eval), specified by ModR/M byte
273 should_decode_modrm_
= true;
276 // These addressing modes specify an immediate or an offset value
277 // directly, so we need to look at the operand type to see how many
279 case AM_I
: // Immediate data.
280 case AM_J
: // Jump to offset.
281 case AM_O
: // Operand is at offset.
282 switch (flag_operand
& OT_MASK
) {
283 case OT_B
: // Byte regardless of operand-size attribute.
284 operand_bytes_
+= OS_BYTE
;
286 case OT_C
: // Byte or word, depending on operand-size attribute.
287 if (operand_is_32_bits_
)
288 operand_bytes_
+= OS_WORD
;
290 operand_bytes_
+= OS_BYTE
;
292 case OT_D
: // Doubleword, regardless of operand-size attribute.
293 operand_bytes_
+= OS_DOUBLE_WORD
;
295 case OT_DQ
: // Double-quadword, regardless of operand-size attribute.
296 operand_bytes_
+= OS_DOUBLE_QUAD_WORD
;
298 case OT_P
: // 32-bit or 48-bit pointer, depending on operand-size
300 if (operand_is_32_bits_
)
301 operand_bytes_
+= OS_48_BIT_POINTER
;
303 operand_bytes_
+= OS_32_BIT_POINTER
;
305 case OT_PS
: // 128-bit packed single-precision floating-point data.
306 operand_bytes_
+= OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING
;
308 case OT_Q
: // Quadword, regardless of operand-size attribute.
309 operand_bytes_
+= OS_QUAD_WORD
;
311 case OT_S
: // 6-byte pseudo-descriptor.
312 operand_bytes_
+= OS_PSEUDO_DESCRIPTOR
;
314 case OT_SD
: // Scalar Double-Precision Floating-Point Value
315 case OT_PD
: // Unaligned packed double-precision floating point value
316 operand_bytes_
+= OS_DOUBLE_PRECISION_FLOATING
;
319 // Scalar element of a 128-bit packed single-precision
321 // We simply return enItUnknown since we don't have to support
325 case OT_V
: // Word, doubleword or quadword, depending on operand-size
327 if (operand_is_64_bits_
&& flag_operand
& AM_I
&&
328 flag_operand
& IOS_64
)
329 operand_bytes_
+= OS_QUAD_WORD
;
330 else if (operand_is_32_bits_
)
331 operand_bytes_
+= OS_DOUBLE_WORD
;
333 operand_bytes_
+= OS_WORD
;
335 case OT_W
: // Word, regardless of operand-size attribute.
336 operand_bytes_
+= OS_WORD
;
339 // Can safely ignore these.
340 case OT_A
: // Two one-word operands in memory or two double-word
341 // operands in memory
342 case OT_PI
: // Quadword MMX technology register (e.g. mm0)
343 case OT_SI
: // Doubleword integer register (e.g., eax)
358 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte
,
359 unsigned int& size
) {
360 // If we don't need to decode, we just return the size of the ModR/M
361 // byte (there is never a SIB byte in this case).
362 if (!should_decode_modrm_
) {
367 // We never care about the reg field, only the combination of the mod
368 // and r/m fields, so let's start by packing those fields together into
370 unsigned char modrm
= (*start_byte
);
371 unsigned char mod
= modrm
& 0xC0; // mask out top two bits to get mod field
372 modrm
= modrm
& 0x07; // mask out bottom 3 bits to get r/m field
373 mod
= mod
>> 3; // shift the mod field to the right place
374 modrm
= mod
| modrm
; // combine the r/m and mod fields as discussed
375 mod
= mod
>> 3; // shift the mod field to bits 2..0
377 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
378 // in bits 2..0, and mod contains the mod field in bits 2..0
380 const ModrmEntry
* modrm_entry
= 0;
381 if (address_is_32_bits_
)
382 modrm_entry
= &s_ia32_modrm_map_
[modrm
];
384 modrm_entry
= &s_ia16_modrm_map_
[modrm
];
386 // Invariant: modrm_entry points to information that we need to decode
389 // Add to the count of operand bytes, if the ModR/M byte indicates
390 // that some operands are encoded in the instruction.
391 if (modrm_entry
->is_encoded_in_instruction_
)
392 operand_bytes_
+= modrm_entry
->operand_size_
;
394 // Process the SIB byte if necessary, and return the count
395 // of ModR/M and SIB bytes.
396 if (modrm_entry
->use_sib_byte_
) {
398 return ProcessSib(start_byte
+ 1, mod
, size
);
405 bool MiniDisassembler::ProcessSib(unsigned char* start_byte
,
407 unsigned int& size
) {
408 // get the mod field from the 2..0 bits of the SIB byte
409 unsigned char sib_base
= (*start_byte
) & 0x07;
410 if (0x05 == sib_base
) {
412 case 0x00: // mod == 00
413 case 0x02: // mod == 10
414 operand_bytes_
+= OS_DOUBLE_WORD
;
416 case 0x01: // mod == 01
417 operand_bytes_
+= OS_BYTE
;
419 case 0x03: // mod == 11
420 // According to the IA-32 docs, there does not seem to be a disp
421 // value for this value of mod
431 }; // namespace sidestep