sandbox/win/src/sidestep/mini_disassembler.cpp

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // Implementation of MiniDisassembler.
   6
   7 #ifdef _WIN64
   8 #error The code in this file should not be used on 64-bit Windows.
   9 #endif
  10
  11 #include "sandbox/win/src/sidestep/mini_disassembler.h"
  12
  13 namespace sidestep {
  14
  15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
  16                                    bool address_default_is_32_bits)
  17     : operand_default_is_32_bits_(operand_default_is_32_bits),
  18       address_default_is_32_bits_(address_default_is_32_bits) {
  19   Initialize();
  20 }
  21
  22 MiniDisassembler::MiniDisassembler()
  23     : operand_default_is_32_bits_(true),
  24       address_default_is_32_bits_(true) {
  25   Initialize();
  26 }
  27
  28 InstructionType MiniDisassembler::Disassemble(
  29     unsigned char* start_byte,
  30     unsigned int* instruction_bytes) {
  31   // Clean up any state from previous invocations.
  32   Initialize();
  33
  34   // Start by processing any prefixes.
  35   unsigned char* current_byte = start_byte;
  36   unsigned int size = 0;
  37   InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
  38
  39   if (IT_UNKNOWN == instruction_type)
  40     return instruction_type;
  41
  42   current_byte += size;
  43   size = 0;
  44
  45   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
  46   // and address_is_32_bits_ flags are correctly set.
  47
  48   instruction_type = ProcessOpcode(current_byte, 0, &size);
  49
  50   // Check for error processing instruction
  51   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
  52     return IT_UNKNOWN;
  53   }
  54
  55   current_byte += size;
  56
  57   // Invariant: operand_bytes_ indicates the total size of operands
  58   // specified by the opcode and/or ModR/M byte and/or SIB byte.
  59   // pCurrentByte points to the first byte after the ModR/M byte, or after
  60   // the SIB byte if it is present (i.e. the first byte of any operands
  61   // encoded in the instruction).
  62
  63   // We get the total length of any prefixes, the opcode, and the ModR/M and
  64   // SIB bytes if present, by taking the difference of the original starting
  65   // address and the current byte (which points to the first byte of the
  66   // operands if present, or to the first byte of the next instruction if
  67   // they are not).  Adding the count of bytes in the operands encoded in
  68   // the instruction gives us the full length of the instruction in bytes.
  69   *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
  70
  71   // Return the instruction type, which was set by ProcessOpcode().
  72   return instruction_type_;
  73 }
  74
  75 void MiniDisassembler::Initialize() {
  76   operand_is_32_bits_ = operand_default_is_32_bits_;
  77   address_is_32_bits_ = address_default_is_32_bits_;
  78   operand_bytes_ = 0;
  79   have_modrm_ = false;
  80   should_decode_modrm_ = false;
  81   instruction_type_ = IT_UNKNOWN;
  82   got_f2_prefix_ = false;
  83   got_f3_prefix_ = false;
  84   got_66_prefix_ = false;
  85 }
  86
  87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
  88                                                   unsigned int* size) {
  89   InstructionType instruction_type = IT_GENERIC;
  90   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
  91
  92   switch (opcode.type_) {
  93     case IT_PREFIX_ADDRESS:
  94       address_is_32_bits_ = !address_default_is_32_bits_;
  95       goto nochangeoperand;
  96     case IT_PREFIX_OPERAND:
  97       operand_is_32_bits_ = !operand_default_is_32_bits_;
  98       nochangeoperand:
  99     case IT_PREFIX:
 100
 101       if (0xF2 == (*start_byte))
 102         got_f2_prefix_ = true;
 103       else if (0xF3 == (*start_byte))
 104         got_f3_prefix_ = true;
 105       else if (0x66 == (*start_byte))
 106         got_66_prefix_ = true;
 107
 108       instruction_type = opcode.type_;
 109       (*size)++;
 110       // we got a prefix, so add one and check next byte
 111       ProcessPrefixes(start_byte + 1, size);
 112     default:
 113       break;   // not a prefix byte
 114   }
 115
 116   return instruction_type;
 117 }
 118
 119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
 120                                                 unsigned int table_index,
 121                                                 unsigned int* size) {
 122   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
 123   unsigned char current_byte = (*start_byte) >> table.shift_;
 124   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
 125
 126   // Check whether the byte we have is inside the table we have.
 127   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
 128     instruction_type_ = IT_UNKNOWN;
 129     return instruction_type_;
 130   }
 131
 132   const Opcode& opcode = table.table_[current_byte];
 133   if (IT_UNUSED == opcode.type_) {
 134     // This instruction is not used by the IA-32 ISA, so we indicate
 135     // this to the user.  Probably means that we were pointed to
 136     // a byte in memory that was not the start of an instruction.
 137     instruction_type_ = IT_UNUSED;
 138     return instruction_type_;
 139   } else if (IT_REFERENCE == opcode.type_) {
 140     // We are looking at an opcode that has more bytes (or is continued
 141     // in the ModR/M byte).  Recursively find the opcode definition in
 142     // the table for the opcode's next byte.
 143     (*size)++;
 144     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
 145     return instruction_type_;
 146   }
 147
 148   const SpecificOpcode* specific_opcode = reinterpret_cast<
 149                                               const SpecificOpcode*>(&opcode);
 150   if (opcode.is_prefix_dependent_) {
 151     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
 152       specific_opcode = &opcode.opcode_if_f2_prefix_;
 153     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
 154       specific_opcode = &opcode.opcode_if_f3_prefix_;
 155     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
 156       specific_opcode = &opcode.opcode_if_66_prefix_;
 157     }
 158   }
 159
 160   // Inv: The opcode type is known.
 161   instruction_type_ = specific_opcode->type_;
 162
 163   // Let's process the operand types to see if we have any immediate
 164   // operands, and/or a ModR/M byte.
 165
 166   ProcessOperand(specific_opcode->flag_dest_);
 167   ProcessOperand(specific_opcode->flag_source_);
 168   ProcessOperand(specific_opcode->flag_aux_);
 169
 170   // Inv: We have processed the opcode and incremented operand_bytes_
 171   // by the number of bytes of any operands specified by the opcode
 172   // that are stored in the instruction (not registers etc.).  Now
 173   // we need to return the total number of bytes for the opcode and
 174   // for the ModR/M or SIB bytes if they are present.
 175
 176   if (table.mask_ != 0xff) {
 177     if (have_modrm_) {
 178       // we're looking at a ModR/M byte so we're not going to
 179       // count that into the opcode size
 180       ProcessModrm(start_byte, size);
 181       return IT_GENERIC;
 182     } else {
 183       // need to count the ModR/M byte even if it's just being
 184       // used for opcode extension
 185       (*size)++;
 186       return IT_GENERIC;
 187     }
 188   } else {
 189     if (have_modrm_) {
 190       // The ModR/M byte is the next byte.
 191       (*size)++;
 192       ProcessModrm(start_byte + 1, size);
 193       return IT_GENERIC;
 194     } else {
 195       (*size)++;
 196       return IT_GENERIC;
 197     }
 198   }
 199 }
 200
 201 bool MiniDisassembler::ProcessOperand(int flag_operand) {
 202   bool succeeded = true;
 203   if (AM_NOT_USED == flag_operand)
 204     return succeeded;
 205
 206   // Decide what to do based on the addressing mode.
 207   switch (flag_operand & AM_MASK) {
 208     // No ModR/M byte indicated by these addressing modes, and no
 209     // additional (e.g. immediate) parameters.
 210     case AM_A:  // Direct address
 211     case AM_F:  // EFLAGS register
 212     case AM_X:  // Memory addressed by the DS:SI register pair
 213     case AM_Y:  // Memory addressed by the ES:DI register pair
 214     case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
 215                        // instruction
 216       break;
 217
 218     // There is a ModR/M byte but it does not necessarily need
 219     // to be decoded.
 220     case AM_C:  // reg field of ModR/M selects a control register
 221     case AM_D:  // reg field of ModR/M selects a debug register
 222     case AM_G:  // reg field of ModR/M selects a general register
 223     case AM_P:  // reg field of ModR/M selects an MMX register
 224     case AM_R:  // mod field of ModR/M may refer only to a general register
 225     case AM_S:  // reg field of ModR/M selects a segment register
 226     case AM_T:  // reg field of ModR/M selects a test register
 227     case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
 228       have_modrm_ = true;
 229       break;
 230
 231     // In these addressing modes, there is a ModR/M byte and it needs to be
 232     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
 233     case AM_E:  // Operand is either a general-purpose register or memory,
 234                 // specified by ModR/M byte
 235     case AM_M:  // ModR/M byte will refer only to memory
 236     case AM_Q:  // Operand is either an MMX register or memory (complex
 237                 // evaluation), specified by ModR/M byte
 238     case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
 239                 // eval), specified by ModR/M byte
 240       have_modrm_ = true;
 241       should_decode_modrm_ = true;
 242       break;
 243
 244     // These addressing modes specify an immediate or an offset value
 245     // directly, so we need to look at the operand type to see how many
 246     // bytes.
 247     case AM_I:  // Immediate data.
 248     case AM_J:  // Jump to offset.
 249     case AM_O:  // Operand is at offset.
 250       switch (flag_operand & OT_MASK) {
 251         case OT_B:  // Byte regardless of operand-size attribute.
 252           operand_bytes_ += OS_BYTE;
 253           break;
 254         case OT_C:  // Byte or word, depending on operand-size attribute.
 255           if (operand_is_32_bits_)
 256             operand_bytes_ += OS_WORD;
 257           else
 258             operand_bytes_ += OS_BYTE;
 259           break;
 260         case OT_D:  // Doubleword, regardless of operand-size attribute.
 261           operand_bytes_ += OS_DOUBLE_WORD;
 262           break;
 263         case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
 264           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
 265           break;
 266         case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
 267                     // attribute.
 268           if (operand_is_32_bits_)
 269             operand_bytes_ += OS_48_BIT_POINTER;
 270           else
 271             operand_bytes_ += OS_32_BIT_POINTER;
 272           break;
 273         case OT_PS:  // 128-bit packed single-precision floating-point data.
 274           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
 275           break;
 276         case OT_Q:  // Quadword, regardless of operand-size attribute.
 277           operand_bytes_ += OS_QUAD_WORD;
 278           break;
 279         case OT_S:  // 6-byte pseudo-descriptor.
 280           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
 281           break;
 282         case OT_SD:  // Scalar Double-Precision Floating-Point Value
 283         case OT_PD:  // Unaligned packed double-precision floating point value
 284           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
 285           break;
 286         case OT_SS:
 287           // Scalar element of a 128-bit packed single-precision
 288           // floating data.
 289           // We simply return enItUnknown since we don't have to support
 290           // floating point
 291           succeeded = false;
 292           break;
 293         case OT_V:  // Word or doubleword, depending on operand-size attribute.
 294           if (operand_is_32_bits_)
 295             operand_bytes_ += OS_DOUBLE_WORD;
 296           else
 297             operand_bytes_ += OS_WORD;
 298           break;
 299         case OT_W:  // Word, regardless of operand-size attribute.
 300           operand_bytes_ += OS_WORD;
 301           break;
 302
 303         // Can safely ignore these.
 304         case OT_A:  // Two one-word operands in memory or two double-word
 305                     // operands in memory
 306         case OT_PI:  // Quadword MMX technology register (e.g. mm0)
 307         case OT_SI:  // Doubleword integer register (e.g., eax)
 308           break;
 309
 310         default:
 311           break;
 312       }
 313       break;
 314
 315     default:
 316       break;
 317   }
 318
 319   return succeeded;
 320 }
 321
 322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
 323                                     unsigned int* size) {
 324   // If we don't need to decode, we just return the size of the ModR/M
 325   // byte (there is never a SIB byte in this case).
 326   if (!should_decode_modrm_) {
 327     (*size)++;
 328     return true;
 329   }
 330
 331   // We never care about the reg field, only the combination of the mod
 332   // and r/m fields, so let's start by packing those fields together into
 333   // 5 bits.
 334   unsigned char modrm = (*start_byte);
 335   unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
 336   modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
 337   mod = mod >> 3;  // shift the mod field to the right place
 338   modrm = mod | modrm;  // combine the r/m and mod fields as discussed
 339   mod = mod >> 3;  // shift the mod field to bits 2..0
 340
 341   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
 342   // in bits 2..0, and mod contains the mod field in bits 2..0
 343
 344   const ModrmEntry* modrm_entry = 0;
 345   if (address_is_32_bits_)
 346     modrm_entry = &s_ia32_modrm_map_[modrm];
 347   else
 348     modrm_entry = &s_ia16_modrm_map_[modrm];
 349
 350   // Invariant: modrm_entry points to information that we need to decode
 351   // the ModR/M byte.
 352
 353   // Add to the count of operand bytes, if the ModR/M byte indicates
 354   // that some operands are encoded in the instruction.
 355   if (modrm_entry->is_encoded_in_instruction_)
 356     operand_bytes_ += modrm_entry->operand_size_;
 357
 358   // Process the SIB byte if necessary, and return the count
 359   // of ModR/M and SIB bytes.
 360   if (modrm_entry->use_sib_byte_) {
 361     (*size)++;
 362     return ProcessSib(start_byte + 1, mod, size);
 363   } else {
 364     (*size)++;
 365     return true;
 366   }
 367 }
 368
 369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
 370                                   unsigned char mod,
 371                                   unsigned int* size) {
 372   // get the mod field from the 2..0 bits of the SIB byte
 373   unsigned char sib_base = (*start_byte) & 0x07;
 374   if (0x05 == sib_base) {
 375     switch (mod) {
 376       case 0x00:  // mod == 00
 377       case 0x02:  // mod == 10
 378         operand_bytes_ += OS_DOUBLE_WORD;
 379         break;
 380       case 0x01:  // mod == 01
 381         operand_bytes_ += OS_BYTE;
 382         break;
 383       case 0x03:  // mod == 11
 384         // According to the IA-32 docs, there does not seem to be a disp
 385         // value for this value of mod
 386       default:
 387         break;
 388     }
 389   }
 390
 391   (*size)++;
 392   return true;
 393 }
 394
 395 };  // namespace sidestep