tools/memory_watcher/mini_disassembler.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 /*
   6  * Implementation of MiniDisassembler.
   7  */
   8
   9 #include "mini_disassembler.h"
  10
  11 namespace sidestep {
  12
  13 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
  14                                    bool address_default_is_32_bits)
  15     : operand_default_is_32_bits_(operand_default_is_32_bits),
  16       address_default_is_32_bits_(address_default_is_32_bits) {
  17   Initialize();
  18 }
  19
  20 MiniDisassembler::MiniDisassembler()
  21     : operand_default_is_32_bits_(true),
  22       address_default_is_32_bits_(true) {
  23   Initialize();
  24 }
  25
  26 InstructionType MiniDisassembler::Disassemble(
  27     unsigned char* start_byte,
  28     unsigned int& instruction_bytes) {
  29   // Clean up any state from previous invocations.
  30   Initialize();
  31
  32   // Start by processing any prefixes.
  33   unsigned char* current_byte = start_byte;
  34   unsigned int size = 0;
  35   InstructionType instruction_type = ProcessPrefixes(current_byte, size);
  36
  37   if (IT_UNKNOWN == instruction_type)
  38     return instruction_type;
  39
  40   current_byte += size;
  41   size = 0;
  42
  43   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
  44   // and address_is_32_bits_ flags are correctly set.
  45
  46   instruction_type = ProcessOpcode(current_byte, 0, size);
  47
  48   // Check for error processing instruction
  49   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
  50     return IT_UNKNOWN;
  51   }
  52
  53   current_byte += size;
  54
  55   // Invariant: operand_bytes_ indicates the total size of operands
  56   // specified by the opcode and/or ModR/M byte and/or SIB byte.
  57   // pCurrentByte points to the first byte after the ModR/M byte, or after
  58   // the SIB byte if it is present (i.e. the first byte of any operands
  59   // encoded in the instruction).
  60
  61   // We get the total length of any prefixes, the opcode, and the ModR/M and
  62   // SIB bytes if present, by taking the difference of the original starting
  63   // address and the current byte (which points to the first byte of the
  64   // operands if present, or to the first byte of the next instruction if
  65   // they are not).  Adding the count of bytes in the operands encoded in
  66   // the instruction gives us the full length of the instruction in bytes.
  67   instruction_bytes += operand_bytes_ + (current_byte - start_byte);
  68
  69   // Return the instruction type, which was set by ProcessOpcode().
  70   return instruction_type_;
  71 }
  72
  73 void MiniDisassembler::Initialize() {
  74   operand_is_32_bits_ = operand_default_is_32_bits_;
  75   address_is_32_bits_ = address_default_is_32_bits_;
  76   operand_bytes_ = 0;
  77   have_modrm_ = false;
  78   should_decode_modrm_ = false;
  79   instruction_type_ = IT_UNKNOWN;
  80   got_f2_prefix_ = false;
  81   got_f3_prefix_ = false;
  82   got_66_prefix_ = false;
  83 }
  84
  85 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
  86                                                   unsigned int& size) {
  87   InstructionType instruction_type = IT_GENERIC;
  88   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
  89
  90   switch (opcode.type_) {
  91     case IT_PREFIX_ADDRESS:
  92       address_is_32_bits_ = !address_default_is_32_bits_;
  93       goto nochangeoperand;
  94     case IT_PREFIX_OPERAND:
  95       operand_is_32_bits_ = !operand_default_is_32_bits_;
  96       nochangeoperand:
  97     case IT_PREFIX:
  98
  99       if (0xF2 == (*start_byte))
 100         got_f2_prefix_ = true;
 101       else if (0xF3 == (*start_byte))
 102         got_f3_prefix_ = true;
 103       else if (0x66 == (*start_byte))
 104         got_66_prefix_ = true;
 105
 106       instruction_type = opcode.type_;
 107       size ++;
 108       // we got a prefix, so add one and check next byte
 109       ProcessPrefixes(start_byte + 1, size);
 110     default:
 111       break;   // not a prefix byte
 112   }
 113
 114   return instruction_type;
 115 }
 116
 117 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
 118                                                 unsigned int table_index,
 119                                                 unsigned int& size) {
 120   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
 121   unsigned char current_byte = (*start_byte) >> table.shift_;
 122   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
 123
 124   // Check whether the byte we have is inside the table we have.
 125   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
 126     instruction_type_ = IT_UNKNOWN;
 127     return instruction_type_;
 128   }
 129
 130   const Opcode& opcode = table.table_[current_byte];
 131   if (IT_UNUSED == opcode.type_) {
 132     // This instruction is not used by the IA-32 ISA, so we indicate
 133     // this to the user.  Probably means that we were pointed to
 134     // a byte in memory that was not the start of an instruction.
 135     instruction_type_ = IT_UNUSED;
 136     return instruction_type_;
 137   } else if (IT_REFERENCE == opcode.type_) {
 138     // We are looking at an opcode that has more bytes (or is continued
 139     // in the ModR/M byte).  Recursively find the opcode definition in
 140     // the table for the opcode's next byte.
 141     size++;
 142     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
 143     return instruction_type_;
 144   }
 145
 146   const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
 147   if (opcode.is_prefix_dependent_) {
 148     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
 149       specific_opcode = &opcode.opcode_if_f2_prefix_;
 150     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
 151       specific_opcode = &opcode.opcode_if_f3_prefix_;
 152     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
 153       specific_opcode = &opcode.opcode_if_66_prefix_;
 154     }
 155   }
 156
 157   // Inv: The opcode type is known.
 158   instruction_type_ = specific_opcode->type_;
 159
 160   // Let's process the operand types to see if we have any immediate
 161   // operands, and/or a ModR/M byte.
 162
 163   ProcessOperand(specific_opcode->flag_dest_);
 164   ProcessOperand(specific_opcode->flag_source_);
 165   ProcessOperand(specific_opcode->flag_aux_);
 166
 167   // Inv: We have processed the opcode and incremented operand_bytes_
 168   // by the number of bytes of any operands specified by the opcode
 169   // that are stored in the instruction (not registers etc.).  Now
 170   // we need to return the total number of bytes for the opcode and
 171   // for the ModR/M or SIB bytes if they are present.
 172
 173   if (table.mask_ != 0xff) {
 174     if (have_modrm_) {
 175       // we're looking at a ModR/M byte so we're not going to
 176       // count that into the opcode size
 177       ProcessModrm(start_byte, size);
 178       return IT_GENERIC;
 179     } else {
 180       // need to count the ModR/M byte even if it's just being
 181       // used for opcode extension
 182       size++;
 183       return IT_GENERIC;
 184     }
 185   } else {
 186     if (have_modrm_) {
 187       // The ModR/M byte is the next byte.
 188       size++;
 189       ProcessModrm(start_byte + 1, size);
 190       return IT_GENERIC;
 191     } else {
 192       size++;
 193       return IT_GENERIC;
 194     }
 195   }
 196 }
 197
 198 bool MiniDisassembler::ProcessOperand(int flag_operand) {
 199   bool succeeded = true;
 200   if (AM_NOT_USED == flag_operand)
 201     return succeeded;
 202
 203   // Decide what to do based on the addressing mode.
 204   switch (flag_operand & AM_MASK) {
 205     // No ModR/M byte indicated by these addressing modes, and no
 206     // additional (e.g. immediate) parameters.
 207     case AM_A: // Direct address
 208     case AM_F: // EFLAGS register
 209     case AM_X: // Memory addressed by the DS:SI register pair
 210     case AM_Y: // Memory addressed by the ES:DI register pair
 211     case AM_IMPLICIT: // Parameter is implicit, occupies no space in
 212                        // instruction
 213       break;
 214
 215     // There is a ModR/M byte but it does not necessarily need
 216     // to be decoded.
 217     case AM_C: // reg field of ModR/M selects a control register
 218     case AM_D: // reg field of ModR/M selects a debug register
 219     case AM_G: // reg field of ModR/M selects a general register
 220     case AM_P: // reg field of ModR/M selects an MMX register
 221     case AM_R: // mod field of ModR/M may refer only to a general register
 222     case AM_S: // reg field of ModR/M selects a segment register
 223     case AM_T: // reg field of ModR/M selects a test register
 224     case AM_V: // reg field of ModR/M selects a 128-bit XMM register
 225       have_modrm_ = true;
 226       break;
 227
 228     // In these addressing modes, there is a ModR/M byte and it needs to be
 229     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
 230     case AM_E: // Operand is either a general-purpose register or memory,
 231                  // specified by ModR/M byte
 232     case AM_M: // ModR/M byte will refer only to memory
 233     case AM_Q: // Operand is either an MMX register or memory (complex
 234                  // evaluation), specified by ModR/M byte
 235     case AM_W: // Operand is either a 128-bit XMM register or memory (complex
 236                  // eval), specified by ModR/M byte
 237       have_modrm_ = true;
 238       should_decode_modrm_ = true;
 239       break;
 240
 241     // These addressing modes specify an immediate or an offset value
 242     // directly, so we need to look at the operand type to see how many
 243     // bytes.
 244     case AM_I: // Immediate data.
 245     case AM_J: // Jump to offset.
 246     case AM_O: // Operand is at offset.
 247       switch (flag_operand & OT_MASK) {
 248         case OT_B: // Byte regardless of operand-size attribute.
 249           operand_bytes_ += OS_BYTE;
 250           break;
 251         case OT_C: // Byte or word, depending on operand-size attribute.
 252           if (operand_is_32_bits_)
 253             operand_bytes_ += OS_WORD;
 254           else
 255             operand_bytes_ += OS_BYTE;
 256           break;
 257         case OT_D: // Doubleword, regardless of operand-size attribute.
 258           operand_bytes_ += OS_DOUBLE_WORD;
 259           break;
 260         case OT_DQ: // Double-quadword, regardless of operand-size attribute.
 261           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
 262           break;
 263         case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
 264                      // attribute.
 265           if (operand_is_32_bits_)
 266             operand_bytes_ += OS_48_BIT_POINTER;
 267           else
 268             operand_bytes_ += OS_32_BIT_POINTER;
 269           break;
 270         case OT_PS: // 128-bit packed single-precision floating-point data.
 271           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
 272           break;
 273         case OT_Q: // Quadword, regardless of operand-size attribute.
 274           operand_bytes_ += OS_QUAD_WORD;
 275           break;
 276         case OT_S: // 6-byte pseudo-descriptor.
 277           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
 278           break;
 279         case OT_SD: // Scalar Double-Precision Floating-Point Value
 280         case OT_PD: // Unaligned packed double-precision floating point value
 281           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
 282           break;
 283         case OT_SS:
 284           // Scalar element of a 128-bit packed single-precision
 285           // floating data.
 286           // We simply return enItUnknown since we don't have to support
 287           // floating point
 288           succeeded = false;
 289           break;
 290         case OT_V: // Word or doubleword, depending on operand-size attribute.
 291           if (operand_is_32_bits_)
 292             operand_bytes_ += OS_DOUBLE_WORD;
 293           else
 294             operand_bytes_ += OS_WORD;
 295           break;
 296         case OT_W: // Word, regardless of operand-size attribute.
 297           operand_bytes_ += OS_WORD;
 298           break;
 299
 300         // Can safely ignore these.
 301         case OT_A: // Two one-word operands in memory or two double-word
 302                      // operands in memory
 303         case OT_PI: // Quadword MMX technology register (e.g. mm0)
 304         case OT_SI: // Doubleword integer register (e.g., eax)
 305           break;
 306
 307         default:
 308           break;
 309       }
 310       break;
 311
 312     default:
 313       break;
 314   }
 315
 316   return succeeded;
 317 }
 318
 319 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
 320                                     unsigned int& size) {
 321   // If we don't need to decode, we just return the size of the ModR/M
 322   // byte (there is never a SIB byte in this case).
 323   if (!should_decode_modrm_) {
 324     size++;
 325     return true;
 326   }
 327
 328   // We never care about the reg field, only the combination of the mod
 329   // and r/m fields, so let's start by packing those fields together into
 330   // 5 bits.
 331   unsigned char modrm = (*start_byte);
 332   unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
 333   modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
 334   mod = mod >> 3; // shift the mod field to the right place
 335   modrm = mod | modrm; // combine the r/m and mod fields as discussed
 336   mod = mod >> 3; // shift the mod field to bits 2..0
 337
 338   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
 339   // in bits 2..0, and mod contains the mod field in bits 2..0
 340
 341   const ModrmEntry* modrm_entry = 0;
 342   if (address_is_32_bits_)
 343     modrm_entry = &s_ia32_modrm_map_[modrm];
 344   else
 345     modrm_entry = &s_ia16_modrm_map_[modrm];
 346
 347   // Invariant: modrm_entry points to information that we need to decode
 348   // the ModR/M byte.
 349
 350   // Add to the count of operand bytes, if the ModR/M byte indicates
 351   // that some operands are encoded in the instruction.
 352   if (modrm_entry->is_encoded_in_instruction_)
 353     operand_bytes_ += modrm_entry->operand_size_;
 354
 355   // Process the SIB byte if necessary, and return the count
 356   // of ModR/M and SIB bytes.
 357   if (modrm_entry->use_sib_byte_) {
 358     size++;
 359     return ProcessSib(start_byte + 1, mod, size);
 360   } else {
 361     size++;
 362     return true;
 363   }
 364 }
 365
 366 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
 367                                   unsigned char mod,
 368                                   unsigned int& size) {
 369   // get the mod field from the 2..0 bits of the SIB byte
 370   unsigned char sib_base = (*start_byte) & 0x07;
 371   if (0x05 == sib_base) {
 372     switch (mod) {
 373     case 0x00: // mod == 00
 374     case 0x02: // mod == 10
 375       operand_bytes_ += OS_DOUBLE_WORD;
 376       break;
 377     case 0x01: // mod == 01
 378       operand_bytes_ += OS_BYTE;
 379       break;
 380     case 0x03: // mod == 11
 381       // According to the IA-32 docs, there does not seem to be a disp
 382       // value for this value of mod
 383     default:
 384       break;
 385     }
 386   }
 387
 388   size++;
 389   return true;
 390 }
 391
 392 };  // namespace sidestep