lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h

   1 /*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
   2  *
   3  *                     The LLVM Compiler Infrastructure
   4  *
   5  * This file is distributed under the University of Illinois Open Source
   6  * License. See LICENSE.TXT for details.
   7  *
   8  *===----------------------------------------------------------------------===*
   9  *
  10  * This file is part of the X86 Disassembler.
  11  * It contains common definitions used by both the disassembler and the table
  12  *  generator.
  13  * Documentation for the disassembler can be found in X86Disassembler.h.
  14  *
  15  *===----------------------------------------------------------------------===*/
  16
  17 /*
  18  * This header file provides those definitions that need to be shared between
  19  * the decoder and the table generator in a C-friendly manner.
  20  */
  21
  22 #ifndef X86DISASSEMBLERDECODERCOMMON_H
  23 #define X86DISASSEMBLERDECODERCOMMON_H
  24
  25 #include "llvm/System/DataTypes.h"
  26
  27 #define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
  28 #define CONTEXTS_SYM      x86DisassemblerContexts
  29 #define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
  30 #define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
  31 #define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
  32 #define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
  33
  34 #define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
  35 #define CONTEXTS_STR      "x86DisassemblerContexts"
  36 #define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
  37 #define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
  38 #define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
  39 #define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
  40
  41 /*
  42  * Attributes of an instruction that must be known before the opcode can be
  43  * processed correctly.  Most of these indicate the presence of particular
  44  * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
  45  */
  46 #define ATTRIBUTE_BITS          \
  47   ENUM_ENTRY(ATTR_NONE,   0x00) \
  48   ENUM_ENTRY(ATTR_64BIT,  0x01) \
  49   ENUM_ENTRY(ATTR_XS,     0x02) \
  50   ENUM_ENTRY(ATTR_XD,     0x04) \
  51   ENUM_ENTRY(ATTR_REXW,   0x08) \
  52   ENUM_ENTRY(ATTR_OPSIZE, 0x10)
  53
  54 #define ENUM_ENTRY(n, v) n = v,
  55 enum attributeBits {
  56   ATTRIBUTE_BITS
  57   ATTR_max
  58 };
  59 #undef ENUM_ENTRY
  60
  61 /*
  62  * Combinations of the above attributes that are relevant to instruction
  63  * decode.  Although other combinations are possible, they can be reduced to
  64  * these without affecting the ultimately decoded instruction.
  65  */
  66
  67 /*           Class name           Rank  Rationale for rank assignment         */
  68 #define INSTRUCTION_CONTEXTS                                                   \
  69   ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
  70   ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
  71                                         "64-bit mode but no more")             \
  72   ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
  73                                         "operands change width")               \
  74   ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
  75                                         "but not the operands")                \
  76   ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
  77                                         "but not the operands")                \
  78   ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
  79                                         "change width; overrides IC_OPSIZE")   \
  80   ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
  81   ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
  82                                         "secondary")                           \
  83   ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
  84   ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
  85                                         "opcode")                              \
  86   ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
  87                                         "IC_64BIT_REXW_XS")                    \
  88   ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
  89                                         "else because this changes most "      \
  90                                         "operands' meaning")
  91
  92 #define ENUM_ENTRY(n, r, d) n,
  93 typedef enum {
  94   INSTRUCTION_CONTEXTS
  95   IC_max
  96 } InstructionContext;
  97 #undef ENUM_ENTRY
  98
  99 /*
 100  * Opcode types, which determine which decode table to use, both in the Intel
 101  * manual and also for the decoder.
 102  */
 103 typedef enum {
 104   ONEBYTE       = 0,
 105   TWOBYTE       = 1,
 106   THREEBYTE_38  = 2,
 107   THREEBYTE_3A  = 3
 108 } OpcodeType;
 109
 110 /*
 111  * The following structs are used for the hierarchical decode table.  After
 112  * determining the instruction's class (i.e., which IC_* constant applies to
 113  * it), the decoder reads the opcode.  Some instructions require specific
 114  * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
 115  *
 116  * If a ModR/M byte is not required, "required" is left unset, and the values
 117  * for each instructionID are identical.
 118  */
 119
 120 typedef uint16_t InstrUID;
 121
 122 /*
 123  * ModRMDecisionType - describes the type of ModR/M decision, allowing the
 124  * consumer to determine the number of entries in it.
 125  *
 126  * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
 127  *                  instruction is the same.
 128  * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
 129  *                  corresponds to one instruction; otherwise, it corresponds to
 130  *                  a different instruction.
 131  * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
 132  *                  to a different instruction.
 133  */
 134
 135 #define MODRMTYPES            \
 136   ENUM_ENTRY(MODRM_ONEENTRY)  \
 137   ENUM_ENTRY(MODRM_SPLITRM)   \
 138   ENUM_ENTRY(MODRM_FULL)
 139
 140 #define ENUM_ENTRY(n) n,
 141 typedef enum {
 142   MODRMTYPES
 143   MODRM_max
 144 } ModRMDecisionType;
 145 #undef ENUM_ENTRY
 146
 147 /*
 148  * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
 149  *  instruction each possible value of the ModR/M byte corresponds to.  Once
 150  *  this information is known, we have narrowed down to a single instruction.
 151  */
 152 struct ModRMDecision {
 153   uint8_t     modrm_type;
 154
 155   /* The macro below must be defined wherever this file is included. */
 156   INSTRUCTION_IDS
 157 };
 158
 159 /*
 160  * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
 161  *   given a particular opcode.
 162  */
 163 struct OpcodeDecision {
 164   struct ModRMDecision modRMDecisions[256];
 165 };
 166
 167 /*
 168  * ContextDecision - Specifies which opcode->instruction tables to look at given
 169  *   a particular context (set of attributes).  Since there are many possible
 170  *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
 171  *   applies given a specific set of attributes.  Hence there are only IC_max
 172  *   entries in this table, rather than 2^(ATTR_max).
 173  */
 174 struct ContextDecision {
 175   struct OpcodeDecision opcodeDecisions[IC_max];
 176 };
 177
 178 /*
 179  * Physical encodings of instruction operands.
 180  */
 181
 182 #define ENCODINGS                                                              \
 183   ENUM_ENTRY(ENCODING_NONE,   "")                                              \
 184   ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
 185   ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
 186   ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
 187   ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
 188   ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
 189   ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
 190   ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
 191   ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
 192   ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
 193   ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
 194   ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
 195   ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
 196   ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
 197                               "the opcode byte")                               \
 198   ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
 199   ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
 200   ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
 201   ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
 202                               "opcode byte")                                   \
 203                                                                                \
 204   ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
 205   ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
 206   ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
 207                               "opcode byte")                                   \
 208   ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
 209                               "in type")
 210
 211 #define ENUM_ENTRY(n, d) n,
 212   typedef enum {
 213     ENCODINGS
 214     ENCODING_max
 215   } OperandEncoding;
 216 #undef ENUM_ENTRY
 217
 218 /*
 219  * Semantic interpretations of instruction operands.
 220  */
 221
 222 #define TYPES                                                                  \
 223   ENUM_ENTRY(TYPE_NONE,       "")                                              \
 224   ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
 225   ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
 226   ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
 227   ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
 228   ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
 229   ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
 230   ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
 231   ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
 232   ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
 233   ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
 234   ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
 235   ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
 236   ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
 237   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
 238   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
 239   ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
 240   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
 241   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
 242   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
 243   ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
 244   ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
 245   ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
 246   ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
 247   ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
 248   ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
 249   ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
 250   ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
 251   ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
 252   ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
 253   ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
 254   ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
 255   ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
 256   ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
 257   ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
 258   ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
 259   ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
 260                               "base)")                                         \
 261   ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
 262   ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
 263   ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
 264   ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
 265                               "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
 266   ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
 267   ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
 268   ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
 269   ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
 270                               "floating-point instructions")                   \
 271   ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
 272   ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
 273   ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
 274   ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
 275   ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
 276   ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
 277   ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
 278   ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
 279   ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
 280   ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
 281   ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
 282   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
 283   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
 284   ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
 285                                                                                \
 286   ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
 287   ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
 288   ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
 289   ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
 290   ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
 291   ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
 292   ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
 293   ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
 294   ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
 295   ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
 296
 297 #define ENUM_ENTRY(n, d) n,
 298 typedef enum {
 299   TYPES
 300   TYPE_max
 301 } OperandType;
 302 #undef ENUM_ENTRY
 303
 304 /*
 305  * OperandSpecifier - The specification for how to extract and interpret one
 306  *   operand.
 307  */
 308 struct OperandSpecifier {
 309   OperandEncoding  encoding;
 310   OperandType      type;
 311 };
 312
 313 /*
 314  * Indicates where the opcode modifier (if any) is to be found.  Extended
 315  * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
 316  */
 317
 318 #define MODIFIER_TYPES        \
 319   ENUM_ENTRY(MODIFIER_NONE)   \
 320   ENUM_ENTRY(MODIFIER_OPCODE) \
 321   ENUM_ENTRY(MODIFIER_MODRM)
 322
 323 #define ENUM_ENTRY(n) n,
 324 typedef enum {
 325   MODIFIER_TYPES
 326   MODIFIER_max
 327 } ModifierType;
 328 #undef ENUM_ENTRY
 329
 330 #define X86_MAX_OPERANDS 5
 331
 332 /*
 333  * The specification for how to extract and interpret a full instruction and
 334  * its operands.
 335  */
 336 struct InstructionSpecifier {
 337   ModifierType modifierType;
 338   uint8_t modifierBase;
 339   struct OperandSpecifier operands[X86_MAX_OPERANDS];
 340
 341   /* The macro below must be defined wherever this file is included. */
 342   INSTRUCTION_SPECIFIER_FIELDS
 343 };
 344
 345 /*
 346  * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
 347  * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
 348  * respectively.
 349  */
 350 typedef enum {
 351   MODE_16BIT,
 352   MODE_32BIT,
 353   MODE_64BIT
 354 } DisassemblerMode;
 355
 356 #endif