1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
3 * The LLVM Compiler Infrastructure
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===*
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *===----------------------------------------------------------------------===*/
16 #include <stdarg.h> /* for va_*() */
17 #include <stdio.h> /* for vsnprintf() */
18 #include <stdlib.h> /* for exit() */
19 #include <string.h> /* for memset() */
21 #include "X86DisassemblerDecoder.h"
23 #include "X86GenDisassemblerTables.inc"
31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
33 #define debug(s) do { } while (0)
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
45 static InstructionContext
contextForAttrs(uint8_t attrMask
) {
46 return CONTEXTS_SYM
[attrMask
];
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
60 static int modRMRequired(OpcodeType type
,
61 InstructionContext insnContext
,
63 const struct ContextDecision
* decision
= 0;
67 decision
= &ONEBYTE_SYM
;
70 decision
= &TWOBYTE_SYM
;
73 decision
= &THREEBYTE38_SYM
;
76 decision
= &THREEBYTE3A_SYM
;
79 decision
= &THREEBYTEA6_SYM
;
82 decision
= &THREEBYTEA7_SYM
;
86 return decision
->opcodeDecisions
[insnContext
].modRMDecisions
[opcode
].
87 modrm_type
!= MODRM_ONEENTRY
;
93 * decode - Reads the appropriate instruction table to obtain the unique ID of
96 * @param type - See modRMRequired().
97 * @param insnContext - See modRMRequired().
98 * @param opcode - See modRMRequired().
99 * @param modRM - The ModR/M byte if required, or any value if not.
100 * @return - The UID of the instruction, or 0 on failure.
102 static InstrUID
decode(OpcodeType type
,
103 InstructionContext insnContext
,
106 const struct ModRMDecision
* dec
;
110 debug("Unknown opcode type");
113 dec
= &ONEBYTE_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
116 dec
= &TWOBYTE_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
119 dec
= &THREEBYTE38_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
122 dec
= &THREEBYTE3A_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
125 dec
= &THREEBYTEA6_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
128 dec
= &THREEBYTEA7_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
132 switch (dec
->modrm_type
) {
134 debug("Corrupt table! Unknown modrm_type");
137 return dec
->instructionIDs
[0];
139 if (modFromModRM(modRM
) == 0x3)
140 return dec
->instructionIDs
[1];
142 return dec
->instructionIDs
[0];
144 return dec
->instructionIDs
[modRM
];
149 * specifierForUID - Given a UID, returns the name and operand specification for
152 * @param uid - The unique ID for the instruction. This should be returned by
153 * decode(); specifierForUID will not check bounds.
154 * @return - A pointer to the specification for that instruction.
156 static const struct InstructionSpecifier
*specifierForUID(InstrUID uid
) {
157 return &INSTRUCTIONS_SYM
[uid
];
161 * consumeByte - Uses the reader function provided by the user to consume one
162 * byte from the instruction's memory and advance the cursor.
164 * @param insn - The instruction with the reader function to use. The cursor
165 * for this instruction is advanced.
166 * @param byte - A pointer to a pre-allocated memory buffer to be populated
167 * with the data read.
168 * @return - 0 if the read was successful; nonzero otherwise.
170 static int consumeByte(struct InternalInstruction
* insn
, uint8_t* byte
) {
171 int ret
= insn
->reader(insn
->readerArg
, byte
, insn
->readerCursor
);
174 ++(insn
->readerCursor
);
180 * lookAtByte - Like consumeByte, but does not advance the cursor.
182 * @param insn - See consumeByte().
183 * @param byte - See consumeByte().
184 * @return - See consumeByte().
186 static int lookAtByte(struct InternalInstruction
* insn
, uint8_t* byte
) {
187 return insn
->reader(insn
->readerArg
, byte
, insn
->readerCursor
);
190 static void unconsumeByte(struct InternalInstruction
* insn
) {
191 insn
->readerCursor
--;
194 #define CONSUME_FUNC(name, type) \
195 static int name(struct InternalInstruction* insn, type* ptr) { \
198 for (offset = 0; offset < sizeof(type); ++offset) { \
200 int ret = insn->reader(insn->readerArg, \
202 insn->readerCursor + offset); \
205 combined = combined | ((type)byte << ((type)offset * 8)); \
208 insn->readerCursor += sizeof(type); \
213 * consume* - Use the reader function provided by the user to consume data
214 * values of various sizes from the instruction's memory and advance the
215 * cursor appropriately. These readers perform endian conversion.
217 * @param insn - See consumeByte().
218 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
219 * be populated with the data read.
220 * @return - See consumeByte().
222 CONSUME_FUNC(consumeInt8
, int8_t)
223 CONSUME_FUNC(consumeInt16
, int16_t)
224 CONSUME_FUNC(consumeInt32
, int32_t)
225 CONSUME_FUNC(consumeUInt16
, uint16_t)
226 CONSUME_FUNC(consumeUInt32
, uint32_t)
227 CONSUME_FUNC(consumeUInt64
, uint64_t)
230 * dbgprintf - Uses the logging function provided by the user to log a single
231 * message, typically without a carriage-return.
233 * @param insn - The instruction containing the logging function.
234 * @param format - See printf().
235 * @param ... - See printf().
237 static void dbgprintf(struct InternalInstruction
* insn
,
246 va_start(ap
, format
);
247 (void)vsnprintf(buffer
, sizeof(buffer
), format
, ap
);
250 insn
->dlog(insn
->dlogArg
, buffer
);
256 * setPrefixPresent - Marks that a particular prefix is present at a particular
259 * @param insn - The instruction to be marked as having the prefix.
260 * @param prefix - The prefix that is present.
261 * @param location - The location where the prefix is located (in the address
262 * space of the instruction's reader).
264 static void setPrefixPresent(struct InternalInstruction
* insn
,
268 insn
->prefixPresent
[prefix
] = 1;
269 insn
->prefixLocations
[prefix
] = location
;
273 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
274 * present at a given location.
276 * @param insn - The instruction to be queried.
277 * @param prefix - The prefix.
278 * @param location - The location to query.
279 * @return - Whether the prefix is at that location.
281 static BOOL
isPrefixAtLocation(struct InternalInstruction
* insn
,
285 if (insn
->prefixPresent
[prefix
] == 1 &&
286 insn
->prefixLocations
[prefix
] == location
)
293 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
294 * instruction as having them. Also sets the instruction's default operand,
295 * address, and other relevant data sizes to report operands correctly.
297 * @param insn - The instruction whose prefixes are to be read.
298 * @return - 0 if the instruction could be read until the end of the prefix
299 * bytes, and no prefixes conflicted; nonzero otherwise.
301 static int readPrefixes(struct InternalInstruction
* insn
) {
302 BOOL isPrefix
= TRUE
;
303 BOOL prefixGroups
[4] = { FALSE
};
304 uint64_t prefixLocation
;
307 BOOL hasAdSize
= FALSE
;
308 BOOL hasOpSize
= FALSE
;
310 dbgprintf(insn
, "readPrefixes()");
313 prefixLocation
= insn
->readerCursor
;
315 if (consumeByte(insn
, &byte
))
319 case 0xf0: /* LOCK */
320 case 0xf2: /* REPNE/REPNZ */
321 case 0xf3: /* REP or REPE/REPZ */
323 dbgprintf(insn
, "Redundant Group 1 prefix");
324 prefixGroups
[0] = TRUE
;
325 setPrefixPresent(insn
, byte
, prefixLocation
);
327 case 0x2e: /* CS segment override -OR- Branch not taken */
328 case 0x36: /* SS segment override -OR- Branch taken */
329 case 0x3e: /* DS segment override */
330 case 0x26: /* ES segment override */
331 case 0x64: /* FS segment override */
332 case 0x65: /* GS segment override */
335 insn
->segmentOverride
= SEG_OVERRIDE_CS
;
338 insn
->segmentOverride
= SEG_OVERRIDE_SS
;
341 insn
->segmentOverride
= SEG_OVERRIDE_DS
;
344 insn
->segmentOverride
= SEG_OVERRIDE_ES
;
347 insn
->segmentOverride
= SEG_OVERRIDE_FS
;
350 insn
->segmentOverride
= SEG_OVERRIDE_GS
;
353 debug("Unhandled override");
357 dbgprintf(insn
, "Redundant Group 2 prefix");
358 prefixGroups
[1] = TRUE
;
359 setPrefixPresent(insn
, byte
, prefixLocation
);
361 case 0x66: /* Operand-size override */
363 dbgprintf(insn
, "Redundant Group 3 prefix");
364 prefixGroups
[2] = TRUE
;
366 setPrefixPresent(insn
, byte
, prefixLocation
);
368 case 0x67: /* Address-size override */
370 dbgprintf(insn
, "Redundant Group 4 prefix");
371 prefixGroups
[3] = TRUE
;
373 setPrefixPresent(insn
, byte
, prefixLocation
);
375 default: /* Not a prefix byte */
381 dbgprintf(insn
, "Found prefix 0x%hhx", byte
);
389 if (lookAtByte(insn
, &byte1
)) {
390 dbgprintf(insn
, "Couldn't read second byte of VEX");
394 if (insn
->mode
== MODE_64BIT
|| byte1
& 0x8) {
396 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 1;
400 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 1;
403 if (insn
->vexSize
== 3) {
404 insn
->vexPrefix
[0] = byte
;
405 consumeByte(insn
, &insn
->vexPrefix
[1]);
406 consumeByte(insn
, &insn
->vexPrefix
[2]);
408 /* We simulate the REX prefix for simplicity's sake */
410 insn
->rexPrefix
= 0x40
411 | (wFromVEX3of3(insn
->vexPrefix
[2]) << 3)
412 | (rFromVEX2of3(insn
->vexPrefix
[1]) << 2)
413 | (xFromVEX2of3(insn
->vexPrefix
[1]) << 1)
414 | (bFromVEX2of3(insn
->vexPrefix
[1]) << 0);
416 switch (ppFromVEX3of3(insn
->vexPrefix
[2]))
425 dbgprintf(insn
, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn
->vexPrefix
[0], insn
->vexPrefix
[1], insn
->vexPrefix
[2]);
428 else if (byte
== 0xc5) {
431 if (lookAtByte(insn
, &byte1
)) {
432 dbgprintf(insn
, "Couldn't read second byte of VEX");
436 if (insn
->mode
== MODE_64BIT
|| byte1
& 0x8) {
443 if (insn
->vexSize
== 2) {
444 insn
->vexPrefix
[0] = byte
;
445 consumeByte(insn
, &insn
->vexPrefix
[1]);
447 insn
->rexPrefix
= 0x40
448 | (rFromVEX2of2(insn
->vexPrefix
[1]) << 2);
450 switch (ppFromVEX2of2(insn
->vexPrefix
[1]))
459 dbgprintf(insn
, "Found VEX prefix 0x%hhx 0x%hhx", insn
->vexPrefix
[0], insn
->vexPrefix
[1]);
463 if (insn
->mode
== MODE_64BIT
) {
464 if ((byte
& 0xf0) == 0x40) {
467 if (lookAtByte(insn
, &opcodeByte
) || ((opcodeByte
& 0xf0) == 0x40)) {
468 dbgprintf(insn
, "Redundant REX prefix");
472 insn
->rexPrefix
= byte
;
473 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 2;
475 dbgprintf(insn
, "Found REX prefix 0x%hhx", byte
);
478 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 1;
482 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 1;
486 if (insn
->mode
== MODE_16BIT
) {
487 insn
->registerSize
= (hasOpSize
? 4 : 2);
488 insn
->addressSize
= (hasAdSize
? 4 : 2);
489 insn
->displacementSize
= (hasAdSize
? 4 : 2);
490 insn
->immediateSize
= (hasOpSize
? 4 : 2);
491 } else if (insn
->mode
== MODE_32BIT
) {
492 insn
->registerSize
= (hasOpSize
? 2 : 4);
493 insn
->addressSize
= (hasAdSize
? 2 : 4);
494 insn
->displacementSize
= (hasAdSize
? 2 : 4);
495 insn
->immediateSize
= (hasOpSize
? 2 : 4);
496 } else if (insn
->mode
== MODE_64BIT
) {
497 if (insn
->rexPrefix
&& wFromREX(insn
->rexPrefix
)) {
498 insn
->registerSize
= 8;
499 insn
->addressSize
= (hasAdSize
? 4 : 8);
500 insn
->displacementSize
= 4;
501 insn
->immediateSize
= 4;
502 } else if (insn
->rexPrefix
) {
503 insn
->registerSize
= (hasOpSize
? 2 : 4);
504 insn
->addressSize
= (hasAdSize
? 4 : 8);
505 insn
->displacementSize
= (hasOpSize
? 2 : 4);
506 insn
->immediateSize
= (hasOpSize
? 2 : 4);
508 insn
->registerSize
= (hasOpSize
? 2 : 4);
509 insn
->addressSize
= (hasAdSize
? 4 : 8);
510 insn
->displacementSize
= (hasOpSize
? 2 : 4);
511 insn
->immediateSize
= (hasOpSize
? 2 : 4);
519 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
520 * extended or escape opcodes).
522 * @param insn - The instruction whose opcode is to be read.
523 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
525 static int readOpcode(struct InternalInstruction
* insn
) {
526 /* Determine the length of the primary opcode */
530 dbgprintf(insn
, "readOpcode()");
532 insn
->opcodeType
= ONEBYTE
;
534 if (insn
->vexSize
== 3)
536 switch (mmmmmFromVEX2of3(insn
->vexPrefix
[1]))
539 dbgprintf(insn
, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn
->vexPrefix
[1]));
544 insn
->twoByteEscape
= 0x0f;
545 insn
->opcodeType
= TWOBYTE
;
546 return consumeByte(insn
, &insn
->opcode
);
548 insn
->twoByteEscape
= 0x0f;
549 insn
->threeByteEscape
= 0x38;
550 insn
->opcodeType
= THREEBYTE_38
;
551 return consumeByte(insn
, &insn
->opcode
);
553 insn
->twoByteEscape
= 0x0f;
554 insn
->threeByteEscape
= 0x3a;
555 insn
->opcodeType
= THREEBYTE_3A
;
556 return consumeByte(insn
, &insn
->opcode
);
559 else if (insn
->vexSize
== 2)
561 insn
->twoByteEscape
= 0x0f;
562 insn
->opcodeType
= TWOBYTE
;
563 return consumeByte(insn
, &insn
->opcode
);
566 if (consumeByte(insn
, ¤t
))
569 if (current
== 0x0f) {
570 dbgprintf(insn
, "Found a two-byte escape prefix (0x%hhx)", current
);
572 insn
->twoByteEscape
= current
;
574 if (consumeByte(insn
, ¤t
))
577 if (current
== 0x38) {
578 dbgprintf(insn
, "Found a three-byte escape prefix (0x%hhx)", current
);
580 insn
->threeByteEscape
= current
;
582 if (consumeByte(insn
, ¤t
))
585 insn
->opcodeType
= THREEBYTE_38
;
586 } else if (current
== 0x3a) {
587 dbgprintf(insn
, "Found a three-byte escape prefix (0x%hhx)", current
);
589 insn
->threeByteEscape
= current
;
591 if (consumeByte(insn
, ¤t
))
594 insn
->opcodeType
= THREEBYTE_3A
;
595 } else if (current
== 0xa6) {
596 dbgprintf(insn
, "Found a three-byte escape prefix (0x%hhx)", current
);
598 insn
->threeByteEscape
= current
;
600 if (consumeByte(insn
, ¤t
))
603 insn
->opcodeType
= THREEBYTE_A6
;
604 } else if (current
== 0xa7) {
605 dbgprintf(insn
, "Found a three-byte escape prefix (0x%hhx)", current
);
607 insn
->threeByteEscape
= current
;
609 if (consumeByte(insn
, ¤t
))
612 insn
->opcodeType
= THREEBYTE_A7
;
614 dbgprintf(insn
, "Didn't find a three-byte escape prefix");
616 insn
->opcodeType
= TWOBYTE
;
621 * At this point we have consumed the full opcode.
622 * Anything we consume from here on must be unconsumed.
625 insn
->opcode
= current
;
630 static int readModRM(struct InternalInstruction
* insn
);
633 * getIDWithAttrMask - Determines the ID of an instruction, consuming
634 * the ModR/M byte as appropriate for extended and escape opcodes,
635 * and using a supplied attribute mask.
637 * @param instructionID - A pointer whose target is filled in with the ID of the
639 * @param insn - The instruction whose ID is to be determined.
640 * @param attrMask - The attribute mask to search.
641 * @return - 0 if the ModR/M could be read when needed or was not
642 * needed; nonzero otherwise.
644 static int getIDWithAttrMask(uint16_t* instructionID
,
645 struct InternalInstruction
* insn
,
647 BOOL hasModRMExtension
;
649 uint8_t instructionClass
;
651 instructionClass
= contextForAttrs(attrMask
);
653 hasModRMExtension
= modRMRequired(insn
->opcodeType
,
657 if (hasModRMExtension
) {
661 *instructionID
= decode(insn
->opcodeType
,
666 *instructionID
= decode(insn
->opcodeType
,
676 * is16BitEquivalent - Determines whether two instruction names refer to
677 * equivalent instructions but one is 16-bit whereas the other is not.
679 * @param orig - The instruction that is not 16-bit
680 * @param equiv - The instruction that is 16-bit
682 static BOOL
is16BitEquvalent(const char* orig
, const char* equiv
) {
686 if (orig
[i
] == '\0' && equiv
[i
] == '\0')
688 if (orig
[i
] == '\0' || equiv
[i
] == '\0')
690 if (orig
[i
] != equiv
[i
]) {
691 if ((orig
[i
] == 'Q' || orig
[i
] == 'L') && equiv
[i
] == 'W')
693 if ((orig
[i
] == '6' || orig
[i
] == '3') && equiv
[i
] == '1')
695 if ((orig
[i
] == '4' || orig
[i
] == '2') && equiv
[i
] == '6')
703 * is64BitEquivalent - Determines whether two instruction names refer to
704 * equivalent instructions but one is 64-bit whereas the other is not.
706 * @param orig - The instruction that is not 64-bit
707 * @param equiv - The instruction that is 64-bit
709 static BOOL
is64BitEquivalent(const char* orig
, const char* equiv
) {
713 if (orig
[i
] == '\0' && equiv
[i
] == '\0')
715 if (orig
[i
] == '\0' || equiv
[i
] == '\0')
717 if (orig
[i
] != equiv
[i
]) {
718 if ((orig
[i
] == 'W' || orig
[i
] == 'L') && equiv
[i
] == 'Q')
720 if ((orig
[i
] == '1' || orig
[i
] == '3') && equiv
[i
] == '6')
722 if ((orig
[i
] == '6' || orig
[i
] == '2') && equiv
[i
] == '4')
731 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
732 * appropriate for extended and escape opcodes. Determines the attributes and
733 * context for the instruction before doing so.
735 * @param insn - The instruction whose ID is to be determined.
736 * @return - 0 if the ModR/M could be read when needed or was not needed;
739 static int getID(struct InternalInstruction
* insn
) {
741 uint16_t instructionID
;
743 dbgprintf(insn
, "getID()");
745 attrMask
= ATTR_NONE
;
747 if (insn
->mode
== MODE_64BIT
)
748 attrMask
|= ATTR_64BIT
;
751 attrMask
|= ATTR_VEX
;
753 if (insn
->vexSize
== 3) {
754 switch (ppFromVEX3of3(insn
->vexPrefix
[2])) {
756 attrMask
|= ATTR_OPSIZE
;
766 if (wFromVEX3of3(insn
->vexPrefix
[2]))
767 attrMask
|= ATTR_REXW
;
768 if (lFromVEX3of3(insn
->vexPrefix
[2]))
769 attrMask
|= ATTR_VEXL
;
771 else if (insn
->vexSize
== 2) {
772 switch (ppFromVEX2of2(insn
->vexPrefix
[1])) {
774 attrMask
|= ATTR_OPSIZE
;
784 if (lFromVEX2of2(insn
->vexPrefix
[1]))
785 attrMask
|= ATTR_VEXL
;
792 if (insn
->rexPrefix
& 0x08)
793 attrMask
|= ATTR_REXW
;
795 if (isPrefixAtLocation(insn
, 0x66, insn
->necessaryPrefixLocation
))
796 attrMask
|= ATTR_OPSIZE
;
797 else if (isPrefixAtLocation(insn
, 0xf3, insn
->necessaryPrefixLocation
))
799 else if (isPrefixAtLocation(insn
, 0xf2, insn
->necessaryPrefixLocation
))
804 if (getIDWithAttrMask(&instructionID
, insn
, attrMask
))
807 /* The following clauses compensate for limitations of the tables. */
809 if ((attrMask
& ATTR_XD
) && (attrMask
& ATTR_REXW
)) {
811 * Although for SSE instructions it is usually necessary to treat REX.W+F2
812 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
813 * an occasional instruction where F2 is incidental and REX.W is the more
814 * significant. If the decoded instruction is 32-bit and adding REX.W
815 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
818 const struct InstructionSpecifier
*spec
;
819 uint16_t instructionIDWithREXw
;
820 const struct InstructionSpecifier
*specWithREXw
;
822 spec
= specifierForUID(instructionID
);
824 if (getIDWithAttrMask(&instructionIDWithREXw
,
826 attrMask
& (~ATTR_XD
))) {
828 * Decoding with REX.w would yield nothing; give up and return original
832 insn
->instructionID
= instructionID
;
837 specWithREXw
= specifierForUID(instructionIDWithREXw
);
839 if (is64BitEquivalent(spec
->name
, specWithREXw
->name
)) {
840 insn
->instructionID
= instructionIDWithREXw
;
841 insn
->spec
= specWithREXw
;
843 insn
->instructionID
= instructionID
;
849 if (insn
->prefixPresent
[0x66] && !(attrMask
& ATTR_OPSIZE
)) {
851 * The instruction tables make no distinction between instructions that
852 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
853 * particular spot (i.e., many MMX operations). In general we're
854 * conservative, but in the specific case where OpSize is present but not
855 * in the right place we check if there's a 16-bit operation.
858 const struct InstructionSpecifier
*spec
;
859 uint16_t instructionIDWithOpsize
;
860 const struct InstructionSpecifier
*specWithOpsize
;
862 spec
= specifierForUID(instructionID
);
864 if (getIDWithAttrMask(&instructionIDWithOpsize
,
866 attrMask
| ATTR_OPSIZE
)) {
868 * ModRM required with OpSize but not present; give up and return version
872 insn
->instructionID
= instructionID
;
877 specWithOpsize
= specifierForUID(instructionIDWithOpsize
);
879 if (is16BitEquvalent(spec
->name
, specWithOpsize
->name
)) {
880 insn
->instructionID
= instructionIDWithOpsize
;
881 insn
->spec
= specWithOpsize
;
883 insn
->instructionID
= instructionID
;
889 insn
->instructionID
= instructionID
;
890 insn
->spec
= specifierForUID(insn
->instructionID
);
896 * readSIB - Consumes the SIB byte to determine addressing information for an
899 * @param insn - The instruction whose SIB byte is to be read.
900 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
902 static int readSIB(struct InternalInstruction
* insn
) {
903 SIBIndex sibIndexBase
= 0;
904 SIBBase sibBaseBase
= 0;
907 dbgprintf(insn
, "readSIB()");
909 if (insn
->consumedSIB
)
912 insn
->consumedSIB
= TRUE
;
914 switch (insn
->addressSize
) {
916 dbgprintf(insn
, "SIB-based addressing doesn't work in 16-bit mode");
920 sibIndexBase
= SIB_INDEX_EAX
;
921 sibBaseBase
= SIB_BASE_EAX
;
924 sibIndexBase
= SIB_INDEX_RAX
;
925 sibBaseBase
= SIB_BASE_RAX
;
929 if (consumeByte(insn
, &insn
->sib
))
932 index
= indexFromSIB(insn
->sib
) | (xFromREX(insn
->rexPrefix
) << 3);
936 insn
->sibIndex
= SIB_INDEX_NONE
;
939 insn
->sibIndex
= (SIBIndex
)(sibIndexBase
+ index
);
940 if (insn
->sibIndex
== SIB_INDEX_sib
||
941 insn
->sibIndex
== SIB_INDEX_sib64
)
942 insn
->sibIndex
= SIB_INDEX_NONE
;
946 switch (scaleFromSIB(insn
->sib
)) {
961 base
= baseFromSIB(insn
->sib
) | (bFromREX(insn
->rexPrefix
) << 3);
965 switch (modFromModRM(insn
->modRM
)) {
967 insn
->eaDisplacement
= EA_DISP_32
;
968 insn
->sibBase
= SIB_BASE_NONE
;
971 insn
->eaDisplacement
= EA_DISP_8
;
972 insn
->sibBase
= (insn
->addressSize
== 4 ?
973 SIB_BASE_EBP
: SIB_BASE_RBP
);
976 insn
->eaDisplacement
= EA_DISP_32
;
977 insn
->sibBase
= (insn
->addressSize
== 4 ?
978 SIB_BASE_EBP
: SIB_BASE_RBP
);
981 debug("Cannot have Mod = 0b11 and a SIB byte");
986 insn
->sibBase
= (SIBBase
)(sibBaseBase
+ base
);
994 * readDisplacement - Consumes the displacement of an instruction.
996 * @param insn - The instruction whose displacement is to be read.
997 * @return - 0 if the displacement byte was successfully read; nonzero
1000 static int readDisplacement(struct InternalInstruction
* insn
) {
1005 dbgprintf(insn
, "readDisplacement()");
1007 if (insn
->consumedDisplacement
)
1010 insn
->consumedDisplacement
= TRUE
;
1012 switch (insn
->eaDisplacement
) {
1014 insn
->consumedDisplacement
= FALSE
;
1017 if (consumeInt8(insn
, &d8
))
1019 insn
->displacement
= d8
;
1022 if (consumeInt16(insn
, &d16
))
1024 insn
->displacement
= d16
;
1027 if (consumeInt32(insn
, &d32
))
1029 insn
->displacement
= d32
;
1033 insn
->consumedDisplacement
= TRUE
;
1038 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1039 * displacement) for an instruction and interprets it.
1041 * @param insn - The instruction whose addressing information is to be read.
1042 * @return - 0 if the information was successfully read; nonzero otherwise.
1044 static int readModRM(struct InternalInstruction
* insn
) {
1045 uint8_t mod
, rm
, reg
;
1047 dbgprintf(insn
, "readModRM()");
1049 if (insn
->consumedModRM
)
1052 if (consumeByte(insn
, &insn
->modRM
))
1054 insn
->consumedModRM
= TRUE
;
1056 mod
= modFromModRM(insn
->modRM
);
1057 rm
= rmFromModRM(insn
->modRM
);
1058 reg
= regFromModRM(insn
->modRM
);
1061 * This goes by insn->registerSize to pick the correct register, which messes
1062 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1065 switch (insn
->registerSize
) {
1067 insn
->regBase
= MODRM_REG_AX
;
1068 insn
->eaRegBase
= EA_REG_AX
;
1071 insn
->regBase
= MODRM_REG_EAX
;
1072 insn
->eaRegBase
= EA_REG_EAX
;
1075 insn
->regBase
= MODRM_REG_RAX
;
1076 insn
->eaRegBase
= EA_REG_RAX
;
1080 reg
|= rFromREX(insn
->rexPrefix
) << 3;
1081 rm
|= bFromREX(insn
->rexPrefix
) << 3;
1083 insn
->reg
= (Reg
)(insn
->regBase
+ reg
);
1085 switch (insn
->addressSize
) {
1087 insn
->eaBaseBase
= EA_BASE_BX_SI
;
1092 insn
->eaBase
= EA_BASE_NONE
;
1093 insn
->eaDisplacement
= EA_DISP_16
;
1094 if (readDisplacement(insn
))
1097 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
1098 insn
->eaDisplacement
= EA_DISP_NONE
;
1102 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
1103 insn
->eaDisplacement
= EA_DISP_8
;
1104 if (readDisplacement(insn
))
1108 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
1109 insn
->eaDisplacement
= EA_DISP_16
;
1110 if (readDisplacement(insn
))
1114 insn
->eaBase
= (EABase
)(insn
->eaRegBase
+ rm
);
1115 if (readDisplacement(insn
))
1122 insn
->eaBaseBase
= (insn
->addressSize
== 4 ? EA_BASE_EAX
: EA_BASE_RAX
);
1126 insn
->eaDisplacement
= EA_DISP_NONE
; /* readSIB may override this */
1129 case 0xc: /* in case REXW.b is set */
1130 insn
->eaBase
= (insn
->addressSize
== 4 ?
1131 EA_BASE_sib
: EA_BASE_sib64
);
1133 if (readDisplacement(insn
))
1137 insn
->eaBase
= EA_BASE_NONE
;
1138 insn
->eaDisplacement
= EA_DISP_32
;
1139 if (readDisplacement(insn
))
1143 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
1149 insn
->eaDisplacement
= (mod
== 0x1 ? EA_DISP_8
: EA_DISP_32
);
1152 case 0xc: /* in case REXW.b is set */
1153 insn
->eaBase
= EA_BASE_sib
;
1155 if (readDisplacement(insn
))
1159 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
1160 if (readDisplacement(insn
))
1166 insn
->eaDisplacement
= EA_DISP_NONE
;
1167 insn
->eaBase
= (EABase
)(insn
->eaRegBase
+ rm
);
1171 } /* switch (insn->addressSize) */
1176 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1177 static uint8_t name(struct InternalInstruction *insn, \
1184 debug("Unhandled register type"); \
1188 return base + index; \
1190 if (insn->rexPrefix && \
1191 index >= 4 && index <= 7) { \
1192 return prefix##_SPL + (index - 4); \
1194 return prefix##_AL + index; \
1197 return prefix##_AX + index; \
1199 return prefix##_EAX + index; \
1201 return prefix##_RAX + index; \
1203 return prefix##_YMM0 + index; \
1208 return prefix##_XMM0 + index; \
1214 return prefix##_MM0 + index; \
1215 case TYPE_SEGMENTREG: \
1218 return prefix##_ES + index; \
1219 case TYPE_DEBUGREG: \
1222 return prefix##_DR0 + index; \
1223 case TYPE_CONTROLREG: \
1226 return prefix##_CR0 + index; \
1231 * fixup*Value - Consults an operand type to determine the meaning of the
1232 * reg or R/M field. If the operand is an XMM operand, for example, an
1233 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1234 * misinterpret it as.
1236 * @param insn - The instruction containing the operand.
1237 * @param type - The operand type.
1238 * @param index - The existing value of the field as reported by readModRM().
1239 * @param valid - The address of a uint8_t. The target is set to 1 if the
1240 * field is valid for the register class; 0 if not.
1241 * @return - The proper value.
1243 GENERIC_FIXUP_FUNC(fixupRegValue
, insn
->regBase
, MODRM_REG
)
1244 GENERIC_FIXUP_FUNC(fixupRMValue
, insn
->eaRegBase
, EA_REG
)
1247 * fixupReg - Consults an operand specifier to determine which of the
1248 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1250 * @param insn - See fixup*Value().
1251 * @param op - The operand specifier.
1252 * @return - 0 if fixup was successful; -1 if the register returned was
1253 * invalid for its class.
1255 static int fixupReg(struct InternalInstruction
*insn
,
1256 const struct OperandSpecifier
*op
) {
1259 dbgprintf(insn
, "fixupReg()");
1261 switch ((OperandEncoding
)op
->encoding
) {
1263 debug("Expected a REG or R/M encoding in fixupReg");
1266 insn
->vvvv
= (Reg
)fixupRegValue(insn
,
1267 (OperandType
)op
->type
,
1274 insn
->reg
= (Reg
)fixupRegValue(insn
,
1275 (OperandType
)op
->type
,
1276 insn
->reg
- insn
->regBase
,
1282 if (insn
->eaBase
>= insn
->eaRegBase
) {
1283 insn
->eaBase
= (EABase
)fixupRMValue(insn
,
1284 (OperandType
)op
->type
,
1285 insn
->eaBase
- insn
->eaRegBase
,
1297 * readOpcodeModifier - Reads an operand from the opcode field of an
1298 * instruction. Handles AddRegFrm instructions.
1300 * @param insn - The instruction whose opcode field is to be read.
1301 * @param inModRM - Indicates that the opcode field is to be read from the
1302 * ModR/M extension; useful for escape opcodes
1303 * @return - 0 on success; nonzero otherwise.
1305 static int readOpcodeModifier(struct InternalInstruction
* insn
) {
1306 dbgprintf(insn
, "readOpcodeModifier()");
1308 if (insn
->consumedOpcodeModifier
)
1311 insn
->consumedOpcodeModifier
= TRUE
;
1313 switch (insn
->spec
->modifierType
) {
1315 debug("Unknown modifier type.");
1318 debug("No modifier but an operand expects one.");
1320 case MODIFIER_OPCODE
:
1321 insn
->opcodeModifier
= insn
->opcode
- insn
->spec
->modifierBase
;
1323 case MODIFIER_MODRM
:
1324 insn
->opcodeModifier
= insn
->modRM
- insn
->spec
->modifierBase
;
1330 * readOpcodeRegister - Reads an operand from the opcode field of an
1331 * instruction and interprets it appropriately given the operand width.
1332 * Handles AddRegFrm instructions.
1334 * @param insn - See readOpcodeModifier().
1335 * @param size - The width (in bytes) of the register being specified.
1336 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1338 * @return - 0 on success; nonzero otherwise.
1340 static int readOpcodeRegister(struct InternalInstruction
* insn
, uint8_t size
) {
1341 dbgprintf(insn
, "readOpcodeRegister()");
1343 if (readOpcodeModifier(insn
))
1347 size
= insn
->registerSize
;
1351 insn
->opcodeRegister
= (Reg
)(MODRM_REG_AL
+ ((bFromREX(insn
->rexPrefix
) << 3)
1352 | insn
->opcodeModifier
));
1353 if (insn
->rexPrefix
&&
1354 insn
->opcodeRegister
>= MODRM_REG_AL
+ 0x4 &&
1355 insn
->opcodeRegister
< MODRM_REG_AL
+ 0x8) {
1356 insn
->opcodeRegister
= (Reg
)(MODRM_REG_SPL
1357 + (insn
->opcodeRegister
- MODRM_REG_AL
- 4));
1362 insn
->opcodeRegister
= (Reg
)(MODRM_REG_AX
1363 + ((bFromREX(insn
->rexPrefix
) << 3)
1364 | insn
->opcodeModifier
));
1367 insn
->opcodeRegister
= (Reg
)(MODRM_REG_EAX
1368 + ((bFromREX(insn
->rexPrefix
) << 3)
1369 | insn
->opcodeModifier
));
1372 insn
->opcodeRegister
= (Reg
)(MODRM_REG_RAX
1373 + ((bFromREX(insn
->rexPrefix
) << 3)
1374 | insn
->opcodeModifier
));
1382 * readImmediate - Consumes an immediate operand from an instruction, given the
1383 * desired operand size.
1385 * @param insn - The instruction whose operand is to be read.
1386 * @param size - The width (in bytes) of the operand.
1387 * @return - 0 if the immediate was successfully consumed; nonzero
1390 static int readImmediate(struct InternalInstruction
* insn
, uint8_t size
) {
1396 dbgprintf(insn
, "readImmediate()");
1398 if (insn
->numImmediatesConsumed
== 2) {
1399 debug("Already consumed two immediates");
1404 size
= insn
->immediateSize
;
1406 insn
->immediateSize
= size
;
1410 if (consumeByte(insn
, &imm8
))
1412 insn
->immediates
[insn
->numImmediatesConsumed
] = imm8
;
1415 if (consumeUInt16(insn
, &imm16
))
1417 insn
->immediates
[insn
->numImmediatesConsumed
] = imm16
;
1420 if (consumeUInt32(insn
, &imm32
))
1422 insn
->immediates
[insn
->numImmediatesConsumed
] = imm32
;
1425 if (consumeUInt64(insn
, &imm64
))
1427 insn
->immediates
[insn
->numImmediatesConsumed
] = imm64
;
1431 insn
->numImmediatesConsumed
++;
1437 * readVVVV - Consumes an immediate operand from an instruction, given the
1438 * desired operand size.
1440 * @param insn - The instruction whose operand is to be read.
1441 * @return - 0 if the immediate was successfully consumed; nonzero
1444 static int readVVVV(struct InternalInstruction
* insn
) {
1445 dbgprintf(insn
, "readVVVV()");
1447 if (insn
->vexSize
== 3)
1448 insn
->vvvv
= vvvvFromVEX3of3(insn
->vexPrefix
[2]);
1449 else if (insn
->vexSize
== 2)
1450 insn
->vvvv
= vvvvFromVEX2of2(insn
->vexPrefix
[1]);
1458 * readOperands - Consults the specifier for an instruction and consumes all
1459 * operands for that instruction, interpreting them as it goes.
1461 * @param insn - The instruction whose operands are to be read and interpreted.
1462 * @return - 0 if all operands could be read; nonzero otherwise.
1464 static int readOperands(struct InternalInstruction
* insn
) {
1467 dbgprintf(insn
, "readOperands()");
1469 for (index
= 0; index
< X86_MAX_OPERANDS
; ++index
) {
1470 switch (insn
->spec
->operands
[index
].encoding
) {
1475 if (readModRM(insn
))
1477 if (fixupReg(insn
, &insn
->spec
->operands
[index
]))
1486 dbgprintf(insn
, "We currently don't hande code-offset encodings");
1489 if (readImmediate(insn
, 1))
1491 if (insn
->spec
->operands
[index
].type
== TYPE_IMM3
&&
1492 insn
->immediates
[insn
->numImmediatesConsumed
- 1] > 7)
1496 if (readImmediate(insn
, 2))
1500 if (readImmediate(insn
, 4))
1504 if (readImmediate(insn
, 8))
1508 if (readImmediate(insn
, insn
->immediateSize
))
1512 if (readImmediate(insn
, insn
->addressSize
))
1516 if (readOpcodeRegister(insn
, 1))
1520 if (readOpcodeRegister(insn
, 2))
1524 if (readOpcodeRegister(insn
, 4))
1528 if (readOpcodeRegister(insn
, 8))
1532 if (readOpcodeRegister(insn
, 0))
1536 if (readOpcodeModifier(insn
))
1542 if (fixupReg(insn
, &insn
->spec
->operands
[index
]))
1548 dbgprintf(insn
, "Encountered an operand with an unknown encoding.");
1557 * decodeInstruction - Reads and interprets a full instruction provided by the
1560 * @param insn - A pointer to the instruction to be populated. Must be
1562 * @param reader - The function to be used to read the instruction's bytes.
1563 * @param readerArg - A generic argument to be passed to the reader to store
1564 * any internal state.
1565 * @param logger - If non-NULL, the function to be used to write log messages
1567 * @param loggerArg - A generic argument to be passed to the logger to store
1568 * any internal state.
1569 * @param startLoc - The address (in the reader's address space) of the first
1570 * byte in the instruction.
1571 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1572 * decode the instruction in.
1573 * @return - 0 if the instruction's memory could be read; nonzero if
1576 int decodeInstruction(struct InternalInstruction
* insn
,
1577 byteReader_t reader
,
1582 DisassemblerMode mode
) {
1583 memset(insn
, 0, sizeof(struct InternalInstruction
));
1585 insn
->reader
= reader
;
1586 insn
->readerArg
= readerArg
;
1587 insn
->dlog
= logger
;
1588 insn
->dlogArg
= loggerArg
;
1589 insn
->startLocation
= startLoc
;
1590 insn
->readerCursor
= startLoc
;
1592 insn
->numImmediatesConsumed
= 0;
1594 if (readPrefixes(insn
) ||
1597 insn
->instructionID
== 0 ||
1601 insn
->length
= insn
->readerCursor
- insn
->startLocation
;
1603 dbgprintf(insn
, "Read from 0x%llx to 0x%llx: length %zu",
1604 startLoc
, insn
->readerCursor
, insn
->length
);
1606 if (insn
->length
> 15)
1607 dbgprintf(insn
, "Instruction exceeds 15-byte limit");