1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
3 * The LLVM Compiler Infrastructure
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===*
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *===----------------------------------------------------------------------===*/
16 #include <stdarg.h> /* for va_*() */
17 #include <stdio.h> /* for vsnprintf() */
18 #include <stdlib.h> /* for exit() */
19 #include <string.h> /* for memset() */
21 #include "X86DisassemblerDecoder.h"
23 #include "X86GenDisassemblerTables.inc"
31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
33 #define debug(s) do { } while (0)
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
45 static InstructionContext
contextForAttrs(uint8_t attrMask
) {
46 return CONTEXTS_SYM
[attrMask
];
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
60 static int modRMRequired(OpcodeType type
,
61 InstructionContext insnContext
,
63 const struct ContextDecision
* decision
= 0;
67 decision
= &ONEBYTE_SYM
;
70 decision
= &TWOBYTE_SYM
;
73 decision
= &THREEBYTE38_SYM
;
76 decision
= &THREEBYTE3A_SYM
;
80 return decision
->opcodeDecisions
[insnContext
].modRMDecisions
[opcode
].
81 modrm_type
!= MODRM_ONEENTRY
;
87 * decode - Reads the appropriate instruction table to obtain the unique ID of
90 * @param type - See modRMRequired().
91 * @param insnContext - See modRMRequired().
92 * @param opcode - See modRMRequired().
93 * @param modRM - The ModR/M byte if required, or any value if not.
94 * @return - The UID of the instruction, or 0 on failure.
96 static InstrUID
decode(OpcodeType type
,
97 InstructionContext insnContext
,
100 const struct ModRMDecision
* dec
;
104 debug("Unknown opcode type");
107 dec
= &ONEBYTE_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
110 dec
= &TWOBYTE_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
113 dec
= &THREEBYTE38_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
116 dec
= &THREEBYTE3A_SYM
.opcodeDecisions
[insnContext
].modRMDecisions
[opcode
];
120 switch (dec
->modrm_type
) {
122 debug("Corrupt table! Unknown modrm_type");
125 return dec
->instructionIDs
[0];
127 if (modFromModRM(modRM
) == 0x3)
128 return dec
->instructionIDs
[1];
130 return dec
->instructionIDs
[0];
132 return dec
->instructionIDs
[modRM
];
137 * specifierForUID - Given a UID, returns the name and operand specification for
140 * @param uid - The unique ID for the instruction. This should be returned by
141 * decode(); specifierForUID will not check bounds.
142 * @return - A pointer to the specification for that instruction.
144 static const struct InstructionSpecifier
*specifierForUID(InstrUID uid
) {
145 return &INSTRUCTIONS_SYM
[uid
];
149 * consumeByte - Uses the reader function provided by the user to consume one
150 * byte from the instruction's memory and advance the cursor.
152 * @param insn - The instruction with the reader function to use. The cursor
153 * for this instruction is advanced.
154 * @param byte - A pointer to a pre-allocated memory buffer to be populated
155 * with the data read.
156 * @return - 0 if the read was successful; nonzero otherwise.
158 static int consumeByte(struct InternalInstruction
* insn
, uint8_t* byte
) {
159 int ret
= insn
->reader(insn
->readerArg
, byte
, insn
->readerCursor
);
162 ++(insn
->readerCursor
);
168 * lookAtByte - Like consumeByte, but does not advance the cursor.
170 * @param insn - See consumeByte().
171 * @param byte - See consumeByte().
172 * @return - See consumeByte().
174 static int lookAtByte(struct InternalInstruction
* insn
, uint8_t* byte
) {
175 return insn
->reader(insn
->readerArg
, byte
, insn
->readerCursor
);
178 static void unconsumeByte(struct InternalInstruction
* insn
) {
179 insn
->readerCursor
--;
182 #define CONSUME_FUNC(name, type) \
183 static int name(struct InternalInstruction* insn, type* ptr) { \
186 for (offset = 0; offset < sizeof(type); ++offset) { \
188 int ret = insn->reader(insn->readerArg, \
190 insn->readerCursor + offset); \
193 combined = combined | ((type)byte << ((type)offset * 8)); \
196 insn->readerCursor += sizeof(type); \
201 * consume* - Use the reader function provided by the user to consume data
202 * values of various sizes from the instruction's memory and advance the
203 * cursor appropriately. These readers perform endian conversion.
205 * @param insn - See consumeByte().
206 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
207 * be populated with the data read.
208 * @return - See consumeByte().
210 CONSUME_FUNC(consumeInt8
, int8_t)
211 CONSUME_FUNC(consumeInt16
, int16_t)
212 CONSUME_FUNC(consumeInt32
, int32_t)
213 CONSUME_FUNC(consumeUInt16
, uint16_t)
214 CONSUME_FUNC(consumeUInt32
, uint32_t)
215 CONSUME_FUNC(consumeUInt64
, uint64_t)
218 * dbgprintf - Uses the logging function provided by the user to log a single
219 * message, typically without a carriage-return.
221 * @param insn - The instruction containing the logging function.
222 * @param format - See printf().
223 * @param ... - See printf().
225 static void dbgprintf(struct InternalInstruction
* insn
,
234 va_start(ap
, format
);
235 (void)vsnprintf(buffer
, sizeof(buffer
), format
, ap
);
238 insn
->dlog(insn
->dlogArg
, buffer
);
244 * setPrefixPresent - Marks that a particular prefix is present at a particular
247 * @param insn - The instruction to be marked as having the prefix.
248 * @param prefix - The prefix that is present.
249 * @param location - The location where the prefix is located (in the address
250 * space of the instruction's reader).
252 static void setPrefixPresent(struct InternalInstruction
* insn
,
256 insn
->prefixPresent
[prefix
] = 1;
257 insn
->prefixLocations
[prefix
] = location
;
261 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
262 * present at a given location.
264 * @param insn - The instruction to be queried.
265 * @param prefix - The prefix.
266 * @param location - The location to query.
267 * @return - Whether the prefix is at that location.
269 static BOOL
isPrefixAtLocation(struct InternalInstruction
* insn
,
273 if (insn
->prefixPresent
[prefix
] == 1 &&
274 insn
->prefixLocations
[prefix
] == location
)
281 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
282 * instruction as having them. Also sets the instruction's default operand,
283 * address, and other relevant data sizes to report operands correctly.
285 * @param insn - The instruction whose prefixes are to be read.
286 * @return - 0 if the instruction could be read until the end of the prefix
287 * bytes, and no prefixes conflicted; nonzero otherwise.
289 static int readPrefixes(struct InternalInstruction
* insn
) {
290 BOOL isPrefix
= TRUE
;
291 BOOL prefixGroups
[4] = { FALSE
};
292 uint64_t prefixLocation
;
295 BOOL hasAdSize
= FALSE
;
296 BOOL hasOpSize
= FALSE
;
298 dbgprintf(insn
, "readPrefixes()");
301 prefixLocation
= insn
->readerCursor
;
303 if (consumeByte(insn
, &byte
))
307 case 0xf0: /* LOCK */
308 case 0xf2: /* REPNE/REPNZ */
309 case 0xf3: /* REP or REPE/REPZ */
311 dbgprintf(insn
, "Redundant Group 1 prefix");
312 prefixGroups
[0] = TRUE
;
313 setPrefixPresent(insn
, byte
, prefixLocation
);
315 case 0x2e: /* CS segment override -OR- Branch not taken */
316 case 0x36: /* SS segment override -OR- Branch taken */
317 case 0x3e: /* DS segment override */
318 case 0x26: /* ES segment override */
319 case 0x64: /* FS segment override */
320 case 0x65: /* GS segment override */
323 insn
->segmentOverride
= SEG_OVERRIDE_CS
;
326 insn
->segmentOverride
= SEG_OVERRIDE_SS
;
329 insn
->segmentOverride
= SEG_OVERRIDE_DS
;
332 insn
->segmentOverride
= SEG_OVERRIDE_ES
;
335 insn
->segmentOverride
= SEG_OVERRIDE_FS
;
338 insn
->segmentOverride
= SEG_OVERRIDE_GS
;
341 debug("Unhandled override");
345 dbgprintf(insn
, "Redundant Group 2 prefix");
346 prefixGroups
[1] = TRUE
;
347 setPrefixPresent(insn
, byte
, prefixLocation
);
349 case 0x66: /* Operand-size override */
351 dbgprintf(insn
, "Redundant Group 3 prefix");
352 prefixGroups
[2] = TRUE
;
354 setPrefixPresent(insn
, byte
, prefixLocation
);
356 case 0x67: /* Address-size override */
358 dbgprintf(insn
, "Redundant Group 4 prefix");
359 prefixGroups
[3] = TRUE
;
361 setPrefixPresent(insn
, byte
, prefixLocation
);
363 default: /* Not a prefix byte */
369 dbgprintf(insn
, "Found prefix 0x%hhx", byte
);
372 if (insn
->mode
== MODE_64BIT
) {
373 if ((byte
& 0xf0) == 0x40) {
376 if (lookAtByte(insn
, &opcodeByte
) || ((opcodeByte
& 0xf0) == 0x40)) {
377 dbgprintf(insn
, "Redundant REX prefix");
381 insn
->rexPrefix
= byte
;
382 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 2;
384 dbgprintf(insn
, "Found REX prefix 0x%hhx", byte
);
387 insn
->necessaryPrefixLocation
= insn
->readerCursor
- 1;
393 if (insn
->mode
== MODE_16BIT
) {
394 insn
->registerSize
= (hasOpSize
? 4 : 2);
395 insn
->addressSize
= (hasAdSize
? 4 : 2);
396 insn
->displacementSize
= (hasAdSize
? 4 : 2);
397 insn
->immediateSize
= (hasOpSize
? 4 : 2);
398 } else if (insn
->mode
== MODE_32BIT
) {
399 insn
->registerSize
= (hasOpSize
? 2 : 4);
400 insn
->addressSize
= (hasAdSize
? 2 : 4);
401 insn
->displacementSize
= (hasAdSize
? 2 : 4);
402 insn
->immediateSize
= (hasOpSize
? 2 : 4);
403 } else if (insn
->mode
== MODE_64BIT
) {
404 if (insn
->rexPrefix
&& wFromREX(insn
->rexPrefix
)) {
405 insn
->registerSize
= 8;
406 insn
->addressSize
= (hasAdSize
? 4 : 8);
407 insn
->displacementSize
= 4;
408 insn
->immediateSize
= 4;
409 } else if (insn
->rexPrefix
) {
410 insn
->registerSize
= (hasOpSize
? 2 : 4);
411 insn
->addressSize
= (hasAdSize
? 4 : 8);
412 insn
->displacementSize
= (hasOpSize
? 2 : 4);
413 insn
->immediateSize
= (hasOpSize
? 2 : 4);
415 insn
->registerSize
= (hasOpSize
? 2 : 4);
416 insn
->addressSize
= (hasAdSize
? 4 : 8);
417 insn
->displacementSize
= (hasOpSize
? 2 : 4);
418 insn
->immediateSize
= (hasOpSize
? 2 : 4);
426 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
427 * extended or escape opcodes).
429 * @param insn - The instruction whose opcode is to be read.
430 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
432 static int readOpcode(struct InternalInstruction
* insn
) {
433 /* Determine the length of the primary opcode */
437 dbgprintf(insn
, "readOpcode()");
439 insn
->opcodeType
= ONEBYTE
;
440 if (consumeByte(insn
, ¤t
))
443 if (current
== 0x0f) {
444 dbgprintf(insn
, "Found a two-byte escape prefix (0x%hhx)", current
);
446 insn
->twoByteEscape
= current
;
448 if (consumeByte(insn
, ¤t
))
451 if (current
== 0x38) {
452 dbgprintf(insn
, "Found a three-byte escape prefix (0x%hhx)", current
);
454 insn
->threeByteEscape
= current
;
456 if (consumeByte(insn
, ¤t
))
459 insn
->opcodeType
= THREEBYTE_38
;
460 } else if (current
== 0x3a) {
461 dbgprintf(insn
, "Found a three-byte escape prefix (0x%hhx)", current
);
463 insn
->threeByteEscape
= current
;
465 if (consumeByte(insn
, ¤t
))
468 insn
->opcodeType
= THREEBYTE_3A
;
470 dbgprintf(insn
, "Didn't find a three-byte escape prefix");
472 insn
->opcodeType
= TWOBYTE
;
477 * At this point we have consumed the full opcode.
478 * Anything we consume from here on must be unconsumed.
481 insn
->opcode
= current
;
486 static int readModRM(struct InternalInstruction
* insn
);
489 * getIDWithAttrMask - Determines the ID of an instruction, consuming
490 * the ModR/M byte as appropriate for extended and escape opcodes,
491 * and using a supplied attribute mask.
493 * @param instructionID - A pointer whose target is filled in with the ID of the
495 * @param insn - The instruction whose ID is to be determined.
496 * @param attrMask - The attribute mask to search.
497 * @return - 0 if the ModR/M could be read when needed or was not
498 * needed; nonzero otherwise.
500 static int getIDWithAttrMask(uint16_t* instructionID
,
501 struct InternalInstruction
* insn
,
503 BOOL hasModRMExtension
;
505 uint8_t instructionClass
;
507 instructionClass
= contextForAttrs(attrMask
);
509 hasModRMExtension
= modRMRequired(insn
->opcodeType
,
513 if (hasModRMExtension
) {
516 *instructionID
= decode(insn
->opcodeType
,
521 *instructionID
= decode(insn
->opcodeType
,
531 * is16BitEquivalent - Determines whether two instruction names refer to
532 * equivalent instructions but one is 16-bit whereas the other is not.
534 * @param orig - The instruction that is not 16-bit
535 * @param equiv - The instruction that is 16-bit
537 static BOOL
is16BitEquvalent(const char* orig
, const char* equiv
) {
541 if (orig
[i
] == '\0' && equiv
[i
] == '\0')
543 if (orig
[i
] == '\0' || equiv
[i
] == '\0')
545 if (orig
[i
] != equiv
[i
]) {
546 if ((orig
[i
] == 'Q' || orig
[i
] == 'L') && equiv
[i
] == 'W')
548 if ((orig
[i
] == '6' || orig
[i
] == '3') && equiv
[i
] == '1')
550 if ((orig
[i
] == '4' || orig
[i
] == '2') && equiv
[i
] == '6')
558 * is64BitEquivalent - Determines whether two instruction names refer to
559 * equivalent instructions but one is 64-bit whereas the other is not.
561 * @param orig - The instruction that is not 64-bit
562 * @param equiv - The instruction that is 64-bit
564 static BOOL
is64BitEquivalent(const char* orig
, const char* equiv
) {
568 if (orig
[i
] == '\0' && equiv
[i
] == '\0')
570 if (orig
[i
] == '\0' || equiv
[i
] == '\0')
572 if (orig
[i
] != equiv
[i
]) {
573 if ((orig
[i
] == 'W' || orig
[i
] == 'L') && equiv
[i
] == 'Q')
575 if ((orig
[i
] == '1' || orig
[i
] == '3') && equiv
[i
] == '6')
577 if ((orig
[i
] == '6' || orig
[i
] == '2') && equiv
[i
] == '4')
586 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
587 * appropriate for extended and escape opcodes. Determines the attributes and
588 * context for the instruction before doing so.
590 * @param insn - The instruction whose ID is to be determined.
591 * @return - 0 if the ModR/M could be read when needed or was not needed;
594 static int getID(struct InternalInstruction
* insn
) {
596 uint16_t instructionID
;
598 dbgprintf(insn
, "getID()");
600 attrMask
= ATTR_NONE
;
602 if (insn
->mode
== MODE_64BIT
)
603 attrMask
|= ATTR_64BIT
;
605 if (insn
->rexPrefix
& 0x08)
606 attrMask
|= ATTR_REXW
;
608 if (isPrefixAtLocation(insn
, 0x66, insn
->necessaryPrefixLocation
))
609 attrMask
|= ATTR_OPSIZE
;
610 else if (isPrefixAtLocation(insn
, 0xf3, insn
->necessaryPrefixLocation
))
612 else if (isPrefixAtLocation(insn
, 0xf2, insn
->necessaryPrefixLocation
))
615 if (getIDWithAttrMask(&instructionID
, insn
, attrMask
))
618 /* The following clauses compensate for limitations of the tables. */
620 if ((attrMask
& ATTR_XD
) && (attrMask
& ATTR_REXW
)) {
622 * Although for SSE instructions it is usually necessary to treat REX.W+F2
623 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
624 * an occasional instruction where F2 is incidental and REX.W is the more
625 * significant. If the decoded instruction is 32-bit and adding REX.W
626 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
629 const struct InstructionSpecifier
*spec
;
630 uint16_t instructionIDWithREXw
;
631 const struct InstructionSpecifier
*specWithREXw
;
633 spec
= specifierForUID(instructionID
);
635 if (getIDWithAttrMask(&instructionIDWithREXw
,
637 attrMask
& (~ATTR_XD
))) {
639 * Decoding with REX.w would yield nothing; give up and return original
643 insn
->instructionID
= instructionID
;
648 specWithREXw
= specifierForUID(instructionIDWithREXw
);
650 if (is64BitEquivalent(spec
->name
, specWithREXw
->name
)) {
651 insn
->instructionID
= instructionIDWithREXw
;
652 insn
->spec
= specWithREXw
;
654 insn
->instructionID
= instructionID
;
660 if (insn
->prefixPresent
[0x66] && !(attrMask
& ATTR_OPSIZE
)) {
662 * The instruction tables make no distinction between instructions that
663 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
664 * particular spot (i.e., many MMX operations). In general we're
665 * conservative, but in the specific case where OpSize is present but not
666 * in the right place we check if there's a 16-bit operation.
669 const struct InstructionSpecifier
*spec
;
670 uint16_t instructionIDWithOpsize
;
671 const struct InstructionSpecifier
*specWithOpsize
;
673 spec
= specifierForUID(instructionID
);
675 if (getIDWithAttrMask(&instructionIDWithOpsize
,
677 attrMask
| ATTR_OPSIZE
)) {
679 * ModRM required with OpSize but not present; give up and return version
683 insn
->instructionID
= instructionID
;
688 specWithOpsize
= specifierForUID(instructionIDWithOpsize
);
690 if (is16BitEquvalent(spec
->name
, specWithOpsize
->name
)) {
691 insn
->instructionID
= instructionIDWithOpsize
;
692 insn
->spec
= specWithOpsize
;
694 insn
->instructionID
= instructionID
;
700 insn
->instructionID
= instructionID
;
701 insn
->spec
= specifierForUID(insn
->instructionID
);
707 * readSIB - Consumes the SIB byte to determine addressing information for an
710 * @param insn - The instruction whose SIB byte is to be read.
711 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
713 static int readSIB(struct InternalInstruction
* insn
) {
714 SIBIndex sibIndexBase
= 0;
715 SIBBase sibBaseBase
= 0;
718 dbgprintf(insn
, "readSIB()");
720 if (insn
->consumedSIB
)
723 insn
->consumedSIB
= TRUE
;
725 switch (insn
->addressSize
) {
727 dbgprintf(insn
, "SIB-based addressing doesn't work in 16-bit mode");
731 sibIndexBase
= SIB_INDEX_EAX
;
732 sibBaseBase
= SIB_BASE_EAX
;
735 sibIndexBase
= SIB_INDEX_RAX
;
736 sibBaseBase
= SIB_BASE_RAX
;
740 if (consumeByte(insn
, &insn
->sib
))
743 index
= indexFromSIB(insn
->sib
) | (xFromREX(insn
->rexPrefix
) << 3);
747 insn
->sibIndex
= SIB_INDEX_NONE
;
750 insn
->sibIndex
= (EABase
)(sibIndexBase
+ index
);
751 if (insn
->sibIndex
== SIB_INDEX_sib
||
752 insn
->sibIndex
== SIB_INDEX_sib64
)
753 insn
->sibIndex
= SIB_INDEX_NONE
;
757 switch (scaleFromSIB(insn
->sib
)) {
772 base
= baseFromSIB(insn
->sib
) | (bFromREX(insn
->rexPrefix
) << 3);
776 switch (modFromModRM(insn
->modRM
)) {
778 insn
->eaDisplacement
= EA_DISP_32
;
779 insn
->sibBase
= SIB_BASE_NONE
;
782 insn
->eaDisplacement
= EA_DISP_8
;
783 insn
->sibBase
= (insn
->addressSize
== 4 ?
784 SIB_BASE_EBP
: SIB_BASE_RBP
);
787 insn
->eaDisplacement
= EA_DISP_32
;
788 insn
->sibBase
= (insn
->addressSize
== 4 ?
789 SIB_BASE_EBP
: SIB_BASE_RBP
);
792 debug("Cannot have Mod = 0b11 and a SIB byte");
797 insn
->sibBase
= (EABase
)(sibBaseBase
+ base
);
805 * readDisplacement - Consumes the displacement of an instruction.
807 * @param insn - The instruction whose displacement is to be read.
808 * @return - 0 if the displacement byte was successfully read; nonzero
811 static int readDisplacement(struct InternalInstruction
* insn
) {
816 dbgprintf(insn
, "readDisplacement()");
818 if (insn
->consumedDisplacement
)
821 insn
->consumedDisplacement
= TRUE
;
823 switch (insn
->eaDisplacement
) {
825 insn
->consumedDisplacement
= FALSE
;
828 if (consumeInt8(insn
, &d8
))
830 insn
->displacement
= d8
;
833 if (consumeInt16(insn
, &d16
))
835 insn
->displacement
= d16
;
838 if (consumeInt32(insn
, &d32
))
840 insn
->displacement
= d32
;
844 insn
->consumedDisplacement
= TRUE
;
849 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
850 * displacement) for an instruction and interprets it.
852 * @param insn - The instruction whose addressing information is to be read.
853 * @return - 0 if the information was successfully read; nonzero otherwise.
855 static int readModRM(struct InternalInstruction
* insn
) {
856 uint8_t mod
, rm
, reg
;
858 dbgprintf(insn
, "readModRM()");
860 if (insn
->consumedModRM
)
863 consumeByte(insn
, &insn
->modRM
);
864 insn
->consumedModRM
= TRUE
;
866 mod
= modFromModRM(insn
->modRM
);
867 rm
= rmFromModRM(insn
->modRM
);
868 reg
= regFromModRM(insn
->modRM
);
871 * This goes by insn->registerSize to pick the correct register, which messes
872 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
875 switch (insn
->registerSize
) {
877 insn
->regBase
= MODRM_REG_AX
;
878 insn
->eaRegBase
= EA_REG_AX
;
881 insn
->regBase
= MODRM_REG_EAX
;
882 insn
->eaRegBase
= EA_REG_EAX
;
885 insn
->regBase
= MODRM_REG_RAX
;
886 insn
->eaRegBase
= EA_REG_RAX
;
890 reg
|= rFromREX(insn
->rexPrefix
) << 3;
891 rm
|= bFromREX(insn
->rexPrefix
) << 3;
893 insn
->reg
= (Reg
)(insn
->regBase
+ reg
);
895 switch (insn
->addressSize
) {
897 insn
->eaBaseBase
= EA_BASE_BX_SI
;
902 insn
->eaBase
= EA_BASE_NONE
;
903 insn
->eaDisplacement
= EA_DISP_16
;
904 if (readDisplacement(insn
))
907 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
908 insn
->eaDisplacement
= EA_DISP_NONE
;
912 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
913 insn
->eaDisplacement
= EA_DISP_8
;
914 if (readDisplacement(insn
))
918 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
919 insn
->eaDisplacement
= EA_DISP_16
;
920 if (readDisplacement(insn
))
924 insn
->eaBase
= (EABase
)(insn
->eaRegBase
+ rm
);
925 if (readDisplacement(insn
))
932 insn
->eaBaseBase
= (insn
->addressSize
== 4 ? EA_BASE_EAX
: EA_BASE_RAX
);
936 insn
->eaDisplacement
= EA_DISP_NONE
; /* readSIB may override this */
939 case 0xc: /* in case REXW.b is set */
940 insn
->eaBase
= (insn
->addressSize
== 4 ?
941 EA_BASE_sib
: EA_BASE_sib64
);
943 if (readDisplacement(insn
))
947 insn
->eaBase
= EA_BASE_NONE
;
948 insn
->eaDisplacement
= EA_DISP_32
;
949 if (readDisplacement(insn
))
953 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
959 insn
->eaDisplacement
= (mod
== 0x1 ? EA_DISP_8
: EA_DISP_32
);
962 case 0xc: /* in case REXW.b is set */
963 insn
->eaBase
= EA_BASE_sib
;
965 if (readDisplacement(insn
))
969 insn
->eaBase
= (EABase
)(insn
->eaBaseBase
+ rm
);
970 if (readDisplacement(insn
))
976 insn
->eaDisplacement
= EA_DISP_NONE
;
977 insn
->eaBase
= (EABase
)(insn
->eaRegBase
+ rm
);
981 } /* switch (insn->addressSize) */
986 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
987 static uint8_t name(struct InternalInstruction *insn, \
994 debug("Unhandled register type"); \
998 return base + index; \
1000 if (insn->rexPrefix && \
1001 index >= 4 && index <= 7) { \
1002 return prefix##_SPL + (index - 4); \
1004 return prefix##_AL + index; \
1007 return prefix##_AX + index; \
1009 return prefix##_EAX + index; \
1011 return prefix##_RAX + index; \
1016 return prefix##_XMM0 + index; \
1022 return prefix##_MM0 + index; \
1023 case TYPE_SEGMENTREG: \
1026 return prefix##_ES + index; \
1027 case TYPE_DEBUGREG: \
1030 return prefix##_DR0 + index; \
1031 case TYPE_CONTROLREG: \
1034 return prefix##_CR0 + index; \
1039 * fixup*Value - Consults an operand type to determine the meaning of the
1040 * reg or R/M field. If the operand is an XMM operand, for example, an
1041 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1042 * misinterpret it as.
1044 * @param insn - The instruction containing the operand.
1045 * @param type - The operand type.
1046 * @param index - The existing value of the field as reported by readModRM().
1047 * @param valid - The address of a uint8_t. The target is set to 1 if the
1048 * field is valid for the register class; 0 if not.
1049 * @return - The proper value.
1051 GENERIC_FIXUP_FUNC(fixupRegValue
, insn
->regBase
, MODRM_REG
)
1052 GENERIC_FIXUP_FUNC(fixupRMValue
, insn
->eaRegBase
, EA_REG
)
1055 * fixupReg - Consults an operand specifier to determine which of the
1056 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1058 * @param insn - See fixup*Value().
1059 * @param op - The operand specifier.
1060 * @return - 0 if fixup was successful; -1 if the register returned was
1061 * invalid for its class.
1063 static int fixupReg(struct InternalInstruction
*insn
,
1064 const struct OperandSpecifier
*op
) {
1067 dbgprintf(insn
, "fixupReg()");
1069 switch ((OperandEncoding
)op
->encoding
) {
1071 debug("Expected a REG or R/M encoding in fixupReg");
1074 insn
->reg
= (Reg
)fixupRegValue(insn
,
1075 (OperandType
)op
->type
,
1076 insn
->reg
- insn
->regBase
,
1082 if (insn
->eaBase
>= insn
->eaRegBase
) {
1083 insn
->eaBase
= (EABase
)fixupRMValue(insn
,
1084 (OperandType
)op
->type
,
1085 insn
->eaBase
- insn
->eaRegBase
,
1097 * readOpcodeModifier - Reads an operand from the opcode field of an
1098 * instruction. Handles AddRegFrm instructions.
1100 * @param insn - The instruction whose opcode field is to be read.
1101 * @param inModRM - Indicates that the opcode field is to be read from the
1102 * ModR/M extension; useful for escape opcodes
1103 * @return - 0 on success; nonzero otherwise.
1105 static int readOpcodeModifier(struct InternalInstruction
* insn
) {
1106 dbgprintf(insn
, "readOpcodeModifier()");
1108 if (insn
->consumedOpcodeModifier
)
1111 insn
->consumedOpcodeModifier
= TRUE
;
1113 switch (insn
->spec
->modifierType
) {
1115 debug("Unknown modifier type.");
1118 debug("No modifier but an operand expects one.");
1120 case MODIFIER_OPCODE
:
1121 insn
->opcodeModifier
= insn
->opcode
- insn
->spec
->modifierBase
;
1123 case MODIFIER_MODRM
:
1124 insn
->opcodeModifier
= insn
->modRM
- insn
->spec
->modifierBase
;
1130 * readOpcodeRegister - Reads an operand from the opcode field of an
1131 * instruction and interprets it appropriately given the operand width.
1132 * Handles AddRegFrm instructions.
1134 * @param insn - See readOpcodeModifier().
1135 * @param size - The width (in bytes) of the register being specified.
1136 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1138 * @return - 0 on success; nonzero otherwise.
1140 static int readOpcodeRegister(struct InternalInstruction
* insn
, uint8_t size
) {
1141 dbgprintf(insn
, "readOpcodeRegister()");
1143 if (readOpcodeModifier(insn
))
1147 size
= insn
->registerSize
;
1151 insn
->opcodeRegister
= (Reg
)(MODRM_REG_AL
+ ((bFromREX(insn
->rexPrefix
) << 3)
1152 | insn
->opcodeModifier
));
1153 if (insn
->rexPrefix
&&
1154 insn
->opcodeRegister
>= MODRM_REG_AL
+ 0x4 &&
1155 insn
->opcodeRegister
< MODRM_REG_AL
+ 0x8) {
1156 insn
->opcodeRegister
= (Reg
)(MODRM_REG_SPL
1157 + (insn
->opcodeRegister
- MODRM_REG_AL
- 4));
1162 insn
->opcodeRegister
= (Reg
)(MODRM_REG_AX
1163 + ((bFromREX(insn
->rexPrefix
) << 3)
1164 | insn
->opcodeModifier
));
1167 insn
->opcodeRegister
= (Reg
)(MODRM_REG_EAX
1168 + ((bFromREX(insn
->rexPrefix
) << 3)
1169 | insn
->opcodeModifier
));
1172 insn
->opcodeRegister
= (Reg
)(MODRM_REG_RAX
1173 + ((bFromREX(insn
->rexPrefix
) << 3)
1174 | insn
->opcodeModifier
));
1182 * readImmediate - Consumes an immediate operand from an instruction, given the
1183 * desired operand size.
1185 * @param insn - The instruction whose operand is to be read.
1186 * @param size - The width (in bytes) of the operand.
1187 * @return - 0 if the immediate was successfully consumed; nonzero
1190 static int readImmediate(struct InternalInstruction
* insn
, uint8_t size
) {
1196 dbgprintf(insn
, "readImmediate()");
1198 if (insn
->numImmediatesConsumed
== 2) {
1199 debug("Already consumed two immediates");
1204 size
= insn
->immediateSize
;
1206 insn
->immediateSize
= size
;
1210 if (consumeByte(insn
, &imm8
))
1212 insn
->immediates
[insn
->numImmediatesConsumed
] = imm8
;
1215 if (consumeUInt16(insn
, &imm16
))
1217 insn
->immediates
[insn
->numImmediatesConsumed
] = imm16
;
1220 if (consumeUInt32(insn
, &imm32
))
1222 insn
->immediates
[insn
->numImmediatesConsumed
] = imm32
;
1225 if (consumeUInt64(insn
, &imm64
))
1227 insn
->immediates
[insn
->numImmediatesConsumed
] = imm64
;
1231 insn
->numImmediatesConsumed
++;
1237 * readOperands - Consults the specifier for an instruction and consumes all
1238 * operands for that instruction, interpreting them as it goes.
1240 * @param insn - The instruction whose operands are to be read and interpreted.
1241 * @return - 0 if all operands could be read; nonzero otherwise.
1243 static int readOperands(struct InternalInstruction
* insn
) {
1246 dbgprintf(insn
, "readOperands()");
1248 for (index
= 0; index
< X86_MAX_OPERANDS
; ++index
) {
1249 switch (insn
->spec
->operands
[index
].encoding
) {
1254 if (readModRM(insn
))
1256 if (fixupReg(insn
, &insn
->spec
->operands
[index
]))
1265 dbgprintf(insn
, "We currently don't hande code-offset encodings");
1268 if (readImmediate(insn
, 1))
1270 if (insn
->spec
->operands
[index
].type
== TYPE_IMM3
&&
1271 insn
->immediates
[insn
->numImmediatesConsumed
- 1] > 7)
1275 if (readImmediate(insn
, 2))
1279 if (readImmediate(insn
, 4))
1283 if (readImmediate(insn
, 8))
1287 if (readImmediate(insn
, insn
->immediateSize
))
1291 if (readImmediate(insn
, insn
->addressSize
))
1295 if (readOpcodeRegister(insn
, 1))
1299 if (readOpcodeRegister(insn
, 2))
1303 if (readOpcodeRegister(insn
, 4))
1307 if (readOpcodeRegister(insn
, 8))
1311 if (readOpcodeRegister(insn
, 0))
1315 if (readOpcodeModifier(insn
))
1320 dbgprintf(insn
, "Encountered an operand with an unknown encoding.");
1329 * decodeInstruction - Reads and interprets a full instruction provided by the
1332 * @param insn - A pointer to the instruction to be populated. Must be
1334 * @param reader - The function to be used to read the instruction's bytes.
1335 * @param readerArg - A generic argument to be passed to the reader to store
1336 * any internal state.
1337 * @param logger - If non-NULL, the function to be used to write log messages
1339 * @param loggerArg - A generic argument to be passed to the logger to store
1340 * any internal state.
1341 * @param startLoc - The address (in the reader's address space) of the first
1342 * byte in the instruction.
1343 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1344 * decode the instruction in.
1345 * @return - 0 if the instruction's memory could be read; nonzero if
1348 int decodeInstruction(struct InternalInstruction
* insn
,
1349 byteReader_t reader
,
1354 DisassemblerMode mode
) {
1355 memset(insn
, 0, sizeof(struct InternalInstruction
));
1357 insn
->reader
= reader
;
1358 insn
->readerArg
= readerArg
;
1359 insn
->dlog
= logger
;
1360 insn
->dlogArg
= loggerArg
;
1361 insn
->startLocation
= startLoc
;
1362 insn
->readerCursor
= startLoc
;
1364 insn
->numImmediatesConsumed
= 0;
1366 if (readPrefixes(insn
) ||
1369 insn
->instructionID
== 0 ||
1373 insn
->length
= insn
->readerCursor
- insn
->startLocation
;
1375 dbgprintf(insn
, "Read from 0x%llx to 0x%llx: length %zu",
1376 startLoc
, insn
->readerCursor
, insn
->length
);
1378 if (insn
->length
> 15)
1379 dbgprintf(insn
, "Instruction exceeds 15-byte limit");