Add ICU message format support
[chromium-blink-merge.git] / third_party / mach_override / libudis86 / decode.c
blob2b352dc879951260c171573777aa738a8426caf9
1 /* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "udint.h"
27 #include "types.h"
28 #include "input.h"
29 #include "decode.h"
31 #ifndef __UD_STANDALONE__
32 # include <string.h>
33 #endif /* __UD_STANDALONE__ */
35 /* The max number of prefixes to an instruction */
36 #define MAX_PREFIXES 15
38 /* rex prefix bits */
39 #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 )
40 #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 )
41 #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 )
42 #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 )
43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
44 ( P_REXR(n) << 2 ) | \
45 ( P_REXX(n) << 1 ) | \
46 ( P_REXB(n) << 0 ) )
48 /* scable-index-base bits */
49 #define SIB_S(b) ( ( b ) >> 6 )
50 #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 )
51 #define SIB_B(b) ( ( b ) & 7 )
53 /* modrm bits */
54 #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 )
55 #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 )
56 #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 )
57 #define MODRM_RM(b) ( ( b ) & 7 )
59 static int decode_ext(struct ud *u, uint16_t ptr);
61 enum reg_class { /* register classes */
62 REGCLASS_NONE,
63 REGCLASS_GPR,
64 REGCLASS_MMX,
65 REGCLASS_CR,
66 REGCLASS_DB,
67 REGCLASS_SEG,
68 REGCLASS_XMM
73 * inp_uint8
74 * int_uint16
75 * int_uint32
76 * int_uint64
77 * Load little-endian values from input
79 static uint8_t
80 inp_uint8(struct ud* u)
82 return ud_inp_next(u);
85 static uint16_t
86 inp_uint16(struct ud* u)
88 uint16_t r, ret;
90 ret = ud_inp_next(u);
91 r = ud_inp_next(u);
92 return ret | (r << 8);
95 static uint32_t
96 inp_uint32(struct ud* u)
98 uint32_t r, ret;
100 ret = ud_inp_next(u);
101 r = ud_inp_next(u);
102 ret = ret | (r << 8);
103 r = ud_inp_next(u);
104 ret = ret | (r << 16);
105 r = ud_inp_next(u);
106 return ret | (r << 24);
109 static uint64_t
110 inp_uint64(struct ud* u)
112 uint64_t r, ret;
114 ret = ud_inp_next(u);
115 r = ud_inp_next(u);
116 ret = ret | (r << 8);
117 r = ud_inp_next(u);
118 ret = ret | (r << 16);
119 r = ud_inp_next(u);
120 ret = ret | (r << 24);
121 r = ud_inp_next(u);
122 ret = ret | (r << 32);
123 r = ud_inp_next(u);
124 ret = ret | (r << 40);
125 r = ud_inp_next(u);
126 ret = ret | (r << 48);
127 r = ud_inp_next(u);
128 return ret | (r << 56);
132 static inline int
133 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
135 if (dis_mode == 64) {
136 return rex_w ? 64 : (pfx_opr ? 16 : 32);
137 } else if (dis_mode == 32) {
138 return pfx_opr ? 16 : 32;
139 } else {
140 UD_ASSERT(dis_mode == 16);
141 return pfx_opr ? 32 : 16;
146 static inline int
147 eff_adr_mode(int dis_mode, int pfx_adr)
149 if (dis_mode == 64) {
150 return pfx_adr ? 32 : 64;
151 } else if (dis_mode == 32) {
152 return pfx_adr ? 16 : 32;
153 } else {
154 UD_ASSERT(dis_mode == 16);
155 return pfx_adr ? 32 : 16;
160 /* Looks up mnemonic code in the mnemonic string table
161 * Returns NULL if the mnemonic code is invalid
163 const char*
164 ud_lookup_mnemonic(enum ud_mnemonic_code c)
166 if (c < UD_MAX_MNEMONIC_CODE) {
167 return ud_mnemonics_str[c];
168 } else {
169 return NULL;
175 * decode_prefixes
177 * Extracts instruction prefixes.
179 static int
180 decode_prefixes(struct ud *u)
182 int done = 0;
183 uint8_t curr;
184 UD_RETURN_ON_ERROR(u);
186 do {
187 ud_inp_next(u);
188 UD_RETURN_ON_ERROR(u);
189 if (inp_len(u) == MAX_INSN_LENGTH) {
190 UD_RETURN_WITH_ERROR(u, "max instruction length");
192 curr = inp_curr(u);
194 switch (curr)
196 case 0x2E :
197 u->pfx_seg = UD_R_CS;
198 break;
199 case 0x36 :
200 u->pfx_seg = UD_R_SS;
201 break;
202 case 0x3E :
203 u->pfx_seg = UD_R_DS;
204 break;
205 case 0x26 :
206 u->pfx_seg = UD_R_ES;
207 break;
208 case 0x64 :
209 u->pfx_seg = UD_R_FS;
210 break;
211 case 0x65 :
212 u->pfx_seg = UD_R_GS;
213 break;
214 case 0x67 : /* adress-size override prefix */
215 u->pfx_adr = 0x67;
216 break;
217 case 0xF0 :
218 u->pfx_lock = 0xF0;
219 break;
220 case 0x66:
221 u->pfx_opr = 0x66;
222 break;
223 case 0xF2:
224 u->pfx_str = 0xf2;
225 break;
226 case 0xF3:
227 u->pfx_str = 0xf3;
228 break;
229 default:
230 done = 1;
231 break;
233 } while (!done);
235 if (u->dis_mode == 64 && (curr & 0xF0) == 0x40) {
236 /* rex prefixes in 64bit mode, must be the last prefix
238 u->pfx_rex = curr;
239 } else {
240 /* rewind back one byte in stream, since the above loop
241 * stops with a non-prefix byte.
243 inp_back(u);
245 return 0;
249 static inline unsigned int modrm( struct ud * u )
251 if ( !u->have_modrm ) {
252 u->modrm = ud_inp_next( u );
253 u->have_modrm = 1;
255 return u->modrm;
259 static unsigned int
260 resolve_operand_size( const struct ud * u, unsigned int s )
262 switch ( s )
264 case SZ_V:
265 return ( u->opr_mode );
266 case SZ_Z:
267 return ( u->opr_mode == 16 ) ? 16 : 32;
268 case SZ_Y:
269 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
270 case SZ_RDQ:
271 return ( u->dis_mode == 64 ) ? 64 : 32;
272 default:
273 return s;
278 static int resolve_mnemonic( struct ud* u )
280 /* resolve 3dnow weirdness. */
281 if ( u->mnemonic == UD_I3dnow ) {
282 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic;
284 /* SWAPGS is only valid in 64bits mode */
285 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
286 UDERR(u, "swapgs invalid in 64bits mode");
287 return -1;
290 if (u->mnemonic == UD_Ixchg) {
291 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
292 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
293 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
294 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
295 u->operand[0].type = UD_NONE;
296 u->operand[1].type = UD_NONE;
297 u->mnemonic = UD_Inop;
301 if (u->mnemonic == UD_Inop && u->pfx_repe) {
302 u->pfx_repe = 0;
303 u->mnemonic = UD_Ipause;
305 return 0;
309 /* -----------------------------------------------------------------------------
310 * decode_a()- Decodes operands of the type seg:offset
311 * -----------------------------------------------------------------------------
313 static void
314 decode_a(struct ud* u, struct ud_operand *op)
316 if (u->opr_mode == 16) {
317 /* seg16:off16 */
318 op->type = UD_OP_PTR;
319 op->size = 32;
320 op->lval.ptr.off = inp_uint16(u);
321 op->lval.ptr.seg = inp_uint16(u);
322 } else {
323 /* seg16:off32 */
324 op->type = UD_OP_PTR;
325 op->size = 48;
326 op->lval.ptr.off = inp_uint32(u);
327 op->lval.ptr.seg = inp_uint16(u);
331 /* -----------------------------------------------------------------------------
332 * decode_gpr() - Returns decoded General Purpose Register
333 * -----------------------------------------------------------------------------
335 static enum ud_type
336 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
338 switch (s) {
339 case 64:
340 return UD_R_RAX + rm;
341 case 32:
342 return UD_R_EAX + rm;
343 case 16:
344 return UD_R_AX + rm;
345 case 8:
346 if (u->dis_mode == 64 && u->pfx_rex) {
347 if (rm >= 4)
348 return UD_R_SPL + (rm-4);
349 return UD_R_AL + rm;
350 } else return UD_R_AL + rm;
351 default:
352 UD_ASSERT(!"invalid operand size");
353 return 0;
357 static void
358 decode_reg(struct ud *u,
359 struct ud_operand *opr,
360 int type,
361 int num,
362 int size)
364 int reg;
365 size = resolve_operand_size(u, size);
366 switch (type) {
367 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
368 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break;
369 case REGCLASS_XMM : reg = UD_R_XMM0 + num; break;
370 case REGCLASS_CR : reg = UD_R_CR0 + num; break;
371 case REGCLASS_DB : reg = UD_R_DR0 + num; break;
372 case REGCLASS_SEG : {
374 * Only 6 segment registers, anything else is an error.
376 if ((num & 7) > 5) {
377 UDERR(u, "invalid segment register value");
378 return;
379 } else {
380 reg = UD_R_ES + (num & 7);
382 break;
384 default:
385 UD_ASSERT(!"invalid register type");
386 break;
388 opr->type = UD_OP_REG;
389 opr->base = reg;
390 opr->size = size;
395 * decode_imm
397 * Decode Immediate values.
399 static void
400 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
402 op->size = resolve_operand_size(u, size);
403 op->type = UD_OP_IMM;
405 switch (op->size) {
406 case 8: op->lval.sbyte = inp_uint8(u); break;
407 case 16: op->lval.uword = inp_uint16(u); break;
408 case 32: op->lval.udword = inp_uint32(u); break;
409 case 64: op->lval.uqword = inp_uint64(u); break;
410 default: return;
416 * decode_mem_disp
418 * Decode mem address displacement.
420 static void
421 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
423 switch (size) {
424 case 8:
425 op->offset = 8;
426 op->lval.ubyte = inp_uint8(u);
427 break;
428 case 16:
429 op->offset = 16;
430 op->lval.uword = inp_uint16(u);
431 break;
432 case 32:
433 op->offset = 32;
434 op->lval.udword = inp_uint32(u);
435 break;
436 case 64:
437 op->offset = 64;
438 op->lval.uqword = inp_uint64(u);
439 break;
440 default:
441 return;
447 * decode_modrm_reg
449 * Decodes reg field of mod/rm byte
452 static inline void
453 decode_modrm_reg(struct ud *u,
454 struct ud_operand *operand,
455 unsigned int type,
456 unsigned int size)
458 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
459 decode_reg(u, operand, type, reg, size);
464 * decode_modrm_rm
466 * Decodes rm field of mod/rm byte
469 static void
470 decode_modrm_rm(struct ud *u,
471 struct ud_operand *op,
472 unsigned char type, /* register type */
473 unsigned int size) /* operand size */
476 size_t offset = 0;
477 unsigned char mod, rm;
479 /* get mod, r/m and reg fields */
480 mod = MODRM_MOD(modrm(u));
481 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
484 * If mod is 11b, then the modrm.rm specifies a register.
487 if (mod == 3) {
488 decode_reg(u, op, type, rm, size);
489 return;
493 * !11b => Memory Address
495 op->type = UD_OP_MEM;
496 op->size = resolve_operand_size(u, size);
498 if (u->adr_mode == 64) {
499 op->base = UD_R_RAX + rm;
500 if (mod == 1) {
501 offset = 8;
502 } else if (mod == 2) {
503 offset = 32;
504 } else if (mod == 0 && (rm & 7) == 5) {
505 op->base = UD_R_RIP;
506 offset = 32;
507 } else {
508 offset = 0;
511 * Scale-Index-Base (SIB)
513 if ((rm & 7) == 4) {
514 ud_inp_next(u);
516 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
517 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
518 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
520 /* special conditions for base reference */
521 if (op->index == UD_R_RSP) {
522 op->index = UD_NONE;
523 op->scale = UD_NONE;
526 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
527 if (mod == 0) {
528 op->base = UD_NONE;
530 if (mod == 1) {
531 offset = 8;
532 } else {
533 offset = 32;
537 } else if (u->adr_mode == 32) {
538 op->base = UD_R_EAX + rm;
539 if (mod == 1) {
540 offset = 8;
541 } else if (mod == 2) {
542 offset = 32;
543 } else if (mod == 0 && rm == 5) {
544 op->base = UD_NONE;
545 offset = 32;
546 } else {
547 offset = 0;
550 /* Scale-Index-Base (SIB) */
551 if ((rm & 7) == 4) {
552 ud_inp_next(u);
554 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
555 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
556 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
558 if (op->index == UD_R_ESP) {
559 op->index = UD_NONE;
560 op->scale = UD_NONE;
563 /* special condition for base reference */
564 if (op->base == UD_R_EBP) {
565 if (mod == 0) {
566 op->base = UD_NONE;
568 if (mod == 1) {
569 offset = 8;
570 } else {
571 offset = 32;
575 } else {
576 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
577 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
578 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
579 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
580 op->base = bases[rm & 7];
581 op->index = indices[rm & 7];
582 if (mod == 0 && rm == 6) {
583 offset = 16;
584 op->base = UD_NONE;
585 } else if (mod == 1) {
586 offset = 8;
587 } else if (mod == 2) {
588 offset = 16;
592 if (offset) {
593 decode_mem_disp(u, offset, op);
599 * decode_moffset
600 * Decode offset-only memory operand
602 static void
603 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
605 opr->type = UD_OP_MEM;
606 opr->size = resolve_operand_size(u, size);
607 decode_mem_disp(u, u->adr_mode, opr);
611 /* -----------------------------------------------------------------------------
612 * decode_operands() - Disassembles Operands.
613 * -----------------------------------------------------------------------------
615 static int
616 decode_operand(struct ud *u,
617 struct ud_operand *operand,
618 enum ud_operand_code type,
619 unsigned int size)
621 operand->_oprcode = type;
623 switch (type) {
624 case OP_A :
625 decode_a(u, operand);
626 break;
627 case OP_MR:
628 decode_modrm_rm(u, operand, REGCLASS_GPR,
629 MODRM_MOD(modrm(u)) == 3 ?
630 Mx_reg_size(size) : Mx_mem_size(size));
631 break;
632 case OP_F:
633 u->br_far = 1;
634 /* intended fall through */
635 case OP_M:
636 if (MODRM_MOD(modrm(u)) == 3) {
637 UDERR(u, "expected modrm.mod != 3");
639 /* intended fall through */
640 case OP_E:
641 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
642 break;
643 case OP_G:
644 decode_modrm_reg(u, operand, REGCLASS_GPR, size);
645 break;
646 case OP_sI:
647 case OP_I:
648 decode_imm(u, size, operand);
649 break;
650 case OP_I1:
651 operand->type = UD_OP_CONST;
652 operand->lval.udword = 1;
653 break;
654 case OP_N:
655 if (MODRM_MOD(modrm(u)) != 3) {
656 UDERR(u, "expected modrm.mod == 3");
658 /* intended fall through */
659 case OP_Q:
660 decode_modrm_rm(u, operand, REGCLASS_MMX, size);
661 break;
662 case OP_P:
663 decode_modrm_reg(u, operand, REGCLASS_MMX, size);
664 break;
665 case OP_U:
666 if (MODRM_MOD(modrm(u)) != 3) {
667 UDERR(u, "expected modrm.mod == 3");
669 /* intended fall through */
670 case OP_W:
671 decode_modrm_rm(u, operand, REGCLASS_XMM, size);
672 break;
673 case OP_V:
674 decode_modrm_reg(u, operand, REGCLASS_XMM, size);
675 break;
676 case OP_MU:
677 decode_modrm_rm(u, operand, REGCLASS_XMM,
678 MODRM_MOD(modrm(u)) == 3 ?
679 Mx_reg_size(size) : Mx_mem_size(size));
680 break;
681 case OP_S:
682 decode_modrm_reg(u, operand, REGCLASS_SEG, size);
683 break;
684 case OP_O:
685 decode_moffset(u, size, operand);
686 break;
687 case OP_R0:
688 case OP_R1:
689 case OP_R2:
690 case OP_R3:
691 case OP_R4:
692 case OP_R5:
693 case OP_R6:
694 case OP_R7:
695 decode_reg(u, operand, REGCLASS_GPR,
696 (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size);
697 break;
698 case OP_AL:
699 case OP_AX:
700 case OP_eAX:
701 case OP_rAX:
702 decode_reg(u, operand, REGCLASS_GPR, 0, size);
703 break;
704 case OP_CL:
705 case OP_CX:
706 case OP_eCX:
707 decode_reg(u, operand, REGCLASS_GPR, 1, size);
708 break;
709 case OP_DL:
710 case OP_DX:
711 case OP_eDX:
712 decode_reg(u, operand, REGCLASS_GPR, 2, size);
713 break;
714 case OP_ES:
715 case OP_CS:
716 case OP_DS:
717 case OP_SS:
718 case OP_FS:
719 case OP_GS:
720 /* in 64bits mode, only fs and gs are allowed */
721 if (u->dis_mode == 64) {
722 if (type != OP_FS && type != OP_GS) {
723 UDERR(u, "invalid segment register in 64bits");
726 operand->type = UD_OP_REG;
727 operand->base = (type - OP_ES) + UD_R_ES;
728 operand->size = 16;
729 break;
730 case OP_J :
731 decode_imm(u, size, operand);
732 operand->type = UD_OP_JIMM;
733 break ;
734 case OP_R :
735 if (MODRM_MOD(modrm(u)) != 3) {
736 UDERR(u, "expected modrm.mod == 3");
738 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
739 break;
740 case OP_C:
741 decode_modrm_reg(u, operand, REGCLASS_CR, size);
742 break;
743 case OP_D:
744 decode_modrm_reg(u, operand, REGCLASS_DB, size);
745 break;
746 case OP_I3 :
747 operand->type = UD_OP_CONST;
748 operand->lval.sbyte = 3;
749 break;
750 case OP_ST0:
751 case OP_ST1:
752 case OP_ST2:
753 case OP_ST3:
754 case OP_ST4:
755 case OP_ST5:
756 case OP_ST6:
757 case OP_ST7:
758 operand->type = UD_OP_REG;
759 operand->base = (type - OP_ST0) + UD_R_ST0;
760 operand->size = 80;
761 break;
762 default :
763 break;
765 return 0;
770 * decode_operands
772 * Disassemble upto 3 operands of the current instruction being
773 * disassembled. By the end of the function, the operand fields
774 * of the ud structure will have been filled.
776 static int
777 decode_operands(struct ud* u)
779 decode_operand(u, &u->operand[0],
780 u->itab_entry->operand1.type,
781 u->itab_entry->operand1.size);
782 decode_operand(u, &u->operand[1],
783 u->itab_entry->operand2.type,
784 u->itab_entry->operand2.size);
785 decode_operand(u, &u->operand[2],
786 u->itab_entry->operand3.type,
787 u->itab_entry->operand3.size);
788 return 0;
791 /* -----------------------------------------------------------------------------
792 * clear_insn() - clear instruction structure
793 * -----------------------------------------------------------------------------
795 static void
796 clear_insn(register struct ud* u)
798 u->error = 0;
799 u->pfx_seg = 0;
800 u->pfx_opr = 0;
801 u->pfx_adr = 0;
802 u->pfx_lock = 0;
803 u->pfx_repne = 0;
804 u->pfx_rep = 0;
805 u->pfx_repe = 0;
806 u->pfx_rex = 0;
807 u->pfx_str = 0;
808 u->mnemonic = UD_Inone;
809 u->itab_entry = NULL;
810 u->have_modrm = 0;
811 u->br_far = 0;
813 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
814 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
815 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
819 static inline int
820 resolve_pfx_str(struct ud* u)
822 if (u->pfx_str == 0xf3) {
823 if (P_STR(u->itab_entry->prefix)) {
824 u->pfx_rep = 0xf3;
825 } else {
826 u->pfx_repe = 0xf3;
828 } else if (u->pfx_str == 0xf2) {
829 u->pfx_repne = 0xf3;
831 return 0;
835 static int
836 resolve_mode( struct ud* u )
838 /* if in error state, bail out */
839 if ( u->error ) return -1;
841 /* propagate prefix effects */
842 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
844 /* Check validity of instruction m64 */
845 if ( P_INV64( u->itab_entry->prefix ) ) {
846 UDERR(u, "instruction invalid in 64bits");
847 return -1;
850 /* effective rex prefix is the effective mask for the
851 * instruction hard-coded in the opcode map.
853 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
854 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
856 /* whether this instruction has a default operand size of
857 * 64bit, also hardcoded into the opcode map.
859 u->default64 = P_DEF64( u->itab_entry->prefix );
860 /* calculate effective operand size */
861 if ( REX_W( u->pfx_rex ) ) {
862 u->opr_mode = 64;
863 } else if ( u->pfx_opr ) {
864 u->opr_mode = 16;
865 } else {
866 /* unless the default opr size of instruction is 64,
867 * the effective operand size in the absence of rex.w
868 * prefix is 32.
870 u->opr_mode = ( u->default64 ) ? 64 : 32;
873 /* calculate effective address size */
874 u->adr_mode = (u->pfx_adr) ? 32 : 64;
875 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
876 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
877 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
878 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
879 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
880 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
883 /* set flags for implicit addressing */
884 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
886 return 0;
890 static inline int
891 decode_insn(struct ud *u, uint16_t ptr)
893 UD_ASSERT((ptr & 0x8000) == 0);
894 u->itab_entry = &ud_itab[ ptr ];
895 u->mnemonic = u->itab_entry->mnemonic;
896 return (resolve_pfx_str(u) == 0 &&
897 resolve_mode(u) == 0 &&
898 decode_operands(u) == 0 &&
899 resolve_mnemonic(u) == 0) ? 0 : -1;
904 * decode_3dnow()
906 * Decoding 3dnow is a little tricky because of its strange opcode
907 * structure. The final opcode disambiguation depends on the last
908 * byte that comes after the operands have been decoded. Fortunately,
909 * all 3dnow instructions have the same set of operand types. So we
910 * go ahead and decode the instruction by picking an arbitrarily chosen
911 * valid entry in the table, decode the operands, and read the final
912 * byte to resolve the menmonic.
914 static inline int
915 decode_3dnow(struct ud* u)
917 uint16_t ptr;
918 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
919 UD_ASSERT(u->le->table[0xc] != 0);
920 decode_insn(u, u->le->table[0xc]);
921 ud_inp_next(u);
922 if (u->error) {
923 return -1;
925 ptr = u->le->table[inp_curr(u)];
926 UD_ASSERT((ptr & 0x8000) == 0);
927 u->mnemonic = ud_itab[ptr].mnemonic;
928 return 0;
932 static int
933 decode_ssepfx(struct ud *u)
935 uint8_t idx;
936 uint8_t pfx;
939 * String prefixes (f2, f3) take precedence over operand
940 * size prefix (66).
942 pfx = u->pfx_str;
943 if (pfx == 0) {
944 pfx = u->pfx_opr;
946 idx = ((pfx & 0xf) + 1) / 2;
947 if (u->le->table[idx] == 0) {
948 idx = 0;
950 if (idx && u->le->table[idx] != 0) {
952 * "Consume" the prefix as a part of the opcode, so it is no
953 * longer exported as an instruction prefix.
955 u->pfx_str = 0;
956 if (pfx == 0x66) {
958 * consume "66" only if it was used for decoding, leaving
959 * it to be used as an operands size override for some
960 * simd instructions.
962 u->pfx_opr = 0;
965 return decode_ext(u, u->le->table[idx]);
970 * decode_ext()
972 * Decode opcode extensions (if any)
974 static int
975 decode_ext(struct ud *u, uint16_t ptr)
977 uint8_t idx = 0;
978 if ((ptr & 0x8000) == 0) {
979 return decode_insn(u, ptr);
981 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
982 if (u->le->type == UD_TAB__OPC_3DNOW) {
983 return decode_3dnow(u);
986 switch (u->le->type) {
987 case UD_TAB__OPC_MOD:
988 /* !11 = 0, 11 = 1 */
989 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
990 break;
991 /* disassembly mode/operand size/address size based tables.
992 * 16 = 0,, 32 = 1, 64 = 2
994 case UD_TAB__OPC_MODE:
995 idx = u->dis_mode != 64 ? 0 : 1;
996 break;
997 case UD_TAB__OPC_OSIZE:
998 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
999 break;
1000 case UD_TAB__OPC_ASIZE:
1001 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1002 break;
1003 case UD_TAB__OPC_X87:
1004 idx = modrm(u) - 0xC0;
1005 break;
1006 case UD_TAB__OPC_VENDOR:
1007 if (u->vendor == UD_VENDOR_ANY) {
1008 /* choose a valid entry */
1009 idx = (u->le->table[idx] != 0) ? 0 : 1;
1010 } else if (u->vendor == UD_VENDOR_AMD) {
1011 idx = 0;
1012 } else {
1013 idx = 1;
1015 break;
1016 case UD_TAB__OPC_RM:
1017 idx = MODRM_RM(modrm(u));
1018 break;
1019 case UD_TAB__OPC_REG:
1020 idx = MODRM_REG(modrm(u));
1021 break;
1022 case UD_TAB__OPC_SSE:
1023 return decode_ssepfx(u);
1024 default:
1025 UD_ASSERT(!"not reached");
1026 break;
1029 return decode_ext(u, u->le->table[idx]);
1033 static int
1034 decode_opcode(struct ud *u)
1036 uint16_t ptr;
1037 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1038 ud_inp_next(u);
1039 if (u->error) {
1040 return -1;
1042 u->primary_opcode = inp_curr(u);
1043 ptr = u->le->table[inp_curr(u)];
1044 if (ptr & 0x8000) {
1045 u->le = &ud_lookup_table_list[ptr & ~0x8000];
1046 if (u->le->type == UD_TAB__OPC_TABLE) {
1047 return decode_opcode(u);
1050 return decode_ext(u, ptr);
1054 /* =============================================================================
1055 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1056 * =============================================================================
1058 unsigned int
1059 ud_decode(struct ud *u)
1061 inp_start(u);
1062 clear_insn(u);
1063 u->le = &ud_lookup_table_list[0];
1064 u->error = decode_prefixes(u) == -1 ||
1065 decode_opcode(u) == -1 ||
1066 u->error;
1067 /* Handle decode error. */
1068 if (u->error) {
1069 /* clear out the decode data. */
1070 clear_insn(u);
1071 /* mark the sequence of bytes as invalid. */
1072 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1073 u->mnemonic = u->itab_entry->mnemonic;
1076 /* maybe this stray segment override byte
1077 * should be spewed out?
1079 if ( !P_SEG( u->itab_entry->prefix ) &&
1080 u->operand[0].type != UD_OP_MEM &&
1081 u->operand[1].type != UD_OP_MEM )
1082 u->pfx_seg = 0;
1084 u->insn_offset = u->pc; /* set offset of instruction */
1085 u->asm_buf_fill = 0; /* set translation buffer index to 0 */
1086 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1088 /* return number of bytes disassembled. */
1089 return u->inp_ctr;
1093 vim: set ts=2 sw=2 expandtab