1 /* disasm.c where all the _work_ gets done in the Netwide Disassembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
21 extern struct itemplate
**itable
[];
24 * Flags that go into the `segment' field of `insn' structures
27 #define SEG_RELATIVE 1
34 #define SEG_SIGNED 128
36 static int whichreg(long regflags
, int regval
)
40 if (!(REG_AL
& ~regflags
))
42 if (!(REG_AX
& ~regflags
))
44 if (!(REG_EAX
& ~regflags
))
46 if (!(REG_DL
& ~regflags
))
48 if (!(REG_DX
& ~regflags
))
50 if (!(REG_EDX
& ~regflags
))
52 if (!(REG_CL
& ~regflags
))
54 if (!(REG_CX
& ~regflags
))
56 if (!(REG_ECX
& ~regflags
))
58 if (!(FPU0
& ~regflags
))
60 if (!(REG_CS
& ~regflags
))
61 return (regval
== 1) ? R_CS
: 0;
62 if (!(REG_DESS
& ~regflags
))
63 return (regval
== 0 || regval
== 2
64 || regval
== 3 ? sreg
[regval
] : 0);
65 if (!(REG_FSGS
& ~regflags
))
66 return (regval
== 4 || regval
== 5 ? sreg
[regval
] : 0);
67 if (!(REG_SEG67
& ~regflags
))
68 return (regval
== 6 || regval
== 7 ? sreg
[regval
] : 0);
70 /* All the entries below look up regval in an 8-entry array */
71 if (regval
< 0 || regval
> 7)
74 if (!((REGMEM
| BITS8
) & ~regflags
))
76 if (!((REGMEM
| BITS16
) & ~regflags
))
78 if (!((REGMEM
| BITS32
) & ~regflags
))
80 if (!(REG_SREG
& ~regflags
))
82 if (!(REG_CREG
& ~regflags
))
84 if (!(REG_DREG
& ~regflags
))
86 if (!(REG_TREG
& ~regflags
))
88 if (!(FPUREG
& ~regflags
))
89 return fpureg
[regval
];
90 if (!(MMXREG
& ~regflags
))
91 return mmxreg
[regval
];
92 if (!(XMMREG
& ~regflags
))
93 return xmmreg
[regval
];
98 static const char *whichcond(int condval
)
100 static int conds
[] = {
101 C_O
, C_NO
, C_C
, C_NC
, C_Z
, C_NZ
, C_NA
, C_A
,
102 C_S
, C_NS
, C_PE
, C_PO
, C_L
, C_NL
, C_NG
, C_G
104 return conditions
[conds
[condval
]];
108 * Process an effective address (ModRM) specification.
110 static unsigned char *do_ea(unsigned char *data
, int modrm
, int asize
,
111 int segsize
, operand
* op
)
113 int mod
, rm
, scale
, index
, base
;
115 mod
= (modrm
>> 6) & 03;
118 if (mod
== 3) { /* pure register version */
120 op
->segment
|= SEG_RMREG
;
128 * <mod> specifies the displacement size (none, byte or
129 * word), and <rm> specifies the register combination.
130 * Exception: mod=0,rm=6 does not specify [BP] as one might
131 * expect, but instead specifies [disp16].
133 op
->indexreg
= op
->basereg
= -1;
134 op
->scale
= 1; /* always, in 16 bits */
165 if (rm
== 6 && mod
== 0) { /* special case */
169 mod
= 2; /* fake disp16 */
173 op
->segment
|= SEG_NODISP
;
176 op
->segment
|= SEG_DISP8
;
177 op
->offset
= (signed char)*data
++;
180 op
->segment
|= SEG_DISP16
;
181 op
->offset
= *data
++;
182 op
->offset
|= ((unsigned)*data
++) << 8;
188 * Once again, <mod> specifies displacement size (this time
189 * none, byte or *dword*), while <rm> specifies the base
190 * register. Again, [EBP] is missing, replaced by a pure
191 * disp32 (this time that's mod=0,rm=*5*). However, rm=4
192 * indicates not a single base register, but instead the
193 * presence of a SIB byte...
219 if (rm
== 5 && mod
== 0) {
223 mod
= 2; /* fake disp32 */
225 if (rm
== 4) { /* process SIB */
226 scale
= (*data
>> 6) & 03;
227 index
= (*data
>> 3) & 07;
231 op
->scale
= 1 << scale
;
234 op
->indexreg
= R_EAX
;
237 op
->indexreg
= R_ECX
;
240 op
->indexreg
= R_EDX
;
243 op
->indexreg
= R_EBX
;
249 op
->indexreg
= R_EBP
;
252 op
->indexreg
= R_ESI
;
255 op
->indexreg
= R_EDI
;
292 op
->segment
|= SEG_NODISP
;
295 op
->segment
|= SEG_DISP8
;
296 op
->offset
= (signed char)*data
++;
299 op
->segment
|= SEG_DISP32
;
300 op
->offset
= *data
++;
301 op
->offset
|= ((unsigned)*data
++) << 8;
302 op
->offset
|= ((long)*data
++) << 16;
303 op
->offset
|= ((long)*data
++) << 24;
311 * Determine whether the instruction template in t corresponds to the data
312 * stream in data. Return the number of bytes matched if so.
314 static int matches(struct itemplate
*t
, unsigned char *data
, int asize
,
315 int osize
, int segsize
, int rep
, insn
* ins
)
317 unsigned char *r
= (unsigned char *)(t
->code
);
318 unsigned char *origdata
= data
;
319 int a_used
= FALSE
, o_used
= FALSE
;
324 else if (rep
== 0xF3)
329 if (c
>= 01 && c
<= 03) {
337 ins
->oprs
[0].basereg
= 0;
340 ins
->oprs
[0].basereg
= 2;
343 ins
->oprs
[0].basereg
= 3;
352 ins
->oprs
[0].basereg
= 4;
355 ins
->oprs
[0].basereg
= 5;
364 ins
->oprs
[0].basereg
= 0;
367 ins
->oprs
[0].basereg
= 1;
370 ins
->oprs
[0].basereg
= 2;
373 ins
->oprs
[0].basereg
= 3;
382 ins
->oprs
[0].basereg
= 4;
385 ins
->oprs
[0].basereg
= 5;
391 if (c
>= 010 && c
<= 012) {
392 int t
= *r
++, d
= *data
++;
393 if (d
< t
|| d
> t
+ 7)
396 ins
->oprs
[c
- 010].basereg
= d
- t
;
397 ins
->oprs
[c
- 010].segment
|= SEG_RMREG
;
403 if (c
>= 014 && c
<= 016) {
404 ins
->oprs
[c
- 014].offset
= (signed char)*data
++;
405 ins
->oprs
[c
- 014].segment
|= SEG_SIGNED
;
407 if (c
>= 020 && c
<= 022)
408 ins
->oprs
[c
- 020].offset
= *data
++;
409 if (c
>= 024 && c
<= 026)
410 ins
->oprs
[c
- 024].offset
= *data
++;
411 if (c
>= 030 && c
<= 032) {
412 ins
->oprs
[c
- 030].offset
= *data
++;
413 ins
->oprs
[c
- 030].offset
|= (((unsigned)*data
++) << 8);
415 if (c
>= 034 && c
<= 036) {
416 ins
->oprs
[c
- 034].offset
= *data
++;
417 ins
->oprs
[c
- 034].offset
|= (((unsigned)*data
++) << 8);
419 ins
->oprs
[c
- 034].offset
|= (((long)*data
++) << 16);
420 ins
->oprs
[c
- 034].offset
|= (((long)*data
++) << 24);
422 if (segsize
!= asize
)
423 ins
->oprs
[c
- 034].addr_size
= asize
;
425 if (c
>= 040 && c
<= 042) {
426 ins
->oprs
[c
- 040].offset
= *data
++;
427 ins
->oprs
[c
- 040].offset
|= (((unsigned)*data
++) << 8);
428 ins
->oprs
[c
- 040].offset
|= (((long)*data
++) << 16);
429 ins
->oprs
[c
- 040].offset
|= (((long)*data
++) << 24);
431 if (c
>= 044 && c
<= 046) {
432 ins
->oprs
[c
- 044].offset
= *data
++;
433 ins
->oprs
[c
- 044].offset
|= (((unsigned)*data
++) << 8);
435 ins
->oprs
[c
- 044].offset
|= (((long)*data
++) << 16);
436 ins
->oprs
[c
- 044].offset
|= (((long)*data
++) << 24);
438 if (segsize
!= asize
)
439 ins
->oprs
[c
- 044].addr_size
= asize
;
441 if (c
>= 050 && c
<= 052) {
442 ins
->oprs
[c
- 050].offset
= (signed char)*data
++;
443 ins
->oprs
[c
- 050].segment
|= SEG_RELATIVE
;
445 if (c
>= 060 && c
<= 062) {
446 ins
->oprs
[c
- 060].offset
= *data
++;
447 ins
->oprs
[c
- 060].offset
|= (((unsigned)*data
++) << 8);
448 ins
->oprs
[c
- 060].segment
|= SEG_RELATIVE
;
449 ins
->oprs
[c
- 060].segment
&= ~SEG_32BIT
;
451 if (c
>= 064 && c
<= 066) {
452 ins
->oprs
[c
- 064].offset
= *data
++;
453 ins
->oprs
[c
- 064].offset
|= (((unsigned)*data
++) << 8);
455 ins
->oprs
[c
- 064].offset
|= (((long)*data
++) << 16);
456 ins
->oprs
[c
- 064].offset
|= (((long)*data
++) << 24);
457 ins
->oprs
[c
- 064].segment
|= SEG_32BIT
;
459 ins
->oprs
[c
- 064].segment
&= ~SEG_32BIT
;
460 ins
->oprs
[c
- 064].segment
|= SEG_RELATIVE
;
461 if (segsize
!= osize
) {
462 ins
->oprs
[c
- 064].type
=
463 (ins
->oprs
[c
- 064].type
& NON_SIZE
)
464 | ((osize
== 16) ? BITS16
: BITS32
);
467 if (c
>= 070 && c
<= 072) {
468 ins
->oprs
[c
- 070].offset
= *data
++;
469 ins
->oprs
[c
- 070].offset
|= (((unsigned)*data
++) << 8);
470 ins
->oprs
[c
- 070].offset
|= (((long)*data
++) << 16);
471 ins
->oprs
[c
- 070].offset
|= (((long)*data
++) << 24);
472 ins
->oprs
[c
- 070].segment
|= SEG_32BIT
| SEG_RELATIVE
;
474 if (c
>= 0100 && c
< 0130) {
476 ins
->oprs
[c
& 07].basereg
= (modrm
>> 3) & 07;
477 ins
->oprs
[c
& 07].segment
|= SEG_RMREG
;
478 data
= do_ea(data
, modrm
, asize
, segsize
,
479 &ins
->oprs
[(c
>> 3) & 07]);
481 if (c
>= 0130 && c
<= 0132) {
482 ins
->oprs
[c
- 0130].offset
= *data
++;
483 ins
->oprs
[c
- 0130].offset
|= (((unsigned)*data
++) << 8);
485 if (c
>= 0140 && c
<= 0142) {
486 ins
->oprs
[c
- 0140].offset
= *data
++;
487 ins
->oprs
[c
- 0140].offset
|= (((unsigned)*data
++) << 8);
488 ins
->oprs
[c
- 0140].offset
|= (((long)*data
++) << 16);
489 ins
->oprs
[c
- 0140].offset
|= (((long)*data
++) << 24);
491 if (c
>= 0200 && c
<= 0277) {
493 if (((modrm
>> 3) & 07) != (c
& 07))
494 return FALSE
; /* spare field doesn't match up */
495 data
= do_ea(data
, modrm
, asize
, segsize
,
496 &ins
->oprs
[(c
>> 3) & 07]);
498 if (c
>= 0300 && c
<= 0302) {
500 ins
->oprs
[c
- 0300].segment
|= SEG_32BIT
;
502 ins
->oprs
[c
- 0300].segment
&= ~SEG_32BIT
;
518 if (asize
!= segsize
)
536 if (osize
!= segsize
)
542 int t
= *r
++, d
= *data
++;
543 if (d
< t
|| d
> t
+ 15)
546 ins
->condition
= d
- t
;
564 * Check for unused rep or a/o prefixes.
568 ins
->prefixes
[ins
->nprefix
++] = drep
;
569 if (!a_used
&& asize
!= segsize
)
570 ins
->prefixes
[ins
->nprefix
++] = (asize
== 16 ? P_A16
: P_A32
);
571 if (!o_used
&& osize
!= segsize
)
572 ins
->prefixes
[ins
->nprefix
++] = (osize
== 16 ? P_O16
: P_O32
);
574 return data
- origdata
;
577 long disasm(unsigned char *data
, char *output
, int outbufsize
, int segsize
,
578 long offset
, int autosync
, unsigned long prefer
)
580 struct itemplate
**p
, **best_p
;
581 int length
, best_length
= 0;
583 int rep
, lock
, asize
, osize
, i
, slen
, colon
;
584 unsigned char *origdata
;
587 unsigned long goodness
, best
;
592 asize
= osize
= segsize
;
597 if (*data
== 0xF3 || *data
== 0xF2)
599 else if (*data
== 0xF0)
601 else if (*data
== 0x2E || *data
== 0x36 || *data
== 0x3E ||
602 *data
== 0x26 || *data
== 0x64 || *data
== 0x65) {
623 } else if (*data
== 0x66)
624 osize
= 48 - segsize
, data
++;
625 else if (*data
== 0x67)
626 asize
= 48 - segsize
, data
++;
631 tmp_ins
.oprs
[0].segment
= tmp_ins
.oprs
[1].segment
=
632 tmp_ins
.oprs
[2].segment
=
633 tmp_ins
.oprs
[0].addr_size
= tmp_ins
.oprs
[1].addr_size
=
634 tmp_ins
.oprs
[2].addr_size
= (segsize
== 16 ? 0 : SEG_32BIT
);
635 tmp_ins
.condition
= -1;
636 best
= ~0UL; /* Worst possible */
638 for (p
= itable
[*data
]; *p
; p
++) {
639 if ((length
= matches(*p
, data
, asize
, osize
,
640 segsize
, rep
, &tmp_ins
))) {
643 * Final check to make sure the types of r/m match up.
645 for (i
= 0; i
< (*p
)->operands
; i
++) {
647 /* If it's a mem-only EA but we have a register, die. */
648 ((tmp_ins
.oprs
[i
].segment
& SEG_RMREG
) &&
649 !(MEMORY
& ~(*p
)->opd
[i
])) ||
650 /* If it's a reg-only EA but we have a memory ref, die. */
651 (!(tmp_ins
.oprs
[i
].segment
& SEG_RMREG
) &&
652 !(REGNORM
& ~(*p
)->opd
[i
]) &&
653 !((*p
)->opd
[i
] & REG_SMASK
)) ||
654 /* Register type mismatch (eg FS vs REG_DESS): die. */
655 ((((*p
)->opd
[i
] & (REGISTER
| FPUREG
)) ||
656 (tmp_ins
.oprs
[i
].segment
& SEG_RMREG
)) &&
657 !whichreg((*p
)->opd
[i
],
658 tmp_ins
.oprs
[i
].basereg
))) {
665 goodness
= ((*p
)->flags
& IF_PFMASK
) ^ prefer
;
666 if (goodness
< best
) {
667 /* This is the best one found so far */
670 best_length
= length
;
678 return 0; /* no instruction was matched */
680 /* Pick the best match */
682 length
= best_length
;
686 /* TODO: snprintf returns the value that the string would have if
687 * the buffer were long enough, and not the actual length of
688 * the returned string, so each instance of using the return
689 * value of snprintf should actually be checked to assure that
690 * the return value is "sane." Maybe a macro wrapper could
691 * be used for that purpose.
694 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "lock ");
695 for (i
= 0; i
< ins
.nprefix
; i
++)
696 switch (ins
.prefixes
[i
]) {
698 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "rep ");
701 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "repe ");
704 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "repne ");
707 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "a16 ");
710 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "a32 ");
713 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "o16 ");
716 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "o32 ");
720 for (i
= 0; i
< elements(ico
); i
++)
721 if ((*p
)->opcode
== ico
[i
]) {
723 snprintf(output
+ slen
, outbufsize
- slen
, "%s%s", icn
[i
],
724 whichcond(ins
.condition
));
727 if (i
>= elements(ico
))
729 snprintf(output
+ slen
, outbufsize
- slen
, "%s",
730 insn_names
[(*p
)->opcode
]);
732 length
+= data
- origdata
; /* fix up for prefixes */
733 for (i
= 0; i
< (*p
)->operands
; i
++) {
734 output
[slen
++] = (colon
? ':' : i
== 0 ? ' ' : ',');
736 if (ins
.oprs
[i
].segment
& SEG_RELATIVE
) {
737 ins
.oprs
[i
].offset
+= offset
+ length
;
739 * sort out wraparound
741 if (!(ins
.oprs
[i
].segment
& SEG_32BIT
))
742 ins
.oprs
[i
].offset
&= 0xFFFF;
744 * add sync marker, if autosync is on
747 add_sync(ins
.oprs
[i
].offset
, 0L);
750 if ((*p
)->opd
[i
] & COLON
)
755 if (((*p
)->opd
[i
] & (REGISTER
| FPUREG
)) ||
756 (ins
.oprs
[i
].segment
& SEG_RMREG
)) {
757 ins
.oprs
[i
].basereg
= whichreg((*p
)->opd
[i
],
758 ins
.oprs
[i
].basereg
);
759 if ((*p
)->opd
[i
] & TO
)
760 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "to ");
761 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
762 reg_names
[ins
.oprs
[i
].basereg
-
764 } else if (!(UNITY
& ~(*p
)->opd
[i
])) {
765 output
[slen
++] = '1';
766 } else if ((*p
)->opd
[i
] & IMMEDIATE
) {
767 if ((*p
)->opd
[i
] & BITS8
) {
769 snprintf(output
+ slen
, outbufsize
- slen
, "byte ");
770 if (ins
.oprs
[i
].segment
& SEG_SIGNED
) {
771 if (ins
.oprs
[i
].offset
< 0) {
772 ins
.oprs
[i
].offset
*= -1;
773 output
[slen
++] = '-';
775 output
[slen
++] = '+';
777 } else if ((*p
)->opd
[i
] & BITS16
) {
779 snprintf(output
+ slen
, outbufsize
- slen
, "word ");
780 } else if ((*p
)->opd
[i
] & BITS32
) {
782 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
783 } else if ((*p
)->opd
[i
] & NEAR
) {
785 snprintf(output
+ slen
, outbufsize
- slen
, "near ");
786 } else if ((*p
)->opd
[i
] & SHORT
) {
788 snprintf(output
+ slen
, outbufsize
- slen
, "short ");
791 snprintf(output
+ slen
, outbufsize
- slen
, "0x%lx",
793 } else if (!(MEM_OFFS
& ~(*p
)->opd
[i
])) {
795 snprintf(output
+ slen
, outbufsize
- slen
, "[%s%s%s0x%lx]",
796 (segover
? segover
: ""), (segover
? ":" : ""),
797 (ins
.oprs
[i
].addr_size
==
798 32 ? "dword " : ins
.oprs
[i
].addr_size
==
799 16 ? "word " : ""), ins
.oprs
[i
].offset
);
801 } else if (!(REGMEM
& ~(*p
)->opd
[i
])) {
803 if ((*p
)->opd
[i
] & BITS8
)
805 snprintf(output
+ slen
, outbufsize
- slen
, "byte ");
806 if ((*p
)->opd
[i
] & BITS16
)
808 snprintf(output
+ slen
, outbufsize
- slen
, "word ");
809 if ((*p
)->opd
[i
] & BITS32
)
811 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
812 if ((*p
)->opd
[i
] & BITS64
)
814 snprintf(output
+ slen
, outbufsize
- slen
, "qword ");
815 if ((*p
)->opd
[i
] & BITS80
)
817 snprintf(output
+ slen
, outbufsize
- slen
, "tword ");
818 if ((*p
)->opd
[i
] & FAR
)
819 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "far ");
820 if ((*p
)->opd
[i
] & NEAR
)
822 snprintf(output
+ slen
, outbufsize
- slen
, "near ");
823 output
[slen
++] = '[';
824 if (ins
.oprs
[i
].addr_size
)
825 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
826 (ins
.oprs
[i
].addr_size
== 32 ? "dword " :
827 ins
.oprs
[i
].addr_size
==
831 snprintf(output
+ slen
, outbufsize
- slen
, "%s:",
835 if (ins
.oprs
[i
].basereg
!= -1) {
836 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
837 reg_names
[(ins
.oprs
[i
].basereg
-
841 if (ins
.oprs
[i
].indexreg
!= -1) {
843 output
[slen
++] = '+';
844 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
845 reg_names
[(ins
.oprs
[i
].indexreg
-
847 if (ins
.oprs
[i
].scale
> 1)
849 snprintf(output
+ slen
, outbufsize
- slen
, "*%d",
853 if (ins
.oprs
[i
].segment
& SEG_DISP8
) {
855 if (ins
.oprs
[i
].offset
& 0x80) {
856 ins
.oprs
[i
].offset
= -(signed char)ins
.oprs
[i
].offset
;
860 snprintf(output
+ slen
, outbufsize
- slen
, "%c0x%lx",
861 sign
, ins
.oprs
[i
].offset
);
862 } else if (ins
.oprs
[i
].segment
& SEG_DISP16
) {
864 output
[slen
++] = '+';
866 snprintf(output
+ slen
, outbufsize
- slen
, "0x%lx",
868 } else if (ins
.oprs
[i
].segment
& SEG_DISP32
) {
870 output
[slen
++] = '+';
872 snprintf(output
+ slen
, outbufsize
- slen
, "0x%lx",
875 output
[slen
++] = ']';
878 snprintf(output
+ slen
, outbufsize
- slen
, "<operand%d>",
883 if (segover
) { /* unused segment override */
885 int count
= slen
+ 1;
887 p
[count
+ 3] = p
[count
];
888 strncpy(output
, segover
, 2);
894 long eatbyte(unsigned char *data
, char *output
, int outbufsize
)
896 snprintf(output
, outbufsize
, "db 0x%02X", *data
);