1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \331 - instruction not valid with REP prefix. Hint for
49 * disassembler only; for SSE instructions.
50 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
51 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
52 * as a literal byte in order to aid the disassembler.
53 * \340 - reserve <operand 0> bytes of uninitialised storage.
54 * Operand 0 had better be a segmentless constant.
65 extern struct itemplate
*nasm_instructions
[];
68 int sib_present
; /* is a SIB byte necessary? */
69 int bytes
; /* # of bytes of offset needed */
70 int size
; /* lazy - this is sib+bytes+1 */
71 unsigned char modrm
, sib
; /* the bytes themselves */
75 static struct ofmt
*outfmt
;
78 static long calcsize (long, long, int, insn
*, char *);
79 static void gencode (long, long, int, insn
*, char *, long);
80 static int regval (operand
*o
);
81 static int matches (struct itemplate
*, insn
*);
82 static ea
* process_ea (operand
*, ea
*, int, int, int);
83 static int chsize (operand
*, int);
86 * This routine wrappers the real output format's output routine,
87 * in order to pass a copy of the data off to the listing file
88 * generator at the same time.
90 static void out (long offset
, long segto
, void *data
, unsigned long type
,
91 long segment
, long wrt
)
96 if ((type
& OUT_TYPMASK
) == OUT_ADDRESS
) {
97 if (segment
!= NO_SEG
|| wrt
!= NO_SEG
) {
99 * This address is relocated. We must write it as
100 * OUT_ADDRESS, so there's no work to be done here.
102 list
->output (offset
, data
, type
);
105 unsigned char p
[4], *q
= p
;
107 * This is a non-relocated address, and we're going to
108 * convert it into RAWDATA format.
110 if ((type
& OUT_SIZMASK
) == 4) {
111 WRITELONG (q
, * (long *) data
);
112 list
->output (offset
, p
, OUT_RAWDATA
+4);
115 WRITESHORT (q
, * (long *) data
);
116 list
->output (offset
, p
, OUT_RAWDATA
+2);
120 else if ((type
& OUT_TYPMASK
) == OUT_RAWDATA
) {
121 list
->output (offset
, data
, type
);
123 else if ((type
& OUT_TYPMASK
) == OUT_RESERVE
) {
124 list
->output (offset
, NULL
, type
);
126 else if ((type
& OUT_TYPMASK
) == OUT_REL2ADR
||
127 (type
& OUT_TYPMASK
) == OUT_REL4ADR
) {
128 list
->output (offset
, data
, type
);
131 if (src_get(&lineno
,&lnfname
))
132 outfmt
->current_dfmt
->linenum(lnfname
,lineno
,segto
);
134 outfmt
->output (segto
, data
, type
, segment
, wrt
);
137 long assemble (long segment
, long offset
, int bits
,
138 insn
*instruction
, struct ofmt
*output
, efunc error
,
141 struct itemplate
*temp
;
147 long wsize
= 0; /* size for DB etc. */
149 errfunc
= error
; /* to pass to other functions */
150 outfmt
= output
; /* likewise */
151 list
= listgen
; /* and again */
153 switch (instruction
->opcode
)
156 case I_DB
: wsize
= 1; break;
157 case I_DW
: wsize
= 2; break;
158 case I_DD
: wsize
= 4; break;
159 case I_DQ
: wsize
= 8; break;
160 case I_DT
: wsize
= 10; break;
165 long t
= instruction
->times
;
167 errfunc(ERR_PANIC
, "instruction->times < 0 (%ld) in assemble()",t
);
169 while (t
--) /* repeat TIMES times */
171 for (e
= instruction
->eops
; e
; e
= e
->next
)
173 if (e
->type
== EOT_DB_NUMBER
)
176 if (e
->segment
!= NO_SEG
)
177 errfunc (ERR_NONFATAL
,
178 "one-byte relocation attempted");
180 out (offset
, segment
, &e
->offset
, OUT_RAWDATA
+1,
184 else if (wsize
> 5) {
185 errfunc (ERR_NONFATAL
, "integer supplied to a D%c"
186 " instruction", wsize
==8 ? 'Q' : 'T');
189 out (offset
, segment
, &e
->offset
,
190 OUT_ADDRESS
+wsize
, e
->segment
,
194 else if (e
->type
== EOT_DB_STRING
)
198 out (offset
, segment
, e
->stringval
,
199 OUT_RAWDATA
+e
->stringlen
, NO_SEG
, NO_SEG
);
200 align
= e
->stringlen
% wsize
;
203 align
= wsize
- align
;
204 out (offset
, segment
, "\0\0\0\0\0\0\0\0",
205 OUT_RAWDATA
+align
, NO_SEG
, NO_SEG
);
207 offset
+= e
->stringlen
+ align
;
210 if (t
> 0 && t
== instruction
->times
-1)
213 * Dummy call to list->output to give the offset to the
216 list
->output (offset
, NULL
, OUT_RAWDATA
);
217 list
->uplevel (LIST_TIMES
);
220 if (instruction
->times
> 1)
221 list
->downlevel (LIST_TIMES
);
222 return offset
- start
;
225 if (instruction
->opcode
== I_INCBIN
)
227 static char fname
[FILENAME_MAX
];
231 len
= FILENAME_MAX
-1;
232 if (len
> instruction
->eops
->stringlen
)
233 len
= instruction
->eops
->stringlen
;
234 strncpy (fname
, instruction
->eops
->stringval
, len
);
237 if ( (fp
= fopen(fname
, "rb")) == NULL
)
238 error (ERR_NONFATAL
, "`incbin': unable to open file `%s'", fname
);
239 else if (fseek(fp
, 0L, SEEK_END
) < 0)
240 error (ERR_NONFATAL
, "`incbin': unable to seek on file `%s'",
244 static char buf
[2048];
245 long t
= instruction
->times
;
249 if (instruction
->eops
->next
) {
250 base
= instruction
->eops
->next
->offset
;
252 if (instruction
->eops
->next
->next
&&
253 len
> instruction
->eops
->next
->next
->offset
)
254 len
= instruction
->eops
->next
->next
->offset
;
257 * Dummy call to list->output to give the offset to the
260 list
->output (offset
, NULL
, OUT_RAWDATA
);
261 list
->uplevel(LIST_INCBIN
);
266 fseek (fp
, base
, SEEK_SET
);
269 long m
= fread (buf
, 1, (l
>sizeof(buf
)?sizeof(buf
):l
),
273 * This shouldn't happen unless the file
274 * actually changes while we are reading
277 error (ERR_NONFATAL
, "`incbin': unexpected EOF while"
278 " reading file `%s'", fname
);
279 t
=0; /* Try to exit cleanly */
282 out (offset
, segment
, buf
, OUT_RAWDATA
+m
,
287 list
->downlevel(LIST_INCBIN
);
288 if (instruction
->times
> 1) {
290 * Dummy call to list->output to give the offset to the
293 list
->output (offset
, NULL
, OUT_RAWDATA
);
294 list
->uplevel(LIST_TIMES
);
295 list
->downlevel(LIST_TIMES
);
298 return instruction
->times
* len
;
300 return 0; /* if we're here, there's an error */
304 temp
= nasm_instructions
[instruction
->opcode
];
305 while (temp
->opcode
!= -1) {
306 int m
= matches (temp
, instruction
);
308 if (m
== 100) /* matches! */
310 char *codes
= temp
->code
;
311 long insn_size
= calcsize(segment
, offset
, bits
,
313 itimes
= instruction
->times
;
314 if (insn_size
< 0) /* shouldn't be, on pass two */
315 error (ERR_PANIC
, "errors made it through from pass one");
316 else while (itimes
--) {
317 insn_end
= offset
+ insn_size
;
318 for (j
=0; j
<instruction
->nprefix
; j
++) {
320 switch (instruction
->prefixes
[j
]) {
323 case P_REPNE
: case P_REPNZ
:
325 case P_REPE
: case P_REPZ
: case P_REP
:
327 case R_CS
: c
= 0x2E; break;
328 case R_DS
: c
= 0x3E; break;
329 case R_ES
: c
= 0x26; break;
330 case R_FS
: c
= 0x64; break;
331 case R_GS
: c
= 0x65; break;
332 case R_SS
: c
= 0x36; break;
351 "invalid instruction prefix");
354 out (offset
, segment
, &c
, OUT_RAWDATA
+1,
359 gencode (segment
, offset
, bits
, instruction
, codes
, insn_end
);
361 if (itimes
> 0 && itimes
== instruction
->times
-1) {
363 * Dummy call to list->output to give the offset to the
366 list
->output (offset
, NULL
, OUT_RAWDATA
);
367 list
->uplevel (LIST_TIMES
);
370 if (instruction
->times
> 1)
371 list
->downlevel (LIST_TIMES
);
372 return offset
- start
;
379 if (temp
->opcode
== -1) { /* didn't match any instruction */
380 if (size_prob
== 1) /* would have matched, but for size */
381 error (ERR_NONFATAL
, "operation size not specified");
382 else if (size_prob
== 2)
383 error (ERR_NONFATAL
, "mismatch in operand sizes");
386 "invalid combination of opcode and operands");
391 long insn_size (long segment
, long offset
, int bits
,
392 insn
*instruction
, efunc error
)
394 struct itemplate
*temp
;
396 errfunc
= error
; /* to pass to other functions */
398 if (instruction
->opcode
== -1)
401 if (instruction
->opcode
== I_DB
||
402 instruction
->opcode
== I_DW
||
403 instruction
->opcode
== I_DD
||
404 instruction
->opcode
== I_DQ
||
405 instruction
->opcode
== I_DT
)
408 long isize
, osize
, wsize
= 0; /* placate gcc */
411 switch (instruction
->opcode
)
413 case I_DB
: wsize
= 1; break;
414 case I_DW
: wsize
= 2; break;
415 case I_DD
: wsize
= 4; break;
416 case I_DQ
: wsize
= 8; break;
417 case I_DT
: wsize
= 10; break;
420 for (e
= instruction
->eops
; e
; e
= e
->next
)
425 if (e
->type
== EOT_DB_NUMBER
)
427 else if (e
->type
== EOT_DB_STRING
)
428 osize
= e
->stringlen
;
430 align
= (-osize
) % wsize
;
433 isize
+= osize
+ align
;
435 return isize
* instruction
->times
;
438 if (instruction
->opcode
== I_INCBIN
)
440 char fname
[FILENAME_MAX
];
444 len
= FILENAME_MAX
-1;
445 if (len
> instruction
->eops
->stringlen
)
446 len
= instruction
->eops
->stringlen
;
447 strncpy (fname
, instruction
->eops
->stringval
, len
);
449 if ( (fp
= fopen(fname
, "rb")) == NULL
)
450 error (ERR_NONFATAL
, "`incbin': unable to open file `%s'", fname
);
451 else if (fseek(fp
, 0L, SEEK_END
) < 0)
452 error (ERR_NONFATAL
, "`incbin': unable to seek on file `%s'",
458 if (instruction
->eops
->next
)
460 len
-= instruction
->eops
->next
->offset
;
461 if (instruction
->eops
->next
->next
&&
462 len
> instruction
->eops
->next
->next
->offset
)
464 len
= instruction
->eops
->next
->next
->offset
;
467 return instruction
->times
* len
;
469 return 0; /* if we're here, there's an error */
472 temp
= nasm_instructions
[instruction
->opcode
];
473 while (temp
->opcode
!= -1) {
474 if (matches(temp
, instruction
) == 100) {
475 /* we've matched an instruction. */
477 char * codes
= temp
->code
;
480 isize
= calcsize(segment
, offset
, bits
, instruction
, codes
);
483 for (j
= 0; j
< instruction
->nprefix
; j
++)
485 if ((instruction
->prefixes
[j
] != P_A16
&&
486 instruction
->prefixes
[j
] != P_O16
&& bits
==16) ||
487 (instruction
->prefixes
[j
] != P_A32
&&
488 instruction
->prefixes
[j
] != P_O32
&& bits
==32))
493 return isize
* instruction
->times
;
497 return -1; /* didn't match any instruction */
500 static long calcsize (long segment
, long offset
, int bits
,
501 insn
*ins
, char *codes
)
506 (void) segment
; /* Don't warn that this parameter is unused */
507 (void) offset
; /* Don't warn that this parameter is unused */
509 while (*codes
) switch (c
= *codes
++) {
510 case 01: case 02: case 03:
511 codes
+= c
, length
+= c
; break;
512 case 04: case 05: case 06: case 07:
514 case 010: case 011: case 012:
515 codes
++, length
++; break;
518 case 014: case 015: case 016:
520 case 020: case 021: case 022:
522 case 024: case 025: case 026:
524 case 030: case 031: case 032:
526 case 034: case 035: case 036:
527 length
+= ((ins
->oprs
[c
-034].addr_size
?
528 ins
->oprs
[c
-034].addr_size
: bits
) == 16 ? 2 : 4); break;
531 case 040: case 041: case 042:
533 case 050: case 051: case 052:
535 case 060: case 061: case 062:
537 case 064: case 065: case 066:
538 length
+= ((ins
->oprs
[c
-064].addr_size
?
539 ins
->oprs
[c
-064].addr_size
: bits
) == 16 ? 2 : 4); break;
540 case 070: case 071: case 072:
542 case 0300: case 0301: case 0302:
543 length
+= chsize (&ins
->oprs
[c
-0300], bits
);
546 length
+= (bits
==32);
549 length
+= (bits
==16);
554 length
+= (bits
==32);
557 length
+= (bits
==16);
562 codes
++, length
++; break;
568 case 0340: case 0341: case 0342:
569 if (ins
->oprs
[0].segment
!= NO_SEG
)
570 errfunc (ERR_NONFATAL
, "attempt to reserve non-constant"
571 " quantity of BSS space");
573 length
+= ins
->oprs
[0].offset
<< (c
-0340);
575 default: /* can't do it by 'case' statements */
576 if (c
>=0100 && c
<=0277) { /* it's an EA */
578 if (!process_ea (&ins
->oprs
[(c
>>3)&7], &ea_data
, bits
, 0,
580 errfunc (ERR_NONFATAL
, "invalid effective address");
583 length
+= ea_data
.size
;
585 errfunc (ERR_PANIC
, "internal instruction table corrupt"
586 ": instruction code 0x%02X given", c
);
591 static void gencode (long segment
, long offset
, int bits
,
592 insn
*ins
, char *codes
, long insn_end
)
594 static char condval
[] = { /* conditional opcodes */
595 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
596 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
597 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
600 unsigned char bytes
[4];
604 switch (c
= *codes
++)
606 case 01: case 02: case 03:
607 out (offset
, segment
, codes
, OUT_RAWDATA
+c
, NO_SEG
, NO_SEG
);
613 switch (ins
->oprs
[0].basereg
)
616 bytes
[0] = 0x0E + (c
== 0x04 ? 1 : 0); break;
618 bytes
[0] = 0x1E + (c
== 0x04 ? 1 : 0); break;
620 bytes
[0] = 0x06 + (c
== 0x04 ? 1 : 0); break;
622 bytes
[0] = 0x16 + (c
== 0x04 ? 1 : 0); break;
624 errfunc (ERR_PANIC
, "bizarre 8086 segment register received");
626 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
631 switch (ins
->oprs
[0].basereg
) {
632 case R_FS
: bytes
[0] = 0xA0 + (c
== 0x05 ? 1 : 0); break;
633 case R_GS
: bytes
[0] = 0xA8 + (c
== 0x05 ? 1 : 0); break;
635 errfunc (ERR_PANIC
, "bizarre 386 segment register received");
637 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
641 case 010: case 011: case 012:
642 bytes
[0] = *codes
++ + regval(&ins
->oprs
[c
-010]);
643 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
649 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
653 case 014: case 015: case 016:
654 if (ins
->oprs
[c
-014].offset
< -128
655 || ins
->oprs
[c
-014].offset
> 127)
657 errfunc (ERR_WARNING
, "signed byte value exceeds bounds");
660 if (ins
->oprs
[c
-014].segment
!= NO_SEG
)
662 data
= ins
->oprs
[c
-014].offset
;
663 out (offset
, segment
, &data
, OUT_ADDRESS
+1,
664 ins
->oprs
[c
-014].segment
, ins
->oprs
[c
-014].wrt
);
667 bytes
[0] = ins
->oprs
[c
-014].offset
;
668 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
673 case 020: case 021: case 022:
674 if (ins
->oprs
[c
-020].offset
< -256
675 || ins
->oprs
[c
-020].offset
> 255)
677 errfunc (ERR_WARNING
, "byte value exceeds bounds");
679 if (ins
->oprs
[c
-020].segment
!= NO_SEG
) {
680 data
= ins
->oprs
[c
-020].offset
;
681 out (offset
, segment
, &data
, OUT_ADDRESS
+1,
682 ins
->oprs
[c
-020].segment
, ins
->oprs
[c
-020].wrt
);
685 bytes
[0] = ins
->oprs
[c
-020].offset
;
686 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
691 case 024: case 025: case 026:
692 if (ins
->oprs
[c
-024].offset
< 0 || ins
->oprs
[c
-024].offset
> 255)
693 errfunc (ERR_WARNING
, "unsigned byte value exceeds bounds");
694 if (ins
->oprs
[c
-024].segment
!= NO_SEG
) {
695 data
= ins
->oprs
[c
-024].offset
;
696 out (offset
, segment
, &data
, OUT_ADDRESS
+1,
697 ins
->oprs
[c
-024].segment
, ins
->oprs
[c
-024].wrt
);
700 bytes
[0] = ins
->oprs
[c
-024].offset
;
701 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
706 case 030: case 031: case 032:
707 if (ins
->oprs
[c
-030].segment
== NO_SEG
&&
708 ins
->oprs
[c
-030].wrt
== NO_SEG
&&
709 (ins
->oprs
[c
-030].offset
< -65536L ||
710 ins
->oprs
[c
-030].offset
> 65535L))
712 errfunc (ERR_WARNING
, "word value exceeds bounds");
714 data
= ins
->oprs
[c
-030].offset
;
715 out (offset
, segment
, &data
, OUT_ADDRESS
+2,
716 ins
->oprs
[c
-030].segment
, ins
->oprs
[c
-030].wrt
);
720 case 034: case 035: case 036:
721 data
= ins
->oprs
[c
-034].offset
;
722 size
= ((ins
->oprs
[c
-034].addr_size
?
723 ins
->oprs
[c
-034].addr_size
: bits
) == 16 ? 2 : 4);
724 if (size
==16 && (data
< -65536L || data
> 65535L))
725 errfunc (ERR_WARNING
, "word value exceeds bounds");
726 out (offset
, segment
, &data
, OUT_ADDRESS
+size
,
727 ins
->oprs
[c
-034].segment
, ins
->oprs
[c
-034].wrt
);
732 if (ins
->oprs
[0].segment
== NO_SEG
)
733 errfunc (ERR_NONFATAL
, "value referenced by FAR is not"
736 out (offset
, segment
, &data
, OUT_ADDRESS
+2,
737 outfmt
->segbase(1+ins
->oprs
[0].segment
),
742 case 040: case 041: case 042:
743 data
= ins
->oprs
[c
-040].offset
;
744 out (offset
, segment
, &data
, OUT_ADDRESS
+4,
745 ins
->oprs
[c
-040].segment
, ins
->oprs
[c
-040].wrt
);
749 case 050: case 051: case 052:
750 if (ins
->oprs
[c
-050].segment
!= segment
)
751 errfunc (ERR_NONFATAL
, "short relative jump outside segment");
752 data
= ins
->oprs
[c
-050].offset
- insn_end
;
753 if (data
> 127 || data
< -128)
754 errfunc (ERR_NONFATAL
, "short jump is out of range");
756 out (offset
, segment
, bytes
, OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
760 case 060: case 061: case 062:
761 if (ins
->oprs
[c
-060].segment
!= segment
) {
762 data
= ins
->oprs
[c
-060].offset
;
763 out (offset
, segment
, &data
, OUT_REL2ADR
+insn_end
-offset
,
764 ins
->oprs
[c
-060].segment
, ins
->oprs
[c
-060].wrt
);
766 data
= ins
->oprs
[c
-060].offset
- insn_end
;
767 out (offset
, segment
, &data
,
768 OUT_ADDRESS
+2, NO_SEG
, NO_SEG
);
773 case 064: case 065: case 066:
774 size
= ((ins
->oprs
[c
-064].addr_size
?
775 ins
->oprs
[c
-064].addr_size
: bits
) == 16 ? 2 : 4);
776 if (ins
->oprs
[c
-064].segment
!= segment
) {
777 data
= ins
->oprs
[c
-064].offset
;
778 size
= (bits
== 16 ? OUT_REL2ADR
: OUT_REL4ADR
);
779 out (offset
, segment
, &data
, size
+insn_end
-offset
,
780 ins
->oprs
[c
-064].segment
, ins
->oprs
[c
-064].wrt
);
781 size
= (bits
== 16 ? 2 : 4);
783 data
= ins
->oprs
[c
-064].offset
- insn_end
;
784 out (offset
, segment
, &data
,
785 OUT_ADDRESS
+size
, NO_SEG
, NO_SEG
);
790 case 070: case 071: case 072:
791 if (ins
->oprs
[c
-070].segment
!= segment
) {
792 data
= ins
->oprs
[c
-070].offset
;
793 out (offset
, segment
, &data
, OUT_REL4ADR
+insn_end
-offset
,
794 ins
->oprs
[c
-070].segment
, ins
->oprs
[c
-070].wrt
);
796 data
= ins
->oprs
[c
-070].offset
- insn_end
;
797 out (offset
, segment
, &data
,
798 OUT_ADDRESS
+4, NO_SEG
, NO_SEG
);
803 case 0300: case 0301: case 0302:
804 if (chsize (&ins
->oprs
[c
-0300], bits
)) {
806 out (offset
, segment
, bytes
,
807 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
816 out (offset
, segment
, bytes
,
817 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
826 out (offset
, segment
, bytes
,
827 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
839 out (offset
, segment
, bytes
,
840 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
849 out (offset
, segment
, bytes
,
850 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
860 *bytes
= *codes
++ + condval
[ins
->condition
];
861 out (offset
, segment
, bytes
,
862 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
872 out (offset
, segment
, bytes
,
873 OUT_RAWDATA
+1, NO_SEG
, NO_SEG
);
877 case 0340: case 0341: case 0342:
878 if (ins
->oprs
[0].segment
!= NO_SEG
)
879 errfunc (ERR_PANIC
, "non-constant BSS size in pass two");
881 long size
= ins
->oprs
[0].offset
<< (c
-0340);
883 out (offset
, segment
, NULL
,
884 OUT_RESERVE
+size
, NO_SEG
, NO_SEG
);
889 default: /* can't do it by 'case' statements */
890 if (c
>=0100 && c
<=0277) { /* it's an EA */
896 if (c
<=0177) /* pick rfield from operand b */
897 rfield
= regval (&ins
->oprs
[c
&7]);
898 else /* rfield is constant */
901 if (!process_ea (&ins
->oprs
[(c
>>3)&7], &ea_data
, bits
, rfield
,
904 errfunc (ERR_NONFATAL
, "invalid effective address");
908 *p
++ = ea_data
.modrm
;
909 if (ea_data
.sib_present
)
913 out (offset
, segment
, bytes
, OUT_RAWDATA
+ s
,
916 switch (ea_data
.bytes
) {
920 if (ins
->oprs
[(c
>>3)&7].segment
!= NO_SEG
) {
921 data
= ins
->oprs
[(c
>>3)&7].offset
;
922 out (offset
, segment
, &data
, OUT_ADDRESS
+1,
923 ins
->oprs
[(c
>>3)&7].segment
,
924 ins
->oprs
[(c
>>3)&7].wrt
);
926 *bytes
= ins
->oprs
[(c
>>3)&7].offset
;
927 out (offset
, segment
, bytes
, OUT_RAWDATA
+1,
934 data
= ins
->oprs
[(c
>>3)&7].offset
;
935 out (offset
, segment
, &data
,
936 OUT_ADDRESS
+ea_data
.bytes
,
937 ins
->oprs
[(c
>>3)&7].segment
, ins
->oprs
[(c
>>3)&7].wrt
);
943 errfunc (ERR_PANIC
, "internal instruction table corrupt"
944 ": instruction code 0x%02X given", c
);
948 static int regval (operand
*o
)
950 switch (o
->basereg
) {
951 case R_EAX
: case R_AX
: case R_AL
: case R_ES
: case R_CR0
: case R_DR0
:
952 case R_ST0
: case R_MM0
: case R_XMM0
:
954 case R_ECX
: case R_CX
: case R_CL
: case R_CS
: case R_DR1
: case R_ST1
:
955 case R_MM1
: case R_XMM1
:
957 case R_EDX
: case R_DX
: case R_DL
: case R_SS
: case R_CR2
: case R_DR2
:
958 case R_ST2
: case R_MM2
: case R_XMM2
:
960 case R_EBX
: case R_BX
: case R_BL
: case R_DS
: case R_CR3
: case R_DR3
:
961 case R_TR3
: case R_ST3
: case R_MM3
: case R_XMM3
:
963 case R_ESP
: case R_SP
: case R_AH
: case R_FS
: case R_CR4
: case R_TR4
:
964 case R_ST4
: case R_MM4
: case R_XMM4
:
966 case R_EBP
: case R_BP
: case R_CH
: case R_GS
: case R_TR5
: case R_ST5
:
967 case R_MM5
: case R_XMM5
:
969 case R_ESI
: case R_SI
: case R_DH
: case R_DR6
: case R_TR6
: case R_ST6
:
970 case R_MM6
: case R_XMM6
:
972 case R_EDI
: case R_DI
: case R_BH
: case R_DR7
: case R_TR7
: case R_ST7
:
973 case R_MM7
: case R_XMM7
:
976 errfunc (ERR_PANIC
, "invalid register operand given to regval()");
981 static int matches (struct itemplate
*itemp
, insn
*instruction
)
983 int i
, size
[3], asize
, oprs
, ret
;
990 if (itemp
->opcode
!= instruction
->opcode
) return 0;
995 if (itemp
->operands
!= instruction
->operands
) return 0;
998 * Check that no spurious colons or TOs are present
1000 for (i
=0; i
<itemp
->operands
; i
++)
1001 if (instruction
->oprs
[i
].type
& ~itemp
->opd
[i
] & (COLON
|TO
))
1005 * Check that the operand flags all match up
1007 for (i
=0; i
<itemp
->operands
; i
++)
1008 if (itemp
->opd
[i
] & ~instruction
->oprs
[i
].type
||
1009 ((itemp
->opd
[i
] & SIZE_MASK
) &&
1010 ((itemp
->opd
[i
] ^ instruction
->oprs
[i
].type
) & SIZE_MASK
)))
1012 if ((itemp
->opd
[i
] & ~instruction
->oprs
[i
].type
& NON_SIZE
) ||
1013 (instruction
->oprs
[i
].type
& SIZE_MASK
))
1020 * Check operand sizes
1022 if (itemp
->flags
& IF_ARMASK
) {
1023 size
[0] = size
[1] = size
[2] = 0;
1025 switch (itemp
->flags
& IF_ARMASK
) {
1026 case IF_AR0
: i
= 0; break;
1027 case IF_AR1
: i
= 1; break;
1028 case IF_AR2
: i
= 2; break;
1029 default: break; /* Shouldn't happen */
1031 if (itemp
->flags
& IF_SB
) {
1033 } else if (itemp
->flags
& IF_SW
) {
1035 } else if (itemp
->flags
& IF_SD
) {
1040 if (itemp
->flags
& IF_SB
) {
1042 oprs
= itemp
->operands
;
1043 } else if (itemp
->flags
& IF_SW
) {
1045 oprs
= itemp
->operands
;
1046 } else if (itemp
->flags
& IF_SD
) {
1048 oprs
= itemp
->operands
;
1050 size
[0] = size
[1] = size
[2] = asize
;
1053 if (itemp
->flags
& (IF_SM
| IF_SM2
)) {
1054 oprs
= (itemp
->flags
& IF_SM2
? 2 : itemp
->operands
);
1056 for (i
=0; i
<oprs
; i
++) {
1057 if ( (asize
= itemp
->opd
[i
] & SIZE_MASK
) != 0) {
1059 for (j
=0; j
<oprs
; j
++)
1065 oprs
= itemp
->operands
;
1068 for (i
=0; i
<itemp
->operands
; i
++)
1069 if (!(itemp
->opd
[i
] & SIZE_MASK
) &&
1070 (instruction
->oprs
[i
].type
& SIZE_MASK
& ~size
[i
]))
1076 static ea
*process_ea (operand
*input
, ea
*output
, int addrbits
, int rfield
,
1079 if (!(REGISTER
& ~input
->type
)) { /* it's a single register */
1080 static int regs
[] = {
1081 R_AL
, R_CL
, R_DL
, R_BL
, R_AH
, R_CH
, R_DH
, R_BH
,
1082 R_AX
, R_CX
, R_DX
, R_BX
, R_SP
, R_BP
, R_SI
, R_DI
,
1083 R_EAX
, R_ECX
, R_EDX
, R_EBX
, R_ESP
, R_EBP
, R_ESI
, R_EDI
,
1084 R_MM0
, R_MM1
, R_MM2
, R_MM3
, R_MM4
, R_MM5
, R_MM6
, R_MM7
,
1085 R_XMM0
, R_XMM1
, R_XMM2
, R_XMM3
, R_XMM4
, R_XMM5
, R_XMM6
, R_XMM7
1089 for (i
=0; i
<elements(regs
); i
++)
1090 if (input
->basereg
== regs
[i
]) break;
1091 if (i
<elements(regs
)) {
1092 output
->sib_present
= FALSE
;/* no SIB necessary */
1093 output
->bytes
= 0; /* no offset necessary either */
1094 output
->modrm
= 0xC0 | (rfield
<< 3) | (i
& 7);
1098 } else { /* it's a memory reference */
1099 if (input
->basereg
==-1 && (input
->indexreg
==-1 || input
->scale
==0)) {
1100 /* it's a pure offset */
1101 if (input
->addr_size
)
1102 addrbits
= input
->addr_size
;
1103 output
->sib_present
= FALSE
;
1104 output
->bytes
= (addrbits
==32 ? 4 : 2);
1105 output
->modrm
= (addrbits
==32 ? 5 : 6) | (rfield
<< 3);
1107 else { /* it's an indirection */
1108 int i
=input
->indexreg
, b
=input
->basereg
, s
=input
->scale
;
1109 long o
=input
->offset
, seg
=input
->segment
;
1110 int hb
=input
->hintbase
, ht
=input
->hinttype
;
1113 if (s
==0) i
= -1; /* make this easy, at least */
1115 if (i
==R_EAX
|| i
==R_EBX
|| i
==R_ECX
|| i
==R_EDX
1116 || i
==R_EBP
|| i
==R_ESP
|| i
==R_ESI
|| i
==R_EDI
1117 || b
==R_EAX
|| b
==R_EBX
|| b
==R_ECX
|| b
==R_EDX
1118 || b
==R_EBP
|| b
==R_ESP
|| b
==R_ESI
|| b
==R_EDI
) {
1119 /* it must be a 32-bit memory reference. Firstly we have
1120 * to check that all registers involved are type Exx. */
1121 if (i
!=-1 && i
!=R_EAX
&& i
!=R_EBX
&& i
!=R_ECX
&& i
!=R_EDX
1122 && i
!=R_EBP
&& i
!=R_ESP
&& i
!=R_ESI
&& i
!=R_EDI
)
1124 if (b
!=-1 && b
!=R_EAX
&& b
!=R_EBX
&& b
!=R_ECX
&& b
!=R_EDX
1125 && b
!=R_EBP
&& b
!=R_ESP
&& b
!=R_ESI
&& b
!=R_EDI
)
1128 /* While we're here, ensure the user didn't specify WORD. */
1129 if (input
->addr_size
== 16)
1132 /* now reorganise base/index */
1133 if (s
== 1 && b
!= i
&& b
!= -1 && i
!= -1 &&
1134 ((hb
==b
&&ht
==EAH_NOTBASE
) || (hb
==i
&&ht
==EAH_MAKEBASE
)))
1135 t
= b
, b
= i
, i
= t
; /* swap if hints say so */
1136 if (b
==i
) /* convert EAX+2*EAX to 3*EAX */
1138 if (b
==-1 && s
==1 && !(hb
== i
&& ht
== EAH_NOTBASE
))
1139 b
= i
, i
= -1; /* make single reg base, unless hint */
1140 if (((s
==2 && i
!=R_ESP
&& !(input
->eaflags
& EAF_TIMESTWO
)) ||
1141 s
==3 || s
==5 || s
==9) && b
==-1)
1142 b
= i
, s
--; /* convert 3*EAX to EAX+2*EAX */
1143 if (s
==1 && i
==R_ESP
) /* swap ESP into base if scale is 1 */
1145 if (i
==R_ESP
|| (s
!=1 && s
!=2 && s
!=4 && s
!=8 && i
!=-1))
1146 return NULL
; /* wrong, for various reasons */
1148 if (i
==-1 && b
!=R_ESP
) {/* no SIB needed */
1151 case R_EAX
: rm
= 0; break;
1152 case R_ECX
: rm
= 1; break;
1153 case R_EDX
: rm
= 2; break;
1154 case R_EBX
: rm
= 3; break;
1155 case R_EBP
: rm
= 5; break;
1156 case R_ESI
: rm
= 6; break;
1157 case R_EDI
: rm
= 7; break;
1158 case -1: rm
= 5; break;
1159 default: /* should never happen */
1162 if (b
==-1 || (b
!=R_EBP
&& o
==0 &&
1163 seg
==NO_SEG
&& !forw_ref
&&
1165 (EAF_BYTEOFFS
|EAF_WORDOFFS
))))
1167 else if (input
->eaflags
& EAF_BYTEOFFS
||
1168 (o
>=-128 && o
<=127 && seg
==NO_SEG
&& !forw_ref
&&
1169 !(input
->eaflags
& EAF_WORDOFFS
))) {
1175 output
->sib_present
= FALSE
;
1176 output
->bytes
= (b
==-1 || mod
==2 ? 4 : mod
);
1177 output
->modrm
= (mod
<<6) | (rfield
<<3) | rm
;
1179 else { /* we need a SIB */
1180 int mod
, scale
, index
, base
;
1183 case R_EAX
: base
= 0; break;
1184 case R_ECX
: base
= 1; break;
1185 case R_EDX
: base
= 2; break;
1186 case R_EBX
: base
= 3; break;
1187 case R_ESP
: base
= 4; break;
1188 case R_EBP
: case -1: base
= 5; break;
1189 case R_ESI
: base
= 6; break;
1190 case R_EDI
: base
= 7; break;
1191 default: /* then what the smeg is it? */
1192 return NULL
; /* panic */
1196 case R_EAX
: index
= 0; break;
1197 case R_ECX
: index
= 1; break;
1198 case R_EDX
: index
= 2; break;
1199 case R_EBX
: index
= 3; break;
1200 case -1: index
= 4; break;
1201 case R_EBP
: index
= 5; break;
1202 case R_ESI
: index
= 6; break;
1203 case R_EDI
: index
= 7; break;
1204 default: /* then what the smeg is it? */
1205 return NULL
; /* panic */
1210 case 1: scale
= 0; break;
1211 case 2: scale
= 1; break;
1212 case 4: scale
= 2; break;
1213 case 8: scale
= 3; break;
1214 default: /* then what the smeg is it? */
1215 return NULL
; /* panic */
1218 if (b
==-1 || (b
!=R_EBP
&& o
==0 &&
1219 seg
==NO_SEG
&& !forw_ref
&&
1221 (EAF_BYTEOFFS
|EAF_WORDOFFS
))))
1223 else if (input
->eaflags
& EAF_BYTEOFFS
||
1224 (o
>=-128 && o
<=127 && seg
==NO_SEG
&& !forw_ref
&&
1225 !(input
->eaflags
& EAF_WORDOFFS
)))
1230 output
->sib_present
= TRUE
;
1231 output
->bytes
= (b
==-1 || mod
==2 ? 4 : mod
);
1232 output
->modrm
= (mod
<<6) | (rfield
<<3) | 4;
1233 output
->sib
= (scale
<<6) | (index
<<3) | base
;
1236 else { /* it's 16-bit */
1239 /* check all registers are BX, BP, SI or DI */
1240 if ((b
!=-1 && b
!=R_BP
&& b
!=R_BX
&& b
!=R_SI
&& b
!=R_DI
) ||
1241 (i
!=-1 && i
!=R_BP
&& i
!=R_BX
&& i
!=R_SI
&& i
!=R_DI
))
1244 /* ensure the user didn't specify DWORD */
1245 if (input
->addr_size
== 32)
1248 if (s
!=1 && i
!=-1) return NULL
;/* no can do, in 16-bit EA */
1249 if (b
==-1 && i
!=-1) b
^= i
^= b
^= i
; /* swap them round */
1250 if ((b
==R_SI
|| b
==R_DI
) && i
!=-1)
1251 b
^= i
^= b
^= i
; /* have BX/BP as base, SI/DI index */
1252 if (b
==i
) return NULL
;/* shouldn't ever happen, in theory */
1253 if (i
!=-1 && b
!=-1 &&
1254 (i
==R_BP
|| i
==R_BX
|| b
==R_SI
|| b
==R_DI
))
1255 return NULL
; /* invalid combinations */
1256 if (b
==-1) /* pure offset: handled above */
1257 return NULL
; /* so if it gets to here, panic! */
1261 switch (i
*256 + b
) {
1262 case R_SI
*256+R_BX
: rm
=0; break;
1263 case R_DI
*256+R_BX
: rm
=1; break;
1264 case R_SI
*256+R_BP
: rm
=2; break;
1265 case R_DI
*256+R_BP
: rm
=3; break;
1269 case R_SI
: rm
=4; break;
1270 case R_DI
: rm
=5; break;
1271 case R_BP
: rm
=6; break;
1272 case R_BX
: rm
=7; break;
1274 if (rm
==-1) /* can't happen, in theory */
1275 return NULL
; /* so panic if it does */
1277 if (o
==0 && seg
==NO_SEG
&& !forw_ref
&& rm
!=6 &&
1278 !(input
->eaflags
& (EAF_BYTEOFFS
|EAF_WORDOFFS
)))
1280 else if (input
->eaflags
& EAF_BYTEOFFS
||
1281 (o
>=-128 && o
<=127 && seg
==NO_SEG
&& !forw_ref
&&
1282 !(input
->eaflags
& EAF_WORDOFFS
)))
1287 output
->sib_present
= FALSE
; /* no SIB - it's 16-bit */
1288 output
->bytes
= mod
; /* bytes of offset needed */
1289 output
->modrm
= (mod
<<6) | (rfield
<<3) | rm
;
1293 output
->size
= 1 + output
->sib_present
+ output
->bytes
;
1297 static int chsize (operand
*input
, int addrbits
)
1299 if (!(MEMORY
& ~input
->type
)) {
1300 int i
=input
->indexreg
, b
=input
->basereg
;
1302 if (input
->scale
==0) i
= -1;
1304 if (i
== -1 && b
== -1) /* pure offset */
1305 return (input
->addr_size
!= 0 && input
->addr_size
!= addrbits
);
1307 if (i
==R_EAX
|| i
==R_EBX
|| i
==R_ECX
|| i
==R_EDX
1308 || i
==R_EBP
|| i
==R_ESP
|| i
==R_ESI
|| i
==R_EDI
1309 || b
==R_EAX
|| b
==R_EBX
|| b
==R_ECX
|| b
==R_EDX
1310 || b
==R_EBP
|| b
==R_ESP
|| b
==R_ESI
|| b
==R_EDI
)
1311 return (addrbits
==16);
1313 return (addrbits
==32);