1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the operand-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \44, \45, \46 - select between \3[012] and \4[012] depending on 16/32 bit
29 * assembly mode or the address-size override on the operand
30 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
31 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
32 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
33 * assembly mode or the operand-size override on the operand
34 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
35 * \1ab - a ModRM, calculated on EA in operand a, with the spare
36 * field the register value of operand b.
37 * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2
38 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
39 * is a signed byte rather than a word.
40 * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2
41 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
42 * is a signed byte rather than a dword.
43 * \2ab - a ModRM, calculated on EA in operand a, with the spare
44 * field equal to digit b.
45 * \30x - might be an 0x67 byte, depending on the address size of
46 * the memory reference in operand x.
47 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
48 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
49 * \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
50 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
51 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
52 * \322 - indicates that this instruction is only valid when the
53 * operand size is the default (instruction to disassembler,
54 * generates no code in the assembler)
55 * \330 - a literal byte follows in the code stream, to be added
56 * to the condition code value of the instruction.
57 * \331 - instruction not valid with REP prefix. Hint for
58 * disassembler only; for SSE instructions.
59 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
60 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
61 * as a literal byte in order to aid the disassembler.
62 * \340 - reserve <operand 0> bytes of uninitialised storage.
63 * Operand 0 had better be a segmentless constant.
64 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
65 * 370 is used for Jcc, 371 is used for JMP.
66 * \373 - assemble 0x03 if bits==16, 0x05 if bits==32;
67 * used for conditional jump over longer jump
79 extern struct itemplate
*nasm_instructions
[];
82 int sib_present
; /* is a SIB byte necessary? */
83 int bytes
; /* # of bytes of offset needed */
84 int size
; /* lazy - this is sib+bytes+1 */
85 unsigned char modrm
, sib
; /* the bytes themselves */
88 static unsigned long cpu
; /* cpu level received from nasm.c */
90 static struct ofmt
*outfmt
;
93 static long calcsize(long, long, int, insn
*, const char *);
94 static void gencode(long, long, int, insn
*, const char *, long);
95 static int regval(operand
* o
);
96 static int matches(struct itemplate
*, insn
*);
97 static ea
*process_ea(operand
*, ea
*, int, int, int);
98 static int chsize(operand
*, int);
101 * This routine wrappers the real output format's output routine,
102 * in order to pass a copy of the data off to the listing file
103 * generator at the same time.
105 static void out(long offset
, long segto
, const void *data
,
106 unsigned long type
, long segment
, long wrt
)
108 static long lineno
= 0; /* static!!! */
109 static char *lnfname
= NULL
;
111 if ((type
& OUT_TYPMASK
) == OUT_ADDRESS
) {
112 if (segment
!= NO_SEG
|| wrt
!= NO_SEG
) {
114 * This address is relocated. We must write it as
115 * OUT_ADDRESS, so there's no work to be done here.
117 list
->output(offset
, data
, type
);
119 unsigned char p
[4], *q
= p
;
121 * This is a non-relocated address, and we're going to
122 * convert it into RAWDATA format.
124 if ((type
& OUT_SIZMASK
) == 4) {
125 WRITELONG(q
, *(long *)data
);
126 list
->output(offset
, p
, OUT_RAWDATA
+ 4);
128 WRITESHORT(q
, *(long *)data
);
129 list
->output(offset
, p
, OUT_RAWDATA
+ 2);
132 } else if ((type
& OUT_TYPMASK
) == OUT_RAWDATA
) {
133 list
->output(offset
, data
, type
);
134 } else if ((type
& OUT_TYPMASK
) == OUT_RESERVE
) {
135 list
->output(offset
, NULL
, type
);
136 } else if ((type
& OUT_TYPMASK
) == OUT_REL2ADR
||
137 (type
& OUT_TYPMASK
) == OUT_REL4ADR
) {
138 list
->output(offset
, data
, type
);
142 * this call to src_get determines when we call the
143 * debug-format-specific "linenum" function
144 * it updates lineno and lnfname to the current values
145 * returning 0 if "same as last time", -2 if lnfname
146 * changed, and the amount by which lineno changed,
147 * if it did. thus, these variables must be static
150 if (src_get(&lineno
, &lnfname
)) {
151 outfmt
->current_dfmt
->linenum(lnfname
, lineno
, segto
);
154 outfmt
->output(segto
, data
, type
, segment
, wrt
);
157 static int jmp_match(long segment
, long offset
, int bits
,
158 insn
* ins
, const char *code
)
161 unsigned char c
= code
[0];
163 if (c
!= 0370 && c
!= 0371)
165 if (ins
->oprs
[0].opflags
& OPFLAG_FORWARD
) {
166 if ((optimizing
< 0 || (ins
->oprs
[0].type
& STRICT
))
170 return (pass0
== 0); /* match a forward reference */
172 isize
= calcsize(segment
, offset
, bits
, ins
, code
);
173 if (ins
->oprs
[0].segment
!= segment
)
175 isize
= ins
->oprs
[0].offset
- offset
- isize
; /* isize is now the delta */
176 if (isize
>= -128L && isize
<= 127L)
177 return 1; /* it is byte size */
182 long assemble(long segment
, long offset
, int bits
, unsigned long cp
,
183 insn
* instruction
, struct ofmt
*output
, efunc error
,
186 struct itemplate
*temp
;
192 long wsize
= 0; /* size for DB etc. */
194 errfunc
= error
; /* to pass to other functions */
196 outfmt
= output
; /* likewise */
197 list
= listgen
; /* and again */
199 switch (instruction
->opcode
) {
221 long t
= instruction
->times
;
224 "instruction->times < 0 (%ld) in assemble()", t
);
226 while (t
--) { /* repeat TIMES times */
227 for (e
= instruction
->eops
; e
; e
= e
->next
) {
228 if (e
->type
== EOT_DB_NUMBER
) {
230 if (e
->segment
!= NO_SEG
)
231 errfunc(ERR_NONFATAL
,
232 "one-byte relocation attempted");
234 unsigned char out_byte
= e
->offset
;
235 out(offset
, segment
, &out_byte
,
236 OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
238 } else if (wsize
> 5) {
239 errfunc(ERR_NONFATAL
, "integer supplied to a D%c"
240 " instruction", wsize
== 8 ? 'Q' : 'T');
242 out(offset
, segment
, &e
->offset
,
243 OUT_ADDRESS
+ wsize
, e
->segment
, e
->wrt
);
245 } else if (e
->type
== EOT_DB_STRING
) {
248 out(offset
, segment
, e
->stringval
,
249 OUT_RAWDATA
+ e
->stringlen
, NO_SEG
, NO_SEG
);
250 align
= e
->stringlen
% wsize
;
253 align
= wsize
- align
;
254 out(offset
, segment
, "\0\0\0\0\0\0\0\0",
255 OUT_RAWDATA
+ align
, NO_SEG
, NO_SEG
);
257 offset
+= e
->stringlen
+ align
;
260 if (t
> 0 && t
== instruction
->times
- 1) {
262 * Dummy call to list->output to give the offset to the
265 list
->output(offset
, NULL
, OUT_RAWDATA
);
266 list
->uplevel(LIST_TIMES
);
269 if (instruction
->times
> 1)
270 list
->downlevel(LIST_TIMES
);
271 return offset
- start
;
274 if (instruction
->opcode
== I_INCBIN
) {
275 static char fname
[FILENAME_MAX
];
278 char *prefix
= "", *combine
;
279 char **pPrevPath
= NULL
;
281 len
= FILENAME_MAX
- 1;
282 if (len
> instruction
->eops
->stringlen
)
283 len
= instruction
->eops
->stringlen
;
284 strncpy(fname
, instruction
->eops
->stringval
, len
);
287 while (1) { /* added by alexfru: 'incbin' uses include paths */
288 combine
= nasm_malloc(strlen(prefix
) + len
+ 1);
289 strcpy(combine
, prefix
);
290 strcat(combine
, fname
);
292 if ((fp
= fopen(combine
, "rb")) != NULL
) {
298 pPrevPath
= pp_get_include_path_ptr(pPrevPath
);
299 if (pPrevPath
== NULL
)
305 error(ERR_NONFATAL
, "`incbin': unable to open file `%s'",
307 else if (fseek(fp
, 0L, SEEK_END
) < 0)
308 error(ERR_NONFATAL
, "`incbin': unable to seek on file `%s'",
311 static char buf
[2048];
312 long t
= instruction
->times
;
316 if (instruction
->eops
->next
) {
317 base
= instruction
->eops
->next
->offset
;
319 if (instruction
->eops
->next
->next
&&
320 len
> instruction
->eops
->next
->next
->offset
)
321 len
= instruction
->eops
->next
->next
->offset
;
324 * Dummy call to list->output to give the offset to the
327 list
->output(offset
, NULL
, OUT_RAWDATA
);
328 list
->uplevel(LIST_INCBIN
);
332 fseek(fp
, base
, SEEK_SET
);
336 fread(buf
, 1, (l
> sizeof(buf
) ? sizeof(buf
) : l
),
340 * This shouldn't happen unless the file
341 * actually changes while we are reading
345 "`incbin': unexpected EOF while"
346 " reading file `%s'", fname
);
347 t
= 0; /* Try to exit cleanly */
350 out(offset
, segment
, buf
, OUT_RAWDATA
+ m
,
355 list
->downlevel(LIST_INCBIN
);
356 if (instruction
->times
> 1) {
358 * Dummy call to list->output to give the offset to the
361 list
->output(offset
, NULL
, OUT_RAWDATA
);
362 list
->uplevel(LIST_TIMES
);
363 list
->downlevel(LIST_TIMES
);
366 return instruction
->times
* len
;
368 return 0; /* if we're here, there's an error */
372 temp
= nasm_instructions
[instruction
->opcode
];
373 while (temp
->opcode
!= -1) {
374 int m
= matches(temp
, instruction
);
376 m
+= jmp_match(segment
, offset
, bits
, instruction
, temp
->code
);
378 if (m
== 100) { /* matches! */
379 const char *codes
= temp
->code
;
380 long insn_size
= calcsize(segment
, offset
, bits
,
382 itimes
= instruction
->times
;
383 if (insn_size
< 0) /* shouldn't be, on pass two */
384 error(ERR_PANIC
, "errors made it through from pass one");
387 for (j
= 0; j
< instruction
->nprefix
; j
++) {
389 switch (instruction
->prefixes
[j
]) {
423 "segr6 and segr7 cannot be used as prefixes");
442 error(ERR_PANIC
, "invalid instruction prefix");
445 out(offset
, segment
, &c
, OUT_RAWDATA
+ 1,
450 insn_end
= offset
+ insn_size
;
451 gencode(segment
, offset
, bits
, instruction
, codes
,
454 if (itimes
> 0 && itimes
== instruction
->times
- 1) {
456 * Dummy call to list->output to give the offset to the
459 list
->output(offset
, NULL
, OUT_RAWDATA
);
460 list
->uplevel(LIST_TIMES
);
463 if (instruction
->times
> 1)
464 list
->downlevel(LIST_TIMES
);
465 return offset
- start
;
466 } else if (m
> 0 && m
> size_prob
) {
472 if (temp
->opcode
== -1) { /* didn't match any instruction */
473 if (size_prob
== 1) /* would have matched, but for size */
474 error(ERR_NONFATAL
, "operation size not specified");
475 else if (size_prob
== 2)
476 error(ERR_NONFATAL
, "mismatch in operand sizes");
477 else if (size_prob
== 3)
478 error(ERR_NONFATAL
, "no instruction for this cpu level");
481 "invalid combination of opcode and operands");
486 long insn_size(long segment
, long offset
, int bits
, unsigned long cp
,
487 insn
* instruction
, efunc error
)
489 struct itemplate
*temp
;
491 errfunc
= error
; /* to pass to other functions */
494 if (instruction
->opcode
== -1)
497 if (instruction
->opcode
== I_DB
||
498 instruction
->opcode
== I_DW
||
499 instruction
->opcode
== I_DD
||
500 instruction
->opcode
== I_DQ
|| instruction
->opcode
== I_DT
) {
502 long isize
, osize
, wsize
= 0; /* placate gcc */
505 switch (instruction
->opcode
) {
523 for (e
= instruction
->eops
; e
; e
= e
->next
) {
527 if (e
->type
== EOT_DB_NUMBER
)
529 else if (e
->type
== EOT_DB_STRING
)
530 osize
= e
->stringlen
;
532 align
= (-osize
) % wsize
;
535 isize
+= osize
+ align
;
537 return isize
* instruction
->times
;
540 if (instruction
->opcode
== I_INCBIN
) {
541 char fname
[FILENAME_MAX
];
544 char *prefix
= "", *combine
;
545 char **pPrevPath
= NULL
;
547 len
= FILENAME_MAX
- 1;
548 if (len
> instruction
->eops
->stringlen
)
549 len
= instruction
->eops
->stringlen
;
550 strncpy(fname
, instruction
->eops
->stringval
, len
);
553 while (1) { /* added by alexfru: 'incbin' uses include paths */
554 combine
= nasm_malloc(strlen(prefix
) + len
+ 1);
555 strcpy(combine
, prefix
);
556 strcat(combine
, fname
);
558 if ((fp
= fopen(combine
, "rb")) != NULL
) {
564 pPrevPath
= pp_get_include_path_ptr(pPrevPath
);
565 if (pPrevPath
== NULL
)
571 error(ERR_NONFATAL
, "`incbin': unable to open file `%s'",
573 else if (fseek(fp
, 0L, SEEK_END
) < 0)
574 error(ERR_NONFATAL
, "`incbin': unable to seek on file `%s'",
579 if (instruction
->eops
->next
) {
580 len
-= instruction
->eops
->next
->offset
;
581 if (instruction
->eops
->next
->next
&&
582 len
> instruction
->eops
->next
->next
->offset
) {
583 len
= instruction
->eops
->next
->next
->offset
;
586 return instruction
->times
* len
;
588 return 0; /* if we're here, there's an error */
591 temp
= nasm_instructions
[instruction
->opcode
];
592 while (temp
->opcode
!= -1) {
593 int m
= matches(temp
, instruction
);
595 m
+= jmp_match(segment
, offset
, bits
, instruction
, temp
->code
);
598 /* we've matched an instruction. */
600 const char *codes
= temp
->code
;
603 isize
= calcsize(segment
, offset
, bits
, instruction
, codes
);
606 for (j
= 0; j
< instruction
->nprefix
; j
++) {
607 if ((instruction
->prefixes
[j
] != P_A16
&&
608 instruction
->prefixes
[j
] != P_O16
&& bits
== 16) ||
609 (instruction
->prefixes
[j
] != P_A32
&&
610 instruction
->prefixes
[j
] != P_O32
&& bits
== 32)) {
614 return isize
* instruction
->times
;
618 return -1; /* didn't match any instruction */
621 /* check that opn[op] is a signed byte of size 16 or 32,
622 and return the signed value*/
623 static int is_sbyte(insn
* ins
, int op
, int size
)
628 ret
= !(ins
->forw_ref
&& ins
->oprs
[op
].opflags
) && /* dead in the water on forward reference or External */
630 !(ins
->oprs
[op
].type
& STRICT
) &&
631 ins
->oprs
[op
].wrt
== NO_SEG
&& ins
->oprs
[op
].segment
== NO_SEG
;
633 v
= ins
->oprs
[op
].offset
;
635 v
= (signed short)v
; /* sign extend if 16 bits */
637 return ret
&& v
>= -128L && v
<= 127L;
640 static long calcsize(long segment
, long offset
, int bits
,
641 insn
* ins
, const char *codes
)
646 (void)segment
; /* Don't warn that this parameter is unused */
647 (void)offset
; /* Don't warn that this parameter is unused */
650 switch (c
= *codes
++) {
654 codes
+= c
, length
+= c
;
693 if (ins
->oprs
[c
- 034].type
& (BITS16
| BITS32
))
694 length
+= (ins
->oprs
[c
- 034].type
& BITS16
) ? 2 : 4;
696 length
+= (bits
== 16) ? 2 : 4;
709 length
+= ((ins
->oprs
[c
- 044].addr_size
?
710 ins
->oprs
[c
- 044].addr_size
: bits
) ==
726 if (ins
->oprs
[c
- 064].type
& (BITS16
| BITS32
))
727 length
+= (ins
->oprs
[c
- 064].type
& BITS16
) ? 2 : 4;
729 length
+= (bits
== 16) ? 2 : 4;
739 length
+= is_sbyte(ins
, c
- 0130, 16) ? 1 : 2;
750 length
+= is_sbyte(ins
, c
- 0140, 32) ? 1 : 4;
761 length
+= chsize(&ins
->oprs
[c
- 0300], bits
);
764 length
+= (bits
== 32);
767 length
+= (bits
== 16);
772 length
+= (bits
== 32);
775 length
+= (bits
== 16);
791 if (ins
->oprs
[0].segment
!= NO_SEG
)
792 errfunc(ERR_NONFATAL
, "attempt to reserve non-constant"
793 " quantity of BSS space");
795 length
+= ins
->oprs
[0].offset
<< (c
- 0340);
804 default: /* can't do it by 'case' statements */
805 if (c
>= 0100 && c
<= 0277) { /* it's an EA */
808 (&ins
->oprs
[(c
>> 3) & 7], &ea_data
, bits
, 0,
810 errfunc(ERR_NONFATAL
, "invalid effective address");
813 length
+= ea_data
.size
;
815 errfunc(ERR_PANIC
, "internal instruction table corrupt"
816 ": instruction code 0x%02X given", c
);
821 static void gencode(long segment
, long offset
, int bits
,
822 insn
* ins
, const char *codes
, long insn_end
)
824 static char condval
[] = { /* conditional opcodes */
825 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
826 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
827 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
830 unsigned char bytes
[4];
834 switch (c
= *codes
++) {
838 out(offset
, segment
, codes
, OUT_RAWDATA
+ c
, NO_SEG
, NO_SEG
);
845 switch (ins
->oprs
[0].basereg
) {
847 bytes
[0] = 0x0E + (c
== 0x04 ? 1 : 0);
850 bytes
[0] = 0x1E + (c
== 0x04 ? 1 : 0);
853 bytes
[0] = 0x06 + (c
== 0x04 ? 1 : 0);
856 bytes
[0] = 0x16 + (c
== 0x04 ? 1 : 0);
860 "bizarre 8086 segment register received");
862 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
868 switch (ins
->oprs
[0].basereg
) {
870 bytes
[0] = 0xA0 + (c
== 0x05 ? 1 : 0);
873 bytes
[0] = 0xA8 + (c
== 0x05 ? 1 : 0);
877 "bizarre 386 segment register received");
879 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
886 bytes
[0] = *codes
++ + regval(&ins
->oprs
[c
- 010]);
887 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
893 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
900 if (ins
->oprs
[c
- 014].offset
< -128
901 || ins
->oprs
[c
- 014].offset
> 127) {
902 errfunc(ERR_WARNING
, "signed byte value exceeds bounds");
905 if (ins
->oprs
[c
- 014].segment
!= NO_SEG
) {
906 data
= ins
->oprs
[c
- 014].offset
;
907 out(offset
, segment
, &data
, OUT_ADDRESS
+ 1,
908 ins
->oprs
[c
- 014].segment
, ins
->oprs
[c
- 014].wrt
);
910 bytes
[0] = ins
->oprs
[c
- 014].offset
;
911 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
,
920 if (ins
->oprs
[c
- 020].offset
< -256
921 || ins
->oprs
[c
- 020].offset
> 255) {
922 errfunc(ERR_WARNING
, "byte value exceeds bounds");
924 if (ins
->oprs
[c
- 020].segment
!= NO_SEG
) {
925 data
= ins
->oprs
[c
- 020].offset
;
926 out(offset
, segment
, &data
, OUT_ADDRESS
+ 1,
927 ins
->oprs
[c
- 020].segment
, ins
->oprs
[c
- 020].wrt
);
929 bytes
[0] = ins
->oprs
[c
- 020].offset
;
930 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
,
939 if (ins
->oprs
[c
- 024].offset
< 0
940 || ins
->oprs
[c
- 024].offset
> 255)
941 errfunc(ERR_WARNING
, "unsigned byte value exceeds bounds");
942 if (ins
->oprs
[c
- 024].segment
!= NO_SEG
) {
943 data
= ins
->oprs
[c
- 024].offset
;
944 out(offset
, segment
, &data
, OUT_ADDRESS
+ 1,
945 ins
->oprs
[c
- 024].segment
, ins
->oprs
[c
- 024].wrt
);
947 bytes
[0] = ins
->oprs
[c
- 024].offset
;
948 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
,
957 if (ins
->oprs
[c
- 030].segment
== NO_SEG
&&
958 ins
->oprs
[c
- 030].wrt
== NO_SEG
&&
959 (ins
->oprs
[c
- 030].offset
< -65536L ||
960 ins
->oprs
[c
- 030].offset
> 65535L)) {
961 errfunc(ERR_WARNING
, "word value exceeds bounds");
963 data
= ins
->oprs
[c
- 030].offset
;
964 out(offset
, segment
, &data
, OUT_ADDRESS
+ 2,
965 ins
->oprs
[c
- 030].segment
, ins
->oprs
[c
- 030].wrt
);
972 if (ins
->oprs
[c
- 034].type
& (BITS16
| BITS32
))
973 size
= (ins
->oprs
[c
- 034].type
& BITS16
) ? 2 : 4;
975 size
= (bits
== 16) ? 2 : 4;
976 data
= ins
->oprs
[c
- 034].offset
;
977 if (size
== 2 && (data
< -65536L || data
> 65535L))
978 errfunc(ERR_WARNING
, "word value exceeds bounds");
979 out(offset
, segment
, &data
, OUT_ADDRESS
+ size
,
980 ins
->oprs
[c
- 034].segment
, ins
->oprs
[c
- 034].wrt
);
985 if (ins
->oprs
[0].segment
== NO_SEG
)
986 errfunc(ERR_NONFATAL
, "value referenced by FAR is not"
989 out(offset
, segment
, &data
, OUT_ADDRESS
+ 2,
990 outfmt
->segbase(1 + ins
->oprs
[0].segment
),
998 data
= ins
->oprs
[c
- 040].offset
;
999 out(offset
, segment
, &data
, OUT_ADDRESS
+ 4,
1000 ins
->oprs
[c
- 040].segment
, ins
->oprs
[c
- 040].wrt
);
1007 data
= ins
->oprs
[c
- 044].offset
;
1008 size
= ((ins
->oprs
[c
- 044].addr_size
?
1009 ins
->oprs
[c
- 044].addr_size
: bits
) == 16 ? 2 : 4);
1010 if (size
== 2 && (data
< -65536L || data
> 65535L))
1011 errfunc(ERR_WARNING
, "word value exceeds bounds");
1012 out(offset
, segment
, &data
, OUT_ADDRESS
+ size
,
1013 ins
->oprs
[c
- 044].segment
, ins
->oprs
[c
- 044].wrt
);
1020 if (ins
->oprs
[c
- 050].segment
!= segment
)
1021 errfunc(ERR_NONFATAL
,
1022 "short relative jump outside segment");
1023 data
= ins
->oprs
[c
- 050].offset
- insn_end
;
1024 if (data
> 127 || data
< -128)
1025 errfunc(ERR_NONFATAL
, "short jump is out of range");
1027 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1034 if (ins
->oprs
[c
- 060].segment
!= segment
) {
1035 data
= ins
->oprs
[c
- 060].offset
;
1036 out(offset
, segment
, &data
,
1037 OUT_REL2ADR
+ insn_end
- offset
,
1038 ins
->oprs
[c
- 060].segment
, ins
->oprs
[c
- 060].wrt
);
1040 data
= ins
->oprs
[c
- 060].offset
- insn_end
;
1041 out(offset
, segment
, &data
,
1042 OUT_ADDRESS
+ 2, NO_SEG
, NO_SEG
);
1050 if (ins
->oprs
[c
- 064].type
& (BITS16
| BITS32
))
1051 size
= (ins
->oprs
[c
- 064].type
& BITS16
) ? 2 : 4;
1053 size
= (bits
== 16) ? 2 : 4;
1054 if (ins
->oprs
[c
- 064].segment
!= segment
) {
1055 long reltype
= (size
== 2 ? OUT_REL2ADR
: OUT_REL4ADR
);
1056 data
= ins
->oprs
[c
- 064].offset
;
1057 out(offset
, segment
, &data
, reltype
+ insn_end
- offset
,
1058 ins
->oprs
[c
- 064].segment
, ins
->oprs
[c
- 064].wrt
);
1060 data
= ins
->oprs
[c
- 064].offset
- insn_end
;
1061 out(offset
, segment
, &data
,
1062 OUT_ADDRESS
+ size
, NO_SEG
, NO_SEG
);
1070 if (ins
->oprs
[c
- 070].segment
!= segment
) {
1071 data
= ins
->oprs
[c
- 070].offset
;
1072 out(offset
, segment
, &data
,
1073 OUT_REL4ADR
+ insn_end
- offset
,
1074 ins
->oprs
[c
- 070].segment
, ins
->oprs
[c
- 070].wrt
);
1076 data
= ins
->oprs
[c
- 070].offset
- insn_end
;
1077 out(offset
, segment
, &data
,
1078 OUT_ADDRESS
+ 4, NO_SEG
, NO_SEG
);
1086 data
= ins
->oprs
[c
- 0130].offset
;
1087 if (is_sbyte(ins
, c
- 0130, 16)) {
1089 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
,
1093 if (ins
->oprs
[c
- 0130].segment
== NO_SEG
&&
1094 ins
->oprs
[c
- 0130].wrt
== NO_SEG
&&
1095 (data
< -65536L || data
> 65535L)) {
1096 errfunc(ERR_WARNING
, "word value exceeds bounds");
1098 out(offset
, segment
, &data
, OUT_ADDRESS
+ 2,
1099 ins
->oprs
[c
- 0130].segment
, ins
->oprs
[c
- 0130].wrt
);
1108 bytes
[0] = *codes
++;
1109 if (is_sbyte(ins
, c
- 0133, 16))
1110 bytes
[0] |= 2; /* s-bit */
1111 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1118 data
= ins
->oprs
[c
- 0140].offset
;
1119 if (is_sbyte(ins
, c
- 0140, 32)) {
1121 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
,
1125 out(offset
, segment
, &data
, OUT_ADDRESS
+ 4,
1126 ins
->oprs
[c
- 0140].segment
, ins
->oprs
[c
- 0140].wrt
);
1135 bytes
[0] = *codes
++;
1136 if (is_sbyte(ins
, c
- 0143, 32))
1137 bytes
[0] |= 2; /* s-bit */
1138 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1145 if (chsize(&ins
->oprs
[c
- 0300], bits
)) {
1147 out(offset
, segment
, bytes
,
1148 OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1157 out(offset
, segment
, bytes
,
1158 OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1167 out(offset
, segment
, bytes
,
1168 OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1180 out(offset
, segment
, bytes
,
1181 OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1190 out(offset
, segment
, bytes
,
1191 OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1201 *bytes
= *codes
++ ^ condval
[ins
->condition
];
1202 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1212 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1219 if (ins
->oprs
[0].segment
!= NO_SEG
)
1220 errfunc(ERR_PANIC
, "non-constant BSS size in pass two");
1222 long size
= ins
->oprs
[0].offset
<< (c
- 0340);
1224 out(offset
, segment
, NULL
,
1225 OUT_RESERVE
+ size
, NO_SEG
, NO_SEG
);
1236 *bytes
= bits
== 16 ? 3 : 5;
1237 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1, NO_SEG
, NO_SEG
);
1241 default: /* can't do it by 'case' statements */
1242 if (c
>= 0100 && c
<= 0277) { /* it's an EA */
1248 if (c
<= 0177) /* pick rfield from operand b */
1249 rfield
= regval(&ins
->oprs
[c
& 7]);
1250 else /* rfield is constant */
1254 (&ins
->oprs
[(c
>> 3) & 7], &ea_data
, bits
, rfield
,
1256 errfunc(ERR_NONFATAL
, "invalid effective address");
1260 *p
++ = ea_data
.modrm
;
1261 if (ea_data
.sib_present
)
1265 out(offset
, segment
, bytes
, OUT_RAWDATA
+ s
,
1268 switch (ea_data
.bytes
) {
1272 if (ins
->oprs
[(c
>> 3) & 7].segment
!= NO_SEG
) {
1273 data
= ins
->oprs
[(c
>> 3) & 7].offset
;
1274 out(offset
, segment
, &data
, OUT_ADDRESS
+ 1,
1275 ins
->oprs
[(c
>> 3) & 7].segment
,
1276 ins
->oprs
[(c
>> 3) & 7].wrt
);
1278 *bytes
= ins
->oprs
[(c
>> 3) & 7].offset
;
1279 out(offset
, segment
, bytes
, OUT_RAWDATA
+ 1,
1286 data
= ins
->oprs
[(c
>> 3) & 7].offset
;
1287 out(offset
, segment
, &data
,
1288 OUT_ADDRESS
+ ea_data
.bytes
,
1289 ins
->oprs
[(c
>> 3) & 7].segment
,
1290 ins
->oprs
[(c
>> 3) & 7].wrt
);
1296 errfunc(ERR_PANIC
, "internal instruction table corrupt"
1297 ": instruction code 0x%02X given", c
);
1301 #include "regvals.c"
1303 static int regval(operand
* o
)
1305 if (o
->basereg
< EXPR_REG_START
|| o
->basereg
>= REG_ENUM_LIMIT
) {
1306 errfunc(ERR_PANIC
, "invalid operand passed to regval()");
1308 return regvals
[o
->basereg
];
1311 static int matches(struct itemplate
*itemp
, insn
* instruction
)
1313 int i
, size
[3], asize
, oprs
, ret
;
1320 if (itemp
->opcode
!= instruction
->opcode
)
1324 * Count the operands
1326 if (itemp
->operands
!= instruction
->operands
)
1330 * Check that no spurious colons or TOs are present
1332 for (i
= 0; i
< itemp
->operands
; i
++)
1333 if (instruction
->oprs
[i
].type
& ~itemp
->opd
[i
] & (COLON
| TO
))
1337 * Check that the operand flags all match up
1339 for (i
= 0; i
< itemp
->operands
; i
++)
1340 if (itemp
->opd
[i
] & ~instruction
->oprs
[i
].type
||
1341 ((itemp
->opd
[i
] & SIZE_MASK
) &&
1342 ((itemp
->opd
[i
] ^ instruction
->oprs
[i
].type
) & SIZE_MASK
))) {
1343 if ((itemp
->opd
[i
] & ~instruction
->oprs
[i
].type
& NON_SIZE
) ||
1344 (instruction
->oprs
[i
].type
& SIZE_MASK
))
1352 * Check operand sizes
1354 if (itemp
->flags
& IF_ARMASK
) {
1355 size
[0] = size
[1] = size
[2] = 0;
1357 switch (itemp
->flags
& IF_ARMASK
) {
1368 break; /* Shouldn't happen */
1370 if (itemp
->flags
& IF_SB
) {
1372 } else if (itemp
->flags
& IF_SW
) {
1374 } else if (itemp
->flags
& IF_SD
) {
1379 if (itemp
->flags
& IF_SB
) {
1381 oprs
= itemp
->operands
;
1382 } else if (itemp
->flags
& IF_SW
) {
1384 oprs
= itemp
->operands
;
1385 } else if (itemp
->flags
& IF_SD
) {
1387 oprs
= itemp
->operands
;
1389 size
[0] = size
[1] = size
[2] = asize
;
1392 if (itemp
->flags
& (IF_SM
| IF_SM2
)) {
1393 oprs
= (itemp
->flags
& IF_SM2
? 2 : itemp
->operands
);
1395 for (i
= 0; i
< oprs
; i
++) {
1396 if ((asize
= itemp
->opd
[i
] & SIZE_MASK
) != 0) {
1398 for (j
= 0; j
< oprs
; j
++)
1404 oprs
= itemp
->operands
;
1407 for (i
= 0; i
< itemp
->operands
; i
++)
1408 if (!(itemp
->opd
[i
] & SIZE_MASK
) &&
1409 (instruction
->oprs
[i
].type
& SIZE_MASK
& ~size
[i
]))
1414 * Check template is okay at the set cpu level
1416 if ((itemp
->flags
& IF_PLEVEL
) > cpu
)
1420 * Check if special handling needed for Jumps
1422 if ((unsigned char)(itemp
->code
[0]) >= 0370)
1428 static ea
*process_ea(operand
* input
, ea
* output
, int addrbits
,
1429 int rfield
, int forw_ref
)
1431 if (!(REGISTER
& ~input
->type
)) { /* it's a single register */
1432 static int regs
[] = {
1433 R_AL
, R_CL
, R_DL
, R_BL
, R_AH
, R_CH
, R_DH
, R_BH
,
1434 R_AX
, R_CX
, R_DX
, R_BX
, R_SP
, R_BP
, R_SI
, R_DI
,
1435 R_EAX
, R_ECX
, R_EDX
, R_EBX
, R_ESP
, R_EBP
, R_ESI
, R_EDI
,
1436 R_MM0
, R_MM1
, R_MM2
, R_MM3
, R_MM4
, R_MM5
, R_MM6
, R_MM7
,
1437 R_XMM0
, R_XMM1
, R_XMM2
, R_XMM3
, R_XMM4
, R_XMM5
, R_XMM6
, R_XMM7
1441 for (i
= 0; i
< elements(regs
); i
++)
1442 if (input
->basereg
== regs
[i
])
1444 if (i
< elements(regs
)) {
1445 output
->sib_present
= FALSE
; /* no SIB necessary */
1446 output
->bytes
= 0; /* no offset necessary either */
1447 output
->modrm
= 0xC0 | (rfield
<< 3) | (i
& 7);
1450 } else { /* it's a memory reference */
1451 if (input
->basereg
== -1
1452 && (input
->indexreg
== -1 || input
->scale
== 0)) {
1453 /* it's a pure offset */
1454 if (input
->addr_size
)
1455 addrbits
= input
->addr_size
;
1456 output
->sib_present
= FALSE
;
1457 output
->bytes
= (addrbits
== 32 ? 4 : 2);
1458 output
->modrm
= (addrbits
== 32 ? 5 : 6) | (rfield
<< 3);
1459 } else { /* it's an indirection */
1460 int i
= input
->indexreg
, b
= input
->basereg
, s
= input
->scale
;
1461 long o
= input
->offset
, seg
= input
->segment
;
1462 int hb
= input
->hintbase
, ht
= input
->hinttype
;
1466 i
= -1; /* make this easy, at least */
1468 if (i
== R_EAX
|| i
== R_EBX
|| i
== R_ECX
|| i
== R_EDX
1469 || i
== R_EBP
|| i
== R_ESP
|| i
== R_ESI
|| i
== R_EDI
1470 || b
== R_EAX
|| b
== R_EBX
|| b
== R_ECX
|| b
== R_EDX
1471 || b
== R_EBP
|| b
== R_ESP
|| b
== R_ESI
|| b
== R_EDI
) {
1472 /* it must be a 32-bit memory reference. Firstly we have
1473 * to check that all registers involved are type Exx. */
1474 if (i
!= -1 && i
!= R_EAX
&& i
!= R_EBX
&& i
!= R_ECX
1475 && i
!= R_EDX
&& i
!= R_EBP
&& i
!= R_ESP
&& i
!= R_ESI
1478 if (b
!= -1 && b
!= R_EAX
&& b
!= R_EBX
&& b
!= R_ECX
1479 && b
!= R_EDX
&& b
!= R_EBP
&& b
!= R_ESP
&& b
!= R_ESI
1483 /* While we're here, ensure the user didn't specify WORD. */
1484 if (input
->addr_size
== 16)
1487 /* now reorganise base/index */
1488 if (s
== 1 && b
!= i
&& b
!= -1 && i
!= -1 &&
1489 ((hb
== b
&& ht
== EAH_NOTBASE
)
1490 || (hb
== i
&& ht
== EAH_MAKEBASE
)))
1491 t
= b
, b
= i
, i
= t
; /* swap if hints say so */
1492 if (b
== i
) /* convert EAX+2*EAX to 3*EAX */
1494 if (b
== -1 && s
== 1 && !(hb
== i
&& ht
== EAH_NOTBASE
))
1495 b
= i
, i
= -1; /* make single reg base, unless hint */
1496 if (((s
== 2 && i
!= R_ESP
1497 && !(input
->eaflags
& EAF_TIMESTWO
)) || s
== 3
1498 || s
== 5 || s
== 9) && b
== -1)
1499 b
= i
, s
--; /* convert 3*EAX to EAX+2*EAX */
1500 if (i
== -1 && b
!= R_ESP
1501 && (input
->eaflags
& EAF_TIMESTWO
))
1502 i
= b
, b
= -1, s
= 1;
1503 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
1504 if (s
== 1 && i
== R_ESP
) /* swap ESP into base if scale is 1 */
1507 || (s
!= 1 && s
!= 2 && s
!= 4 && s
!= 8 && i
!= -1))
1508 return NULL
; /* wrong, for various reasons */
1510 if (i
== -1 && b
!= R_ESP
) { /* no SIB needed */
1537 default: /* should never happen */
1540 if (b
== -1 || (b
!= R_EBP
&& o
== 0 &&
1541 seg
== NO_SEG
&& !forw_ref
&&
1543 (EAF_BYTEOFFS
| EAF_WORDOFFS
))))
1545 else if (input
->eaflags
& EAF_BYTEOFFS
||
1546 (o
>= -128 && o
<= 127 && seg
== NO_SEG
1548 && !(input
->eaflags
& EAF_WORDOFFS
))) {
1553 output
->sib_present
= FALSE
;
1554 output
->bytes
= (b
== -1 || mod
== 2 ? 4 : mod
);
1555 output
->modrm
= (mod
<< 6) | (rfield
<< 3) | rm
;
1556 } else { /* we need a SIB */
1557 int mod
, scale
, index
, base
;
1585 default: /* then what the smeg is it? */
1586 return NULL
; /* panic */
1614 default: /* then what the smeg is it? */
1615 return NULL
; /* panic */
1633 default: /* then what the smeg is it? */
1634 return NULL
; /* panic */
1637 if (b
== -1 || (b
!= R_EBP
&& o
== 0 &&
1638 seg
== NO_SEG
&& !forw_ref
&&
1640 (EAF_BYTEOFFS
| EAF_WORDOFFS
))))
1642 else if (input
->eaflags
& EAF_BYTEOFFS
||
1643 (o
>= -128 && o
<= 127 && seg
== NO_SEG
1645 && !(input
->eaflags
& EAF_WORDOFFS
)))
1650 output
->sib_present
= TRUE
;
1651 output
->bytes
= (b
== -1 || mod
== 2 ? 4 : mod
);
1652 output
->modrm
= (mod
<< 6) | (rfield
<< 3) | 4;
1653 output
->sib
= (scale
<< 6) | (index
<< 3) | base
;
1655 } else { /* it's 16-bit */
1658 /* check all registers are BX, BP, SI or DI */
1659 if ((b
!= -1 && b
!= R_BP
&& b
!= R_BX
&& b
!= R_SI
1660 && b
!= R_DI
) || (i
!= -1 && i
!= R_BP
&& i
!= R_BX
1661 && i
!= R_SI
&& i
!= R_DI
))
1664 /* ensure the user didn't specify DWORD */
1665 if (input
->addr_size
== 32)
1668 if (s
!= 1 && i
!= -1)
1669 return NULL
; /* no can do, in 16-bit EA */
1670 if (b
== -1 && i
!= -1) {
1675 if ((b
== R_SI
|| b
== R_DI
) && i
!= -1) {
1680 /* have BX/BP as base, SI/DI index */
1682 return NULL
; /* shouldn't ever happen, in theory */
1683 if (i
!= -1 && b
!= -1 &&
1684 (i
== R_BP
|| i
== R_BX
|| b
== R_SI
|| b
== R_DI
))
1685 return NULL
; /* invalid combinations */
1686 if (b
== -1) /* pure offset: handled above */
1687 return NULL
; /* so if it gets to here, panic! */
1691 switch (i
* 256 + b
) {
1692 case R_SI
* 256 + R_BX
:
1695 case R_DI
* 256 + R_BX
:
1698 case R_SI
* 256 + R_BP
:
1701 case R_DI
* 256 + R_BP
:
1719 if (rm
== -1) /* can't happen, in theory */
1720 return NULL
; /* so panic if it does */
1722 if (o
== 0 && seg
== NO_SEG
&& !forw_ref
&& rm
!= 6 &&
1723 !(input
->eaflags
& (EAF_BYTEOFFS
| EAF_WORDOFFS
)))
1725 else if (input
->eaflags
& EAF_BYTEOFFS
||
1726 (o
>= -128 && o
<= 127 && seg
== NO_SEG
1728 && !(input
->eaflags
& EAF_WORDOFFS
)))
1733 output
->sib_present
= FALSE
; /* no SIB - it's 16-bit */
1734 output
->bytes
= mod
; /* bytes of offset needed */
1735 output
->modrm
= (mod
<< 6) | (rfield
<< 3) | rm
;
1739 output
->size
= 1 + output
->sib_present
+ output
->bytes
;
1743 static int chsize(operand
* input
, int addrbits
)
1745 if (!(MEMORY
& ~input
->type
)) {
1746 int i
= input
->indexreg
, b
= input
->basereg
;
1748 if (input
->scale
== 0)
1751 if (i
== -1 && b
== -1) /* pure offset */
1752 return (input
->addr_size
!= 0 && input
->addr_size
!= addrbits
);
1754 if (i
== R_EAX
|| i
== R_EBX
|| i
== R_ECX
|| i
== R_EDX
1755 || i
== R_EBP
|| i
== R_ESP
|| i
== R_ESI
|| i
== R_EDI
1756 || b
== R_EAX
|| b
== R_EBX
|| b
== R_ECX
|| b
== R_EDX
1757 || b
== R_EBP
|| b
== R_ESP
|| b
== R_ESI
|| b
== R_EDI
)
1758 return (addrbits
== 16);
1760 return (addrbits
== 32);