NASM 0.98p7
[nasm/avx512.git] / assemble.c
blob426bae23731b9acbf30e52e7ca9cd1d6bae27289
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \331 - instruction not valid with REP prefix. Hint for
49 * disassembler only; for SSE instructions.
50 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
51 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
52 * as a literal byte in order to aid the disassembler.
53 * \340 - reserve <operand 0> bytes of uninitialised storage.
54 * Operand 0 had better be a segmentless constant.
57 #include <stdio.h>
58 #include <string.h>
60 #include "nasm.h"
61 #include "nasmlib.h"
62 #include "assemble.h"
63 #include "insns.h"
65 extern struct itemplate *nasm_instructions[];
67 typedef struct {
68 int sib_present; /* is a SIB byte necessary? */
69 int bytes; /* # of bytes of offset needed */
70 int size; /* lazy - this is sib+bytes+1 */
71 unsigned char modrm, sib; /* the bytes themselves */
72 } ea;
74 static efunc errfunc;
75 static struct ofmt *outfmt;
76 static ListGen *list;
78 static long calcsize (long, long, int, insn *, char *);
79 static void gencode (long, long, int, insn *, char *, long);
80 static int regval (operand *o);
81 static int matches (struct itemplate *, insn *);
82 static ea * process_ea (operand *, ea *, int, int, int);
83 static int chsize (operand *, int);
86 * This routine wrappers the real output format's output routine,
87 * in order to pass a copy of the data off to the listing file
88 * generator at the same time.
90 static void out (long offset, long segto, void *data, unsigned long type,
91 long segment, long wrt)
93 static long lineno;
94 static char *lnfname;
96 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
97 if (segment != NO_SEG || wrt != NO_SEG) {
99 * This address is relocated. We must write it as
100 * OUT_ADDRESS, so there's no work to be done here.
102 list->output (offset, data, type);
104 else {
105 unsigned char p[4], *q = p;
107 * This is a non-relocated address, and we're going to
108 * convert it into RAWDATA format.
110 if ((type & OUT_SIZMASK) == 4) {
111 WRITELONG (q, * (long *) data);
112 list->output (offset, p, OUT_RAWDATA+4);
114 else {
115 WRITESHORT (q, * (long *) data);
116 list->output (offset, p, OUT_RAWDATA+2);
120 else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
121 list->output (offset, data, type);
123 else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
124 list->output (offset, NULL, type);
126 else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
127 (type & OUT_TYPMASK) == OUT_REL4ADR) {
128 list->output (offset, data, type);
131 if (src_get(&lineno,&lnfname))
132 outfmt->current_dfmt->linenum(lnfname,lineno,segto);
134 outfmt->output (segto, data, type, segment, wrt);
137 long assemble (long segment, long offset, int bits,
138 insn *instruction, struct ofmt *output, efunc error,
139 ListGen *listgen)
141 struct itemplate *temp;
142 int j;
143 int size_prob;
144 long insn_end;
145 long itimes;
146 long start = offset;
147 long wsize = 0; /* size for DB etc. */
149 errfunc = error; /* to pass to other functions */
150 outfmt = output; /* likewise */
151 list = listgen; /* and again */
153 switch (instruction->opcode)
155 case -1: return 0;
156 case I_DB: wsize = 1; break;
157 case I_DW: wsize = 2; break;
158 case I_DD: wsize = 4; break;
159 case I_DQ: wsize = 8; break;
160 case I_DT: wsize = 10; break;
163 if (wsize) {
164 extop * e;
165 long t = instruction->times;
166 if (t < 0)
167 errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);
169 while (t--) /* repeat TIMES times */
171 for (e = instruction->eops; e; e = e->next)
173 if (e->type == EOT_DB_NUMBER)
175 if (wsize == 1) {
176 if (e->segment != NO_SEG)
177 errfunc (ERR_NONFATAL,
178 "one-byte relocation attempted");
179 else {
180 out (offset, segment, &e->offset, OUT_RAWDATA+1,
181 NO_SEG, NO_SEG);
184 else if (wsize > 5) {
185 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
186 " instruction", wsize==8 ? 'Q' : 'T');
188 else
189 out (offset, segment, &e->offset,
190 OUT_ADDRESS+wsize, e->segment,
191 e->wrt);
192 offset += wsize;
194 else if (e->type == EOT_DB_STRING)
196 int align;
198 out (offset, segment, e->stringval,
199 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
200 align = e->stringlen % wsize;
202 if (align) {
203 align = wsize - align;
204 out (offset, segment, "\0\0\0\0\0\0\0\0",
205 OUT_RAWDATA+align, NO_SEG, NO_SEG);
207 offset += e->stringlen + align;
210 if (t > 0 && t == instruction->times-1)
213 * Dummy call to list->output to give the offset to the
214 * listing module.
216 list->output (offset, NULL, OUT_RAWDATA);
217 list->uplevel (LIST_TIMES);
220 if (instruction->times > 1)
221 list->downlevel (LIST_TIMES);
222 return offset - start;
225 if (instruction->opcode == I_INCBIN)
227 static char fname[FILENAME_MAX];
228 FILE * fp;
229 long len;
231 len = FILENAME_MAX-1;
232 if (len > instruction->eops->stringlen)
233 len = instruction->eops->stringlen;
234 strncpy (fname, instruction->eops->stringval, len);
235 fname[len] = '\0';
237 if ( (fp = fopen(fname, "rb")) == NULL)
238 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
239 else if (fseek(fp, 0L, SEEK_END) < 0)
240 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
241 fname);
242 else
244 static char buf[2048];
245 long t = instruction->times;
246 long base = 0;
248 len = ftell (fp);
249 if (instruction->eops->next) {
250 base = instruction->eops->next->offset;
251 len -= base;
252 if (instruction->eops->next->next &&
253 len > instruction->eops->next->next->offset)
254 len = instruction->eops->next->next->offset;
257 * Dummy call to list->output to give the offset to the
258 * listing module.
260 list->output (offset, NULL, OUT_RAWDATA);
261 list->uplevel(LIST_INCBIN);
262 while (t--)
264 long l;
266 fseek (fp, base, SEEK_SET);
267 l = len;
268 while (l > 0) {
269 long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
270 fp);
271 if (!m) {
273 * This shouldn't happen unless the file
274 * actually changes while we are reading
275 * it.
277 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
278 " reading file `%s'", fname);
279 t=0; /* Try to exit cleanly */
280 break;
282 out (offset, segment, buf, OUT_RAWDATA+m,
283 NO_SEG, NO_SEG);
284 l -= m;
287 list->downlevel(LIST_INCBIN);
288 if (instruction->times > 1) {
290 * Dummy call to list->output to give the offset to the
291 * listing module.
293 list->output (offset, NULL, OUT_RAWDATA);
294 list->uplevel(LIST_TIMES);
295 list->downlevel(LIST_TIMES);
297 fclose (fp);
298 return instruction->times * len;
300 return 0; /* if we're here, there's an error */
303 size_prob = FALSE;
304 temp = nasm_instructions[instruction->opcode];
305 while (temp->opcode != -1) {
306 int m = matches (temp, instruction);
308 if (m == 100) /* matches! */
310 char *codes = temp->code;
311 long insn_size = calcsize(segment, offset, bits,
312 instruction, codes);
313 itimes = instruction->times;
314 if (insn_size < 0) /* shouldn't be, on pass two */
315 error (ERR_PANIC, "errors made it through from pass one");
316 else while (itimes--) {
317 insn_end = offset + insn_size;
318 for (j=0; j<instruction->nprefix; j++) {
319 unsigned char c=0;
320 switch (instruction->prefixes[j]) {
321 case P_LOCK:
322 c = 0xF0; break;
323 case P_REPNE: case P_REPNZ:
324 c = 0xF2; break;
325 case P_REPE: case P_REPZ: case P_REP:
326 c = 0xF3; break;
327 case R_CS: c = 0x2E; break;
328 case R_DS: c = 0x3E; break;
329 case R_ES: c = 0x26; break;
330 case R_FS: c = 0x64; break;
331 case R_GS: c = 0x65; break;
332 case R_SS: c = 0x36; break;
333 case P_A16:
334 if (bits != 16)
335 c = 0x67;
336 break;
337 case P_A32:
338 if (bits != 32)
339 c = 0x67;
340 break;
341 case P_O16:
342 if (bits != 16)
343 c = 0x66;
344 break;
345 case P_O32:
346 if (bits != 32)
347 c = 0x66;
348 break;
349 default:
350 error (ERR_PANIC,
351 "invalid instruction prefix");
353 if (c != 0) {
354 out (offset, segment, &c, OUT_RAWDATA+1,
355 NO_SEG, NO_SEG);
356 offset++;
359 gencode (segment, offset, bits, instruction, codes, insn_end);
360 offset += insn_size;
361 if (itimes > 0 && itimes == instruction->times-1) {
363 * Dummy call to list->output to give the offset to the
364 * listing module.
366 list->output (offset, NULL, OUT_RAWDATA);
367 list->uplevel (LIST_TIMES);
370 if (instruction->times > 1)
371 list->downlevel (LIST_TIMES);
372 return offset - start;
373 } else if (m > 0) {
374 size_prob = m;
376 temp++;
379 if (temp->opcode == -1) { /* didn't match any instruction */
380 if (size_prob == 1) /* would have matched, but for size */
381 error (ERR_NONFATAL, "operation size not specified");
382 else if (size_prob == 2)
383 error (ERR_NONFATAL, "mismatch in operand sizes");
384 else
385 error (ERR_NONFATAL,
386 "invalid combination of opcode and operands");
388 return 0;
391 long insn_size (long segment, long offset, int bits,
392 insn *instruction, efunc error)
394 struct itemplate *temp;
396 errfunc = error; /* to pass to other functions */
398 if (instruction->opcode == -1)
399 return 0;
401 if (instruction->opcode == I_DB ||
402 instruction->opcode == I_DW ||
403 instruction->opcode == I_DD ||
404 instruction->opcode == I_DQ ||
405 instruction->opcode == I_DT)
407 extop *e;
408 long isize, osize, wsize = 0; /* placate gcc */
410 isize = 0;
411 switch (instruction->opcode)
413 case I_DB: wsize = 1; break;
414 case I_DW: wsize = 2; break;
415 case I_DD: wsize = 4; break;
416 case I_DQ: wsize = 8; break;
417 case I_DT: wsize = 10; break;
420 for (e = instruction->eops; e; e = e->next)
422 long align;
424 osize = 0;
425 if (e->type == EOT_DB_NUMBER)
426 osize = 1;
427 else if (e->type == EOT_DB_STRING)
428 osize = e->stringlen;
430 align = (-osize) % wsize;
431 if (align < 0)
432 align += wsize;
433 isize += osize + align;
435 return isize * instruction->times;
438 if (instruction->opcode == I_INCBIN)
440 char fname[FILENAME_MAX];
441 FILE * fp;
442 long len;
444 len = FILENAME_MAX-1;
445 if (len > instruction->eops->stringlen)
446 len = instruction->eops->stringlen;
447 strncpy (fname, instruction->eops->stringval, len);
448 fname[len] = '\0';
449 if ( (fp = fopen(fname, "rb")) == NULL )
450 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
451 else if (fseek(fp, 0L, SEEK_END) < 0)
452 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
453 fname);
454 else
456 len = ftell (fp);
457 fclose (fp);
458 if (instruction->eops->next)
460 len -= instruction->eops->next->offset;
461 if (instruction->eops->next->next &&
462 len > instruction->eops->next->next->offset)
464 len = instruction->eops->next->next->offset;
467 return instruction->times * len;
469 return 0; /* if we're here, there's an error */
472 temp = nasm_instructions[instruction->opcode];
473 while (temp->opcode != -1) {
474 if (matches(temp, instruction) == 100) {
475 /* we've matched an instruction. */
476 long isize;
477 char * codes = temp->code;
478 int j;
480 isize = calcsize(segment, offset, bits, instruction, codes);
481 if (isize < 0)
482 return -1;
483 for (j = 0; j < instruction->nprefix; j++)
485 if ((instruction->prefixes[j] != P_A16 &&
486 instruction->prefixes[j] != P_O16 && bits==16) ||
487 (instruction->prefixes[j] != P_A32 &&
488 instruction->prefixes[j] != P_O32 && bits==32))
490 isize++;
493 return isize * instruction->times;
495 temp++;
497 return -1; /* didn't match any instruction */
500 static long calcsize (long segment, long offset, int bits,
501 insn *ins, char *codes)
503 long length = 0;
504 unsigned char c;
506 (void) segment; /* Don't warn that this parameter is unused */
507 (void) offset; /* Don't warn that this parameter is unused */
509 while (*codes) switch (c = *codes++) {
510 case 01: case 02: case 03:
511 codes += c, length += c; break;
512 case 04: case 05: case 06: case 07:
513 length++; break;
514 case 010: case 011: case 012:
515 codes++, length++; break;
516 case 017:
517 length++; break;
518 case 014: case 015: case 016:
519 length++; break;
520 case 020: case 021: case 022:
521 length++; break;
522 case 024: case 025: case 026:
523 length++; break;
524 case 030: case 031: case 032:
525 length += 2; break;
526 case 034: case 035: case 036:
527 length += ((ins->oprs[c-034].addr_size ?
528 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
529 case 037:
530 length += 2; break;
531 case 040: case 041: case 042:
532 length += 4; break;
533 case 050: case 051: case 052:
534 length++; break;
535 case 060: case 061: case 062:
536 length += 2; break;
537 case 064: case 065: case 066:
538 length += ((ins->oprs[c-064].addr_size ?
539 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
540 case 070: case 071: case 072:
541 length += 4; break;
542 case 0300: case 0301: case 0302:
543 length += chsize (&ins->oprs[c-0300], bits);
544 break;
545 case 0310:
546 length += (bits==32);
547 break;
548 case 0311:
549 length += (bits==16);
550 break;
551 case 0312:
552 break;
553 case 0320:
554 length += (bits==32);
555 break;
556 case 0321:
557 length += (bits==16);
558 break;
559 case 0322:
560 break;
561 case 0330:
562 codes++, length++; break;
563 case 0331:
564 case 0332:
565 break;
566 case 0333:
567 length++; break;
568 case 0340: case 0341: case 0342:
569 if (ins->oprs[0].segment != NO_SEG)
570 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
571 " quantity of BSS space");
572 else
573 length += ins->oprs[0].offset << (c-0340);
574 break;
575 default: /* can't do it by 'case' statements */
576 if (c>=0100 && c<=0277) { /* it's an EA */
577 ea ea_data;
578 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
579 ins->forw_ref)) {
580 errfunc (ERR_NONFATAL, "invalid effective address");
581 return -1;
582 } else
583 length += ea_data.size;
584 } else
585 errfunc (ERR_PANIC, "internal instruction table corrupt"
586 ": instruction code 0x%02X given", c);
588 return length;
591 static void gencode (long segment, long offset, int bits,
592 insn *ins, char *codes, long insn_end)
594 static char condval[] = { /* conditional opcodes */
595 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
596 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
597 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
599 unsigned char c;
600 unsigned char bytes[4];
601 long data, size;
603 while (*codes)
604 switch (c = *codes++)
606 case 01: case 02: case 03:
607 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
608 codes += c;
609 offset += c;
610 break;
612 case 04: case 06:
613 switch (ins->oprs[0].basereg)
615 case R_CS:
616 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
617 case R_DS:
618 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
619 case R_ES:
620 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
621 case R_SS:
622 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
623 default:
624 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
626 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
627 offset++;
628 break;
630 case 05: case 07:
631 switch (ins->oprs[0].basereg) {
632 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
633 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
634 default:
635 errfunc (ERR_PANIC, "bizarre 386 segment register received");
637 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
638 offset++;
639 break;
641 case 010: case 011: case 012:
642 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
643 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
644 offset += 1;
645 break;
647 case 017:
648 bytes[0] = 0;
649 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
650 offset += 1;
651 break;
653 case 014: case 015: case 016:
654 if (ins->oprs[c-014].offset < -128
655 || ins->oprs[c-014].offset > 127)
657 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
660 if (ins->oprs[c-014].segment != NO_SEG)
662 data = ins->oprs[c-014].offset;
663 out (offset, segment, &data, OUT_ADDRESS+1,
664 ins->oprs[c-014].segment, ins->oprs[c-014].wrt);
666 else {
667 bytes[0] = ins->oprs[c-014].offset;
668 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
670 offset += 1;
671 break;
673 case 020: case 021: case 022:
674 if (ins->oprs[c-020].offset < -256
675 || ins->oprs[c-020].offset > 255)
677 errfunc (ERR_WARNING, "byte value exceeds bounds");
679 if (ins->oprs[c-020].segment != NO_SEG) {
680 data = ins->oprs[c-020].offset;
681 out (offset, segment, &data, OUT_ADDRESS+1,
682 ins->oprs[c-020].segment, ins->oprs[c-020].wrt);
684 else {
685 bytes[0] = ins->oprs[c-020].offset;
686 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
688 offset += 1;
689 break;
691 case 024: case 025: case 026:
692 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
693 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
694 if (ins->oprs[c-024].segment != NO_SEG) {
695 data = ins->oprs[c-024].offset;
696 out (offset, segment, &data, OUT_ADDRESS+1,
697 ins->oprs[c-024].segment, ins->oprs[c-024].wrt);
699 else {
700 bytes[0] = ins->oprs[c-024].offset;
701 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
703 offset += 1;
704 break;
706 case 030: case 031: case 032:
707 if (ins->oprs[c-030].segment == NO_SEG &&
708 ins->oprs[c-030].wrt == NO_SEG &&
709 (ins->oprs[c-030].offset < -65536L ||
710 ins->oprs[c-030].offset > 65535L))
712 errfunc (ERR_WARNING, "word value exceeds bounds");
714 data = ins->oprs[c-030].offset;
715 out (offset, segment, &data, OUT_ADDRESS+2,
716 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
717 offset += 2;
718 break;
720 case 034: case 035: case 036:
721 data = ins->oprs[c-034].offset;
722 size = ((ins->oprs[c-034].addr_size ?
723 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
724 if (size==16 && (data < -65536L || data > 65535L))
725 errfunc (ERR_WARNING, "word value exceeds bounds");
726 out (offset, segment, &data, OUT_ADDRESS+size,
727 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
728 offset += size;
729 break;
731 case 037:
732 if (ins->oprs[0].segment == NO_SEG)
733 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
734 " relocatable");
735 data = 0L;
736 out (offset, segment, &data, OUT_ADDRESS+2,
737 outfmt->segbase(1+ins->oprs[0].segment),
738 ins->oprs[0].wrt);
739 offset += 2;
740 break;
742 case 040: case 041: case 042:
743 data = ins->oprs[c-040].offset;
744 out (offset, segment, &data, OUT_ADDRESS+4,
745 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
746 offset += 4;
747 break;
749 case 050: case 051: case 052:
750 if (ins->oprs[c-050].segment != segment)
751 errfunc (ERR_NONFATAL, "short relative jump outside segment");
752 data = ins->oprs[c-050].offset - insn_end;
753 if (data > 127 || data < -128)
754 errfunc (ERR_NONFATAL, "short jump is out of range");
755 bytes[0] = data;
756 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
757 offset += 1;
758 break;
760 case 060: case 061: case 062:
761 if (ins->oprs[c-060].segment != segment) {
762 data = ins->oprs[c-060].offset;
763 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
764 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
765 } else {
766 data = ins->oprs[c-060].offset - insn_end;
767 out (offset, segment, &data,
768 OUT_ADDRESS+2, NO_SEG, NO_SEG);
770 offset += 2;
771 break;
773 case 064: case 065: case 066:
774 size = ((ins->oprs[c-064].addr_size ?
775 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
776 if (ins->oprs[c-064].segment != segment) {
777 data = ins->oprs[c-064].offset;
778 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
779 out (offset, segment, &data, size+insn_end-offset,
780 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
781 size = (bits == 16 ? 2 : 4);
782 } else {
783 data = ins->oprs[c-064].offset - insn_end;
784 out (offset, segment, &data,
785 OUT_ADDRESS+size, NO_SEG, NO_SEG);
787 offset += size;
788 break;
790 case 070: case 071: case 072:
791 if (ins->oprs[c-070].segment != segment) {
792 data = ins->oprs[c-070].offset;
793 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
794 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
795 } else {
796 data = ins->oprs[c-070].offset - insn_end;
797 out (offset, segment, &data,
798 OUT_ADDRESS+4, NO_SEG, NO_SEG);
800 offset += 4;
801 break;
803 case 0300: case 0301: case 0302:
804 if (chsize (&ins->oprs[c-0300], bits)) {
805 *bytes = 0x67;
806 out (offset, segment, bytes,
807 OUT_RAWDATA+1, NO_SEG, NO_SEG);
808 offset += 1;
809 } else
810 offset += 0;
811 break;
813 case 0310:
814 if (bits==32) {
815 *bytes = 0x67;
816 out (offset, segment, bytes,
817 OUT_RAWDATA+1, NO_SEG, NO_SEG);
818 offset += 1;
819 } else
820 offset += 0;
821 break;
823 case 0311:
824 if (bits==16) {
825 *bytes = 0x67;
826 out (offset, segment, bytes,
827 OUT_RAWDATA+1, NO_SEG, NO_SEG);
828 offset += 1;
829 } else
830 offset += 0;
831 break;
833 case 0312:
834 break;
836 case 0320:
837 if (bits==32) {
838 *bytes = 0x66;
839 out (offset, segment, bytes,
840 OUT_RAWDATA+1, NO_SEG, NO_SEG);
841 offset += 1;
842 } else
843 offset += 0;
844 break;
846 case 0321:
847 if (bits==16) {
848 *bytes = 0x66;
849 out (offset, segment, bytes,
850 OUT_RAWDATA+1, NO_SEG, NO_SEG);
851 offset += 1;
852 } else
853 offset += 0;
854 break;
856 case 0322:
857 break;
859 case 0330:
860 *bytes = *codes++ + condval[ins->condition];
861 out (offset, segment, bytes,
862 OUT_RAWDATA+1, NO_SEG, NO_SEG);
863 offset += 1;
864 break;
866 case 0331:
867 case 0332:
868 break;
870 case 0333:
871 *bytes = 0xF3;
872 out (offset, segment, bytes,
873 OUT_RAWDATA+1, NO_SEG, NO_SEG);
874 offset += 1;
875 break;
877 case 0340: case 0341: case 0342:
878 if (ins->oprs[0].segment != NO_SEG)
879 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
880 else {
881 long size = ins->oprs[0].offset << (c-0340);
882 if (size > 0)
883 out (offset, segment, NULL,
884 OUT_RESERVE+size, NO_SEG, NO_SEG);
885 offset += size;
887 break;
889 default: /* can't do it by 'case' statements */
890 if (c>=0100 && c<=0277) { /* it's an EA */
891 ea ea_data;
892 int rfield;
893 unsigned char *p;
894 long s;
896 if (c<=0177) /* pick rfield from operand b */
897 rfield = regval (&ins->oprs[c&7]);
898 else /* rfield is constant */
899 rfield = c & 7;
901 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
902 ins->forw_ref))
904 errfunc (ERR_NONFATAL, "invalid effective address");
907 p = bytes;
908 *p++ = ea_data.modrm;
909 if (ea_data.sib_present)
910 *p++ = ea_data.sib;
912 s = p-bytes;
913 out (offset, segment, bytes, OUT_RAWDATA + s,
914 NO_SEG, NO_SEG);
916 switch (ea_data.bytes) {
917 case 0:
918 break;
919 case 1:
920 if (ins->oprs[(c>>3)&7].segment != NO_SEG) {
921 data = ins->oprs[(c>>3)&7].offset;
922 out (offset, segment, &data, OUT_ADDRESS+1,
923 ins->oprs[(c>>3)&7].segment,
924 ins->oprs[(c>>3)&7].wrt);
925 } else {
926 *bytes = ins->oprs[(c>>3)&7].offset;
927 out (offset, segment, bytes, OUT_RAWDATA+1,
928 NO_SEG, NO_SEG);
930 s++;
931 break;
932 case 2:
933 case 4:
934 data = ins->oprs[(c>>3)&7].offset;
935 out (offset, segment, &data,
936 OUT_ADDRESS+ea_data.bytes,
937 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
938 s += ea_data.bytes;
939 break;
941 offset += s;
942 } else
943 errfunc (ERR_PANIC, "internal instruction table corrupt"
944 ": instruction code 0x%02X given", c);
948 static int regval (operand *o)
950 switch (o->basereg) {
951 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
952 case R_ST0: case R_MM0: case R_XMM0:
953 return 0;
954 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
955 case R_MM1: case R_XMM1:
956 return 1;
957 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
958 case R_ST2: case R_MM2: case R_XMM2:
959 return 2;
960 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
961 case R_TR3: case R_ST3: case R_MM3: case R_XMM3:
962 return 3;
963 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
964 case R_ST4: case R_MM4: case R_XMM4:
965 return 4;
966 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
967 case R_MM5: case R_XMM5:
968 return 5;
969 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
970 case R_MM6: case R_XMM6:
971 return 6;
972 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
973 case R_MM7: case R_XMM7:
974 return 7;
975 default: /* panic */
976 errfunc (ERR_PANIC, "invalid register operand given to regval()");
977 return 0;
981 static int matches (struct itemplate *itemp, insn *instruction)
983 int i, size[3], asize, oprs, ret;
985 ret = 100;
988 * Check the opcode
990 if (itemp->opcode != instruction->opcode) return 0;
993 * Count the operands
995 if (itemp->operands != instruction->operands) return 0;
998 * Check that no spurious colons or TOs are present
1000 for (i=0; i<itemp->operands; i++)
1001 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
1002 return 0;
1005 * Check that the operand flags all match up
1007 for (i=0; i<itemp->operands; i++)
1008 if (itemp->opd[i] & ~instruction->oprs[i].type ||
1009 ((itemp->opd[i] & SIZE_MASK) &&
1010 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK)))
1012 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
1013 (instruction->oprs[i].type & SIZE_MASK))
1014 return 0;
1015 else
1016 ret = 1;
1020 * Check operand sizes
1022 if (itemp->flags & IF_ARMASK) {
1023 size[0] = size[1] = size[2] = 0;
1025 switch (itemp->flags & IF_ARMASK) {
1026 case IF_AR0: i = 0; break;
1027 case IF_AR1: i = 1; break;
1028 case IF_AR2: i = 2; break;
1029 default: break; /* Shouldn't happen */
1031 if (itemp->flags & IF_SB) {
1032 size[i] = BITS8;
1033 } else if (itemp->flags & IF_SW) {
1034 size[i] = BITS16;
1035 } else if (itemp->flags & IF_SD) {
1036 size[i] = BITS32;
1038 } else {
1039 asize = 0;
1040 if (itemp->flags & IF_SB) {
1041 asize = BITS8;
1042 oprs = itemp->operands;
1043 } else if (itemp->flags & IF_SW) {
1044 asize = BITS16;
1045 oprs = itemp->operands;
1046 } else if (itemp->flags & IF_SD) {
1047 asize = BITS32;
1048 oprs = itemp->operands;
1050 size[0] = size[1] = size[2] = asize;
1053 if (itemp->flags & (IF_SM | IF_SM2)) {
1054 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1055 asize = 0;
1056 for (i=0; i<oprs; i++) {
1057 if ( (asize = itemp->opd[i] & SIZE_MASK) != 0) {
1058 int j;
1059 for (j=0; j<oprs; j++)
1060 size[j] = asize;
1061 break;
1064 } else {
1065 oprs = itemp->operands;
1068 for (i=0; i<itemp->operands; i++)
1069 if (!(itemp->opd[i] & SIZE_MASK) &&
1070 (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1071 ret = 2;
1073 return ret;
1076 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
1077 int forw_ref)
1079 if (!(REGISTER & ~input->type)) { /* it's a single register */
1080 static int regs[] = {
1081 R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH,
1082 R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI,
1083 R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI,
1084 R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7,
1085 R_XMM0, R_XMM1, R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7
1087 int i;
1089 for (i=0; i<elements(regs); i++)
1090 if (input->basereg == regs[i]) break;
1091 if (i<elements(regs)) {
1092 output->sib_present = FALSE;/* no SIB necessary */
1093 output->bytes = 0; /* no offset necessary either */
1094 output->modrm = 0xC0 | (rfield << 3) | (i & 7);
1096 else
1097 return NULL;
1098 } else { /* it's a memory reference */
1099 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
1100 /* it's a pure offset */
1101 if (input->addr_size)
1102 addrbits = input->addr_size;
1103 output->sib_present = FALSE;
1104 output->bytes = (addrbits==32 ? 4 : 2);
1105 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
1107 else { /* it's an indirection */
1108 int i=input->indexreg, b=input->basereg, s=input->scale;
1109 long o=input->offset, seg=input->segment;
1110 int hb=input->hintbase, ht=input->hinttype;
1111 int t;
1113 if (s==0) i = -1; /* make this easy, at least */
1115 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1116 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1117 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1118 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
1119 /* it must be a 32-bit memory reference. Firstly we have
1120 * to check that all registers involved are type Exx. */
1121 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
1122 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
1123 return NULL;
1124 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
1125 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
1126 return NULL;
1128 /* While we're here, ensure the user didn't specify WORD. */
1129 if (input->addr_size == 16)
1130 return NULL;
1132 /* now reorganise base/index */
1133 if (s == 1 && b != i && b != -1 && i != -1 &&
1134 ((hb==b&&ht==EAH_NOTBASE) || (hb==i&&ht==EAH_MAKEBASE)))
1135 t = b, b = i, i = t; /* swap if hints say so */
1136 if (b==i) /* convert EAX+2*EAX to 3*EAX */
1137 b = -1, s++;
1138 if (b==-1 && s==1 && !(hb == i && ht == EAH_NOTBASE))
1139 b = i, i = -1; /* make single reg base, unless hint */
1140 if (((s==2 && i!=R_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
1141 s==3 || s==5 || s==9) && b==-1)
1142 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
1143 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
1144 i = b, b = R_ESP;
1145 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
1146 return NULL; /* wrong, for various reasons */
1148 if (i==-1 && b!=R_ESP) {/* no SIB needed */
1149 int mod, rm;
1150 switch(b) {
1151 case R_EAX: rm = 0; break;
1152 case R_ECX: rm = 1; break;
1153 case R_EDX: rm = 2; break;
1154 case R_EBX: rm = 3; break;
1155 case R_EBP: rm = 5; break;
1156 case R_ESI: rm = 6; break;
1157 case R_EDI: rm = 7; break;
1158 case -1: rm = 5; break;
1159 default: /* should never happen */
1160 return NULL;
1162 if (b==-1 || (b!=R_EBP && o==0 &&
1163 seg==NO_SEG && !forw_ref &&
1164 !(input->eaflags &
1165 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1166 mod = 0;
1167 else if (input->eaflags & EAF_BYTEOFFS ||
1168 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1169 !(input->eaflags & EAF_WORDOFFS))) {
1170 mod = 1;
1172 else
1173 mod = 2;
1175 output->sib_present = FALSE;
1176 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1177 output->modrm = (mod<<6) | (rfield<<3) | rm;
1179 else { /* we need a SIB */
1180 int mod, scale, index, base;
1182 switch (b) {
1183 case R_EAX: base = 0; break;
1184 case R_ECX: base = 1; break;
1185 case R_EDX: base = 2; break;
1186 case R_EBX: base = 3; break;
1187 case R_ESP: base = 4; break;
1188 case R_EBP: case -1: base = 5; break;
1189 case R_ESI: base = 6; break;
1190 case R_EDI: base = 7; break;
1191 default: /* then what the smeg is it? */
1192 return NULL; /* panic */
1195 switch (i) {
1196 case R_EAX: index = 0; break;
1197 case R_ECX: index = 1; break;
1198 case R_EDX: index = 2; break;
1199 case R_EBX: index = 3; break;
1200 case -1: index = 4; break;
1201 case R_EBP: index = 5; break;
1202 case R_ESI: index = 6; break;
1203 case R_EDI: index = 7; break;
1204 default: /* then what the smeg is it? */
1205 return NULL; /* panic */
1208 if (i==-1) s = 1;
1209 switch (s) {
1210 case 1: scale = 0; break;
1211 case 2: scale = 1; break;
1212 case 4: scale = 2; break;
1213 case 8: scale = 3; break;
1214 default: /* then what the smeg is it? */
1215 return NULL; /* panic */
1218 if (b==-1 || (b!=R_EBP && o==0 &&
1219 seg==NO_SEG && !forw_ref &&
1220 !(input->eaflags &
1221 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1222 mod = 0;
1223 else if (input->eaflags & EAF_BYTEOFFS ||
1224 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1225 !(input->eaflags & EAF_WORDOFFS)))
1226 mod = 1;
1227 else
1228 mod = 2;
1230 output->sib_present = TRUE;
1231 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1232 output->modrm = (mod<<6) | (rfield<<3) | 4;
1233 output->sib = (scale<<6) | (index<<3) | base;
1236 else { /* it's 16-bit */
1237 int mod, rm;
1239 /* check all registers are BX, BP, SI or DI */
1240 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1241 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1242 return NULL;
1244 /* ensure the user didn't specify DWORD */
1245 if (input->addr_size == 32)
1246 return NULL;
1248 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1249 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
1250 if ((b==R_SI || b==R_DI) && i!=-1)
1251 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
1252 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1253 if (i!=-1 && b!=-1 &&
1254 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1255 return NULL; /* invalid combinations */
1256 if (b==-1) /* pure offset: handled above */
1257 return NULL; /* so if it gets to here, panic! */
1259 rm = -1;
1260 if (i!=-1)
1261 switch (i*256 + b) {
1262 case R_SI*256+R_BX: rm=0; break;
1263 case R_DI*256+R_BX: rm=1; break;
1264 case R_SI*256+R_BP: rm=2; break;
1265 case R_DI*256+R_BP: rm=3; break;
1267 else
1268 switch (b) {
1269 case R_SI: rm=4; break;
1270 case R_DI: rm=5; break;
1271 case R_BP: rm=6; break;
1272 case R_BX: rm=7; break;
1274 if (rm==-1) /* can't happen, in theory */
1275 return NULL; /* so panic if it does */
1277 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6 &&
1278 !(input->eaflags & (EAF_BYTEOFFS|EAF_WORDOFFS)))
1279 mod = 0;
1280 else if (input->eaflags & EAF_BYTEOFFS ||
1281 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1282 !(input->eaflags & EAF_WORDOFFS)))
1283 mod = 1;
1284 else
1285 mod = 2;
1287 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1288 output->bytes = mod; /* bytes of offset needed */
1289 output->modrm = (mod<<6) | (rfield<<3) | rm;
1293 output->size = 1 + output->sib_present + output->bytes;
1294 return output;
1297 static int chsize (operand *input, int addrbits)
1299 if (!(MEMORY & ~input->type)) {
1300 int i=input->indexreg, b=input->basereg;
1302 if (input->scale==0) i = -1;
1304 if (i == -1 && b == -1) /* pure offset */
1305 return (input->addr_size != 0 && input->addr_size != addrbits);
1307 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1308 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1309 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1310 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1311 return (addrbits==16);
1312 else
1313 return (addrbits==32);
1315 else
1316 return 0;