NASM 0.98bf
[nasm/avx512.git] / assemble.c
blob9d4d037bae37cbb9fb2ee0aa2b500d73fd6e9d9b
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \331 - instruction not valid with REP prefix. Hint for
49 * disassembler only; for SSE instructions.
50 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
51 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
52 * as a literal byte in order to aid the disassembler.
53 * \340 - reserve <operand 0> bytes of uninitialised storage.
54 * Operand 0 had better be a segmentless constant.
57 #include <stdio.h>
58 #include <string.h>
60 #include "nasm.h"
61 #include "nasmlib.h"
62 #include "assemble.h"
63 #include "insns.h"
65 extern struct itemplate *nasm_instructions[];
67 typedef struct {
68 int sib_present; /* is a SIB byte necessary? */
69 int bytes; /* # of bytes of offset needed */
70 int size; /* lazy - this is sib+bytes+1 */
71 unsigned char modrm, sib; /* the bytes themselves */
72 } ea;
74 static efunc errfunc;
75 static struct ofmt *outfmt;
76 static ListGen *list;
78 static long calcsize (long, long, int, insn *, char *);
79 static void gencode (long, long, int, insn *, char *, long);
80 static int regval (operand *o);
81 static int matches (struct itemplate *, insn *);
82 static ea * process_ea (operand *, ea *, int, int, int);
83 static int chsize (operand *, int);
86 * This routine wrappers the real output format's output routine,
87 * in order to pass a copy of the data off to the listing file
88 * generator at the same time.
90 static void out (long offset, long segto, void *data, unsigned long type,
91 long segment, long wrt)
93 static long lineno;
94 static char *lnfname;
96 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
97 if (segment != NO_SEG || wrt != NO_SEG) {
99 * This address is relocated. We must write it as
100 * OUT_ADDRESS, so there's no work to be done here.
102 list->output (offset, data, type);
104 else {
105 unsigned char p[4], *q = p;
107 * This is a non-relocated address, and we're going to
108 * convert it into RAWDATA format.
110 if ((type & OUT_SIZMASK) == 4) {
111 WRITELONG (q, * (long *) data);
112 list->output (offset, p, OUT_RAWDATA+4);
114 else {
115 WRITESHORT (q, * (long *) data);
116 list->output (offset, p, OUT_RAWDATA+2);
120 else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
121 list->output (offset, data, type);
123 else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
124 list->output (offset, NULL, type);
126 else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
127 (type & OUT_TYPMASK) == OUT_REL4ADR) {
128 list->output (offset, data, type);
131 if (src_get(&lineno,&lnfname))
132 outfmt->current_dfmt->linenum(lnfname,lineno,segto);
134 outfmt->output (segto, data, type, segment, wrt);
137 long assemble (long segment, long offset, int bits,
138 insn *instruction, struct ofmt *output, efunc error,
139 ListGen *listgen)
141 struct itemplate *temp;
142 int j;
143 int size_prob;
144 long insn_end;
145 long itimes;
146 long start = offset;
147 long wsize = 0; /* size for DB etc. */
149 errfunc = error; /* to pass to other functions */
150 outfmt = output; /* likewise */
151 list = listgen; /* and again */
153 switch (instruction->opcode)
155 case -1: return 0;
156 case I_DB: wsize = 1; break;
157 case I_DW: wsize = 2; break;
158 case I_DD: wsize = 4; break;
159 case I_DQ: wsize = 8; break;
160 case I_DT: wsize = 10; break;
163 if (wsize) {
164 extop * e;
165 long t = instruction->times;
166 if (t < 0)
167 errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);
169 while (t--) /* repeat TIMES times */
171 for (e = instruction->eops; e; e = e->next)
173 if (e->type == EOT_DB_NUMBER)
175 if (wsize == 1) {
176 if (e->segment != NO_SEG)
177 errfunc (ERR_NONFATAL,
178 "one-byte relocation attempted");
179 else {
180 unsigned char out_byte = e->offset;
181 out (offset, segment, &out_byte, OUT_RAWDATA+1,
182 NO_SEG, NO_SEG);
185 else if (wsize > 5) {
186 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
187 " instruction", wsize==8 ? 'Q' : 'T');
189 else
190 out (offset, segment, &e->offset,
191 OUT_ADDRESS+wsize, e->segment,
192 e->wrt);
193 offset += wsize;
195 else if (e->type == EOT_DB_STRING)
197 int align;
199 out (offset, segment, e->stringval,
200 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
201 align = e->stringlen % wsize;
203 if (align) {
204 align = wsize - align;
205 out (offset, segment, "\0\0\0\0\0\0\0\0",
206 OUT_RAWDATA+align, NO_SEG, NO_SEG);
208 offset += e->stringlen + align;
211 if (t > 0 && t == instruction->times-1)
214 * Dummy call to list->output to give the offset to the
215 * listing module.
217 list->output (offset, NULL, OUT_RAWDATA);
218 list->uplevel (LIST_TIMES);
221 if (instruction->times > 1)
222 list->downlevel (LIST_TIMES);
223 return offset - start;
226 if (instruction->opcode == I_INCBIN)
228 static char fname[FILENAME_MAX];
229 FILE * fp;
230 long len;
232 len = FILENAME_MAX-1;
233 if (len > instruction->eops->stringlen)
234 len = instruction->eops->stringlen;
235 strncpy (fname, instruction->eops->stringval, len);
236 fname[len] = '\0';
238 if ( (fp = fopen(fname, "rb")) == NULL)
239 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
240 else if (fseek(fp, 0L, SEEK_END) < 0)
241 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
242 fname);
243 else
245 static char buf[2048];
246 long t = instruction->times;
247 long base = 0;
249 len = ftell (fp);
250 if (instruction->eops->next) {
251 base = instruction->eops->next->offset;
252 len -= base;
253 if (instruction->eops->next->next &&
254 len > instruction->eops->next->next->offset)
255 len = instruction->eops->next->next->offset;
258 * Dummy call to list->output to give the offset to the
259 * listing module.
261 list->output (offset, NULL, OUT_RAWDATA);
262 list->uplevel(LIST_INCBIN);
263 while (t--)
265 long l;
267 fseek (fp, base, SEEK_SET);
268 l = len;
269 while (l > 0) {
270 long m = fread (buf, 1, (l>(int)sizeof(buf)?sizeof(buf):l),
271 fp);
272 if (!m) {
274 * This shouldn't happen unless the file
275 * actually changes while we are reading
276 * it.
278 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
279 " reading file `%s'", fname);
280 t=0; /* Try to exit cleanly */
281 break;
283 out (offset, segment, buf, OUT_RAWDATA+m,
284 NO_SEG, NO_SEG);
285 l -= m;
288 list->downlevel(LIST_INCBIN);
289 if (instruction->times > 1) {
291 * Dummy call to list->output to give the offset to the
292 * listing module.
294 list->output (offset, NULL, OUT_RAWDATA);
295 list->uplevel(LIST_TIMES);
296 list->downlevel(LIST_TIMES);
298 fclose (fp);
299 return instruction->times * len;
301 return 0; /* if we're here, there's an error */
304 size_prob = FALSE;
305 temp = nasm_instructions[instruction->opcode];
306 while (temp->opcode != -1) {
307 int m = matches (temp, instruction);
309 if (m == 100) /* matches! */
311 char *codes = temp->code;
312 long insn_size = calcsize(segment, offset, bits,
313 instruction, codes);
314 itimes = instruction->times;
315 if (insn_size < 0) /* shouldn't be, on pass two */
316 error (ERR_PANIC, "errors made it through from pass one");
317 else while (itimes--) {
318 insn_end = offset + insn_size;
319 for (j=0; j<instruction->nprefix; j++) {
320 unsigned char c=0;
321 switch (instruction->prefixes[j]) {
322 case P_LOCK:
323 c = 0xF0; break;
324 case P_REPNE: case P_REPNZ:
325 c = 0xF2; break;
326 case P_REPE: case P_REPZ: case P_REP:
327 c = 0xF3; break;
328 case R_CS: c = 0x2E; break;
329 case R_DS: c = 0x3E; break;
330 case R_ES: c = 0x26; break;
331 case R_FS: c = 0x64; break;
332 case R_GS: c = 0x65; break;
333 case R_SS: c = 0x36; break;
334 case P_A16:
335 if (bits != 16)
336 c = 0x67;
337 break;
338 case P_A32:
339 if (bits != 32)
340 c = 0x67;
341 break;
342 case P_O16:
343 if (bits != 16)
344 c = 0x66;
345 break;
346 case P_O32:
347 if (bits != 32)
348 c = 0x66;
349 break;
350 default:
351 error (ERR_PANIC,
352 "invalid instruction prefix");
354 if (c != 0) {
355 out (offset, segment, &c, OUT_RAWDATA+1,
356 NO_SEG, NO_SEG);
357 offset++;
360 gencode (segment, offset, bits, instruction, codes, insn_end);
361 offset += insn_size;
362 if (itimes > 0 && itimes == instruction->times-1) {
364 * Dummy call to list->output to give the offset to the
365 * listing module.
367 list->output (offset, NULL, OUT_RAWDATA);
368 list->uplevel (LIST_TIMES);
371 if (instruction->times > 1)
372 list->downlevel (LIST_TIMES);
373 return offset - start;
374 } else if (m > 0) {
375 size_prob = m;
377 temp++;
380 if (temp->opcode == -1) { /* didn't match any instruction */
381 if (size_prob == 1) /* would have matched, but for size */
382 error (ERR_NONFATAL, "operation size not specified");
383 else if (size_prob == 2)
384 error (ERR_NONFATAL, "mismatch in operand sizes");
385 else
386 error (ERR_NONFATAL,
387 "invalid combination of opcode and operands");
389 return 0;
392 long insn_size (long segment, long offset, int bits,
393 insn *instruction, efunc error)
395 struct itemplate *temp;
397 errfunc = error; /* to pass to other functions */
399 if (instruction->opcode == -1)
400 return 0;
402 if (instruction->opcode == I_DB ||
403 instruction->opcode == I_DW ||
404 instruction->opcode == I_DD ||
405 instruction->opcode == I_DQ ||
406 instruction->opcode == I_DT)
408 extop *e;
409 long isize, osize, wsize = 0; /* placate gcc */
411 isize = 0;
412 switch (instruction->opcode)
414 case I_DB: wsize = 1; break;
415 case I_DW: wsize = 2; break;
416 case I_DD: wsize = 4; break;
417 case I_DQ: wsize = 8; break;
418 case I_DT: wsize = 10; break;
421 for (e = instruction->eops; e; e = e->next)
423 long align;
425 osize = 0;
426 if (e->type == EOT_DB_NUMBER)
427 osize = 1;
428 else if (e->type == EOT_DB_STRING)
429 osize = e->stringlen;
431 align = (-osize) % wsize;
432 if (align < 0)
433 align += wsize;
434 isize += osize + align;
436 return isize * instruction->times;
439 if (instruction->opcode == I_INCBIN)
441 char fname[FILENAME_MAX];
442 FILE * fp;
443 long len;
445 len = FILENAME_MAX-1;
446 if (len > instruction->eops->stringlen)
447 len = instruction->eops->stringlen;
448 strncpy (fname, instruction->eops->stringval, len);
449 fname[len] = '\0';
450 if ( (fp = fopen(fname, "rb")) == NULL )
451 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
452 else if (fseek(fp, 0L, SEEK_END) < 0)
453 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
454 fname);
455 else
457 len = ftell (fp);
458 fclose (fp);
459 if (instruction->eops->next)
461 len -= instruction->eops->next->offset;
462 if (instruction->eops->next->next &&
463 len > instruction->eops->next->next->offset)
465 len = instruction->eops->next->next->offset;
468 return instruction->times * len;
470 return 0; /* if we're here, there's an error */
473 temp = nasm_instructions[instruction->opcode];
474 while (temp->opcode != -1) {
475 if (matches(temp, instruction) == 100) {
476 /* we've matched an instruction. */
477 long isize;
478 char * codes = temp->code;
479 int j;
481 isize = calcsize(segment, offset, bits, instruction, codes);
482 if (isize < 0)
483 return -1;
484 for (j = 0; j < instruction->nprefix; j++)
486 if ((instruction->prefixes[j] != P_A16 &&
487 instruction->prefixes[j] != P_O16 && bits==16) ||
488 (instruction->prefixes[j] != P_A32 &&
489 instruction->prefixes[j] != P_O32 && bits==32))
491 isize++;
494 return isize * instruction->times;
496 temp++;
498 return -1; /* didn't match any instruction */
501 static long calcsize (long segment, long offset, int bits,
502 insn *ins, char *codes)
504 long length = 0;
505 unsigned char c;
507 (void) segment; /* Don't warn that this parameter is unused */
508 (void) offset; /* Don't warn that this parameter is unused */
510 while (*codes) switch (c = *codes++) {
511 case 01: case 02: case 03:
512 codes += c, length += c; break;
513 case 04: case 05: case 06: case 07:
514 length++; break;
515 case 010: case 011: case 012:
516 codes++, length++; break;
517 case 017:
518 length++; break;
519 case 014: case 015: case 016:
520 length++; break;
521 case 020: case 021: case 022:
522 length++; break;
523 case 024: case 025: case 026:
524 length++; break;
525 case 030: case 031: case 032:
526 length += 2; break;
527 case 034: case 035: case 036:
528 length += ((ins->oprs[c-034].addr_size ?
529 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
530 case 037:
531 length += 2; break;
532 case 040: case 041: case 042:
533 length += 4; break;
534 case 050: case 051: case 052:
535 length++; break;
536 case 060: case 061: case 062:
537 length += 2; break;
538 case 064: case 065: case 066:
539 length += ((ins->oprs[c-064].addr_size ?
540 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
541 case 070: case 071: case 072:
542 length += 4; break;
543 case 0300: case 0301: case 0302:
544 length += chsize (&ins->oprs[c-0300], bits);
545 break;
546 case 0310:
547 length += (bits==32);
548 break;
549 case 0311:
550 length += (bits==16);
551 break;
552 case 0312:
553 break;
554 case 0320:
555 length += (bits==32);
556 break;
557 case 0321:
558 length += (bits==16);
559 break;
560 case 0322:
561 break;
562 case 0330:
563 codes++, length++; break;
564 case 0331:
565 case 0332:
566 break;
567 case 0333:
568 length++; break;
569 case 0340: case 0341: case 0342:
570 if (ins->oprs[0].segment != NO_SEG)
571 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
572 " quantity of BSS space");
573 else
574 length += ins->oprs[0].offset << (c-0340);
575 break;
576 default: /* can't do it by 'case' statements */
577 if (c>=0100 && c<=0277) { /* it's an EA */
578 ea ea_data;
579 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
580 ins->forw_ref)) {
581 errfunc (ERR_NONFATAL, "invalid effective address");
582 return -1;
583 } else
584 length += ea_data.size;
585 } else
586 errfunc (ERR_PANIC, "internal instruction table corrupt"
587 ": instruction code 0x%02X given", c);
589 return length;
592 static void gencode (long segment, long offset, int bits,
593 insn *ins, char *codes, long insn_end)
595 static char condval[] = { /* conditional opcodes */
596 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
597 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
598 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
600 unsigned char c;
601 unsigned char bytes[4];
602 long data, size;
604 while (*codes)
605 switch (c = *codes++)
607 case 01: case 02: case 03:
608 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
609 codes += c;
610 offset += c;
611 break;
613 case 04: case 06:
614 switch (ins->oprs[0].basereg)
616 case R_CS:
617 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
618 case R_DS:
619 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
620 case R_ES:
621 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
622 case R_SS:
623 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
624 default:
625 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
627 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
628 offset++;
629 break;
631 case 05: case 07:
632 switch (ins->oprs[0].basereg) {
633 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
634 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
635 default:
636 errfunc (ERR_PANIC, "bizarre 386 segment register received");
638 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
639 offset++;
640 break;
642 case 010: case 011: case 012:
643 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
644 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
645 offset += 1;
646 break;
648 case 017:
649 bytes[0] = 0;
650 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
651 offset += 1;
652 break;
654 case 014: case 015: case 016:
655 if (ins->oprs[c-014].offset < -128
656 || ins->oprs[c-014].offset > 127)
658 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
661 if (ins->oprs[c-014].segment != NO_SEG)
663 data = ins->oprs[c-014].offset;
664 out (offset, segment, &data, OUT_ADDRESS+1,
665 ins->oprs[c-014].segment, ins->oprs[c-014].wrt);
667 else {
668 bytes[0] = ins->oprs[c-014].offset;
669 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
671 offset += 1;
672 break;
674 case 020: case 021: case 022:
675 if (ins->oprs[c-020].offset < -256
676 || ins->oprs[c-020].offset > 255)
678 errfunc (ERR_WARNING, "byte value exceeds bounds");
680 if (ins->oprs[c-020].segment != NO_SEG) {
681 data = ins->oprs[c-020].offset;
682 out (offset, segment, &data, OUT_ADDRESS+1,
683 ins->oprs[c-020].segment, ins->oprs[c-020].wrt);
685 else {
686 bytes[0] = ins->oprs[c-020].offset;
687 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
689 offset += 1;
690 break;
692 case 024: case 025: case 026:
693 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
694 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
695 if (ins->oprs[c-024].segment != NO_SEG) {
696 data = ins->oprs[c-024].offset;
697 out (offset, segment, &data, OUT_ADDRESS+1,
698 ins->oprs[c-024].segment, ins->oprs[c-024].wrt);
700 else {
701 bytes[0] = ins->oprs[c-024].offset;
702 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
704 offset += 1;
705 break;
707 case 030: case 031: case 032:
708 if (ins->oprs[c-030].segment == NO_SEG &&
709 ins->oprs[c-030].wrt == NO_SEG &&
710 (ins->oprs[c-030].offset < -65536L ||
711 ins->oprs[c-030].offset > 65535L))
713 errfunc (ERR_WARNING, "word value exceeds bounds");
715 data = ins->oprs[c-030].offset;
716 out (offset, segment, &data, OUT_ADDRESS+2,
717 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
718 offset += 2;
719 break;
721 case 034: case 035: case 036:
722 data = ins->oprs[c-034].offset;
723 size = ((ins->oprs[c-034].addr_size ?
724 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
725 if (size==16 && (data < -65536L || data > 65535L))
726 errfunc (ERR_WARNING, "word value exceeds bounds");
727 out (offset, segment, &data, OUT_ADDRESS+size,
728 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
729 offset += size;
730 break;
732 case 037:
733 if (ins->oprs[0].segment == NO_SEG)
734 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
735 " relocatable");
736 data = 0L;
737 out (offset, segment, &data, OUT_ADDRESS+2,
738 outfmt->segbase(1+ins->oprs[0].segment),
739 ins->oprs[0].wrt);
740 offset += 2;
741 break;
743 case 040: case 041: case 042:
744 data = ins->oprs[c-040].offset;
745 out (offset, segment, &data, OUT_ADDRESS+4,
746 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
747 offset += 4;
748 break;
750 case 050: case 051: case 052:
751 if (ins->oprs[c-050].segment != segment)
752 errfunc (ERR_NONFATAL, "short relative jump outside segment");
753 data = ins->oprs[c-050].offset - insn_end;
754 if (data > 127 || data < -128)
755 errfunc (ERR_NONFATAL, "short jump is out of range");
756 bytes[0] = data;
757 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
758 offset += 1;
759 break;
761 case 060: case 061: case 062:
762 if (ins->oprs[c-060].segment != segment) {
763 data = ins->oprs[c-060].offset;
764 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
765 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
766 } else {
767 data = ins->oprs[c-060].offset - insn_end;
768 out (offset, segment, &data,
769 OUT_ADDRESS+2, NO_SEG, NO_SEG);
771 offset += 2;
772 break;
774 case 064: case 065: case 066:
775 size = ((ins->oprs[c-064].addr_size ?
776 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
777 if (ins->oprs[c-064].segment != segment) {
778 data = ins->oprs[c-064].offset;
779 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
780 out (offset, segment, &data, size+insn_end-offset,
781 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
782 size = (bits == 16 ? 2 : 4);
783 } else {
784 data = ins->oprs[c-064].offset - insn_end;
785 out (offset, segment, &data,
786 OUT_ADDRESS+size, NO_SEG, NO_SEG);
788 offset += size;
789 break;
791 case 070: case 071: case 072:
792 if (ins->oprs[c-070].segment != segment) {
793 data = ins->oprs[c-070].offset;
794 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
795 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
796 } else {
797 data = ins->oprs[c-070].offset - insn_end;
798 out (offset, segment, &data,
799 OUT_ADDRESS+4, NO_SEG, NO_SEG);
801 offset += 4;
802 break;
804 case 0300: case 0301: case 0302:
805 if (chsize (&ins->oprs[c-0300], bits)) {
806 *bytes = 0x67;
807 out (offset, segment, bytes,
808 OUT_RAWDATA+1, NO_SEG, NO_SEG);
809 offset += 1;
810 } else
811 offset += 0;
812 break;
814 case 0310:
815 if (bits==32) {
816 *bytes = 0x67;
817 out (offset, segment, bytes,
818 OUT_RAWDATA+1, NO_SEG, NO_SEG);
819 offset += 1;
820 } else
821 offset += 0;
822 break;
824 case 0311:
825 if (bits==16) {
826 *bytes = 0x67;
827 out (offset, segment, bytes,
828 OUT_RAWDATA+1, NO_SEG, NO_SEG);
829 offset += 1;
830 } else
831 offset += 0;
832 break;
834 case 0312:
835 break;
837 case 0320:
838 if (bits==32) {
839 *bytes = 0x66;
840 out (offset, segment, bytes,
841 OUT_RAWDATA+1, NO_SEG, NO_SEG);
842 offset += 1;
843 } else
844 offset += 0;
845 break;
847 case 0321:
848 if (bits==16) {
849 *bytes = 0x66;
850 out (offset, segment, bytes,
851 OUT_RAWDATA+1, NO_SEG, NO_SEG);
852 offset += 1;
853 } else
854 offset += 0;
855 break;
857 case 0322:
858 break;
860 case 0330:
861 *bytes = *codes++ + condval[ins->condition];
862 out (offset, segment, bytes,
863 OUT_RAWDATA+1, NO_SEG, NO_SEG);
864 offset += 1;
865 break;
867 case 0331:
868 case 0332:
869 break;
871 case 0333:
872 *bytes = 0xF3;
873 out (offset, segment, bytes,
874 OUT_RAWDATA+1, NO_SEG, NO_SEG);
875 offset += 1;
876 break;
878 case 0340: case 0341: case 0342:
879 if (ins->oprs[0].segment != NO_SEG)
880 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
881 else {
882 long size = ins->oprs[0].offset << (c-0340);
883 if (size > 0)
884 out (offset, segment, NULL,
885 OUT_RESERVE+size, NO_SEG, NO_SEG);
886 offset += size;
888 break;
890 default: /* can't do it by 'case' statements */
891 if (c>=0100 && c<=0277) { /* it's an EA */
892 ea ea_data;
893 int rfield;
894 unsigned char *p;
895 long s;
897 if (c<=0177) /* pick rfield from operand b */
898 rfield = regval (&ins->oprs[c&7]);
899 else /* rfield is constant */
900 rfield = c & 7;
902 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
903 ins->forw_ref))
905 errfunc (ERR_NONFATAL, "invalid effective address");
908 p = bytes;
909 *p++ = ea_data.modrm;
910 if (ea_data.sib_present)
911 *p++ = ea_data.sib;
913 s = p-bytes;
914 out (offset, segment, bytes, OUT_RAWDATA + s,
915 NO_SEG, NO_SEG);
917 switch (ea_data.bytes) {
918 case 0:
919 break;
920 case 1:
921 if (ins->oprs[(c>>3)&7].segment != NO_SEG) {
922 data = ins->oprs[(c>>3)&7].offset;
923 out (offset, segment, &data, OUT_ADDRESS+1,
924 ins->oprs[(c>>3)&7].segment,
925 ins->oprs[(c>>3)&7].wrt);
926 } else {
927 *bytes = ins->oprs[(c>>3)&7].offset;
928 out (offset, segment, bytes, OUT_RAWDATA+1,
929 NO_SEG, NO_SEG);
931 s++;
932 break;
933 case 2:
934 case 4:
935 data = ins->oprs[(c>>3)&7].offset;
936 out (offset, segment, &data,
937 OUT_ADDRESS+ea_data.bytes,
938 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
939 s += ea_data.bytes;
940 break;
942 offset += s;
943 } else
944 errfunc (ERR_PANIC, "internal instruction table corrupt"
945 ": instruction code 0x%02X given", c);
949 static int regval (operand *o)
951 switch (o->basereg) {
952 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
953 case R_ST0: case R_MM0: case R_XMM0:
954 return 0;
955 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
956 case R_MM1: case R_XMM1:
957 return 1;
958 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
959 case R_ST2: case R_MM2: case R_XMM2:
960 return 2;
961 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
962 case R_TR3: case R_ST3: case R_MM3: case R_XMM3:
963 return 3;
964 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
965 case R_ST4: case R_MM4: case R_XMM4:
966 return 4;
967 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
968 case R_MM5: case R_XMM5:
969 return 5;
970 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
971 case R_MM6: case R_XMM6:
972 return 6;
973 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
974 case R_MM7: case R_XMM7:
975 return 7;
976 default: /* panic */
977 errfunc (ERR_PANIC, "invalid register operand given to regval()");
978 return 0;
982 static int matches (struct itemplate *itemp, insn *instruction)
984 int i, size[3], asize, oprs, ret;
986 ret = 100;
989 * Check the opcode
991 if (itemp->opcode != instruction->opcode) return 0;
994 * Count the operands
996 if (itemp->operands != instruction->operands) return 0;
999 * Check that no spurious colons or TOs are present
1001 for (i=0; i<itemp->operands; i++)
1002 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
1003 return 0;
1006 * Check that the operand flags all match up
1008 for (i=0; i<itemp->operands; i++)
1009 if (itemp->opd[i] & ~instruction->oprs[i].type ||
1010 ((itemp->opd[i] & SIZE_MASK) &&
1011 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK)))
1013 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
1014 (instruction->oprs[i].type & SIZE_MASK))
1015 return 0;
1016 else
1017 ret = 1;
1021 * Check operand sizes
1023 if (itemp->flags & IF_ARMASK) {
1024 size[0] = size[1] = size[2] = 0;
1026 switch (itemp->flags & IF_ARMASK) {
1027 case IF_AR0: i = 0; break;
1028 case IF_AR1: i = 1; break;
1029 case IF_AR2: i = 2; break;
1030 default: break; /* Shouldn't happen */
1032 if (itemp->flags & IF_SB) {
1033 size[i] = BITS8;
1034 } else if (itemp->flags & IF_SW) {
1035 size[i] = BITS16;
1036 } else if (itemp->flags & IF_SD) {
1037 size[i] = BITS32;
1039 } else {
1040 asize = 0;
1041 if (itemp->flags & IF_SB) {
1042 asize = BITS8;
1043 oprs = itemp->operands;
1044 } else if (itemp->flags & IF_SW) {
1045 asize = BITS16;
1046 oprs = itemp->operands;
1047 } else if (itemp->flags & IF_SD) {
1048 asize = BITS32;
1049 oprs = itemp->operands;
1051 size[0] = size[1] = size[2] = asize;
1054 if (itemp->flags & (IF_SM | IF_SM2)) {
1055 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1056 asize = 0;
1057 for (i=0; i<oprs; i++) {
1058 if ( (asize = itemp->opd[i] & SIZE_MASK) != 0) {
1059 int j;
1060 for (j=0; j<oprs; j++)
1061 size[j] = asize;
1062 break;
1065 } else {
1066 oprs = itemp->operands;
1069 for (i=0; i<itemp->operands; i++)
1070 if (!(itemp->opd[i] & SIZE_MASK) &&
1071 (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1072 ret = 2;
1074 return ret;
1077 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
1078 int forw_ref)
1080 if (!(REGISTER & ~input->type)) { /* it's a single register */
1081 static int regs[] = {
1082 R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH,
1083 R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI,
1084 R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI,
1085 R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7,
1086 R_XMM0, R_XMM1, R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7
1088 int i;
1090 for (i=0; i<elements(regs); i++)
1091 if (input->basereg == regs[i]) break;
1092 if (i<elements(regs)) {
1093 output->sib_present = FALSE;/* no SIB necessary */
1094 output->bytes = 0; /* no offset necessary either */
1095 output->modrm = 0xC0 | (rfield << 3) | (i & 7);
1097 else
1098 return NULL;
1099 } else { /* it's a memory reference */
1100 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
1101 /* it's a pure offset */
1102 if (input->addr_size)
1103 addrbits = input->addr_size;
1104 output->sib_present = FALSE;
1105 output->bytes = (addrbits==32 ? 4 : 2);
1106 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
1108 else { /* it's an indirection */
1109 int i=input->indexreg, b=input->basereg, s=input->scale;
1110 long o=input->offset, seg=input->segment;
1111 int hb=input->hintbase, ht=input->hinttype;
1112 int t;
1114 if (s==0) i = -1; /* make this easy, at least */
1116 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1117 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1118 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1119 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
1120 /* it must be a 32-bit memory reference. Firstly we have
1121 * to check that all registers involved are type Exx. */
1122 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
1123 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
1124 return NULL;
1125 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
1126 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
1127 return NULL;
1129 /* While we're here, ensure the user didn't specify WORD. */
1130 if (input->addr_size == 16)
1131 return NULL;
1133 /* now reorganise base/index */
1134 if (s == 1 && b != i && b != -1 && i != -1 &&
1135 ((hb==b&&ht==EAH_NOTBASE) || (hb==i&&ht==EAH_MAKEBASE)))
1136 t = b, b = i, i = t; /* swap if hints say so */
1137 if (b==i) /* convert EAX+2*EAX to 3*EAX */
1138 b = -1, s++;
1139 if (b==-1 && s==1 && !(hb == i && ht == EAH_NOTBASE))
1140 b = i, i = -1; /* make single reg base, unless hint */
1141 if (((s==2 && i!=R_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
1142 s==3 || s==5 || s==9) && b==-1)
1143 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
1144 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
1145 i = b, b = R_ESP;
1146 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
1147 return NULL; /* wrong, for various reasons */
1149 if (i==-1 && b!=R_ESP) {/* no SIB needed */
1150 int mod, rm;
1151 switch(b) {
1152 case R_EAX: rm = 0; break;
1153 case R_ECX: rm = 1; break;
1154 case R_EDX: rm = 2; break;
1155 case R_EBX: rm = 3; break;
1156 case R_EBP: rm = 5; break;
1157 case R_ESI: rm = 6; break;
1158 case R_EDI: rm = 7; break;
1159 case -1: rm = 5; break;
1160 default: /* should never happen */
1161 return NULL;
1163 if (b==-1 || (b!=R_EBP && o==0 &&
1164 seg==NO_SEG && !forw_ref &&
1165 !(input->eaflags &
1166 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1167 mod = 0;
1168 else if (input->eaflags & EAF_BYTEOFFS ||
1169 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1170 !(input->eaflags & EAF_WORDOFFS))) {
1171 mod = 1;
1173 else
1174 mod = 2;
1176 output->sib_present = FALSE;
1177 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1178 output->modrm = (mod<<6) | (rfield<<3) | rm;
1180 else { /* we need a SIB */
1181 int mod, scale, index, base;
1183 switch (b) {
1184 case R_EAX: base = 0; break;
1185 case R_ECX: base = 1; break;
1186 case R_EDX: base = 2; break;
1187 case R_EBX: base = 3; break;
1188 case R_ESP: base = 4; break;
1189 case R_EBP: case -1: base = 5; break;
1190 case R_ESI: base = 6; break;
1191 case R_EDI: base = 7; break;
1192 default: /* then what the smeg is it? */
1193 return NULL; /* panic */
1196 switch (i) {
1197 case R_EAX: index = 0; break;
1198 case R_ECX: index = 1; break;
1199 case R_EDX: index = 2; break;
1200 case R_EBX: index = 3; break;
1201 case -1: index = 4; break;
1202 case R_EBP: index = 5; break;
1203 case R_ESI: index = 6; break;
1204 case R_EDI: index = 7; break;
1205 default: /* then what the smeg is it? */
1206 return NULL; /* panic */
1209 if (i==-1) s = 1;
1210 switch (s) {
1211 case 1: scale = 0; break;
1212 case 2: scale = 1; break;
1213 case 4: scale = 2; break;
1214 case 8: scale = 3; break;
1215 default: /* then what the smeg is it? */
1216 return NULL; /* panic */
1219 if (b==-1 || (b!=R_EBP && o==0 &&
1220 seg==NO_SEG && !forw_ref &&
1221 !(input->eaflags &
1222 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1223 mod = 0;
1224 else if (input->eaflags & EAF_BYTEOFFS ||
1225 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1226 !(input->eaflags & EAF_WORDOFFS)))
1227 mod = 1;
1228 else
1229 mod = 2;
1231 output->sib_present = TRUE;
1232 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1233 output->modrm = (mod<<6) | (rfield<<3) | 4;
1234 output->sib = (scale<<6) | (index<<3) | base;
1237 else { /* it's 16-bit */
1238 int mod, rm;
1240 /* check all registers are BX, BP, SI or DI */
1241 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1242 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1243 return NULL;
1245 /* ensure the user didn't specify DWORD */
1246 if (input->addr_size == 32)
1247 return NULL;
1249 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1250 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
1251 if ((b==R_SI || b==R_DI) && i!=-1)
1252 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
1253 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1254 if (i!=-1 && b!=-1 &&
1255 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1256 return NULL; /* invalid combinations */
1257 if (b==-1) /* pure offset: handled above */
1258 return NULL; /* so if it gets to here, panic! */
1260 rm = -1;
1261 if (i!=-1)
1262 switch (i*256 + b) {
1263 case R_SI*256+R_BX: rm=0; break;
1264 case R_DI*256+R_BX: rm=1; break;
1265 case R_SI*256+R_BP: rm=2; break;
1266 case R_DI*256+R_BP: rm=3; break;
1268 else
1269 switch (b) {
1270 case R_SI: rm=4; break;
1271 case R_DI: rm=5; break;
1272 case R_BP: rm=6; break;
1273 case R_BX: rm=7; break;
1275 if (rm==-1) /* can't happen, in theory */
1276 return NULL; /* so panic if it does */
1278 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6 &&
1279 !(input->eaflags & (EAF_BYTEOFFS|EAF_WORDOFFS)))
1280 mod = 0;
1281 else if (input->eaflags & EAF_BYTEOFFS ||
1282 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1283 !(input->eaflags & EAF_WORDOFFS)))
1284 mod = 1;
1285 else
1286 mod = 2;
1288 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1289 output->bytes = mod; /* bytes of offset needed */
1290 output->modrm = (mod<<6) | (rfield<<3) | rm;
1294 output->size = 1 + output->sib_present + output->bytes;
1295 return output;
1298 static int chsize (operand *input, int addrbits)
1300 if (!(MEMORY & ~input->type)) {
1301 int i=input->indexreg, b=input->basereg;
1303 if (input->scale==0) i = -1;
1305 if (i == -1 && b == -1) /* pure offset */
1306 return (input->addr_size != 0 && input->addr_size != addrbits);
1308 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1309 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1310 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1311 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1312 return (addrbits==16);
1313 else
1314 return (addrbits==32);
1316 else
1317 return 0;