NASM 0.95
[nasm/avx512.git] / assemble.c
blobc6cc00a3fd70b3c67711762d5339168d22675718
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \340 - reserve <operand 0> bytes of uninitialised storage.
49 * Operand 0 had better be a segmentless constant.
52 #include <stdio.h>
53 #include <string.h>
55 #include "nasm.h"
56 #include "nasmlib.h"
57 #include "assemble.h"
58 #include "insns.h"
60 extern struct itemplate *nasm_instructions[];
62 typedef struct {
63 int sib_present; /* is a SIB byte necessary? */
64 int bytes; /* # of bytes of offset needed */
65 int size; /* lazy - this is sib+bytes+1 */
66 unsigned char modrm, sib; /* the bytes themselves */
67 } ea;
69 static efunc errfunc;
70 static struct ofmt *outfmt;
71 static ListGen *list;
73 static long calcsize (long, long, int, insn *, char *);
74 static void gencode (long, long, int, insn *, char *, long);
75 static int regval (operand *o);
76 static int matches (struct itemplate *, insn *);
77 static ea *process_ea (operand *, ea *, int, int, int);
78 static int chsize (operand *, int);
81 * This routine wrappers the real output format's output routine,
82 * in order to pass a copy of the data off to the listing file
83 * generator at the same time.
85 static void out (long offset, long segto, void *data, unsigned long type,
86 long segment, long wrt) {
87 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
88 if (segment != NO_SEG || wrt != NO_SEG) {
90 * This address is relocated. We must write it as
91 * OUT_ADDRESS, so there's no work to be done here.
93 list->output (offset, data, type);
94 } else {
95 unsigned char p[4], *q = p;
97 * This is a non-relocated address, and we're going to
98 * convert it into RAWDATA format.
100 if ((type & OUT_SIZMASK) == 4) {
101 WRITELONG (q, * (long *) data);
102 list->output (offset, p, OUT_RAWDATA+4);
103 } else {
104 WRITESHORT (q, * (long *) data);
105 list->output (offset, p, OUT_RAWDATA+2);
108 } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
109 list->output (offset, data, type);
110 } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
111 list->output (offset, NULL, type);
112 } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
113 (type & OUT_TYPMASK) == OUT_REL4ADR) {
114 list->output (offset, data, type);
117 outfmt->output (segto, data, type, segment, wrt);
120 long assemble (long segment, long offset, int bits,
121 insn *instruction, struct ofmt *output, efunc error,
122 ListGen *listgen) {
123 int j, size_prob;
124 long insn_end, itimes;
125 long start = offset;
126 struct itemplate *temp;
128 errfunc = error; /* to pass to other functions */
129 outfmt = output; /* likewise */
130 list = listgen; /* and again */
132 if (instruction->opcode == -1)
133 return 0;
135 if (instruction->opcode == I_DB ||
136 instruction->opcode == I_DW ||
137 instruction->opcode == I_DD ||
138 instruction->opcode == I_DQ ||
139 instruction->opcode == I_DT) {
140 extop *e;
141 long wsize = 0; /* placate gcc */
142 long t = instruction->times;
144 switch (instruction->opcode) {
145 case I_DB: wsize = 1; break;
146 case I_DW: wsize = 2; break;
147 case I_DD: wsize = 4; break;
148 case I_DQ: wsize = 8; break;
149 case I_DT: wsize = 10; break;
152 while (t--) {
153 for (e = instruction->eops; e; e = e->next) {
154 if (e->type == EOT_DB_NUMBER) {
155 if (wsize == 1) {
156 if (e->segment != NO_SEG)
157 errfunc (ERR_NONFATAL,
158 "one-byte relocation attempted");
159 else {
160 unsigned char c = e->offset;
161 out (offset, segment, &c, OUT_RAWDATA+1,
162 NO_SEG, NO_SEG);
164 } else if (wsize > 5) {
165 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
166 " instruction", wsize==8 ? 'Q' : 'T');
167 } else
168 out (offset, segment, &e->offset,
169 OUT_ADDRESS+wsize, e->segment,
170 e->wrt);
171 offset += wsize;
172 } else if (e->type == EOT_DB_STRING) {
173 int align;
175 align = (-e->stringlen) % wsize;
176 if (align < 0)
177 align += wsize;
178 out (offset, segment, e->stringval,
179 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
180 if (align)
181 out (offset, segment, "\0\0\0\0",
182 OUT_RAWDATA+align, NO_SEG, NO_SEG);
183 offset += e->stringlen + align;
186 if (t > 0 && t == instruction->times-1) {
188 * Dummy call to list->output to give the offset to the
189 * listing module.
191 list->output (offset, NULL, OUT_RAWDATA);
192 list->uplevel (LIST_TIMES);
195 if (instruction->times > 1)
196 list->downlevel (LIST_TIMES);
197 return offset - start;
200 if (instruction->opcode == I_INCBIN) {
201 static char fname[FILENAME_MAX];
202 FILE *fp;
203 long len;
205 len = FILENAME_MAX-1;
206 if (len > instruction->eops->stringlen)
207 len = instruction->eops->stringlen;
208 strncpy (fname, instruction->eops->stringval, len);
209 fname[len] = '\0';
210 if (!(fp = fopen(fname, "rb")))
211 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
212 else if (fseek(fp, 0L, SEEK_END) < 0)
213 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
214 fname);
215 else {
216 static char buf[2048];
217 long t = instruction->times;
218 long l;
220 len = ftell (fp);
221 if (instruction->eops->next) {
222 len -= instruction->eops->next->offset;
223 if (instruction->eops->next->next &&
224 len > instruction->eops->next->next->offset)
225 len = instruction->eops->next->next->offset;
228 * Dummy call to list->output to give the offset to the
229 * listing module.
231 list->output (offset, NULL, OUT_RAWDATA);
232 list->uplevel(LIST_INCBIN);
233 while (t--) {
234 fseek (fp,
235 (instruction->eops->next ?
236 instruction->eops->next->offset : 0),
237 SEEK_SET);
238 l = len;
239 while (l > 0) {
240 long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
241 fp);
242 if (!m) {
244 * This shouldn't happen unless the file
245 * actually changes while we are reading
246 * it.
248 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
249 " reading file `%s'", fname);
250 return 0; /* it doesn't much matter... */
252 out (offset, segment, buf, OUT_RAWDATA+m,
253 NO_SEG, NO_SEG);
254 l -= m;
257 list->downlevel(LIST_INCBIN);
258 if (instruction->times > 1) {
260 * Dummy call to list->output to give the offset to the
261 * listing module.
263 list->output (offset, NULL, OUT_RAWDATA);
264 list->uplevel(LIST_TIMES);
265 list->downlevel(LIST_TIMES);
267 fclose (fp);
268 return instruction->times * len;
270 return 0; /* if we're here, there's an error */
273 size_prob = FALSE;
274 temp = nasm_instructions[instruction->opcode];
275 while (temp->opcode != -1) {
276 int m = matches (temp, instruction);
277 if (m == 100) { /* matches! */
278 char *codes = temp->code;
279 long insn_size = calcsize(segment, offset, bits,
280 instruction, codes);
281 itimes = instruction->times;
282 if (insn_size < 0) /* shouldn't be, on pass two */
283 error (ERR_PANIC, "errors made it through from pass one");
284 else while (itimes--) {
285 insn_end = offset + insn_size;
286 for (j=0; j<instruction->nprefix; j++) {
287 unsigned char c;
288 switch (instruction->prefixes[j]) {
289 case P_LOCK:
290 c = 0xF0; break;
291 case P_REPNE: case P_REPNZ:
292 c = 0xF2; break;
293 case P_REPE: case P_REPZ: case P_REP:
294 c = 0xF3; break;
295 case R_CS: c = 0x2E; break;
296 case R_DS: c = 0x3E; break;
297 case R_ES: c = 0x26; break;
298 case R_FS: c = 0x64; break;
299 case R_GS: c = 0x65; break;
300 case R_SS: c = 0x36; break;
301 case P_A16:
302 if (bits == 16)
303 c = 0; /* no prefix */
304 else
305 c = 0x67;
306 break;
307 case P_A32:
308 if (bits == 32)
309 c = 0; /* no prefix */
310 else
311 c = 0x67;
312 break;
313 case P_O16:
314 if (bits == 16)
315 c = 0; /* no prefix */
316 else
317 c = 0x66;
318 break;
319 case P_O32:
320 if (bits == 32)
321 c = 0; /* no prefix */
322 else
323 c = 0x66;
324 break;
325 default:
326 error (ERR_PANIC,
327 "invalid instruction prefix");
329 if (c != 0)
330 out (offset, segment, &c, OUT_RAWDATA+1,
331 NO_SEG, NO_SEG);
332 offset++;
334 gencode (segment, offset, bits, instruction, codes, insn_end);
335 offset += insn_size;
336 if (itimes > 0 && itimes == instruction->times-1) {
338 * Dummy call to list->output to give the offset to the
339 * listing module.
341 list->output (offset, NULL, OUT_RAWDATA);
342 list->uplevel (LIST_TIMES);
345 if (instruction->times > 1)
346 list->downlevel (LIST_TIMES);
347 return offset - start;
348 } else if (m > 0) {
349 size_prob = m;
351 temp++;
353 if (temp->opcode == -1) { /* didn't match any instruction */
354 if (size_prob == 1) /* would have matched, but for size */
355 error (ERR_NONFATAL, "operation size not specified");
356 else if (size_prob == 2)
357 error (ERR_NONFATAL, "mismatch in operand sizes");
358 else
359 error (ERR_NONFATAL,
360 "invalid combination of opcode and operands");
362 return 0;
365 long insn_size (long segment, long offset, int bits,
366 insn *instruction, efunc error) {
367 struct itemplate *temp;
369 errfunc = error; /* to pass to other functions */
371 if (instruction->opcode == -1)
372 return 0;
374 if (instruction->opcode == I_DB ||
375 instruction->opcode == I_DW ||
376 instruction->opcode == I_DD ||
377 instruction->opcode == I_DQ ||
378 instruction->opcode == I_DT) {
379 extop *e;
380 long isize, osize, wsize = 0; /* placate gcc */
382 isize = 0;
383 switch (instruction->opcode) {
384 case I_DB: wsize = 1; break;
385 case I_DW: wsize = 2; break;
386 case I_DD: wsize = 4; break;
387 case I_DQ: wsize = 8; break;
388 case I_DT: wsize = 10; break;
391 for (e = instruction->eops; e; e = e->next) {
392 long align;
394 osize = 0;
395 if (e->type == EOT_DB_NUMBER)
396 osize = 1;
397 else if (e->type == EOT_DB_STRING)
398 osize = e->stringlen;
400 align = (-osize) % wsize;
401 if (align < 0)
402 align += wsize;
403 isize += osize + align;
405 return isize * instruction->times;
408 if (instruction->opcode == I_INCBIN) {
409 char fname[FILENAME_MAX];
410 FILE *fp;
411 long len;
413 len = FILENAME_MAX-1;
414 if (len > instruction->eops->stringlen)
415 len = instruction->eops->stringlen;
416 strncpy (fname, instruction->eops->stringval, len);
417 fname[len] = '\0';
418 if (!(fp = fopen(fname, "rb")))
419 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
420 else if (fseek(fp, 0L, SEEK_END) < 0)
421 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
422 fname);
423 else {
424 len = ftell (fp);
425 fclose (fp);
426 if (instruction->eops->next) {
427 len -= instruction->eops->next->offset;
428 if (instruction->eops->next->next &&
429 len > instruction->eops->next->next->offset)
430 len = instruction->eops->next->next->offset;
432 return instruction->times * len;
434 return 0; /* if we're here, there's an error */
437 temp = nasm_instructions[instruction->opcode];
438 while (temp->opcode != -1) {
439 if (matches(temp, instruction) == 100) {
440 /* we've matched an instruction. */
441 long isize;
442 char *codes = temp->code;
443 int j;
445 isize = calcsize(segment, offset, bits, instruction, codes);
446 if (isize < 0)
447 return -1;
448 for (j = 0; j < instruction->nprefix; j++) {
449 if ((instruction->prefixes[j] != P_A16 &&
450 instruction->prefixes[j] != P_O16 && bits==16) ||
451 (instruction->prefixes[j] != P_A32 &&
452 instruction->prefixes[j] != P_O32 && bits==32))
453 isize++;
455 return isize * instruction->times;
457 temp++;
459 return -1; /* didn't match any instruction */
462 static long calcsize (long segment, long offset, int bits,
463 insn *ins, char *codes) {
464 long length = 0;
465 unsigned char c;
467 while (*codes) switch (c = *codes++) {
468 case 01: case 02: case 03:
469 codes += c, length += c; break;
470 case 04: case 05: case 06: case 07:
471 length++; break;
472 case 010: case 011: case 012:
473 codes++, length++; break;
474 case 017:
475 length++; break;
476 case 014: case 015: case 016:
477 length++; break;
478 case 020: case 021: case 022:
479 length++; break;
480 case 024: case 025: case 026:
481 length++; break;
482 case 030: case 031: case 032:
483 length += 2; break;
484 case 034: case 035: case 036:
485 length += ((ins->oprs[c-034].addr_size ?
486 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
487 case 037:
488 length += 2; break;
489 case 040: case 041: case 042:
490 length += 4; break;
491 case 050: case 051: case 052:
492 length++; break;
493 case 060: case 061: case 062:
494 length += 2; break;
495 case 064: case 065: case 066:
496 length += ((ins->oprs[c-064].addr_size ?
497 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
498 case 070: case 071: case 072:
499 length += 4; break;
500 case 0300: case 0301: case 0302:
501 length += chsize (&ins->oprs[c-0300], bits);
502 break;
503 case 0310:
504 length += (bits==32);
505 break;
506 case 0311:
507 length += (bits==16);
508 break;
509 case 0312:
510 break;
511 case 0320:
512 length += (bits==32);
513 break;
514 case 0321:
515 length += (bits==16);
516 break;
517 case 0322:
518 break;
519 case 0330:
520 codes++, length++; break;
521 case 0340: case 0341: case 0342:
522 if (ins->oprs[0].segment != NO_SEG)
523 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
524 " quantity of BSS space");
525 else
526 length += ins->oprs[0].offset << (c-0340);
527 break;
528 default: /* can't do it by 'case' statements */
529 if (c>=0100 && c<=0277) { /* it's an EA */
530 ea ea_data;
531 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
532 ins->forw_ref)) {
533 errfunc (ERR_NONFATAL, "invalid effective address");
534 return -1;
535 } else
536 length += ea_data.size;
537 } else
538 errfunc (ERR_PANIC, "internal instruction table corrupt"
539 ": instruction code 0x%02X given", c);
541 return length;
544 static void gencode (long segment, long offset, int bits,
545 insn *ins, char *codes, long insn_end) {
546 static char condval[] = { /* conditional opcodes */
547 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
548 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
549 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
551 unsigned char c, bytes[4];
552 long data, size;
554 while (*codes) switch (c = *codes++) {
555 case 01: case 02: case 03:
556 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
557 codes += c;
558 offset += c;
559 break;
560 case 04: case 06:
561 switch (ins->oprs[0].basereg) {
562 case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
563 case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
564 case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
565 case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
566 default:
567 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
569 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
570 offset++;
571 break;
572 case 05: case 07:
573 switch (ins->oprs[0].basereg) {
574 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
575 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
576 default:
577 errfunc (ERR_PANIC, "bizarre 386 segment register received");
579 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
580 offset++;
581 break;
582 case 010: case 011: case 012:
583 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
584 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
585 offset += 1;
586 break;
587 case 017:
588 bytes[0] = 0;
589 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
590 offset += 1;
591 break;
592 case 014: case 015: case 016:
593 if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127)
594 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
595 bytes[0] = ins->oprs[c-014].offset;
596 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
597 offset += 1;
598 break;
599 case 020: case 021: case 022:
600 if (ins->oprs[c-020].offset < -256 || ins->oprs[c-020].offset > 255)
601 errfunc (ERR_WARNING, "byte value exceeds bounds");
602 bytes[0] = ins->oprs[c-020].offset;
603 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
604 offset += 1;
605 break;
606 case 024: case 025: case 026:
607 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
608 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
609 bytes[0] = ins->oprs[c-024].offset;
610 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
611 offset += 1;
612 break;
613 case 030: case 031: case 032:
614 if (ins->oprs[c-030].segment == NO_SEG &&
615 ins->oprs[c-030].wrt == NO_SEG &&
616 (ins->oprs[c-030].offset < -65536L ||
617 ins->oprs[c-030].offset > 65535L))
618 errfunc (ERR_WARNING, "word value exceeds bounds");
619 data = ins->oprs[c-030].offset;
620 out (offset, segment, &data, OUT_ADDRESS+2,
621 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
622 offset += 2;
623 break;
624 case 034: case 035: case 036:
625 data = ins->oprs[c-034].offset;
626 size = ((ins->oprs[c-034].addr_size ?
627 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
628 if (size==16 && (data < -65536L || data > 65535L))
629 errfunc (ERR_WARNING, "word value exceeds bounds");
630 out (offset, segment, &data, OUT_ADDRESS+size,
631 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
632 offset += size;
633 break;
634 case 037:
635 if (ins->oprs[0].segment == NO_SEG)
636 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
637 " relocatable");
638 data = 0L;
639 out (offset, segment, &data, OUT_ADDRESS+2,
640 outfmt->segbase(1+ins->oprs[0].segment),
641 ins->oprs[0].wrt);
642 offset += 2;
643 break;
644 case 040: case 041: case 042:
645 data = ins->oprs[c-040].offset;
646 out (offset, segment, &data, OUT_ADDRESS+4,
647 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
648 offset += 4;
649 break;
650 case 050: case 051: case 052:
651 if (ins->oprs[c-050].segment != segment)
652 errfunc (ERR_NONFATAL, "short relative jump outside segment");
653 data = ins->oprs[c-050].offset - insn_end;
654 if (data > 127 || data < -128)
655 errfunc (ERR_NONFATAL, "short jump is out of range");
656 bytes[0] = data;
657 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
658 offset += 1;
659 break;
660 case 060: case 061: case 062:
661 if (ins->oprs[c-060].segment != segment) {
662 data = ins->oprs[c-060].offset;
663 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
664 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
665 } else {
666 data = ins->oprs[c-060].offset - insn_end;
667 out (offset, segment, &data,
668 OUT_ADDRESS+2, NO_SEG, NO_SEG);
670 offset += 2;
671 break;
672 case 064: case 065: case 066:
673 size = ((ins->oprs[c-064].addr_size ?
674 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
675 if (ins->oprs[c-064].segment != segment) {
676 data = ins->oprs[c-064].offset;
677 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
678 out (offset, segment, &data, size+insn_end-offset,
679 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
680 size = (bits == 16 ? 2 : 4);
681 } else {
682 data = ins->oprs[c-064].offset - insn_end;
683 out (offset, segment, &data,
684 OUT_ADDRESS+size, NO_SEG, NO_SEG);
686 offset += size;
687 break;
688 case 070: case 071: case 072:
689 if (ins->oprs[c-070].segment != segment) {
690 data = ins->oprs[c-070].offset;
691 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
692 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
693 } else {
694 data = ins->oprs[c-070].offset - insn_end;
695 out (offset, segment, &data,
696 OUT_ADDRESS+4, NO_SEG, NO_SEG);
698 offset += 4;
699 break;
700 case 0300: case 0301: case 0302:
701 if (chsize (&ins->oprs[c-0300], bits)) {
702 *bytes = 0x67;
703 out (offset, segment, bytes,
704 OUT_RAWDATA+1, NO_SEG, NO_SEG);
705 offset += 1;
706 } else
707 offset += 0;
708 break;
709 case 0310:
710 if (bits==32) {
711 *bytes = 0x67;
712 out (offset, segment, bytes,
713 OUT_RAWDATA+1, NO_SEG, NO_SEG);
714 offset += 1;
715 } else
716 offset += 0;
717 break;
718 case 0311:
719 if (bits==16) {
720 *bytes = 0x67;
721 out (offset, segment, bytes,
722 OUT_RAWDATA+1, NO_SEG, NO_SEG);
723 offset += 1;
724 } else
725 offset += 0;
726 break;
727 case 0312:
728 break;
729 case 0320:
730 if (bits==32) {
731 *bytes = 0x66;
732 out (offset, segment, bytes,
733 OUT_RAWDATA+1, NO_SEG, NO_SEG);
734 offset += 1;
735 } else
736 offset += 0;
737 break;
738 case 0321:
739 if (bits==16) {
740 *bytes = 0x66;
741 out (offset, segment, bytes,
742 OUT_RAWDATA+1, NO_SEG, NO_SEG);
743 offset += 1;
744 } else
745 offset += 0;
746 break;
747 case 0322:
748 break;
749 case 0330:
750 *bytes = *codes++ + condval[ins->condition];
751 out (offset, segment, bytes,
752 OUT_RAWDATA+1, NO_SEG, NO_SEG);
753 offset += 1;
754 break;
755 case 0340: case 0341: case 0342:
756 if (ins->oprs[0].segment != NO_SEG)
757 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
758 else {
759 long size = ins->oprs[0].offset << (c-0340);
760 out (offset, segment, NULL,
761 OUT_RESERVE+size, NO_SEG, NO_SEG);
762 offset += size;
764 break;
765 default: /* can't do it by 'case' statements */
766 if (c>=0100 && c<=0277) { /* it's an EA */
767 ea ea_data;
768 int rfield;
769 unsigned char *p;
770 long s;
772 if (c<=0177) /* pick rfield from operand b */
773 rfield = regval (&ins->oprs[c&7]);
774 else /* rfield is constant */
775 rfield = c & 7;
776 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
777 ins->forw_ref))
778 errfunc (ERR_NONFATAL, "invalid effective address");
780 p = bytes;
781 *p++ = ea_data.modrm;
782 if (ea_data.sib_present)
783 *p++ = ea_data.sib;
785 * the cast in the next line is to placate MS C...
787 out (offset, segment, bytes, OUT_RAWDATA+(long)(p-bytes),
788 NO_SEG, NO_SEG);
789 s = p-bytes;
791 switch (ea_data.bytes) {
792 case 0:
793 break;
794 case 1:
795 *bytes = ins->oprs[(c>>3)&7].offset;
796 out (offset, segment, bytes, OUT_RAWDATA+1,
797 NO_SEG, NO_SEG);
798 s++;
799 break;
800 case 2:
801 case 4:
802 data = ins->oprs[(c>>3)&7].offset;
803 out (offset, segment, &data,
804 OUT_ADDRESS+ea_data.bytes,
805 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
806 s += ea_data.bytes;
807 break;
809 offset += s;
810 } else
811 errfunc (ERR_PANIC, "internal instruction table corrupt"
812 ": instruction code 0x%02X given", c);
816 static int regval (operand *o) {
817 switch (o->basereg) {
818 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
819 case R_ST0: case R_MM0:
820 return 0;
821 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
822 case R_MM1:
823 return 1;
824 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
825 case R_ST2: case R_MM2:
826 return 2;
827 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
828 case R_TR3: case R_ST3: case R_MM3:
829 return 3;
830 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
831 case R_ST4: case R_MM4:
832 return 4;
833 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
834 case R_MM5:
835 return 5;
836 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
837 case R_MM6:
838 return 6;
839 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
840 case R_MM7:
841 return 7;
842 default: /* panic */
843 errfunc (ERR_PANIC, "invalid register operand given to regval()");
844 return 0;
848 static int matches (struct itemplate *itemp, insn *instruction) {
849 int i, size, oprs, ret;
851 ret = 100;
854 * Check the opcode
856 if (itemp->opcode != instruction->opcode) return 0;
859 * Count the operands
861 if (itemp->operands != instruction->operands) return 0;
864 * Check that no spurious colons or TOs are present
866 for (i=0; i<itemp->operands; i++)
867 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
868 return 0;
871 * Check that the operand flags all match up
873 for (i=0; i<itemp->operands; i++)
874 if (itemp->opd[i] & ~instruction->oprs[i].type ||
875 ((itemp->opd[i] & SIZE_MASK) &&
876 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
877 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
878 (instruction->oprs[i].type & SIZE_MASK))
879 return 0;
880 else
881 ret = 1;
885 * Check operand sizes
887 if (itemp->flags & IF_SB) {
888 size = BITS8;
889 oprs = itemp->operands;
890 } else if (itemp->flags & IF_SD) {
891 size = BITS32;
892 oprs = itemp->operands;
893 } else if (itemp->flags & (IF_SM | IF_SM2)) {
894 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
895 size = 0; /* placate gcc */
896 for (i=0; i<oprs; i++)
897 if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
898 break;
899 } else {
900 size = 0;
901 oprs = itemp->operands;
904 for (i=0; i<itemp->operands; i++)
905 if (!(itemp->opd[i] & SIZE_MASK) &&
906 (instruction->oprs[i].type & SIZE_MASK & ~size))
907 ret = 2;
909 return ret;
912 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
913 int forw_ref) {
914 if (!(REGISTER & ~input->type)) { /* it's a single register */
915 static int regs[] = {
916 R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL,
917 R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL,
918 R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH,
919 R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH
921 int i;
923 for (i=0; i<elements(regs); i++)
924 if (input->basereg == regs[i]) break;
925 if (i<elements(regs)) {
926 output->sib_present = FALSE;/* no SIB necessary */
927 output->bytes = 0; /* no offset necessary either */
928 output->modrm = 0xC0 | (rfield << 3) | (i/4);
929 } else
930 return NULL;
931 } else { /* it's a memory reference */
932 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
933 /* it's a pure offset */
934 if (input->addr_size)
935 addrbits = input->addr_size;
936 output->sib_present = FALSE;
937 output->bytes = (addrbits==32 ? 4 : 2);
938 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
939 } else { /* it's an indirection */
940 int i=input->indexreg, b=input->basereg, s=input->scale;
941 long o=input->offset, seg=input->segment;
943 if (s==0) i = -1; /* make this easy, at least */
945 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
946 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
947 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
948 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
949 /* it must be a 32-bit memory reference. Firstly we have
950 * to check that all registers involved are type Exx. */
951 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
952 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
953 return NULL;
954 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
955 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
956 return NULL;
958 /* While we're here, ensure the user didn't specify WORD. */
959 if (input->addr_size == 16)
960 return NULL;
962 /* now reorganise base/index */
963 if (b==i) /* convert EAX+2*EAX to 3*EAX */
964 b = -1, s++;
965 if (b==-1 && s==1) /* single register should be base */
966 b = i, i = -1;
967 if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1)
968 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
969 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
970 i = b, b = R_ESP;
971 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
972 return NULL; /* wrong, for various reasons */
974 if (i==-1 && b!=R_ESP) {/* no SIB needed */
975 int mod, rm;
976 switch(b) {
977 case R_EAX: rm = 0; break;
978 case R_ECX: rm = 1; break;
979 case R_EDX: rm = 2; break;
980 case R_EBX: rm = 3; break;
981 case R_EBP: rm = 5; break;
982 case R_ESI: rm = 6; break;
983 case R_EDI: rm = 7; break;
984 case -1: rm = 5; break;
985 default: /* should never happen */
986 return NULL;
988 if (b==-1 || (b!=R_EBP && o==0 &&
989 seg==NO_SEG && !forw_ref))
990 mod = 0;
991 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
992 mod = 1;
993 else
994 mod = 2;
996 output->sib_present = FALSE;
997 output->bytes = (b==-1 || mod==2 ? 4 : mod);
998 output->modrm = (mod<<6) | (rfield<<3) | rm;
999 } else { /* we need a SIB */
1000 int mod, scale, index, base;
1002 switch (b) {
1003 case R_EAX: base = 0; break;
1004 case R_ECX: base = 1; break;
1005 case R_EDX: base = 2; break;
1006 case R_EBX: base = 3; break;
1007 case R_ESP: base = 4; break;
1008 case R_EBP: case -1: base = 5; break;
1009 case R_ESI: base = 6; break;
1010 case R_EDI: base = 7; break;
1011 default: /* then what the smeg is it? */
1012 return NULL; /* panic */
1015 switch (i) {
1016 case R_EAX: index = 0; break;
1017 case R_ECX: index = 1; break;
1018 case R_EDX: index = 2; break;
1019 case R_EBX: index = 3; break;
1020 case -1: index = 4; break;
1021 case R_EBP: index = 5; break;
1022 case R_ESI: index = 6; break;
1023 case R_EDI: index = 7; break;
1024 default: /* then what the smeg is it? */
1025 return NULL; /* panic */
1028 if (i==-1) s = 1;
1029 switch (s) {
1030 case 1: scale = 0; break;
1031 case 2: scale = 1; break;
1032 case 4: scale = 2; break;
1033 case 8: scale = 3; break;
1034 default: /* then what the smeg is it? */
1035 return NULL; /* panic */
1038 if (b==-1 || (b!=R_EBP && o==0 &&
1039 seg==NO_SEG && !forw_ref))
1040 mod = 0;
1041 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
1042 mod = 1;
1043 else
1044 mod = 2;
1046 output->sib_present = TRUE;
1047 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1048 output->modrm = (mod<<6) | (rfield<<3) | 4;
1049 output->sib = (scale<<6) | (index<<3) | base;
1051 } else { /* it's 16-bit */
1052 int mod, rm;
1054 /* check all registers are BX, BP, SI or DI */
1055 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1056 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1057 return NULL;
1059 /* ensure the user didn't specify DWORD */
1060 if (input->addr_size == 32)
1061 return NULL;
1063 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1064 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
1065 if ((b==R_SI || b==R_DI) && i!=-1)
1066 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
1067 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1068 if (i!=-1 && b!=-1 &&
1069 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1070 return NULL; /* invalid combinations */
1071 if (b==-1) /* pure offset: handled above */
1072 return NULL; /* so if it gets to here, panic! */
1074 rm = -1;
1075 if (i!=-1)
1076 switch (i*256 + b) {
1077 case R_SI*256+R_BX: rm=0; break;
1078 case R_DI*256+R_BX: rm=1; break;
1079 case R_SI*256+R_BP: rm=2; break;
1080 case R_DI*256+R_BP: rm=3; break;
1082 else
1083 switch (b) {
1084 case R_SI: rm=4; break;
1085 case R_DI: rm=5; break;
1086 case R_BP: rm=6; break;
1087 case R_BX: rm=7; break;
1089 if (rm==-1) /* can't happen, in theory */
1090 return NULL; /* so panic if it does */
1092 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6)
1093 mod = 0;
1094 else if (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref)
1095 mod = 1;
1096 else
1097 mod = 2;
1099 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1100 output->bytes = mod; /* bytes of offset needed */
1101 output->modrm = (mod<<6) | (rfield<<3) | rm;
1105 output->size = 1 + output->sib_present + output->bytes;
1106 return output;
1109 static int chsize (operand *input, int addrbits) {
1110 if (!(MEMORY & ~input->type)) {
1111 int i=input->indexreg, b=input->basereg;
1113 if (input->scale==0) i = -1;
1115 if (i == -1 && b == -1) /* pure offset */
1116 return (input->addr_size != 0 && input->addr_size != addrbits);
1118 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1119 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1120 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1121 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1122 return (addrbits==16);
1123 else
1124 return (addrbits==32);
1125 } else
1126 return 0;