NASM 0.91
[nasm/avx512.git] / assemble.c
blobbab6f292136f9e343ba1dd9295852ff4488e6272
1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
14 * on operand 0
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \340 - reserve <operand 0> bytes of uninitialised storage.
49 * Operand 0 had better be a segmentless constant.
52 #include <stdio.h>
53 #include <string.h>
55 #include "nasm.h"
56 #include "assemble.h"
57 #include "insns.h"
59 extern struct itemplate *nasm_instructions[];
61 typedef struct {
62 int sib_present; /* is a SIB byte necessary? */
63 int bytes; /* # of bytes of offset needed */
64 int size; /* lazy - this is sib+bytes+1 */
65 unsigned char modrm, sib; /* the bytes themselves */
66 } ea;
68 static efunc errfunc;
69 static struct ofmt *outfmt;
71 static long calcsize (long, long, int, insn *, char *);
72 static void gencode (long, long, int, insn *, char *, long);
73 static int regval (operand *o);
74 static int matches (struct itemplate *, insn *);
75 static ea *process_ea (operand *, ea *, int, int);
76 static int chsize (operand *, int);
78 long assemble (long segment, long offset, int bits,
79 insn *instruction, struct ofmt *output, efunc error) {
80 int j, itimes, size_prob;
81 long insn_end;
82 long start = offset;
83 struct itemplate *temp;
85 errfunc = error; /* to pass to other functions */
86 outfmt = output; /* likewise */
88 if (instruction->opcode == -1)
89 return 0;
91 if (instruction->opcode == I_DB ||
92 instruction->opcode == I_DW ||
93 instruction->opcode == I_DD ||
94 instruction->opcode == I_DQ ||
95 instruction->opcode == I_DT) {
96 extop *e;
97 long osize, wsize = 0; /* placate gcc */
98 int t = instruction->times;
100 switch (instruction->opcode) {
101 case I_DB: wsize = 1; break;
102 case I_DW: wsize = 2; break;
103 case I_DD: wsize = 4; break;
104 case I_DQ: wsize = 8; break;
105 case I_DT: wsize = 10; break;
108 while (t--) {
109 for (e = instruction->eops; e; e = e->next) {
110 osize = 0;
111 if (e->type == EOT_DB_NUMBER) {
112 if (wsize == 1) {
113 if (e->segment != NO_SEG)
114 errfunc (ERR_NONFATAL,
115 "one-byte relocation attempted");
116 else {
117 unsigned char c = e->offset;
118 outfmt->output (segment, &c, OUT_RAWDATA+1,
119 NO_SEG, NO_SEG);
121 } else if (wsize > 5) {
122 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
123 " instruction", wsize==8 ? 'Q' : 'T');
124 } else
125 outfmt->output (segment, &e->offset,
126 OUT_ADDRESS+wsize, e->segment,
127 e->wrt);
128 offset += wsize;
129 } else if (e->type == EOT_DB_STRING) {
130 int align;
132 align = (-e->stringlen) % wsize;
133 if (align < 0)
134 align += wsize;
135 outfmt->output (segment, e->stringval,
136 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
137 if (align)
138 outfmt->output (segment, "\0\0\0\0",
139 OUT_RAWDATA+align, NO_SEG, NO_SEG);
140 offset += e->stringlen + align;
144 return offset - start;
147 size_prob = FALSE;
148 temp = nasm_instructions[instruction->opcode];
149 while (temp->opcode != -1) {
150 int m = matches (temp, instruction);
151 if (m == 100) { /* matches! */
152 char *codes = temp->code;
153 long insn_size = calcsize(segment, offset, bits,
154 instruction, codes);
155 itimes = instruction->times;
156 if (insn_size < 0) /* shouldn't be, on pass two */
157 error (ERR_PANIC, "errors made it through from pass one");
158 else while (itimes--) {
159 insn_end = offset + insn_size;
160 for (j=0; j<instruction->nprefix; j++) {
161 unsigned char c;
162 switch (instruction->prefixes[j]) {
163 case P_LOCK:
164 c = 0xF0; break;
165 case P_REPNE: case P_REPNZ:
166 c = 0xF2; break;
167 case P_REPE: case P_REPZ: case P_REP:
168 c = 0xF3; break;
169 case R_CS: c = 0x2E; break;
170 case R_DS: c = 0x3E; break;
171 case R_ES: c = 0x26; break;
172 case R_FS: c = 0x64; break;
173 case R_GS: c = 0x65; break;
174 case R_SS: c = 0x36; break;
175 case P_A16:
176 if (bits == 16)
177 c = 0; /* no prefix */
178 else
179 c = 0x67;
180 break;
181 case P_A32:
182 if (bits == 32)
183 c = 0; /* no prefix */
184 else
185 c = 0x67;
186 break;
187 case P_O16:
188 if (bits == 16)
189 c = 0; /* no prefix */
190 else
191 c = 0x66;
192 break;
193 case P_O32:
194 if (bits == 32)
195 c = 0; /* no prefix */
196 else
197 c = 0x66;
198 break;
199 default:
200 error (ERR_PANIC,
201 "invalid instruction prefix");
203 if (c != 0)
204 outfmt->output (segment, &c, OUT_RAWDATA+1,
205 NO_SEG, NO_SEG);
206 offset++;
208 gencode (segment, offset, bits, instruction, codes, insn_end);
209 offset += insn_size;
211 return offset - start;
212 } else if (m > 0) {
213 size_prob = m;
215 temp++;
217 if (temp->opcode == -1) { /* didn't match any instruction */
218 if (size_prob == 1) /* would have matched, but for size */
219 error (ERR_NONFATAL, "operation size not specified");
220 else if (size_prob == 2)
221 error (ERR_NONFATAL, "mismatch in operand sizes");
222 else
223 error (ERR_NONFATAL,
224 "invalid combination of opcode and operands");
226 return 0;
229 long insn_size (long segment, long offset, int bits,
230 insn *instruction, efunc error) {
231 struct itemplate *temp;
233 errfunc = error; /* to pass to other functions */
235 if (instruction->opcode == -1)
236 return 0;
238 if (instruction->opcode == I_DB ||
239 instruction->opcode == I_DW ||
240 instruction->opcode == I_DD ||
241 instruction->opcode == I_DQ ||
242 instruction->opcode == I_DT) {
243 extop *e;
244 long isize, osize, wsize = 0; /* placate gcc */
246 isize = 0;
247 switch (instruction->opcode) {
248 case I_DB: wsize = 1; break;
249 case I_DW: wsize = 2; break;
250 case I_DD: wsize = 4; break;
251 case I_DQ: wsize = 8; break;
252 case I_DT: wsize = 10; break;
255 for (e = instruction->eops; e; e = e->next) {
256 long align;
258 osize = 0;
259 if (e->type == EOT_DB_NUMBER)
260 osize = 1;
261 else if (e->type == EOT_DB_STRING)
262 osize = e->stringlen;
264 align = (-osize) % wsize;
265 if (align < 0)
266 align += wsize;
267 isize += osize + align;
269 return isize * instruction->times;
272 temp = nasm_instructions[instruction->opcode];
273 while (temp->opcode != -1) {
274 if (matches(temp, instruction) == 100) {
275 /* we've matched an instruction. */
276 long isize;
277 char *codes = temp->code;
278 int j;
280 isize = calcsize(segment, offset, bits, instruction, codes);
281 if (isize < 0)
282 return -1;
283 for (j = 0; j < instruction->nprefix; j++) {
284 if ((instruction->prefixes[j] != P_A16 &&
285 instruction->prefixes[j] != P_O16 && bits==16) ||
286 (instruction->prefixes[j] != P_A32 &&
287 instruction->prefixes[j] != P_O32 && bits==32))
288 isize++;
290 return isize * instruction->times;
292 temp++;
294 return -1; /* didn't match any instruction */
297 static long calcsize (long segment, long offset, int bits,
298 insn *ins, char *codes) {
299 long length = 0;
300 unsigned char c;
302 while (*codes) switch (c = *codes++) {
303 case 01: case 02: case 03:
304 codes += c, length += c; break;
305 case 04: case 05: case 06: case 07:
306 length++; break;
307 case 010: case 011: case 012:
308 codes++, length++; break;
309 case 017:
310 length++; break;
311 case 014: case 015: case 016:
312 length++; break;
313 case 020: case 021: case 022:
314 length++; break;
315 case 024: case 025: case 026:
316 length++; break;
317 case 030: case 031: case 032:
318 length += 2; break;
319 case 034: case 035: case 036:
320 length += ((ins->oprs[c-034].addr_size ?
321 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
322 case 037:
323 length += 2; break;
324 case 040: case 041: case 042:
325 length += 4; break;
326 case 050: case 051: case 052:
327 length++; break;
328 case 060: case 061: case 062:
329 length += 2; break;
330 case 064: case 065: case 066:
331 length += ((ins->oprs[c-064].addr_size ?
332 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
333 case 070: case 071: case 072:
334 length += 4; break;
335 case 0300: case 0301: case 0302:
336 length += chsize (&ins->oprs[c-0300], bits);
337 break;
338 case 0310:
339 length += (bits==32);
340 break;
341 case 0311:
342 length += (bits==16);
343 break;
344 case 0312:
345 break;
346 case 0320:
347 length += (bits==32);
348 break;
349 case 0321:
350 length += (bits==16);
351 break;
352 case 0322:
353 break;
354 case 0330:
355 codes++, length++; break;
356 case 0340: case 0341: case 0342:
357 if (ins->oprs[0].segment != NO_SEG)
358 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
359 " quantity of BSS space");
360 else
361 length += ins->oprs[0].offset << (c-0340);
362 break;
363 default: /* can't do it by 'case' statements */
364 if (c>=0100 && c<=0277) { /* it's an EA */
365 ea ea_data;
367 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0)) {
368 errfunc (ERR_NONFATAL, "invalid effective address");
369 return -1;
370 } else
371 length += ea_data.size;
372 } else
373 errfunc (ERR_PANIC, "internal instruction table corrupt"
374 ": instruction code 0x%02X given", c);
376 return length;
379 static void gencode (long segment, long offset, int bits,
380 insn *ins, char *codes, long insn_end) {
381 static char condval[] = { /* conditional opcodes */
382 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
383 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
384 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
386 unsigned char c, bytes[4];
387 long data, size;
389 while (*codes) switch (c = *codes++) {
390 case 01: case 02: case 03:
391 outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
392 codes += c;
393 offset += c;
394 break;
395 case 04: case 06:
396 switch (ins->oprs[0].basereg) {
397 case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
398 case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
399 case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
400 case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
401 default:
402 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
404 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
405 offset++;
406 break;
407 case 05: case 07:
408 switch (ins->oprs[0].basereg) {
409 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
410 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
411 default:
412 errfunc (ERR_PANIC, "bizarre 386 segment register received");
414 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
415 offset++;
416 break;
417 case 010: case 011: case 012:
418 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
419 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
420 offset += 1;
421 break;
422 case 017:
423 bytes[0] = 0;
424 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
425 offset += 1;
426 break;
427 case 014: case 015: case 016:
428 if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127)
429 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
430 bytes[0] = ins->oprs[c-014].offset;
431 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
432 offset += 1;
433 break;
434 case 020: case 021: case 022:
435 if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255)
436 errfunc (ERR_WARNING, "byte value exceeds bounds");
437 bytes[0] = ins->oprs[c-020].offset;
438 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
439 offset += 1;
440 break;
441 case 024: case 025: case 026:
442 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
443 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
444 bytes[0] = ins->oprs[c-024].offset;
445 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
446 offset += 1;
447 break;
448 case 030: case 031: case 032:
449 if (ins->oprs[c-030].segment == NO_SEG &&
450 ins->oprs[c-030].wrt == NO_SEG &&
451 (ins->oprs[c-030].offset < -32768 ||
452 ins->oprs[c-030].offset > 65535))
453 errfunc (ERR_WARNING, "word value exceeds bounds");
454 data = ins->oprs[c-030].offset;
455 outfmt->output (segment, &data, OUT_ADDRESS+2,
456 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
457 offset += 2;
458 break;
459 case 034: case 035: case 036:
460 data = ins->oprs[c-034].offset;
461 size = ((ins->oprs[c-034].addr_size ?
462 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
463 if (size==16 && (data < -32768 || data > 65535))
464 errfunc (ERR_WARNING, "word value exceeds bounds");
465 outfmt->output (segment, &data, OUT_ADDRESS+size,
466 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
467 offset += size;
468 break;
469 case 037:
470 if (ins->oprs[0].segment == NO_SEG)
471 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
472 " relocatable");
473 data = 0L;
474 outfmt->output (segment, &data, OUT_ADDRESS+2,
475 outfmt->segbase(1+ins->oprs[0].segment),
476 ins->oprs[0].wrt);
477 offset += 2;
478 break;
479 case 040: case 041: case 042:
480 data = ins->oprs[c-040].offset;
481 outfmt->output (segment, &data, OUT_ADDRESS+4,
482 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
483 offset += 4;
484 break;
485 case 050: case 051: case 052:
486 if (ins->oprs[c-050].segment != segment)
487 errfunc (ERR_NONFATAL, "short relative jump outside segment");
488 data = ins->oprs[c-050].offset - insn_end;
489 if (data > 127 || data < -128)
490 errfunc (ERR_NONFATAL, "short jump is out of range");
491 bytes[0] = data;
492 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
493 offset += 1;
494 break;
495 case 060: case 061: case 062:
496 if (ins->oprs[c-060].segment != segment) {
497 data = ins->oprs[c-060].offset;
498 outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset,
499 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
500 } else {
501 data = ins->oprs[c-060].offset - insn_end;
502 outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG);
504 offset += 2;
505 break;
506 case 064: case 065: case 066:
507 size = ((ins->oprs[c-064].addr_size ?
508 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
509 if (ins->oprs[c-064].segment != segment) {
510 data = ins->oprs[c-064].offset;
511 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
512 outfmt->output (segment, &data, size+insn_end-offset,
513 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
514 size = (bits == 16 ? 2 : 4);
515 } else {
516 data = ins->oprs[c-064].offset - insn_end;
517 outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG);
519 offset += size;
520 break;
521 case 070: case 071: case 072:
522 if (ins->oprs[c-070].segment != segment) {
523 data = ins->oprs[c-070].offset;
524 outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset,
525 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
526 } else {
527 data = ins->oprs[c-070].offset - insn_end;
528 outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG);
530 offset += 4;
531 break;
532 case 0300: case 0301: case 0302:
533 if (chsize (&ins->oprs[c-0300], bits)) {
534 *bytes = 0x67;
535 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
536 offset += 1;
537 } else
538 offset += 0;
539 break;
540 case 0310:
541 if (bits==32) {
542 *bytes = 0x67;
543 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
544 offset += 1;
545 } else
546 offset += 0;
547 break;
548 case 0311:
549 if (bits==16) {
550 *bytes = 0x67;
551 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
552 offset += 1;
553 } else
554 offset += 0;
555 break;
556 case 0312:
557 break;
558 case 0320:
559 if (bits==32) {
560 *bytes = 0x66;
561 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
562 offset += 1;
563 } else
564 offset += 0;
565 break;
566 case 0321:
567 if (bits==16) {
568 *bytes = 0x66;
569 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
570 offset += 1;
571 } else
572 offset += 0;
573 break;
574 case 0322:
575 break;
576 case 0330:
577 *bytes = *codes++ + condval[ins->condition];
578 outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
579 offset += 1;
580 break;
581 case 0340: case 0341: case 0342:
582 if (ins->oprs[0].segment != NO_SEG)
583 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
584 else {
585 long size = ins->oprs[0].offset << (c-0340);
586 outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG);
587 offset += size;
589 break;
590 default: /* can't do it by 'case' statements */
591 if (c>=0100 && c<=0277) { /* it's an EA */
592 ea ea_data;
593 int rfield;
594 unsigned char *p;
595 long s;
597 if (c<=0177) /* pick rfield from operand b */
598 rfield = regval (&ins->oprs[c&7]);
599 else /* rfield is constant */
600 rfield = c & 7;
601 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield))
602 errfunc (ERR_NONFATAL, "invalid effective address");
604 p = bytes;
605 *p++ = ea_data.modrm;
606 if (ea_data.sib_present)
607 *p++ = ea_data.sib;
609 * the cast in the next line is to placate MS C...
611 outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes),
612 NO_SEG, NO_SEG);
613 s = p-bytes;
615 switch (ea_data.bytes) {
616 case 0:
617 break;
618 case 1:
619 *bytes = ins->oprs[(c>>3)&7].offset;
620 outfmt->output (segment, bytes, OUT_RAWDATA+1,
621 NO_SEG, NO_SEG);
622 s++;
623 break;
624 case 2:
625 case 4:
626 data = ins->oprs[(c>>3)&7].offset;
627 outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes,
628 ins->oprs[(c>>3)&7].segment,
629 ins->oprs[(c>>3)&7].wrt);
630 s += ea_data.bytes;
631 break;
633 offset += s;
634 } else
635 errfunc (ERR_PANIC, "internal instruction table corrupt"
636 ": instruction code 0x%02X given", c);
640 static int regval (operand *o) {
641 switch (o->basereg) {
642 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
643 case R_ST0: case R_MM0:
644 return 0;
645 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
646 case R_MM1:
647 return 1;
648 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
649 case R_ST2: case R_MM2:
650 return 2;
651 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
652 case R_TR3: case R_ST3: case R_MM3:
653 return 3;
654 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
655 case R_ST4: case R_MM4:
656 return 4;
657 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
658 case R_MM5:
659 return 5;
660 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
661 case R_MM6:
662 return 6;
663 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
664 case R_MM7:
665 return 7;
666 default: /* panic */
667 errfunc (ERR_PANIC, "invalid register operand given to regval()");
668 return 0;
672 static int matches (struct itemplate *itemp, insn *instruction) {
673 int i, size, oprs, ret;
675 ret = 100;
678 * Check the opcode
680 if (itemp->opcode != instruction->opcode) return 0;
683 * Count the operands
685 if (itemp->operands != instruction->operands) return 0;
688 * Check that no spurious colons or TOs are present
690 for (i=0; i<itemp->operands; i++)
691 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
692 return 0;
695 * Check that the operand flags all match up
697 for (i=0; i<itemp->operands; i++)
698 if (itemp->opd[i] & ~instruction->oprs[i].type ||
699 ((itemp->opd[i] & SIZE_MASK) &&
700 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
701 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
702 (instruction->oprs[i].type & SIZE_MASK))
703 return 0;
704 else
705 ret = 1;
709 * Check operand sizes
711 if (itemp->flags & IF_SB) {
712 size = BITS8;
713 oprs = itemp->operands;
714 } else if (itemp->flags & IF_SD) {
715 size = BITS32;
716 oprs = itemp->operands;
717 } else if (itemp->flags & (IF_SM | IF_SM2)) {
718 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
719 size = 0; /* placate gcc */
720 for (i=0; i<oprs; i++)
721 if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
722 break;
723 } else {
724 size = 0;
725 oprs = itemp->operands;
728 for (i=0; i<itemp->operands; i++)
729 if (!(itemp->opd[i] & SIZE_MASK) &&
730 (instruction->oprs[i].type & SIZE_MASK & ~size))
731 ret = 2;
733 return ret;
736 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield) {
737 if (!(REGISTER & ~input->type)) { /* it's a single register */
738 static int regs[] = {
739 R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL,
740 R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL,
741 R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH,
742 R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH
744 int i;
746 for (i=0; i<elements(regs); i++)
747 if (input->basereg == regs[i]) break;
748 if (i<elements(regs)) {
749 output->sib_present = FALSE;/* no SIB necessary */
750 output->bytes = 0; /* no offset necessary either */
751 output->modrm = 0xC0 | (rfield << 3) | (i/4);
752 } else
753 return NULL;
754 } else { /* it's a memory reference */
755 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
756 /* it's a pure offset */
757 if (input->addr_size)
758 addrbits = input->addr_size;
759 output->sib_present = FALSE;
760 output->bytes = (addrbits==32 ? 4 : 2);
761 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
762 } else { /* it's an indirection */
763 int i=input->indexreg, b=input->basereg, s=input->scale;
764 long o=input->offset, seg=input->segment;
766 if (s==0) i = -1; /* make this easy, at least */
768 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
769 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
770 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
771 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
772 /* it must be a 32-bit memory reference. Firstly we have
773 * to check that all registers involved are type Exx. */
774 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
775 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
776 return NULL;
777 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
778 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
779 return NULL;
781 /* While we're here, ensure the user didn't specify WORD. */
782 if (input->addr_size == 16)
783 return NULL;
785 /* now reorganise base/index */
786 if (b==i) /* convert EAX+2*EAX to 3*EAX */
787 b = -1, s++;
788 if (b==-1 && s==1) /* single register should be base */
789 b = i, i = -1;
790 if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1)
791 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
792 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
793 return NULL; /* wrong, for various reasons */
795 if (i==-1 && b!=R_ESP) {/* no SIB needed */
796 int mod, rm;
797 switch(b) {
798 case R_EAX: rm = 0; break;
799 case R_ECX: rm = 1; break;
800 case R_EDX: rm = 2; break;
801 case R_EBX: rm = 3; break;
802 case R_EBP: rm = 5; break;
803 case R_ESI: rm = 6; break;
804 case R_EDI: rm = 7; break;
805 case -1: rm = 5; break;
806 default: /* should never happen */
807 return NULL;
809 if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG))
810 mod = 0;
811 else if (o>=-128 && o<=127 && seg==NO_SEG)
812 mod = 1;
813 else
814 mod = 2;
815 output->sib_present = FALSE;
816 output->bytes = (b==-1 || mod==2 ? 4 : mod);
817 output->modrm = (mod<<6) | (rfield<<3) | rm;
818 } else { /* we need a SIB */
819 int mod, scale, index, base;
821 switch (b) {
822 case R_EAX: base = 0; break;
823 case R_ECX: base = 1; break;
824 case R_EDX: base = 2; break;
825 case R_EBX: base = 3; break;
826 case R_ESP: base = 4; break;
827 case R_EBP: case -1: base = 5; break;
828 case R_ESI: base = 6; break;
829 case R_EDI: base = 7; break;
830 default: /* then what the smeg is it? */
831 return NULL; /* panic */
834 switch (i) {
835 case R_EAX: index = 0; break;
836 case R_ECX: index = 1; break;
837 case R_EDX: index = 2; break;
838 case R_EBX: index = 3; break;
839 case -1: index = 4; break;
840 case R_EBP: index = 5; break;
841 case R_ESI: index = 6; break;
842 case R_EDI: index = 7; break;
843 default: /* then what the smeg is it? */
844 return NULL; /* panic */
847 if (i==-1) s = 1;
848 switch (s) {
849 case 1: scale = 0; break;
850 case 2: scale = 1; break;
851 case 4: scale = 2; break;
852 case 8: scale = 3; break;
853 default: /* then what the smeg is it? */
854 return NULL; /* panic */
857 if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG))
858 mod = 0;
859 else if (o>=-128 && o<=127 && seg==NO_SEG)
860 mod = 1;
861 else
862 mod = 2;
864 output->sib_present = TRUE;
865 output->bytes = (b==-1 || mod==2 ? 4 : mod);
866 output->modrm = (mod<<6) | (rfield<<3) | 4;
867 output->sib = (scale<<6) | (index<<3) | base;
869 } else { /* it's 16-bit */
870 int mod, rm;
872 /* check all registers are BX, BP, SI or DI */
873 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
874 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
875 return NULL;
877 /* ensure the user didn't specify DWORD */
878 if (input->addr_size == 32)
879 return NULL;
881 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
882 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
883 if ((b==R_SI || b==R_DI) && i!=-1)
884 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
885 if (b==i) return NULL;/* shouldn't ever happen, in theory */
886 if (i!=-1 && b!=-1 &&
887 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
888 return NULL; /* invalid combinations */
889 if (b==-1) /* pure offset: handled above */
890 return NULL; /* so if it gets to here, panic! */
892 rm = -1;
893 if (i!=-1)
894 switch (i*256 + b) {
895 case R_SI*256+R_BX: rm=0; break;
896 case R_DI*256+R_BX: rm=1; break;
897 case R_SI*256+R_BP: rm=2; break;
898 case R_DI*256+R_BP: rm=3; break;
900 else
901 switch (b) {
902 case R_SI: rm=4; break;
903 case R_DI: rm=5; break;
904 case R_BP: rm=6; break;
905 case R_BX: rm=7; break;
907 if (rm==-1) /* can't happen, in theory */
908 return NULL; /* so panic if it does */
910 if (o==0 && seg==NO_SEG && rm!=6)
911 mod = 0;
912 else if (o>=-128 && o<=127 && seg==NO_SEG)
913 mod = 1;
914 else
915 mod = 2;
917 output->sib_present = FALSE; /* no SIB - it's 16-bit */
918 output->bytes = mod; /* bytes of offset needed */
919 output->modrm = (mod<<6) | (rfield<<3) | rm;
923 output->size = 1 + output->sib_present + output->bytes;
924 return output;
927 static int chsize (operand *input, int addrbits) {
928 if (!(MEMORY & ~input->type)) {
929 int i=input->indexreg, b=input->basereg;
931 if (input->scale==0) i = -1;
933 if (i == -1 && b == -1) /* pure offset */
934 return (input->addr_size != 0 && input->addr_size != addrbits);
936 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
937 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
938 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
939 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
940 return (addrbits==16);
941 else
942 return (addrbits==32);
943 } else
944 return 0;