assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \341          - this instruction needs a WAIT "prefix"
 104  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 105  *                 (POP is never used for CS) depending on operand 0
 106  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 107  *                 on operand 0
 108  * \360          - no SSE prefix (== \364\331)
 109  * \361          - 66 SSE prefix (== \366\331)
 110  * \362          - F2 SSE prefix (== \364\332)
 111  * \363          - F3 SSE prefix (== \364\333)
 112  * \364          - operand-size prefix (0x66) not permitted
 113  * \365          - address-size prefix (0x67) not permitted
 114  * \366          - operand-size prefix (0x66) used as opcode extension
 115  * \367          - address-size prefix (0x67) used as opcode extension
 116  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 117  *                 370 is used for Jcc, 371 is used for JMP.
 118  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 119  *                 used for conditional jump over longer jump
 120  */
 121
 122 #include "compiler.h"
 123
 124 #include <stdio.h>
 125 #include <string.h>
 126 #include <inttypes.h>
 127
 128 #include "nasm.h"
 129 #include "nasmlib.h"
 130 #include "assemble.h"
 131 #include "insns.h"
 132 #include "tables.h"
 133
 134 typedef struct {
 135     int sib_present;                 /* is a SIB byte necessary? */
 136     int bytes;                       /* # of bytes of offset needed */
 137     int size;                        /* lazy - this is sib+bytes+1 */
 138     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 139 } ea;
 140
 141 static uint32_t cpu;            /* cpu level received from nasm.c */
 142 static efunc errfunc;
 143 static struct ofmt *outfmt;
 144 static ListGen *list;
 145
 146 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 147 static void gencode(int32_t segment, int64_t offset, int bits,
 148                     insn * ins, const struct itemplate *temp,
 149                     int64_t insn_end);
 150 static int matches(const struct itemplate *, insn *, int bits);
 151 static int32_t regflag(const operand *);
 152 static int32_t regval(const operand *);
 153 static int rexflags(int, int32_t, int);
 154 static int op_rexflags(const operand *, int);
 155 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 156 static void add_asp(insn *, int);
 157
 158 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 159 {
 160     return ins->prefixes[pos] == prefix;
 161 }
 162
 163 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 164 {
 165     if (ins->prefixes[pos])
 166         errfunc(ERR_NONFATAL, "invalid %s prefix",
 167                 prefix_name(ins->prefixes[pos]));
 168 }
 169
 170 static const char *size_name(int size)
 171 {
 172     switch (size) {
 173     case 1:
 174         return "byte";
 175     case 2:
 176         return "word";
 177     case 4:
 178         return "dword";
 179     case 8:
 180         return "qword";
 181     case 10:
 182         return "tword";
 183     case 16:
 184         return "oword";
 185     case 32:
 186         return "yword";
 187     default:
 188         return "???";
 189     }
 190 }
 191
 192 static void warn_overflow(int size, const struct operand *o)
 193 {
 194     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 195         int64_t lim = ((int64_t)1 << (size*8))-1;
 196         int64_t data = o->offset;
 197
 198         if (data < ~lim || data > lim)
 199             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 200                     "%s data exceeds bounds", size_name(size));
 201     }
 202 }
 203 /*
 204  * This routine wrappers the real output format's output routine,
 205  * in order to pass a copy of the data off to the listing file
 206  * generator at the same time.
 207  */
 208 static void out(int64_t offset, int32_t segto, const void *data,
 209                 enum out_type type, uint64_t size,
 210                 int32_t segment, int32_t wrt)
 211 {
 212     static int32_t lineno = 0;     /* static!!! */
 213     static char *lnfname = NULL;
 214     uint8_t p[8];
 215
 216     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 217         /*
 218          * This is a non-relocated address, and we're going to
 219          * convert it into RAWDATA format.
 220          */
 221         uint8_t *q = p;
 222
 223         if (size > 8) {
 224             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 225             return;
 226         }
 227
 228         WRITEADDR(q, *(int64_t *)data, size);
 229         data = p;
 230         type = OUT_RAWDATA;
 231     }
 232
 233     list->output(offset, data, type, size);
 234
 235     /*
 236      * this call to src_get determines when we call the
 237      * debug-format-specific "linenum" function
 238      * it updates lineno and lnfname to the current values
 239      * returning 0 if "same as last time", -2 if lnfname
 240      * changed, and the amount by which lineno changed,
 241      * if it did. thus, these variables must be static
 242      */
 243
 244     if (src_get(&lineno, &lnfname)) {
 245         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 246     }
 247
 248     outfmt->output(segto, data, type, size, segment, wrt);
 249 }
 250
 251 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 252                      insn * ins, const uint8_t *code)
 253 {
 254     int64_t isize;
 255     uint8_t c = code[0];
 256
 257     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 258         return false;
 259     if (!optimizing)
 260         return false;
 261     if (optimizing < 0 && c == 0371)
 262         return false;
 263
 264     isize = calcsize(segment, offset, bits, ins, code);
 265
 266     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 267         /* Be optimistic in pass 1 */
 268         return true;
 269
 270     if (ins->oprs[0].segment != segment)
 271         return false;
 272
 273     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 274     return (isize >= -128 && isize <= 127); /* is it byte size? */
 275 }
 276
 277 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 278               insn * instruction, struct ofmt *output, efunc error,
 279               ListGen * listgen)
 280 {
 281     const struct itemplate *temp;
 282     int j;
 283     int size_prob;
 284     int64_t insn_end;
 285     int32_t itimes;
 286     int64_t start = offset;
 287     int64_t wsize = 0;             /* size for DB etc. */
 288
 289     errfunc = error;            /* to pass to other functions */
 290     cpu = cp;
 291     outfmt = output;            /* likewise */
 292     list = listgen;             /* and again */
 293
 294     switch (instruction->opcode) {
 295     case -1:
 296         return 0;
 297     case I_DB:
 298         wsize = 1;
 299         break;
 300     case I_DW:
 301         wsize = 2;
 302         break;
 303     case I_DD:
 304         wsize = 4;
 305         break;
 306     case I_DQ:
 307         wsize = 8;
 308         break;
 309     case I_DT:
 310         wsize = 10;
 311         break;
 312     case I_DO:
 313         wsize = 16;
 314         break;
 315     case I_DY:
 316         wsize = 32;
 317         break;
 318     default:
 319         break;
 320     }
 321
 322     if (wsize) {
 323         extop *e;
 324         int32_t t = instruction->times;
 325         if (t < 0)
 326             errfunc(ERR_PANIC,
 327                     "instruction->times < 0 (%ld) in assemble()", t);
 328
 329         while (t--) {           /* repeat TIMES times */
 330             for (e = instruction->eops; e; e = e->next) {
 331                 if (e->type == EOT_DB_NUMBER) {
 332                     if (wsize == 1) {
 333                         if (e->segment != NO_SEG)
 334                             errfunc(ERR_NONFATAL,
 335                                     "one-byte relocation attempted");
 336                         else {
 337                             uint8_t out_byte = e->offset;
 338                             out(offset, segment, &out_byte,
 339                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 340                         }
 341                     } else if (wsize > 8) {
 342                         errfunc(ERR_NONFATAL,
 343                                 "integer supplied to a DT, DO or DY"
 344                                 " instruction");
 345                     } else
 346                         out(offset, segment, &e->offset,
 347                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 348                     offset += wsize;
 349                 } else if (e->type == EOT_DB_STRING ||
 350                            e->type == EOT_DB_STRING_FREE) {
 351                     int align;
 352
 353                     out(offset, segment, e->stringval,
 354                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 355                     align = e->stringlen % wsize;
 356
 357                     if (align) {
 358                         align = wsize - align;
 359                         out(offset, segment, zero_buffer,
 360                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 361                     }
 362                     offset += e->stringlen + align;
 363                 }
 364             }
 365             if (t > 0 && t == instruction->times - 1) {
 366                 /*
 367                  * Dummy call to list->output to give the offset to the
 368                  * listing module.
 369                  */
 370                 list->output(offset, NULL, OUT_RAWDATA, 0);
 371                 list->uplevel(LIST_TIMES);
 372             }
 373         }
 374         if (instruction->times > 1)
 375             list->downlevel(LIST_TIMES);
 376         return offset - start;
 377     }
 378
 379     if (instruction->opcode == I_INCBIN) {
 380         const char *fname = instruction->eops->stringval;
 381         FILE *fp;
 382
 383         fp = fopen(fname, "rb");
 384         if (!fp) {
 385             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 386                   fname);
 387         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 388             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 389                   fname);
 390         } else {
 391             static char buf[4096];
 392             size_t t = instruction->times;
 393             size_t base = 0;
 394             size_t len;
 395
 396             len = ftell(fp);
 397             if (instruction->eops->next) {
 398                 base = instruction->eops->next->offset;
 399                 len -= base;
 400                 if (instruction->eops->next->next &&
 401                     len > (size_t)instruction->eops->next->next->offset)
 402                     len = (size_t)instruction->eops->next->next->offset;
 403             }
 404             /*
 405              * Dummy call to list->output to give the offset to the
 406              * listing module.
 407              */
 408             list->output(offset, NULL, OUT_RAWDATA, 0);
 409             list->uplevel(LIST_INCBIN);
 410             while (t--) {
 411                 size_t l;
 412
 413                 fseek(fp, base, SEEK_SET);
 414                 l = len;
 415                 while (l > 0) {
 416                     int32_t m =
 417                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 418                               fp);
 419                     if (!m) {
 420                         /*
 421                          * This shouldn't happen unless the file
 422                          * actually changes while we are reading
 423                          * it.
 424                          */
 425                         error(ERR_NONFATAL,
 426                               "`incbin': unexpected EOF while"
 427                               " reading file `%s'", fname);
 428                         t = 0;  /* Try to exit cleanly */
 429                         break;
 430                     }
 431                     out(offset, segment, buf, OUT_RAWDATA, m,
 432                         NO_SEG, NO_SEG);
 433                     l -= m;
 434                 }
 435             }
 436             list->downlevel(LIST_INCBIN);
 437             if (instruction->times > 1) {
 438                 /*
 439                  * Dummy call to list->output to give the offset to the
 440                  * listing module.
 441                  */
 442                 list->output(offset, NULL, OUT_RAWDATA, 0);
 443                 list->uplevel(LIST_TIMES);
 444                 list->downlevel(LIST_TIMES);
 445             }
 446             fclose(fp);
 447             return instruction->times * len;
 448         }
 449         return 0;               /* if we're here, there's an error */
 450     }
 451
 452     /* Check to see if we need an address-size prefix */
 453     add_asp(instruction, bits);
 454
 455     size_prob = 0;
 456
 457     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 458         int m = matches(temp, instruction, bits);
 459         if (m == 100 ||
 460             (m == 99 && jmp_match(segment, offset, bits,
 461                                   instruction, temp->code))) {
 462             /* Matches! */
 463             int64_t insn_size = calcsize(segment, offset, bits,
 464                                       instruction, temp->code);
 465             itimes = instruction->times;
 466             if (insn_size < 0)  /* shouldn't be, on pass two */
 467                 error(ERR_PANIC, "errors made it through from pass one");
 468             else
 469                 while (itimes--) {
 470                     for (j = 0; j < MAXPREFIX; j++) {
 471                         uint8_t c = 0;
 472                         switch (instruction->prefixes[j]) {
 473                         case P_WAIT:
 474                             c = 0x9B;
 475                             break;
 476                         case P_LOCK:
 477                             c = 0xF0;
 478                             break;
 479                         case P_REPNE:
 480                         case P_REPNZ:
 481                             c = 0xF2;
 482                             break;
 483                         case P_REPE:
 484                         case P_REPZ:
 485                         case P_REP:
 486                             c = 0xF3;
 487                             break;
 488                         case R_CS:
 489                             if (bits == 64) {
 490                                 error(ERR_WARNING | ERR_PASS2,
 491                                       "cs segment base generated, but will be ignored in 64-bit mode");
 492                             }
 493                             c = 0x2E;
 494                             break;
 495                         case R_DS:
 496                             if (bits == 64) {
 497                                 error(ERR_WARNING | ERR_PASS2,
 498                                       "ds segment base generated, but will be ignored in 64-bit mode");
 499                             }
 500                             c = 0x3E;
 501                             break;
 502                         case R_ES:
 503                            if (bits == 64) {
 504                                 error(ERR_WARNING | ERR_PASS2,
 505                                       "es segment base generated, but will be ignored in 64-bit mode");
 506                            }
 507                             c = 0x26;
 508                             break;
 509                         case R_FS:
 510                             c = 0x64;
 511                             break;
 512                         case R_GS:
 513                             c = 0x65;
 514                             break;
 515                         case R_SS:
 516                             if (bits == 64) {
 517                                 error(ERR_WARNING | ERR_PASS2,
 518                                       "ss segment base generated, but will be ignored in 64-bit mode");
 519                             }
 520                             c = 0x36;
 521                             break;
 522                         case R_SEGR6:
 523                         case R_SEGR7:
 524                             error(ERR_NONFATAL,
 525                                   "segr6 and segr7 cannot be used as prefixes");
 526                             break;
 527                         case P_A16:
 528                             if (bits == 64) {
 529                                 error(ERR_NONFATAL,
 530                                       "16-bit addressing is not supported "
 531                                       "in 64-bit mode");
 532                             } else if (bits != 16)
 533                                 c = 0x67;
 534                             break;
 535                         case P_A32:
 536                             if (bits != 32)
 537                                 c = 0x67;
 538                             break;
 539                         case P_A64:
 540                             if (bits != 64) {
 541                                 error(ERR_NONFATAL,
 542                                       "64-bit addressing is only supported "
 543                                       "in 64-bit mode");
 544                             }
 545                             break;
 546                         case P_ASP:
 547                             c = 0x67;
 548                             break;
 549                         case P_O16:
 550                             if (bits != 16)
 551                                 c = 0x66;
 552                             break;
 553                         case P_O32:
 554                             if (bits == 16)
 555                                 c = 0x66;
 556                             break;
 557                         case P_O64:
 558                             /* REX.W */
 559                             break;
 560                         case P_OSP:
 561                             c = 0x66;
 562                             break;
 563                         case P_none:
 564                             break;
 565                         default:
 566                             error(ERR_PANIC, "invalid instruction prefix");
 567                         }
 568                         if (c != 0) {
 569                             out(offset, segment, &c, OUT_RAWDATA, 1,
 570                                 NO_SEG, NO_SEG);
 571                             offset++;
 572                         }
 573                     }
 574                     insn_end = offset + insn_size;
 575                     gencode(segment, offset, bits, instruction,
 576                             temp, insn_end);
 577                     offset += insn_size;
 578                     if (itimes > 0 && itimes == instruction->times - 1) {
 579                         /*
 580                          * Dummy call to list->output to give the offset to the
 581                          * listing module.
 582                          */
 583                         list->output(offset, NULL, OUT_RAWDATA, 0);
 584                         list->uplevel(LIST_TIMES);
 585                     }
 586                 }
 587             if (instruction->times > 1)
 588                 list->downlevel(LIST_TIMES);
 589             return offset - start;
 590         } else if (m > 0 && m > size_prob) {
 591             size_prob = m;
 592         }
 593     }
 594
 595     if (temp->opcode == -1) {   /* didn't match any instruction */
 596         switch (size_prob) {
 597         case 1:
 598             error(ERR_NONFATAL, "operation size not specified");
 599             break;
 600         case 2:
 601             error(ERR_NONFATAL, "mismatch in operand sizes");
 602             break;
 603         case 3:
 604             error(ERR_NONFATAL, "no instruction for this cpu level");
 605             break;
 606         case 4:
 607             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 608                   bits);
 609             break;
 610         default:
 611             error(ERR_NONFATAL,
 612                   "invalid combination of opcode and operands");
 613             break;
 614         }
 615     }
 616     return 0;
 617 }
 618
 619 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 620                insn * instruction, efunc error)
 621 {
 622     const struct itemplate *temp;
 623
 624     errfunc = error;            /* to pass to other functions */
 625     cpu = cp;
 626
 627     if (instruction->opcode == -1)
 628         return 0;
 629
 630     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 631         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 632         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 633         instruction->opcode == I_DY) {
 634         extop *e;
 635         int32_t isize, osize, wsize = 0;   /* placate gcc */
 636
 637         isize = 0;
 638         switch (instruction->opcode) {
 639         case I_DB:
 640             wsize = 1;
 641             break;
 642         case I_DW:
 643             wsize = 2;
 644             break;
 645         case I_DD:
 646             wsize = 4;
 647             break;
 648         case I_DQ:
 649             wsize = 8;
 650             break;
 651         case I_DT:
 652             wsize = 10;
 653             break;
 654         case I_DO:
 655             wsize = 16;
 656             break;
 657         case I_DY:
 658             wsize = 32;
 659             break;
 660         default:
 661             break;
 662         }
 663
 664         for (e = instruction->eops; e; e = e->next) {
 665             int32_t align;
 666
 667             osize = 0;
 668             if (e->type == EOT_DB_NUMBER)
 669                 osize = 1;
 670             else if (e->type == EOT_DB_STRING ||
 671                      e->type == EOT_DB_STRING_FREE)
 672                 osize = e->stringlen;
 673
 674             align = (-osize) % wsize;
 675             if (align < 0)
 676                 align += wsize;
 677             isize += osize + align;
 678         }
 679         return isize * instruction->times;
 680     }
 681
 682     if (instruction->opcode == I_INCBIN) {
 683         const char *fname = instruction->eops->stringval;
 684         FILE *fp;
 685         size_t len;
 686
 687         fp = fopen(fname, "rb");
 688         if (!fp)
 689             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 690                   fname);
 691         else if (fseek(fp, 0L, SEEK_END) < 0)
 692             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 693                   fname);
 694         else {
 695             len = ftell(fp);
 696             fclose(fp);
 697             if (instruction->eops->next) {
 698                 len -= instruction->eops->next->offset;
 699                 if (instruction->eops->next->next &&
 700                     len > (size_t)instruction->eops->next->next->offset) {
 701                     len = (size_t)instruction->eops->next->next->offset;
 702                 }
 703             }
 704             return instruction->times * len;
 705         }
 706         return 0;               /* if we're here, there's an error */
 707     }
 708
 709     /* Check to see if we need an address-size prefix */
 710     add_asp(instruction, bits);
 711
 712     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 713         int m = matches(temp, instruction, bits);
 714         if (m == 100 ||
 715             (m == 99 && jmp_match(segment, offset, bits,
 716                                   instruction, temp->code))) {
 717             /* we've matched an instruction. */
 718             int64_t isize;
 719             const uint8_t *codes = temp->code;
 720             int j;
 721
 722             isize = calcsize(segment, offset, bits, instruction, codes);
 723             if (isize < 0)
 724                 return -1;
 725             for (j = 0; j < MAXPREFIX; j++) {
 726                 switch (instruction->prefixes[j]) {
 727                 case P_A16:
 728                     if (bits != 16)
 729                         isize++;
 730                     break;
 731                 case P_A32:
 732                     if (bits != 32)
 733                         isize++;
 734                     break;
 735                 case P_O16:
 736                     if (bits != 16)
 737                         isize++;
 738                     break;
 739                 case P_O32:
 740                     if (bits == 16)
 741                         isize++;
 742                     break;
 743                 case P_A64:
 744                 case P_O64:
 745                 case P_none:
 746                     break;
 747                 default:
 748                     isize++;
 749                     break;
 750                 }
 751             }
 752             return isize * instruction->times;
 753         }
 754     }
 755     return -1;                  /* didn't match any instruction */
 756 }
 757
 758 static bool possible_sbyte(operand *o)
 759 {
 760     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 761         !(o->opflags & OPFLAG_UNKNOWN) &&
 762         optimizing >= 0 && !(o->type & STRICT);
 763 }
 764
 765 /* check that opn[op]  is a signed byte of size 16 or 32 */
 766 static bool is_sbyte16(operand *o)
 767 {
 768     int16_t v;
 769
 770     if (!possible_sbyte(o))
 771         return false;
 772
 773     v = o->offset;
 774     return v >= -128 && v <= 127;
 775 }
 776
 777 static bool is_sbyte32(operand *o)
 778 {
 779     int32_t v;
 780
 781     if (!possible_sbyte(o))
 782         return false;
 783
 784     v = o->offset;
 785     return v >= -128 && v <= 127;
 786 }
 787
 788 /* Common construct */
 789 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 790
 791 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 792                         insn * ins, const uint8_t *codes)
 793 {
 794     int64_t length = 0;
 795     uint8_t c;
 796     int rex_mask = ~0;
 797     int op1, op2;
 798     struct operand *opx;
 799     uint8_t opex = 0;
 800
 801     ins->rex = 0;               /* Ensure REX is reset */
 802
 803     if (ins->prefixes[PPS_OSIZE] == P_O64)
 804         ins->rex |= REX_W;
 805
 806     (void)segment;              /* Don't warn that this parameter is unused */
 807     (void)offset;               /* Don't warn that this parameter is unused */
 808
 809     while (*codes) {
 810         c = *codes++;
 811         op1 = (c & 3) + ((opex & 1) << 2);
 812         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 813         opx = &ins->oprs[op1];
 814         opex = 0;               /* For the next iteration */
 815
 816         switch (c) {
 817         case 01:
 818         case 02:
 819         case 03:
 820         case 04:
 821             codes += c, length += c;
 822             break;
 823
 824         case 05:
 825         case 06:
 826         case 07:
 827             opex = c;
 828             break;
 829
 830         case4(010):
 831             ins->rex |=
 832                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 833             codes++, length++;
 834             break;
 835
 836         case4(014):
 837         case4(020):
 838         case4(024):
 839             length++;
 840             break;
 841
 842         case4(030):
 843             length += 2;
 844             break;
 845
 846         case4(034):
 847             if (opx->type & (BITS16 | BITS32 | BITS64))
 848                 length += (opx->type & BITS16) ? 2 : 4;
 849             else
 850                 length += (bits == 16) ? 2 : 4;
 851             break;
 852
 853         case4(040):
 854             length += 4;
 855             break;
 856
 857         case4(044):
 858             length += ins->addr_size >> 3;
 859             break;
 860
 861         case4(050):
 862             length++;
 863             break;
 864
 865         case4(054):
 866             length += 8; /* MOV reg64/imm */
 867             break;
 868
 869         case4(060):
 870             length += 2;
 871             break;
 872
 873         case4(064):
 874             if (opx->type & (BITS16 | BITS32 | BITS64))
 875                 length += (opx->type & BITS16) ? 2 : 4;
 876             else
 877                 length += (bits == 16) ? 2 : 4;
 878             break;
 879
 880         case4(070):
 881             length += 4;
 882             break;
 883
 884         case4(074):
 885             length += 2;
 886             break;
 887
 888         case4(0140):
 889             length += is_sbyte16(opx) ? 1 : 2;
 890             break;
 891
 892         case4(0144):
 893             codes++;
 894             length++;
 895             break;
 896
 897         case4(0150):
 898             length += is_sbyte32(opx) ? 1 : 4;
 899             break;
 900
 901         case4(0154):
 902             codes++;
 903             length++;
 904             break;
 905
 906         case4(0160):
 907             length++;
 908             ins->rex |= REX_D;
 909             ins->drexdst = regval(opx);
 910             break;
 911
 912         case4(0164):
 913             length++;
 914             ins->rex |= REX_D|REX_OC;
 915             ins->drexdst = regval(opx);
 916             break;
 917
 918         case 0171:
 919             break;
 920
 921         case 0172:
 922         case 0173:
 923         case 0174:
 924             codes++;
 925             length++;
 926             break;
 927
 928         case4(0250):
 929             length += is_sbyte32(opx) ? 1 : 4;
 930             break;
 931
 932         case4(0254):
 933             length += 4;
 934             break;
 935
 936         case4(0260):
 937             ins->rex |= REX_V;
 938             ins->drexdst = regval(opx);
 939             ins->vex_m = *codes++;
 940             ins->vex_wlp = *codes++;
 941             break;
 942
 943         case 0270:
 944             ins->rex |= REX_V;
 945             ins->drexdst = 0;
 946             ins->vex_m = *codes++;
 947             ins->vex_wlp = *codes++;
 948             break;
 949
 950         case4(0274):
 951             length++;
 952             break;
 953
 954         case4(0300):
 955             break;
 956
 957         case 0310:
 958             if (bits == 64)
 959                 return -1;
 960             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 961             break;
 962
 963         case 0311:
 964             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 965             break;
 966
 967         case 0312:
 968             break;
 969
 970         case 0313:
 971             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 972                 has_prefix(ins, PPS_ASIZE, P_A32))
 973                 return -1;
 974             break;
 975
 976         case4(0314):
 977             break;
 978
 979         case 0320:
 980             length += (bits != 16);
 981             break;
 982
 983         case 0321:
 984             length += (bits == 16);
 985             break;
 986
 987         case 0322:
 988             break;
 989
 990         case 0323:
 991             rex_mask &= ~REX_W;
 992             break;
 993
 994         case 0324:
 995             ins->rex |= REX_W;
 996             break;
 997
 998         case 0330:
 999             codes++, length++;
1000             break;
1001
1002         case 0331:
1003             break;
1004
1005         case 0332:
1006         case 0333:
1007             length++;
1008             break;
1009
1010         case 0334:
1011             ins->rex |= REX_L;
1012             break;
1013
1014         case 0335:
1015             break;
1016
1017         case 0336:
1018             if (!ins->prefixes[PPS_LREP])
1019                 ins->prefixes[PPS_LREP] = P_REP;
1020             break;
1021
1022         case 0337:
1023             if (!ins->prefixes[PPS_LREP])
1024                 ins->prefixes[PPS_LREP] = P_REPNE;
1025             break;
1026
1027         case 0340:
1028             if (ins->oprs[0].segment != NO_SEG)
1029                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1030                         " quantity of BSS space");
1031             else
1032                 length += ins->oprs[0].offset;
1033             break;
1034
1035         case 0341:
1036             if (!ins->prefixes[PPS_WAIT])
1037                 ins->prefixes[PPS_WAIT] = P_WAIT;
1038             break;
1039
1040         case4(0344):
1041             length++;
1042             break;
1043
1044         case 0360:
1045             break;
1046
1047         case 0361:
1048         case 0362:
1049         case 0363:
1050             length++;
1051             break;
1052
1053         case 0364:
1054         case 0365:
1055             break;
1056
1057         case 0366:
1058         case 0367:
1059             length++;
1060             break;
1061
1062         case 0370:
1063         case 0371:
1064         case 0372:
1065             break;
1066
1067         case 0373:
1068             length++;
1069             break;
1070
1071         case4(0100):
1072         case4(0110):
1073         case4(0120):
1074         case4(0130):
1075         case4(0200):
1076         case4(0204):
1077         case4(0210):
1078         case4(0214):
1079         case4(0220):
1080         case4(0224):
1081         case4(0230):
1082         case4(0234):
1083             {
1084                 ea ea_data;
1085                 int rfield;
1086                 int32_t rflags;
1087                 struct operand *opy = &ins->oprs[op2];
1088
1089                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1090
1091                 if (c <= 0177) {
1092                     /* pick rfield from operand b (opx) */
1093                     rflags = regflag(opx);
1094                     rfield = nasm_regvals[opx->basereg];
1095                 } else {
1096                     rflags = 0;
1097                     rfield = c & 7;
1098                 }
1099                 if (!process_ea(opy, &ea_data, bits,
1100                                 ins->addr_size, rfield, rflags)) {
1101                     errfunc(ERR_NONFATAL, "invalid effective address");
1102                     return -1;
1103                 } else {
1104                     ins->rex |= ea_data.rex;
1105                     length += ea_data.size;
1106                 }
1107             }
1108             break;
1109
1110         default:
1111             errfunc(ERR_PANIC, "internal instruction table corrupt"
1112                     ": instruction code \\%o (0x%02X) given", c, c);
1113             break;
1114         }
1115     }
1116
1117     ins->rex &= rex_mask;
1118
1119     if (ins->rex & REX_V) {
1120         int bad32 = REX_R|REX_W|REX_X|REX_B;
1121
1122         if (ins->rex & REX_H) {
1123             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1124             return -1;
1125         }
1126         switch (ins->vex_wlp & 030) {
1127         case 000:
1128         case 020:
1129             ins->rex &= ~REX_W;
1130             break;
1131         case 010:
1132             ins->rex |= REX_W;
1133             bad32 &= ~REX_W;
1134             break;
1135         case 030:
1136             /* Follow REX_W */
1137             break;
1138         }
1139
1140         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1141             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1142             return -1;
1143         }
1144         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1145             length += 3;
1146         else
1147             length += 2;
1148     } else if (ins->rex & REX_D) {
1149         if (ins->rex & REX_H) {
1150             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1151             return -1;
1152         }
1153         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1154                            ins->drexdst > 7)) {
1155             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1156             return -1;
1157         }
1158         length++;
1159     } else if (ins->rex & REX_REAL) {
1160         if (ins->rex & REX_H) {
1161             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1162             return -1;
1163         } else if (bits == 64) {
1164             length++;
1165         } else if ((ins->rex & REX_L) &&
1166                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1167                    cpu >= IF_X86_64) {
1168             /* LOCK-as-REX.R */
1169             assert_no_prefix(ins, PPS_LREP);
1170             length++;
1171         } else {
1172             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1173             return -1;
1174         }
1175     }
1176
1177     return length;
1178 }
1179
1180 #define EMIT_REX()                                                      \
1181     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1182         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1183         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1184         ins->rex = 0;                                                   \
1185         offset += 1; \
1186     }
1187
1188 static void gencode(int32_t segment, int64_t offset, int bits,
1189                     insn * ins, const struct itemplate *temp,
1190                     int64_t insn_end)
1191 {
1192     static char condval[] = {   /* conditional opcodes */
1193         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1194         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1195         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1196     };
1197     uint8_t c;
1198     uint8_t bytes[4];
1199     int64_t size;
1200     int64_t data;
1201     int op1, op2;
1202     struct operand *opx;
1203     const uint8_t *codes = temp->code;
1204     uint8_t opex = 0;
1205
1206     while (*codes) {
1207         c = *codes++;
1208         op1 = (c & 3) + ((opex & 1) << 2);
1209         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1210         opx = &ins->oprs[op1];
1211         opex = 0;               /* For the next iteration */
1212
1213         switch (c) {
1214         case 01:
1215         case 02:
1216         case 03:
1217         case 04:
1218             EMIT_REX();
1219             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1220             codes += c;
1221             offset += c;
1222             break;
1223
1224         case 05:
1225         case 06:
1226         case 07:
1227             opex = c;
1228             break;
1229
1230         case4(010):
1231             EMIT_REX();
1232             bytes[0] = *codes++ + (regval(opx) & 7);
1233             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1234             offset += 1;
1235             break;
1236
1237         case4(014):
1238             /* The test for BITS8 and SBYTE here is intended to avoid
1239                warning on optimizer actions due to SBYTE, while still
1240                warn on explicit BYTE directives.  Also warn, obviously,
1241                if the optimizer isn't enabled. */
1242             if (((opx->type & BITS8) ||
1243                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1244                 (opx->offset < -128 || opx->offset > 127)) {
1245                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1246                         "signed byte value exceeds bounds");
1247             }
1248             if (opx->segment != NO_SEG) {
1249                 data = opx->offset;
1250                 out(offset, segment, &data, OUT_ADDRESS, 1,
1251                     opx->segment, opx->wrt);
1252             } else {
1253                 bytes[0] = opx->offset;
1254                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1255                     NO_SEG);
1256             }
1257             offset += 1;
1258             break;
1259
1260         case4(020):
1261             if (opx->offset < -256 || opx->offset > 255) {
1262                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1263                         "byte value exceeds bounds");
1264             }
1265             if (opx->segment != NO_SEG) {
1266                 data = opx->offset;
1267                 out(offset, segment, &data, OUT_ADDRESS, 1,
1268                     opx->segment, opx->wrt);
1269             } else {
1270                 bytes[0] = opx->offset;
1271                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1272                     NO_SEG);
1273             }
1274             offset += 1;
1275             break;
1276
1277         case4(024):
1278             if (opx->offset < 0 || opx->offset > 255)
1279                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1280                         "unsigned byte value exceeds bounds");
1281             if (opx->segment != NO_SEG) {
1282                 data = opx->offset;
1283                 out(offset, segment, &data, OUT_ADDRESS, 1,
1284                     opx->segment, opx->wrt);
1285             } else {
1286                 bytes[0] = opx->offset;
1287                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1288                     NO_SEG);
1289             }
1290             offset += 1;
1291             break;
1292
1293         case4(030):
1294             warn_overflow(2, opx);
1295             data = opx->offset;
1296             out(offset, segment, &data, OUT_ADDRESS, 2,
1297                 opx->segment, opx->wrt);
1298             offset += 2;
1299             break;
1300
1301         case4(034):
1302             if (opx->type & (BITS16 | BITS32))
1303                 size = (opx->type & BITS16) ? 2 : 4;
1304             else
1305                 size = (bits == 16) ? 2 : 4;
1306             warn_overflow(size, opx);
1307             data = opx->offset;
1308             out(offset, segment, &data, OUT_ADDRESS, size,
1309                 opx->segment, opx->wrt);
1310             offset += size;
1311             break;
1312
1313         case4(040):
1314             warn_overflow(4, opx);
1315             data = opx->offset;
1316             out(offset, segment, &data, OUT_ADDRESS, 4,
1317                 opx->segment, opx->wrt);
1318             offset += 4;
1319             break;
1320
1321         case4(044):
1322             data = opx->offset;
1323             size = ins->addr_size >> 3;
1324             warn_overflow(size, opx);
1325             out(offset, segment, &data, OUT_ADDRESS, size,
1326                 opx->segment, opx->wrt);
1327             offset += size;
1328             break;
1329
1330         case4(050):
1331             if (opx->segment != segment)
1332                 errfunc(ERR_NONFATAL,
1333                         "short relative jump outside segment");
1334             data = opx->offset - insn_end;
1335             if (data > 127 || data < -128)
1336                 errfunc(ERR_NONFATAL, "short jump is out of range");
1337             bytes[0] = data;
1338             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1339             offset += 1;
1340             break;
1341
1342         case4(054):
1343             data = (int64_t)opx->offset;
1344             out(offset, segment, &data, OUT_ADDRESS, 8,
1345                 opx->segment, opx->wrt);
1346             offset += 8;
1347             break;
1348
1349         case4(060):
1350             if (opx->segment != segment) {
1351                 data = opx->offset;
1352                 out(offset, segment, &data,
1353                     OUT_REL2ADR, insn_end - offset,
1354                     opx->segment, opx->wrt);
1355             } else {
1356                 data = opx->offset - insn_end;
1357                 out(offset, segment, &data,
1358                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1359             }
1360             offset += 2;
1361             break;
1362
1363         case4(064):
1364             if (opx->type & (BITS16 | BITS32 | BITS64))
1365                 size = (opx->type & BITS16) ? 2 : 4;
1366             else
1367                 size = (bits == 16) ? 2 : 4;
1368             if (opx->segment != segment) {
1369                 data = opx->offset;
1370                 out(offset, segment, &data,
1371                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1372                     insn_end - offset, opx->segment, opx->wrt);
1373             } else {
1374                 data = opx->offset - insn_end;
1375                 out(offset, segment, &data,
1376                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1377             }
1378             offset += size;
1379             break;
1380
1381         case4(070):
1382             if (opx->segment != segment) {
1383                 data = opx->offset;
1384                 out(offset, segment, &data,
1385                     OUT_REL4ADR, insn_end - offset,
1386                     opx->segment, opx->wrt);
1387             } else {
1388                 data = opx->offset - insn_end;
1389                 out(offset, segment, &data,
1390                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1391             }
1392             offset += 4;
1393             break;
1394
1395         case4(074):
1396             if (opx->segment == NO_SEG)
1397                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1398                         " relocatable");
1399             data = 0;
1400             out(offset, segment, &data, OUT_ADDRESS, 2,
1401                 outfmt->segbase(1 + opx->segment),
1402                 opx->wrt);
1403             offset += 2;
1404             break;
1405
1406         case4(0140):
1407             data = opx->offset;
1408             warn_overflow(2, opx);
1409             if (is_sbyte16(opx)) {
1410                 bytes[0] = data;
1411                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1412                     NO_SEG);
1413                 offset++;
1414             } else {
1415                 out(offset, segment, &data, OUT_ADDRESS, 2,
1416                     opx->segment, opx->wrt);
1417                 offset += 2;
1418             }
1419             break;
1420
1421         case4(0144):
1422             EMIT_REX();
1423             bytes[0] = *codes++;
1424             if (is_sbyte16(opx))
1425                 bytes[0] |= 2;  /* s-bit */
1426             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1427             offset++;
1428             break;
1429
1430         case4(0150):
1431             data = opx->offset;
1432             warn_overflow(4, opx);
1433             if (is_sbyte32(opx)) {
1434                 bytes[0] = data;
1435                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1436                     NO_SEG);
1437                 offset++;
1438             } else {
1439                 out(offset, segment, &data, OUT_ADDRESS, 4,
1440                     opx->segment, opx->wrt);
1441                 offset += 4;
1442             }
1443             break;
1444
1445         case4(0154):
1446             EMIT_REX();
1447             bytes[0] = *codes++;
1448             if (is_sbyte32(opx))
1449                 bytes[0] |= 2;  /* s-bit */
1450             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1451             offset++;
1452             break;
1453
1454         case4(0160):
1455         case4(0164):
1456             break;
1457
1458         case 0171:
1459             bytes[0] =
1460                 (ins->drexdst << 4) |
1461                 (ins->rex & REX_OC ? 0x08 : 0) |
1462                 (ins->rex & (REX_R|REX_X|REX_B));
1463             ins->rex = 0;
1464             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1465             offset++;
1466             break;
1467
1468         case 0172:
1469             c = *codes++;
1470             opx = &ins->oprs[c >> 3];
1471             bytes[0] = nasm_regvals[opx->basereg] << 4;
1472             opx = &ins->oprs[c & 7];
1473             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1474                 errfunc(ERR_NONFATAL,
1475                         "non-absolute expression not permitted as argument %d",
1476                         c & 7);
1477             } else {
1478                 if (opx->offset & ~15) {
1479                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1480                             "four-bit argument exceeds bounds");
1481                 }
1482                 bytes[0] |= opx->offset & 15;
1483             }
1484             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1485             offset++;
1486             break;
1487
1488         case 0173:
1489             c = *codes++;
1490             opx = &ins->oprs[c >> 4];
1491             bytes[0] = nasm_regvals[opx->basereg] << 4;
1492             bytes[0] |= c & 15;
1493             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1494             offset++;
1495             break;
1496
1497         case 0174:
1498             c = *codes++;
1499             opx = &ins->oprs[c];
1500             bytes[0] = nasm_regvals[opx->basereg] << 4;
1501             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1502             offset++;
1503             break;
1504
1505         case4(0250):
1506             data = opx->offset;
1507             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1508                 (int32_t)data != (int64_t)data) {
1509                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1510                         "signed dword immediate exceeds bounds");
1511             }
1512             if (is_sbyte32(opx)) {
1513                 bytes[0] = data;
1514                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1515                     NO_SEG);
1516                 offset++;
1517             } else {
1518                 out(offset, segment, &data, OUT_ADDRESS, 4,
1519                     opx->segment, opx->wrt);
1520                 offset += 4;
1521             }
1522             break;
1523
1524         case4(0254):
1525             data = opx->offset;
1526             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1527                 (int32_t)data != (int64_t)data) {
1528                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1529                         "signed dword immediate exceeds bounds");
1530             }
1531             out(offset, segment, &data, OUT_ADDRESS, 4,
1532                 opx->segment, opx->wrt);
1533             offset += 4;
1534             break;
1535
1536         case4(0260):
1537         case 0270:
1538             codes += 2;
1539             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1540                 bytes[0] = 0xc4;
1541                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1542                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1543                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1544                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1545                 offset += 3;
1546             } else {
1547                 bytes[0] = 0xc5;
1548                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1549                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1550                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1551                 offset += 2;
1552             }
1553             break;
1554
1555         case4(0274):
1556         {
1557             uint64_t uv, um;
1558             int s;
1559
1560             if (ins->rex & REX_W)
1561                 s = 64;
1562             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1563                 s = 16;
1564             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1565                 s = 32;
1566             else
1567                 s = bits;
1568
1569             um = (uint64_t)2 << (s-1);
1570             uv = opx->offset;
1571
1572             if (uv > 127 && uv < (uint64_t)-128 &&
1573                 (uv < um-128 || uv > um-1)) {
1574                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1575                         "signed byte value exceeds bounds");
1576             }
1577             if (opx->segment != NO_SEG) {
1578                 data = uv;
1579                 out(offset, segment, &data, OUT_ADDRESS, 1,
1580                     opx->segment, opx->wrt);
1581             } else {
1582                 bytes[0] = uv;
1583                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1584                     NO_SEG);
1585             }
1586             offset += 1;
1587             break;
1588         }
1589
1590         case4(0300):
1591             break;
1592
1593         case 0310:
1594             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1595                 *bytes = 0x67;
1596                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1597                 offset += 1;
1598             } else
1599                 offset += 0;
1600             break;
1601
1602         case 0311:
1603             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1604                 *bytes = 0x67;
1605                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1606                 offset += 1;
1607             } else
1608                 offset += 0;
1609             break;
1610
1611         case 0312:
1612             break;
1613
1614         case 0313:
1615             ins->rex = 0;
1616             break;
1617
1618         case4(0314):
1619             break;
1620
1621         case 0320:
1622             if (bits != 16) {
1623                 *bytes = 0x66;
1624                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1625                 offset += 1;
1626             } else
1627                 offset += 0;
1628             break;
1629
1630         case 0321:
1631             if (bits == 16) {
1632                 *bytes = 0x66;
1633                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1634                 offset += 1;
1635             } else
1636                 offset += 0;
1637             break;
1638
1639         case 0322:
1640         case 0323:
1641             break;
1642
1643         case 0324:
1644             ins->rex |= REX_W;
1645             break;
1646
1647         case 0330:
1648             *bytes = *codes++ ^ condval[ins->condition];
1649             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1650             offset += 1;
1651             break;
1652
1653         case 0331:
1654             break;
1655
1656         case 0332:
1657         case 0333:
1658             *bytes = c - 0332 + 0xF2;
1659             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1660             offset += 1;
1661             break;
1662
1663         case 0334:
1664             if (ins->rex & REX_R) {
1665                 *bytes = 0xF0;
1666                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1667                 offset += 1;
1668             }
1669             ins->rex &= ~(REX_L|REX_R);
1670             break;
1671
1672         case 0335:
1673             break;
1674
1675         case 0336:
1676         case 0337:
1677             break;
1678
1679         case 0340:
1680             if (ins->oprs[0].segment != NO_SEG)
1681                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1682             else {
1683                 int64_t size = ins->oprs[0].offset;
1684                 if (size > 0)
1685                     out(offset, segment, NULL,
1686                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1687                 offset += size;
1688             }
1689             break;
1690
1691         case 0341:
1692             break;
1693
1694         case 0344:
1695         case 0345:
1696             bytes[0] = c & 1;
1697             switch (ins->oprs[0].basereg) {
1698             case R_CS:
1699                 bytes[0] += 0x0E;
1700                 break;
1701             case R_DS:
1702                 bytes[0] += 0x1E;
1703                 break;
1704             case R_ES:
1705                 bytes[0] += 0x06;
1706                 break;
1707             case R_SS:
1708                 bytes[0] += 0x16;
1709                 break;
1710             default:
1711                 errfunc(ERR_PANIC,
1712                         "bizarre 8086 segment register received");
1713             }
1714             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1715             offset++;
1716             break;
1717
1718         case 0346:
1719         case 0347:
1720             bytes[0] = c & 1;
1721             switch (ins->oprs[0].basereg) {
1722             case R_FS:
1723                 bytes[0] += 0xA0;
1724                 break;
1725             case R_GS:
1726                 bytes[0] += 0xA8;
1727                 break;
1728             default:
1729                 errfunc(ERR_PANIC,
1730                         "bizarre 386 segment register received");
1731             }
1732             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1733             offset++;
1734             break;
1735
1736         case 0360:
1737             break;
1738
1739         case 0361:
1740             bytes[0] = 0x66;
1741             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1742             offset += 1;
1743             break;
1744
1745         case 0362:
1746         case 0363:
1747             bytes[0] = c - 0362 + 0xf2;
1748             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1749             offset += 1;
1750             break;
1751
1752         case 0364:
1753         case 0365:
1754             break;
1755
1756         case 0366:
1757         case 0367:
1758             *bytes = c - 0366 + 0x66;
1759             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1760             offset += 1;
1761             break;
1762
1763         case 0370:
1764         case 0371:
1765         case 0372:
1766             break;
1767
1768         case 0373:
1769             *bytes = bits == 16 ? 3 : 5;
1770             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1771             offset += 1;
1772             break;
1773
1774         case4(0100):
1775         case4(0110):
1776         case4(0120):
1777         case4(0130):
1778         case4(0200):
1779         case4(0204):
1780         case4(0210):
1781         case4(0214):
1782         case4(0220):
1783         case4(0224):
1784         case4(0230):
1785         case4(0234):
1786             {
1787                 ea ea_data;
1788                 int rfield;
1789                 int32_t rflags;
1790                 uint8_t *p;
1791                 int32_t s;
1792                 enum out_type type;
1793                 struct operand *opy = &ins->oprs[op2];
1794
1795                 if (c <= 0177) {
1796                     /* pick rfield from operand b (opx) */
1797                     rflags = regflag(opx);
1798                     rfield = nasm_regvals[opx->basereg];
1799                 } else {
1800                     /* rfield is constant */
1801                     rflags = 0;
1802                     rfield = c & 7;
1803                 }
1804
1805                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1806                                 rfield, rflags)) {
1807                     errfunc(ERR_NONFATAL, "invalid effective address");
1808                 }
1809
1810
1811                 p = bytes;
1812                 *p++ = ea_data.modrm;
1813                 if (ea_data.sib_present)
1814                     *p++ = ea_data.sib;
1815
1816                 /* DREX suffixes come between the SIB and the displacement */
1817                 if (ins->rex & REX_D) {
1818                     *p++ = (ins->drexdst << 4) |
1819                            (ins->rex & REX_OC ? 0x08 : 0) |
1820                            (ins->rex & (REX_R|REX_X|REX_B));
1821                     ins->rex = 0;
1822                 }
1823
1824                 s = p - bytes;
1825                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1826
1827                 /*
1828                  * Make sure the address gets the right offset in case
1829                  * the line breaks in the .lst file (BR 1197827)
1830                  */
1831                 offset += s;
1832                 s = 0;
1833
1834                 switch (ea_data.bytes) {
1835                 case 0:
1836                     break;
1837                 case 1:
1838                 case 2:
1839                 case 4:
1840                 case 8:
1841                     data = opy->offset;
1842                     warn_overflow(ea_data.bytes, opy);
1843                     s += ea_data.bytes;
1844                     if (ea_data.rip) {
1845                         if (opy->segment == segment) {
1846                             data -= insn_end;
1847                             out(offset, segment, &data, OUT_ADDRESS,
1848                                 ea_data.bytes, NO_SEG, NO_SEG);
1849                         } else {
1850                             out(offset, segment, &data, OUT_REL4ADR,
1851                                 insn_end - offset, opy->segment, opy->wrt);
1852                         }
1853                     } else {
1854                         type = OUT_ADDRESS;
1855                         out(offset, segment, &data, OUT_ADDRESS,
1856                             ea_data.bytes, opy->segment, opy->wrt);
1857                     }
1858                     break;
1859                 default:
1860                     /* Impossible! */
1861                     errfunc(ERR_PANIC,
1862                             "Invalid amount of bytes (%d) for offset?!",
1863                             ea_data.bytes);
1864                     break;
1865                 }
1866                 offset += s;
1867             }
1868             break;
1869
1870         default:
1871             errfunc(ERR_PANIC, "internal instruction table corrupt"
1872                     ": instruction code \\%o (0x%02X) given", c, c);
1873             break;
1874         }
1875     }
1876 }
1877
1878 static int32_t regflag(const operand * o)
1879 {
1880     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1881         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1882     }
1883     return nasm_reg_flags[o->basereg];
1884 }
1885
1886 static int32_t regval(const operand * o)
1887 {
1888     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1889         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1890     }
1891     return nasm_regvals[o->basereg];
1892 }
1893
1894 static int op_rexflags(const operand * o, int mask)
1895 {
1896     int32_t flags;
1897     int val;
1898
1899     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1900         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1901     }
1902
1903     flags = nasm_reg_flags[o->basereg];
1904     val = nasm_regvals[o->basereg];
1905
1906     return rexflags(val, flags, mask);
1907 }
1908
1909 static int rexflags(int val, int32_t flags, int mask)
1910 {
1911     int rex = 0;
1912
1913     if (val >= 8)
1914         rex |= REX_B|REX_X|REX_R;
1915     if (flags & BITS64)
1916         rex |= REX_W;
1917     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1918         rex |= REX_H;
1919     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1920         rex |= REX_P;
1921
1922     return rex & mask;
1923 }
1924
1925 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1926 {
1927     int i, size[MAX_OPERANDS], asize, oprs, ret;
1928
1929     ret = 100;
1930
1931     /*
1932      * Check the opcode
1933      */
1934     if (itemp->opcode != instruction->opcode)
1935         return 0;
1936
1937     /*
1938      * Count the operands
1939      */
1940     if (itemp->operands != instruction->operands)
1941         return 0;
1942
1943     /*
1944      * Check that no spurious colons or TOs are present
1945      */
1946     for (i = 0; i < itemp->operands; i++)
1947         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1948             return 0;
1949
1950     /*
1951      * Process size flags
1952      */
1953     if (itemp->flags & IF_ARMASK) {
1954         memset(size, 0, sizeof size);
1955
1956         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1957
1958         switch (itemp->flags & IF_SMASK) {
1959         case IF_SB:
1960             size[i] = BITS8;
1961             break;
1962         case IF_SW:
1963             size[i] = BITS16;
1964             break;
1965         case IF_SD:
1966             size[i] = BITS32;
1967             break;
1968         case IF_SQ:
1969             size[i] = BITS64;
1970             break;
1971         case IF_SO:
1972             size[i] = BITS128;
1973             break;
1974         case IF_SY:
1975             size[i] = BITS256;
1976             break;
1977         case IF_SZ:
1978             switch (bits) {
1979             case 16:
1980                 size[i] = BITS16;
1981                 break;
1982             case 32:
1983                 size[i] = BITS32;
1984                 break;
1985             case 64:
1986                 size[i] = BITS64;
1987                 break;
1988             }
1989             break;
1990         default:
1991             break;
1992         }
1993     } else {
1994         asize = 0;
1995         switch (itemp->flags & IF_SMASK) {
1996         case IF_SB:
1997             asize = BITS8;
1998             break;
1999         case IF_SW:
2000             asize = BITS16;
2001             break;
2002         case IF_SD:
2003             asize = BITS32;
2004             break;
2005         case IF_SQ:
2006             asize = BITS64;
2007             break;
2008         case IF_SO:
2009             asize = BITS128;
2010             break;
2011         case IF_SY:
2012             asize = BITS256;
2013             break;
2014         case IF_SZ:
2015             switch (bits) {
2016             case 16:
2017                 asize = BITS16;
2018                 break;
2019             case 32:
2020                 asize = BITS32;
2021                 break;
2022             case 64:
2023                 asize = BITS64;
2024                 break;
2025             }
2026             break;
2027         default:
2028             break;
2029         }
2030         for (i = 0; i < MAX_OPERANDS; i++)
2031             size[i] = asize;
2032     }
2033
2034     /*
2035      * Check that the operand flags all match up
2036      */
2037     for (i = 0; i < itemp->operands; i++) {
2038         int32_t type = instruction->oprs[i].type;
2039         if (!(type & SIZE_MASK))
2040             type |= size[i];
2041
2042         if (itemp->opd[i] & SAME_AS) {
2043             int j = itemp->opd[i] & ~SAME_AS;
2044             if (type != instruction->oprs[j].type ||
2045                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2046                 return 0;
2047         } else if (itemp->opd[i] & ~type ||
2048             ((itemp->opd[i] & SIZE_MASK) &&
2049              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2050             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2051                 (type & SIZE_MASK))
2052                 return 0;
2053             else
2054                 return 1;
2055         }
2056     }
2057
2058     /*
2059      * Check operand sizes
2060      */
2061     if (itemp->flags & (IF_SM | IF_SM2)) {
2062         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2063         asize = 0;
2064         for (i = 0; i < oprs; i++) {
2065             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2066                 int j;
2067                 for (j = 0; j < oprs; j++)
2068                     size[j] = asize;
2069                 break;
2070             }
2071         }
2072     } else {
2073         oprs = itemp->operands;
2074     }
2075
2076     for (i = 0; i < itemp->operands; i++) {
2077         if (!(itemp->opd[i] & SIZE_MASK) &&
2078             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2079             return 2;
2080     }
2081
2082     /*
2083      * Check template is okay at the set cpu level
2084      */
2085     if (((itemp->flags & IF_PLEVEL) > cpu))
2086         return 3;
2087
2088     /*
2089      * Verify the appropriate long mode flag.
2090      */
2091     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2092         return 4;
2093
2094     /*
2095      * Check if special handling needed for Jumps
2096      */
2097     if ((uint8_t)(itemp->code[0]) >= 0370)
2098         return 99;
2099
2100     return ret;
2101 }
2102
2103 static ea *process_ea(operand * input, ea * output, int bits,
2104                       int addrbits, int rfield, int32_t rflags)
2105 {
2106     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2107
2108     output->rip = false;
2109
2110     /* REX flags for the rfield operand */
2111     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2112
2113     if (!(REGISTER & ~input->type)) {   /* register direct */
2114         int i;
2115         int32_t f;
2116
2117         if (input->basereg < EXPR_REG_START /* Verify as Register */
2118             || input->basereg >= REG_ENUM_LIMIT)
2119             return NULL;
2120         f = regflag(input);
2121         i = nasm_regvals[input->basereg];
2122
2123         if (REG_EA & ~f)
2124             return NULL;        /* Invalid EA register */
2125
2126         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2127
2128         output->sib_present = false;             /* no SIB necessary */
2129         output->bytes = 0;  /* no offset necessary either */
2130         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2131     } else {                    /* it's a memory reference */
2132         if (input->basereg == -1
2133             && (input->indexreg == -1 || input->scale == 0)) {
2134             /* it's a pure offset */
2135             if (bits == 64 && (~input->type & IP_REL)) {
2136               int scale, index, base;
2137               output->sib_present = true;
2138               scale = 0;
2139               index = 4;
2140               base = 5;
2141               output->sib = (scale << 6) | (index << 3) | base;
2142               output->bytes = 4;
2143               output->modrm = 4 | ((rfield & 7) << 3);
2144               output->rip = false;
2145             } else {
2146               output->sib_present = false;
2147               output->bytes = (addrbits != 16 ? 4 : 2);
2148               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2149               output->rip = bits == 64;
2150             }
2151         } else {                /* it's an indirection */
2152             int i = input->indexreg, b = input->basereg, s = input->scale;
2153             int32_t o = input->offset, seg = input->segment;
2154             int hb = input->hintbase, ht = input->hinttype;
2155             int t;
2156             int it, bt;
2157             int32_t ix, bx;     /* register flags */
2158
2159             if (s == 0)
2160                 i = -1;         /* make this easy, at least */
2161
2162             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2163                 it = nasm_regvals[i];
2164                 ix = nasm_reg_flags[i];
2165             } else {
2166                 it = -1;
2167                 ix = 0;
2168             }
2169
2170             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2171                 bt = nasm_regvals[b];
2172                 bx = nasm_reg_flags[b];
2173             } else {
2174                 bt = -1;
2175                 bx = 0;
2176             }
2177
2178             /* check for a 32/64-bit memory reference... */
2179             if ((ix|bx) & (BITS32|BITS64)) {
2180                 /* it must be a 32/64-bit memory reference. Firstly we have
2181                  * to check that all registers involved are type E/Rxx. */
2182                 int32_t sok = BITS32|BITS64;
2183
2184                 if (it != -1) {
2185                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2186                         sok &= ix;
2187                     else
2188                         return NULL;
2189                 }
2190
2191                 if (bt != -1) {
2192                     if (REG_GPR & ~bx)
2193                         return NULL; /* Invalid register */
2194                     if (~sok & bx & SIZE_MASK)
2195                         return NULL; /* Invalid size */
2196                     sok &= bx;
2197                 }
2198
2199                 /* While we're here, ensure the user didn't specify
2200                    WORD or QWORD. */
2201                 if (input->disp_size == 16 || input->disp_size == 64)
2202                     return NULL;
2203
2204                 if (addrbits == 16 ||
2205                     (addrbits == 32 && !(sok & BITS32)) ||
2206                     (addrbits == 64 && !(sok & BITS64)))
2207                     return NULL;
2208
2209                 /* now reorganize base/index */
2210                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2211                     ((hb == b && ht == EAH_NOTBASE)
2212                      || (hb == i && ht == EAH_MAKEBASE))) {
2213                     /* swap if hints say so */
2214                     t = bt, bt = it, it = t;
2215                     t = bx, bx = ix, ix = t;
2216                 }
2217                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2218                     bt = -1, bx = 0, s++;
2219                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2220                     /* make single reg base, unless hint */
2221                     bt = it, bx = ix, it = -1, ix = 0;
2222                 }
2223                 if (((s == 2 && it != REG_NUM_ESP
2224                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2225                      || s == 5 || s == 9) && bt == -1)
2226                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2227                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2228                     && (input->eaflags & EAF_TIMESTWO))
2229                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2230                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2231                 if (s == 1 && it == REG_NUM_ESP) {
2232                     /* swap ESP into base if scale is 1 */
2233                     t = it, it = bt, bt = t;
2234                     t = ix, ix = bx, bx = t;
2235                 }
2236                 if (it == REG_NUM_ESP
2237                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2238                     return NULL;        /* wrong, for various reasons */
2239
2240                 output->rex |= rexflags(it, ix, REX_X);
2241                 output->rex |= rexflags(bt, bx, REX_B);
2242
2243                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2244                     /* no SIB needed */
2245                     int mod, rm;
2246
2247                     if (bt == -1) {
2248                         rm = 5;
2249                         mod = 0;
2250                     } else {
2251                         rm = (bt & 7);
2252                         if (rm != REG_NUM_EBP && o == 0 &&
2253                                 seg == NO_SEG && !forw_ref &&
2254                                 !(input->eaflags &
2255                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2256                             mod = 0;
2257                         else if (input->eaflags & EAF_BYTEOFFS ||
2258                                  (o >= -128 && o <= 127 && seg == NO_SEG
2259                                   && !forw_ref
2260                                   && !(input->eaflags & EAF_WORDOFFS)))
2261                             mod = 1;
2262                         else
2263                             mod = 2;
2264                     }
2265
2266                     output->sib_present = false;
2267                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2268                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2269                 } else {
2270                     /* we need a SIB */
2271                     int mod, scale, index, base;
2272
2273                     if (it == -1)
2274                         index = 4, s = 1;
2275                     else
2276                         index = (it & 7);
2277
2278                     switch (s) {
2279                     case 1:
2280                         scale = 0;
2281                         break;
2282                     case 2:
2283                         scale = 1;
2284                         break;
2285                     case 4:
2286                         scale = 2;
2287                         break;
2288                     case 8:
2289                         scale = 3;
2290                         break;
2291                     default:   /* then what the smeg is it? */
2292                         return NULL;    /* panic */
2293                     }
2294
2295                     if (bt == -1) {
2296                         base = 5;
2297                         mod = 0;
2298                     } else {
2299                         base = (bt & 7);
2300                         if (base != REG_NUM_EBP && o == 0 &&
2301                                     seg == NO_SEG && !forw_ref &&
2302                                     !(input->eaflags &
2303                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2304                             mod = 0;
2305                         else if (input->eaflags & EAF_BYTEOFFS ||
2306                                  (o >= -128 && o <= 127 && seg == NO_SEG
2307                                   && !forw_ref
2308                                   && !(input->eaflags & EAF_WORDOFFS)))
2309                             mod = 1;
2310                         else
2311                             mod = 2;
2312                     }
2313
2314                     output->sib_present = true;
2315                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2316                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2317                     output->sib = (scale << 6) | (index << 3) | base;
2318                 }
2319             } else {            /* it's 16-bit */
2320                 int mod, rm;
2321
2322                 /* check for 64-bit long mode */
2323                 if (addrbits == 64)
2324                     return NULL;
2325
2326                 /* check all registers are BX, BP, SI or DI */
2327                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2328                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2329                                        && i != R_SI && i != R_DI))
2330                     return NULL;
2331
2332                 /* ensure the user didn't specify DWORD/QWORD */
2333                 if (input->disp_size == 32 || input->disp_size == 64)
2334                     return NULL;
2335
2336                 if (s != 1 && i != -1)
2337                     return NULL;        /* no can do, in 16-bit EA */
2338                 if (b == -1 && i != -1) {
2339                     int tmp = b;
2340                     b = i;
2341                     i = tmp;
2342                 }               /* swap */
2343                 if ((b == R_SI || b == R_DI) && i != -1) {
2344                     int tmp = b;
2345                     b = i;
2346                     i = tmp;
2347                 }
2348                 /* have BX/BP as base, SI/DI index */
2349                 if (b == i)
2350                     return NULL;        /* shouldn't ever happen, in theory */
2351                 if (i != -1 && b != -1 &&
2352                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2353                     return NULL;        /* invalid combinations */
2354                 if (b == -1)    /* pure offset: handled above */
2355                     return NULL;        /* so if it gets to here, panic! */
2356
2357                 rm = -1;
2358                 if (i != -1)
2359                     switch (i * 256 + b) {
2360                     case R_SI * 256 + R_BX:
2361                         rm = 0;
2362                         break;
2363                     case R_DI * 256 + R_BX:
2364                         rm = 1;
2365                         break;
2366                     case R_SI * 256 + R_BP:
2367                         rm = 2;
2368                         break;
2369                     case R_DI * 256 + R_BP:
2370                         rm = 3;
2371                         break;
2372                 } else
2373                     switch (b) {
2374                     case R_SI:
2375                         rm = 4;
2376                         break;
2377                     case R_DI:
2378                         rm = 5;
2379                         break;
2380                     case R_BP:
2381                         rm = 6;
2382                         break;
2383                     case R_BX:
2384                         rm = 7;
2385                         break;
2386                     }
2387                 if (rm == -1)   /* can't happen, in theory */
2388                     return NULL;        /* so panic if it does */
2389
2390                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2391                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2392                     mod = 0;
2393                 else if (input->eaflags & EAF_BYTEOFFS ||
2394                          (o >= -128 && o <= 127 && seg == NO_SEG
2395                           && !forw_ref
2396                           && !(input->eaflags & EAF_WORDOFFS)))
2397                     mod = 1;
2398                 else
2399                     mod = 2;
2400
2401                 output->sib_present = false;    /* no SIB - it's 16-bit */
2402                 output->bytes = mod;    /* bytes of offset needed */
2403                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2404             }
2405         }
2406     }
2407
2408     output->size = 1 + output->sib_present + output->bytes;
2409     return output;
2410 }
2411
2412 static void add_asp(insn *ins, int addrbits)
2413 {
2414     int j, valid;
2415     int defdisp;
2416
2417     valid = (addrbits == 64) ? 64|32 : 32|16;
2418
2419     switch (ins->prefixes[PPS_ASIZE]) {
2420     case P_A16:
2421         valid &= 16;
2422         break;
2423     case P_A32:
2424         valid &= 32;
2425         break;
2426     case P_A64:
2427         valid &= 64;
2428         break;
2429     case P_ASP:
2430         valid &= (addrbits == 32) ? 16 : 32;
2431         break;
2432     default:
2433         break;
2434     }
2435
2436     for (j = 0; j < ins->operands; j++) {
2437         if (!(MEMORY & ~ins->oprs[j].type)) {
2438             int32_t i, b;
2439
2440             /* Verify as Register */
2441             if (ins->oprs[j].indexreg < EXPR_REG_START
2442                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2443                 i = 0;
2444             else
2445                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2446
2447             /* Verify as Register */
2448             if (ins->oprs[j].basereg < EXPR_REG_START
2449                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2450                 b = 0;
2451             else
2452                 b = nasm_reg_flags[ins->oprs[j].basereg];
2453
2454             if (ins->oprs[j].scale == 0)
2455                 i = 0;
2456
2457             if (!i && !b) {
2458                 int ds = ins->oprs[j].disp_size;
2459                 if ((addrbits != 64 && ds > 8) ||
2460                     (addrbits == 64 && ds == 16))
2461                     valid &= ds;
2462             } else {
2463                 if (!(REG16 & ~b))
2464                     valid &= 16;
2465                 if (!(REG32 & ~b))
2466                     valid &= 32;
2467                 if (!(REG64 & ~b))
2468                     valid &= 64;
2469
2470                 if (!(REG16 & ~i))
2471                     valid &= 16;
2472                 if (!(REG32 & ~i))
2473                     valid &= 32;
2474                 if (!(REG64 & ~i))
2475                     valid &= 64;
2476             }
2477         }
2478     }
2479
2480     if (valid & addrbits) {
2481         ins->addr_size = addrbits;
2482     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2483         /* Add an address size prefix */
2484         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2485         ins->prefixes[PPS_ASIZE] = pref;
2486         ins->addr_size = (addrbits == 32) ? 16 : 32;
2487     } else {
2488         /* Impossible... */
2489         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2490         ins->addr_size = addrbits; /* Error recovery */
2491     }
2492
2493     defdisp = ins->addr_size == 16 ? 16 : 32;
2494
2495     for (j = 0; j < ins->operands; j++) {
2496         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2497             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2498             != ins->addr_size) {
2499             /* mem_offs sizes must match the address size; if not,
2500                strip the MEM_OFFS bit and match only EA instructions */
2501             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2502         }
2503     }
2504 }