assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \341          - this instruction needs a WAIT "prefix"
 104  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 105  *                 (POP is never used for CS) depending on operand 0
 106  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 107  *                 on operand 0
 108  * \360          - no SSE prefix (== \364\331)
 109  * \361          - 66 SSE prefix (== \366\331)
 110  * \362          - F2 SSE prefix (== \364\332)
 111  * \363          - F3 SSE prefix (== \364\333)
 112  * \364          - operand-size prefix (0x66) not permitted
 113  * \365          - address-size prefix (0x67) not permitted
 114  * \366          - operand-size prefix (0x66) used as opcode extension
 115  * \367          - address-size prefix (0x67) used as opcode extension
 116  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 117  *                 370 is used for Jcc, 371 is used for JMP.
 118  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 119  *                 used for conditional jump over longer jump
 120  */
 121
 122 #include "compiler.h"
 123
 124 #include <stdio.h>
 125 #include <string.h>
 126 #include <inttypes.h>
 127
 128 #include "nasm.h"
 129 #include "nasmlib.h"
 130 #include "assemble.h"
 131 #include "insns.h"
 132 #include "tables.h"
 133
 134 typedef struct {
 135     int sib_present;                 /* is a SIB byte necessary? */
 136     int bytes;                       /* # of bytes of offset needed */
 137     int size;                        /* lazy - this is sib+bytes+1 */
 138     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 139 } ea;
 140
 141 static uint32_t cpu;            /* cpu level received from nasm.c */
 142 static efunc errfunc;
 143 static struct ofmt *outfmt;
 144 static ListGen *list;
 145
 146 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 147 static void gencode(int32_t segment, int64_t offset, int bits,
 148                     insn * ins, const struct itemplate *temp,
 149                     int64_t insn_end);
 150 static int matches(const struct itemplate *, insn *, int bits);
 151 static int32_t regflag(const operand *);
 152 static int32_t regval(const operand *);
 153 static int rexflags(int, int32_t, int);
 154 static int op_rexflags(const operand *, int);
 155 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 156 static void add_asp(insn *, int);
 157
 158 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 159 {
 160     return ins->prefixes[pos] == prefix;
 161 }
 162
 163 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 164 {
 165     if (ins->prefixes[pos])
 166         errfunc(ERR_NONFATAL, "invalid %s prefix",
 167                 prefix_name(ins->prefixes[pos]));
 168 }
 169
 170 static const char *size_name(int size)
 171 {
 172     switch (size) {
 173     case 1:
 174         return "byte";
 175     case 2:
 176         return "word";
 177     case 4:
 178         return "dword";
 179     case 8:
 180         return "qword";
 181     case 10:
 182         return "tword";
 183     case 16:
 184         return "oword";
 185     case 32:
 186         return "yword";
 187     default:
 188         return "???";
 189     }
 190 }
 191
 192 static void warn_overflow(int size, const struct operand *o)
 193 {
 194     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 195         int64_t lim = ((int64_t)1 << (size*8))-1;
 196         int64_t data = o->offset;
 197
 198         if (data < ~lim || data > lim)
 199             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 200                     "%s data exceeds bounds", size_name(size));
 201     }
 202 }
 203 /*
 204  * This routine wrappers the real output format's output routine,
 205  * in order to pass a copy of the data off to the listing file
 206  * generator at the same time.
 207  */
 208 static void out(int64_t offset, int32_t segto, const void *data,
 209                 enum out_type type, uint64_t size,
 210                 int32_t segment, int32_t wrt)
 211 {
 212     static int32_t lineno = 0;     /* static!!! */
 213     static char *lnfname = NULL;
 214     uint8_t p[8];
 215
 216     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 217         /*
 218          * This is a non-relocated address, and we're going to
 219          * convert it into RAWDATA format.
 220          */
 221         uint8_t *q = p;
 222
 223         if (size > 8) {
 224             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 225             return;
 226         }
 227
 228         WRITEADDR(q, *(int64_t *)data, size);
 229         data = p;
 230         type = OUT_RAWDATA;
 231     }
 232
 233     list->output(offset, data, type, size);
 234
 235     /*
 236      * this call to src_get determines when we call the
 237      * debug-format-specific "linenum" function
 238      * it updates lineno and lnfname to the current values
 239      * returning 0 if "same as last time", -2 if lnfname
 240      * changed, and the amount by which lineno changed,
 241      * if it did. thus, these variables must be static
 242      */
 243
 244     if (src_get(&lineno, &lnfname)) {
 245         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 246     }
 247
 248     outfmt->output(segto, data, type, size, segment, wrt);
 249 }
 250
 251 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 252                      insn * ins, const uint8_t *code)
 253 {
 254     int64_t isize;
 255     uint8_t c = code[0];
 256
 257     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 258         return false;
 259     if (!optimizing)
 260         return false;
 261     if (optimizing < 0 && c == 0371)
 262         return false;
 263
 264     isize = calcsize(segment, offset, bits, ins, code);
 265     if (ins->oprs[0].segment != segment)
 266         return false;
 267
 268     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 269     return (isize >= -128 && isize <= 127); /* is it byte size? */
 270 }
 271
 272 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 273               insn * instruction, struct ofmt *output, efunc error,
 274               ListGen * listgen)
 275 {
 276     const struct itemplate *temp;
 277     int j;
 278     int size_prob;
 279     int64_t insn_end;
 280     int32_t itimes;
 281     int64_t start = offset;
 282     int64_t wsize = 0;             /* size for DB etc. */
 283
 284     errfunc = error;            /* to pass to other functions */
 285     cpu = cp;
 286     outfmt = output;            /* likewise */
 287     list = listgen;             /* and again */
 288
 289     switch (instruction->opcode) {
 290     case -1:
 291         return 0;
 292     case I_DB:
 293         wsize = 1;
 294         break;
 295     case I_DW:
 296         wsize = 2;
 297         break;
 298     case I_DD:
 299         wsize = 4;
 300         break;
 301     case I_DQ:
 302         wsize = 8;
 303         break;
 304     case I_DT:
 305         wsize = 10;
 306         break;
 307     case I_DO:
 308         wsize = 16;
 309         break;
 310     case I_DY:
 311         wsize = 32;
 312         break;
 313     default:
 314         break;
 315     }
 316
 317     if (wsize) {
 318         extop *e;
 319         int32_t t = instruction->times;
 320         if (t < 0)
 321             errfunc(ERR_PANIC,
 322                     "instruction->times < 0 (%ld) in assemble()", t);
 323
 324         while (t--) {           /* repeat TIMES times */
 325             for (e = instruction->eops; e; e = e->next) {
 326                 if (e->type == EOT_DB_NUMBER) {
 327                     if (wsize == 1) {
 328                         if (e->segment != NO_SEG)
 329                             errfunc(ERR_NONFATAL,
 330                                     "one-byte relocation attempted");
 331                         else {
 332                             uint8_t out_byte = e->offset;
 333                             out(offset, segment, &out_byte,
 334                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 335                         }
 336                     } else if (wsize > 8) {
 337                         errfunc(ERR_NONFATAL,
 338                                 "integer supplied to a DT, DO or DY"
 339                                 " instruction");
 340                     } else
 341                         out(offset, segment, &e->offset,
 342                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 343                     offset += wsize;
 344                 } else if (e->type == EOT_DB_STRING ||
 345                            e->type == EOT_DB_STRING_FREE) {
 346                     int align;
 347
 348                     out(offset, segment, e->stringval,
 349                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 350                     align = e->stringlen % wsize;
 351
 352                     if (align) {
 353                         align = wsize - align;
 354                         out(offset, segment, zero_buffer,
 355                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 356                     }
 357                     offset += e->stringlen + align;
 358                 }
 359             }
 360             if (t > 0 && t == instruction->times - 1) {
 361                 /*
 362                  * Dummy call to list->output to give the offset to the
 363                  * listing module.
 364                  */
 365                 list->output(offset, NULL, OUT_RAWDATA, 0);
 366                 list->uplevel(LIST_TIMES);
 367             }
 368         }
 369         if (instruction->times > 1)
 370             list->downlevel(LIST_TIMES);
 371         return offset - start;
 372     }
 373
 374     if (instruction->opcode == I_INCBIN) {
 375         const char *fname = instruction->eops->stringval;
 376         FILE *fp;
 377
 378         fp = fopen(fname, "rb");
 379         if (!fp) {
 380             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 381                   fname);
 382         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 383             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 384                   fname);
 385         } else {
 386             static char buf[4096];
 387             size_t t = instruction->times;
 388             size_t base = 0;
 389             size_t len;
 390
 391             len = ftell(fp);
 392             if (instruction->eops->next) {
 393                 base = instruction->eops->next->offset;
 394                 len -= base;
 395                 if (instruction->eops->next->next &&
 396                     len > (size_t)instruction->eops->next->next->offset)
 397                     len = (size_t)instruction->eops->next->next->offset;
 398             }
 399             /*
 400              * Dummy call to list->output to give the offset to the
 401              * listing module.
 402              */
 403             list->output(offset, NULL, OUT_RAWDATA, 0);
 404             list->uplevel(LIST_INCBIN);
 405             while (t--) {
 406                 size_t l;
 407
 408                 fseek(fp, base, SEEK_SET);
 409                 l = len;
 410                 while (l > 0) {
 411                     int32_t m =
 412                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 413                               fp);
 414                     if (!m) {
 415                         /*
 416                          * This shouldn't happen unless the file
 417                          * actually changes while we are reading
 418                          * it.
 419                          */
 420                         error(ERR_NONFATAL,
 421                               "`incbin': unexpected EOF while"
 422                               " reading file `%s'", fname);
 423                         t = 0;  /* Try to exit cleanly */
 424                         break;
 425                     }
 426                     out(offset, segment, buf, OUT_RAWDATA, m,
 427                         NO_SEG, NO_SEG);
 428                     l -= m;
 429                 }
 430             }
 431             list->downlevel(LIST_INCBIN);
 432             if (instruction->times > 1) {
 433                 /*
 434                  * Dummy call to list->output to give the offset to the
 435                  * listing module.
 436                  */
 437                 list->output(offset, NULL, OUT_RAWDATA, 0);
 438                 list->uplevel(LIST_TIMES);
 439                 list->downlevel(LIST_TIMES);
 440             }
 441             fclose(fp);
 442             return instruction->times * len;
 443         }
 444         return 0;               /* if we're here, there's an error */
 445     }
 446
 447     /* Check to see if we need an address-size prefix */
 448     add_asp(instruction, bits);
 449
 450     size_prob = 0;
 451
 452     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 453         int m = matches(temp, instruction, bits);
 454         if (m == 100 ||
 455             (m == 99 && jmp_match(segment, offset, bits,
 456                                   instruction, temp->code))) {
 457             /* Matches! */
 458             int64_t insn_size = calcsize(segment, offset, bits,
 459                                       instruction, temp->code);
 460             itimes = instruction->times;
 461             if (insn_size < 0)  /* shouldn't be, on pass two */
 462                 error(ERR_PANIC, "errors made it through from pass one");
 463             else
 464                 while (itimes--) {
 465                     for (j = 0; j < MAXPREFIX; j++) {
 466                         uint8_t c = 0;
 467                         switch (instruction->prefixes[j]) {
 468                         case P_WAIT:
 469                             c = 0x9B;
 470                             break;
 471                         case P_LOCK:
 472                             c = 0xF0;
 473                             break;
 474                         case P_REPNE:
 475                         case P_REPNZ:
 476                             c = 0xF2;
 477                             break;
 478                         case P_REPE:
 479                         case P_REPZ:
 480                         case P_REP:
 481                             c = 0xF3;
 482                             break;
 483                         case R_CS:
 484                             if (bits == 64) {
 485                                 error(ERR_WARNING | ERR_PASS2,
 486                                       "cs segment base generated, but will be ignored in 64-bit mode");
 487                             }
 488                             c = 0x2E;
 489                             break;
 490                         case R_DS:
 491                             if (bits == 64) {
 492                                 error(ERR_WARNING | ERR_PASS2,
 493                                       "ds segment base generated, but will be ignored in 64-bit mode");
 494                             }
 495                             c = 0x3E;
 496                             break;
 497                         case R_ES:
 498                            if (bits == 64) {
 499                                 error(ERR_WARNING | ERR_PASS2,
 500                                       "es segment base generated, but will be ignored in 64-bit mode");
 501                            }
 502                             c = 0x26;
 503                             break;
 504                         case R_FS:
 505                             c = 0x64;
 506                             break;
 507                         case R_GS:
 508                             c = 0x65;
 509                             break;
 510                         case R_SS:
 511                             if (bits == 64) {
 512                                 error(ERR_WARNING | ERR_PASS2,
 513                                       "ss segment base generated, but will be ignored in 64-bit mode");
 514                             }
 515                             c = 0x36;
 516                             break;
 517                         case R_SEGR6:
 518                         case R_SEGR7:
 519                             error(ERR_NONFATAL,
 520                                   "segr6 and segr7 cannot be used as prefixes");
 521                             break;
 522                         case P_A16:
 523                             if (bits == 64) {
 524                                 error(ERR_NONFATAL,
 525                                       "16-bit addressing is not supported "
 526                                       "in 64-bit mode");
 527                             } else if (bits != 16)
 528                                 c = 0x67;
 529                             break;
 530                         case P_A32:
 531                             if (bits != 32)
 532                                 c = 0x67;
 533                             break;
 534                         case P_A64:
 535                             if (bits != 64) {
 536                                 error(ERR_NONFATAL,
 537                                       "64-bit addressing is only supported "
 538                                       "in 64-bit mode");
 539                             }
 540                             break;
 541                         case P_ASP:
 542                             c = 0x67;
 543                             break;
 544                         case P_O16:
 545                             if (bits != 16)
 546                                 c = 0x66;
 547                             break;
 548                         case P_O32:
 549                             if (bits == 16)
 550                                 c = 0x66;
 551                             break;
 552                         case P_O64:
 553                             /* REX.W */
 554                             break;
 555                         case P_OSP:
 556                             c = 0x66;
 557                             break;
 558                         case P_none:
 559                             break;
 560                         default:
 561                             error(ERR_PANIC, "invalid instruction prefix");
 562                         }
 563                         if (c != 0) {
 564                             out(offset, segment, &c, OUT_RAWDATA, 1,
 565                                 NO_SEG, NO_SEG);
 566                             offset++;
 567                         }
 568                     }
 569                     insn_end = offset + insn_size;
 570                     gencode(segment, offset, bits, instruction,
 571                             temp, insn_end);
 572                     offset += insn_size;
 573                     if (itimes > 0 && itimes == instruction->times - 1) {
 574                         /*
 575                          * Dummy call to list->output to give the offset to the
 576                          * listing module.
 577                          */
 578                         list->output(offset, NULL, OUT_RAWDATA, 0);
 579                         list->uplevel(LIST_TIMES);
 580                     }
 581                 }
 582             if (instruction->times > 1)
 583                 list->downlevel(LIST_TIMES);
 584             return offset - start;
 585         } else if (m > 0 && m > size_prob) {
 586             size_prob = m;
 587         }
 588     }
 589
 590     if (temp->opcode == -1) {   /* didn't match any instruction */
 591         switch (size_prob) {
 592         case 1:
 593             error(ERR_NONFATAL, "operation size not specified");
 594             break;
 595         case 2:
 596             error(ERR_NONFATAL, "mismatch in operand sizes");
 597             break;
 598         case 3:
 599             error(ERR_NONFATAL, "no instruction for this cpu level");
 600             break;
 601         case 4:
 602             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 603                   bits);
 604             break;
 605         default:
 606             error(ERR_NONFATAL,
 607                   "invalid combination of opcode and operands");
 608             break;
 609         }
 610     }
 611     return 0;
 612 }
 613
 614 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 615                insn * instruction, efunc error)
 616 {
 617     const struct itemplate *temp;
 618
 619     errfunc = error;            /* to pass to other functions */
 620     cpu = cp;
 621
 622     if (instruction->opcode == -1)
 623         return 0;
 624
 625     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 626         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 627         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 628         instruction->opcode == I_DY) {
 629         extop *e;
 630         int32_t isize, osize, wsize = 0;   /* placate gcc */
 631
 632         isize = 0;
 633         switch (instruction->opcode) {
 634         case I_DB:
 635             wsize = 1;
 636             break;
 637         case I_DW:
 638             wsize = 2;
 639             break;
 640         case I_DD:
 641             wsize = 4;
 642             break;
 643         case I_DQ:
 644             wsize = 8;
 645             break;
 646         case I_DT:
 647             wsize = 10;
 648             break;
 649         case I_DO:
 650             wsize = 16;
 651             break;
 652         case I_DY:
 653             wsize = 32;
 654             break;
 655         default:
 656             break;
 657         }
 658
 659         for (e = instruction->eops; e; e = e->next) {
 660             int32_t align;
 661
 662             osize = 0;
 663             if (e->type == EOT_DB_NUMBER)
 664                 osize = 1;
 665             else if (e->type == EOT_DB_STRING ||
 666                      e->type == EOT_DB_STRING_FREE)
 667                 osize = e->stringlen;
 668
 669             align = (-osize) % wsize;
 670             if (align < 0)
 671                 align += wsize;
 672             isize += osize + align;
 673         }
 674         return isize * instruction->times;
 675     }
 676
 677     if (instruction->opcode == I_INCBIN) {
 678         const char *fname = instruction->eops->stringval;
 679         FILE *fp;
 680         size_t len;
 681
 682         fp = fopen(fname, "rb");
 683         if (!fp)
 684             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 685                   fname);
 686         else if (fseek(fp, 0L, SEEK_END) < 0)
 687             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 688                   fname);
 689         else {
 690             len = ftell(fp);
 691             fclose(fp);
 692             if (instruction->eops->next) {
 693                 len -= instruction->eops->next->offset;
 694                 if (instruction->eops->next->next &&
 695                     len > (size_t)instruction->eops->next->next->offset) {
 696                     len = (size_t)instruction->eops->next->next->offset;
 697                 }
 698             }
 699             return instruction->times * len;
 700         }
 701         return 0;               /* if we're here, there's an error */
 702     }
 703
 704     /* Check to see if we need an address-size prefix */
 705     add_asp(instruction, bits);
 706
 707     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 708         int m = matches(temp, instruction, bits);
 709         if (m == 100 ||
 710             (m == 99 && jmp_match(segment, offset, bits,
 711                                   instruction, temp->code))) {
 712             /* we've matched an instruction. */
 713             int64_t isize;
 714             const uint8_t *codes = temp->code;
 715             int j;
 716
 717             isize = calcsize(segment, offset, bits, instruction, codes);
 718             if (isize < 0)
 719                 return -1;
 720             for (j = 0; j < MAXPREFIX; j++) {
 721                 switch (instruction->prefixes[j]) {
 722                 case P_A16:
 723                     if (bits != 16)
 724                         isize++;
 725                     break;
 726                 case P_A32:
 727                     if (bits != 32)
 728                         isize++;
 729                     break;
 730                 case P_O16:
 731                     if (bits != 16)
 732                         isize++;
 733                     break;
 734                 case P_O32:
 735                     if (bits == 16)
 736                         isize++;
 737                     break;
 738                 case P_A64:
 739                 case P_O64:
 740                 case P_none:
 741                     break;
 742                 default:
 743                     isize++;
 744                     break;
 745                 }
 746             }
 747             return isize * instruction->times;
 748         }
 749     }
 750     return -1;                  /* didn't match any instruction */
 751 }
 752
 753 static bool possible_sbyte(operand *o)
 754 {
 755     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 756         !(o->opflags & OPFLAG_FORWARD) &&
 757         optimizing >= 0 && !(o->type & STRICT);
 758 }
 759
 760 /* check that opn[op]  is a signed byte of size 16 or 32 */
 761 static bool is_sbyte16(operand *o)
 762 {
 763     int16_t v;
 764
 765     if (!possible_sbyte(o))
 766         return false;
 767
 768     v = o->offset;
 769     return v >= -128 && v <= 127;
 770 }
 771
 772 static bool is_sbyte32(operand *o)
 773 {
 774     int32_t v;
 775
 776     if (!possible_sbyte(o))
 777         return false;
 778
 779     v = o->offset;
 780     return v >= -128 && v <= 127;
 781 }
 782
 783 /* Common construct */
 784 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 785
 786 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 787                         insn * ins, const uint8_t *codes)
 788 {
 789     int64_t length = 0;
 790     uint8_t c;
 791     int rex_mask = ~0;
 792     int op1, op2;
 793     struct operand *opx;
 794     uint8_t opex = 0;
 795
 796     ins->rex = 0;               /* Ensure REX is reset */
 797
 798     if (ins->prefixes[PPS_OSIZE] == P_O64)
 799         ins->rex |= REX_W;
 800
 801     (void)segment;              /* Don't warn that this parameter is unused */
 802     (void)offset;               /* Don't warn that this parameter is unused */
 803
 804     while (*codes) {
 805         c = *codes++;
 806         op1 = (c & 3) + ((opex & 1) << 2);
 807         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 808         opx = &ins->oprs[op1];
 809         opex = 0;               /* For the next iteration */
 810
 811         switch (c) {
 812         case 01:
 813         case 02:
 814         case 03:
 815         case 04:
 816             codes += c, length += c;
 817             break;
 818
 819         case 05:
 820         case 06:
 821         case 07:
 822             opex = c;
 823             break;
 824
 825         case4(010):
 826             ins->rex |=
 827                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 828             codes++, length++;
 829             break;
 830
 831         case4(014):
 832         case4(020):
 833         case4(024):
 834             length++;
 835             break;
 836
 837         case4(030):
 838             length += 2;
 839             break;
 840
 841         case4(034):
 842             if (opx->type & (BITS16 | BITS32 | BITS64))
 843                 length += (opx->type & BITS16) ? 2 : 4;
 844             else
 845                 length += (bits == 16) ? 2 : 4;
 846             break;
 847
 848         case4(040):
 849             length += 4;
 850             break;
 851
 852         case4(044):
 853             length += ins->addr_size >> 3;
 854             break;
 855
 856         case4(050):
 857             length++;
 858             break;
 859
 860         case4(054):
 861             length += 8; /* MOV reg64/imm */
 862             break;
 863
 864         case4(060):
 865             length += 2;
 866             break;
 867
 868         case4(064):
 869             if (opx->type & (BITS16 | BITS32 | BITS64))
 870                 length += (opx->type & BITS16) ? 2 : 4;
 871             else
 872                 length += (bits == 16) ? 2 : 4;
 873             break;
 874
 875         case4(070):
 876             length += 4;
 877             break;
 878
 879         case4(074):
 880             length += 2;
 881             break;
 882
 883         case4(0140):
 884             length += is_sbyte16(opx) ? 1 : 2;
 885             break;
 886
 887         case4(0144):
 888             codes++;
 889             length++;
 890             break;
 891
 892         case4(0150):
 893             length += is_sbyte32(opx) ? 1 : 4;
 894             break;
 895
 896         case4(0154):
 897             codes++;
 898             length++;
 899             break;
 900
 901         case4(0160):
 902             length++;
 903             ins->rex |= REX_D;
 904             ins->drexdst = regval(opx);
 905             break;
 906
 907         case4(0164):
 908             length++;
 909             ins->rex |= REX_D|REX_OC;
 910             ins->drexdst = regval(opx);
 911             break;
 912
 913         case 0171:
 914             break;
 915
 916         case 0172:
 917         case 0173:
 918         case 0174:
 919             codes++;
 920             length++;
 921             break;
 922
 923         case4(0250):
 924             length += is_sbyte32(opx) ? 1 : 4;
 925             break;
 926
 927         case4(0254):
 928             length += 4;
 929             break;
 930
 931         case4(0260):
 932             ins->rex |= REX_V;
 933             ins->drexdst = regval(opx);
 934             ins->vex_m = *codes++;
 935             ins->vex_wlp = *codes++;
 936             break;
 937
 938         case 0270:
 939             ins->rex |= REX_V;
 940             ins->drexdst = 0;
 941             ins->vex_m = *codes++;
 942             ins->vex_wlp = *codes++;
 943             break;
 944
 945         case4(0274):
 946             length++;
 947             break;
 948
 949         case4(0300):
 950             break;
 951
 952         case 0310:
 953             if (bits == 64)
 954                 return -1;
 955             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 956             break;
 957
 958         case 0311:
 959             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 960             break;
 961
 962         case 0312:
 963             break;
 964
 965         case 0313:
 966             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 967                 has_prefix(ins, PPS_ASIZE, P_A32))
 968                 return -1;
 969             break;
 970
 971         case4(0314):
 972             break;
 973
 974         case 0320:
 975             length += (bits != 16);
 976             break;
 977
 978         case 0321:
 979             length += (bits == 16);
 980             break;
 981
 982         case 0322:
 983             break;
 984
 985         case 0323:
 986             rex_mask &= ~REX_W;
 987             break;
 988
 989         case 0324:
 990             ins->rex |= REX_W;
 991             break;
 992
 993         case 0330:
 994             codes++, length++;
 995             break;
 996
 997         case 0331:
 998             break;
 999
1000         case 0332:
1001         case 0333:
1002             length++;
1003             break;
1004
1005         case 0334:
1006             ins->rex |= REX_L;
1007             break;
1008
1009         case 0335:
1010             break;
1011
1012         case 0336:
1013             if (!ins->prefixes[PPS_LREP])
1014                 ins->prefixes[PPS_LREP] = P_REP;
1015             break;
1016
1017         case 0337:
1018             if (!ins->prefixes[PPS_LREP])
1019                 ins->prefixes[PPS_LREP] = P_REPNE;
1020             break;
1021
1022         case 0340:
1023             if (ins->oprs[0].segment != NO_SEG)
1024                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1025                         " quantity of BSS space");
1026             else
1027                 length += ins->oprs[0].offset;
1028             break;
1029
1030         case 0341:
1031             if (!ins->prefixes[PPS_WAIT])
1032                 ins->prefixes[PPS_WAIT] = P_WAIT;
1033             break;
1034
1035         case4(0344):
1036             length++;
1037             break;
1038
1039         case 0360:
1040             break;
1041
1042         case 0361:
1043         case 0362:
1044         case 0363:
1045             length++;
1046             break;
1047
1048         case 0364:
1049         case 0365:
1050             break;
1051
1052         case 0366:
1053         case 0367:
1054             length++;
1055             break;
1056
1057         case 0370:
1058         case 0371:
1059         case 0372:
1060             break;
1061
1062         case 0373:
1063             length++;
1064             break;
1065
1066         case4(0100):
1067         case4(0110):
1068         case4(0120):
1069         case4(0130):
1070         case4(0200):
1071         case4(0204):
1072         case4(0210):
1073         case4(0214):
1074         case4(0220):
1075         case4(0224):
1076         case4(0230):
1077         case4(0234):
1078             {
1079                 ea ea_data;
1080                 int rfield;
1081                 int32_t rflags;
1082                 struct operand *opy = &ins->oprs[op2];
1083
1084                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1085
1086                 if (c <= 0177) {
1087                     /* pick rfield from operand b (opx) */
1088                     rflags = regflag(opx);
1089                     rfield = nasm_regvals[opx->basereg];
1090                 } else {
1091                     rflags = 0;
1092                     rfield = c & 7;
1093                 }
1094                 if (!process_ea(opy, &ea_data, bits,
1095                                 ins->addr_size, rfield, rflags)) {
1096                     errfunc(ERR_NONFATAL, "invalid effective address");
1097                     return -1;
1098                 } else {
1099                     ins->rex |= ea_data.rex;
1100                     length += ea_data.size;
1101                 }
1102             }
1103             break;
1104
1105         default:
1106             errfunc(ERR_PANIC, "internal instruction table corrupt"
1107                     ": instruction code 0x%02X given", c);
1108             break;
1109         }
1110     }
1111
1112     ins->rex &= rex_mask;
1113
1114     if (ins->rex & REX_V) {
1115         int bad32 = REX_R|REX_W|REX_X|REX_B;
1116
1117         if (ins->rex & REX_H) {
1118             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1119             return -1;
1120         }
1121         switch (ins->vex_wlp & 030) {
1122         case 000:
1123         case 020:
1124             ins->rex &= ~REX_W;
1125             break;
1126         case 010:
1127             ins->rex |= REX_W;
1128             bad32 &= ~REX_W;
1129             break;
1130         case 030:
1131             /* Follow REX_W */
1132             break;
1133         }
1134
1135         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1136             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1137             return -1;
1138         }
1139         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1140             length += 3;
1141         else
1142             length += 2;
1143     } else if (ins->rex & REX_D) {
1144         if (ins->rex & REX_H) {
1145             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1146             return -1;
1147         }
1148         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1149                            ins->drexdst > 7)) {
1150             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1151             return -1;
1152         }
1153         length++;
1154     } else if (ins->rex & REX_REAL) {
1155         if (ins->rex & REX_H) {
1156             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1157             return -1;
1158         } else if (bits == 64) {
1159             length++;
1160         } else if ((ins->rex & REX_L) &&
1161                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1162                    cpu >= IF_X86_64) {
1163             /* LOCK-as-REX.R */
1164             assert_no_prefix(ins, PPS_LREP);
1165             length++;
1166         } else {
1167             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1168             return -1;
1169         }
1170     }
1171
1172     return length;
1173 }
1174
1175 #define EMIT_REX()                                                      \
1176     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1177         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1178         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1179         ins->rex = 0;                                                   \
1180         offset += 1; \
1181     }
1182
1183 static void gencode(int32_t segment, int64_t offset, int bits,
1184                     insn * ins, const struct itemplate *temp,
1185                     int64_t insn_end)
1186 {
1187     static char condval[] = {   /* conditional opcodes */
1188         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1189         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1190         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1191     };
1192     uint8_t c;
1193     uint8_t bytes[4];
1194     int64_t size;
1195     int64_t data;
1196     int op1, op2;
1197     struct operand *opx;
1198     const uint8_t *codes = temp->code;
1199     uint8_t opex = 0;
1200
1201     while (*codes) {
1202         c = *codes++;
1203         op1 = (c & 3) + ((opex & 1) << 2);
1204         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1205         opx = &ins->oprs[op1];
1206         opex = 0;               /* For the next iteration */
1207
1208         switch (c) {
1209         case 01:
1210         case 02:
1211         case 03:
1212         case 04:
1213             EMIT_REX();
1214             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1215             codes += c;
1216             offset += c;
1217             break;
1218
1219         case 05:
1220         case 06:
1221         case 07:
1222             opex = c;
1223             break;
1224
1225         case4(010):
1226             EMIT_REX();
1227             bytes[0] = *codes++ + (regval(opx) & 7);
1228             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1229             offset += 1;
1230             break;
1231
1232         case4(014):
1233             /* The test for BITS8 and SBYTE here is intended to avoid
1234                warning on optimizer actions due to SBYTE, while still
1235                warn on explicit BYTE directives.  Also warn, obviously,
1236                if the optimizer isn't enabled. */
1237             if (((opx->type & BITS8) ||
1238                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1239                 (opx->offset < -128 || opx->offset > 127)) {
1240                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1241                         "signed byte value exceeds bounds");
1242             }
1243             if (opx->segment != NO_SEG) {
1244                 data = opx->offset;
1245                 out(offset, segment, &data, OUT_ADDRESS, 1,
1246                     opx->segment, opx->wrt);
1247             } else {
1248                 bytes[0] = opx->offset;
1249                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1250                     NO_SEG);
1251             }
1252             offset += 1;
1253             break;
1254
1255         case4(020):
1256             if (opx->offset < -256 || opx->offset > 255) {
1257                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1258                         "byte value exceeds bounds");
1259             }
1260             if (opx->segment != NO_SEG) {
1261                 data = opx->offset;
1262                 out(offset, segment, &data, OUT_ADDRESS, 1,
1263                     opx->segment, opx->wrt);
1264             } else {
1265                 bytes[0] = opx->offset;
1266                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1267                     NO_SEG);
1268             }
1269             offset += 1;
1270             break;
1271
1272         case4(024):
1273             if (opx->offset < 0 || opx->offset > 255)
1274                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1275                         "unsigned byte value exceeds bounds");
1276             if (opx->segment != NO_SEG) {
1277                 data = opx->offset;
1278                 out(offset, segment, &data, OUT_ADDRESS, 1,
1279                     opx->segment, opx->wrt);
1280             } else {
1281                 bytes[0] = opx->offset;
1282                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1283                     NO_SEG);
1284             }
1285             offset += 1;
1286             break;
1287
1288         case4(030):
1289             warn_overflow(2, opx);
1290             data = opx->offset;
1291             out(offset, segment, &data, OUT_ADDRESS, 2,
1292                 opx->segment, opx->wrt);
1293             offset += 2;
1294             break;
1295
1296         case4(034):
1297             if (opx->type & (BITS16 | BITS32))
1298                 size = (opx->type & BITS16) ? 2 : 4;
1299             else
1300                 size = (bits == 16) ? 2 : 4;
1301             warn_overflow(size, opx);
1302             data = opx->offset;
1303             out(offset, segment, &data, OUT_ADDRESS, size,
1304                 opx->segment, opx->wrt);
1305             offset += size;
1306             break;
1307
1308         case4(040):
1309             warn_overflow(4, opx);
1310             data = opx->offset;
1311             out(offset, segment, &data, OUT_ADDRESS, 4,
1312                 opx->segment, opx->wrt);
1313             offset += 4;
1314             break;
1315
1316         case4(044):
1317             data = opx->offset;
1318             size = ins->addr_size >> 3;
1319             warn_overflow(size, opx);
1320             out(offset, segment, &data, OUT_ADDRESS, size,
1321                 opx->segment, opx->wrt);
1322             offset += size;
1323             break;
1324
1325         case4(050):
1326             if (opx->segment != segment)
1327                 errfunc(ERR_NONFATAL,
1328                         "short relative jump outside segment");
1329             data = opx->offset - insn_end;
1330             if (data > 127 || data < -128)
1331                 errfunc(ERR_NONFATAL, "short jump is out of range");
1332             bytes[0] = data;
1333             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1334             offset += 1;
1335             break;
1336
1337         case4(054):
1338             data = (int64_t)opx->offset;
1339             out(offset, segment, &data, OUT_ADDRESS, 8,
1340                 opx->segment, opx->wrt);
1341             offset += 8;
1342             break;
1343
1344         case4(060):
1345             if (opx->segment != segment) {
1346                 data = opx->offset;
1347                 out(offset, segment, &data,
1348                     OUT_REL2ADR, insn_end - offset,
1349                     opx->segment, opx->wrt);
1350             } else {
1351                 data = opx->offset - insn_end;
1352                 out(offset, segment, &data,
1353                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1354             }
1355             offset += 2;
1356             break;
1357
1358         case4(064):
1359             if (opx->type & (BITS16 | BITS32 | BITS64))
1360                 size = (opx->type & BITS16) ? 2 : 4;
1361             else
1362                 size = (bits == 16) ? 2 : 4;
1363             if (opx->segment != segment) {
1364                 data = opx->offset;
1365                 out(offset, segment, &data,
1366                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1367                     insn_end - offset, opx->segment, opx->wrt);
1368             } else {
1369                 data = opx->offset - insn_end;
1370                 out(offset, segment, &data,
1371                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1372             }
1373             offset += size;
1374             break;
1375
1376         case4(070):
1377             if (opx->segment != segment) {
1378                 data = opx->offset;
1379                 out(offset, segment, &data,
1380                     OUT_REL4ADR, insn_end - offset,
1381                     opx->segment, opx->wrt);
1382             } else {
1383                 data = opx->offset - insn_end;
1384                 out(offset, segment, &data,
1385                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1386             }
1387             offset += 4;
1388             break;
1389
1390         case4(074):
1391             if (opx->segment == NO_SEG)
1392                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1393                         " relocatable");
1394             data = 0;
1395             out(offset, segment, &data, OUT_ADDRESS, 2,
1396                 outfmt->segbase(1 + opx->segment),
1397                 opx->wrt);
1398             offset += 2;
1399             break;
1400
1401         case4(0140):
1402             data = opx->offset;
1403             warn_overflow(2, opx);
1404             if (is_sbyte16(opx)) {
1405                 bytes[0] = data;
1406                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1407                     NO_SEG);
1408                 offset++;
1409             } else {
1410                 out(offset, segment, &data, OUT_ADDRESS, 2,
1411                     opx->segment, opx->wrt);
1412                 offset += 2;
1413             }
1414             break;
1415
1416         case4(0144):
1417             EMIT_REX();
1418             bytes[0] = *codes++;
1419             if (is_sbyte16(opx))
1420                 bytes[0] |= 2;  /* s-bit */
1421             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1422             offset++;
1423             break;
1424
1425         case4(0150):
1426             data = opx->offset;
1427             warn_overflow(4, opx);
1428             if (is_sbyte32(opx)) {
1429                 bytes[0] = data;
1430                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1431                     NO_SEG);
1432                 offset++;
1433             } else {
1434                 out(offset, segment, &data, OUT_ADDRESS, 4,
1435                     opx->segment, opx->wrt);
1436                 offset += 4;
1437             }
1438             break;
1439
1440         case4(0154):
1441             EMIT_REX();
1442             bytes[0] = *codes++;
1443             if (is_sbyte32(opx))
1444                 bytes[0] |= 2;  /* s-bit */
1445             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1446             offset++;
1447             break;
1448
1449         case4(0160):
1450         case4(0164):
1451             break;
1452
1453         case 0171:
1454             bytes[0] =
1455                 (ins->drexdst << 4) |
1456                 (ins->rex & REX_OC ? 0x08 : 0) |
1457                 (ins->rex & (REX_R|REX_X|REX_B));
1458             ins->rex = 0;
1459             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1460             offset++;
1461             break;
1462
1463         case 0172:
1464             c = *codes++;
1465             opx = &ins->oprs[c >> 3];
1466             bytes[0] = nasm_regvals[opx->basereg] << 4;
1467             opx = &ins->oprs[c & 7];
1468             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1469                 errfunc(ERR_NONFATAL,
1470                         "non-absolute expression not permitted as argument %d",
1471                         c & 7);
1472             } else {
1473                 if (opx->offset & ~15) {
1474                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1475                             "four-bit argument exceeds bounds");
1476                 }
1477                 bytes[0] |= opx->offset & 15;
1478             }
1479             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1480             offset++;
1481             break;
1482
1483         case 0173:
1484             c = *codes++;
1485             opx = &ins->oprs[c >> 4];
1486             bytes[0] = nasm_regvals[opx->basereg] << 4;
1487             bytes[0] |= c & 15;
1488             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1489             offset++;
1490             break;
1491
1492         case 0174:
1493             c = *codes++;
1494             opx = &ins->oprs[c];
1495             bytes[0] = nasm_regvals[opx->basereg] << 4;
1496             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1497             offset++;
1498             break;
1499
1500         case4(0250):
1501             data = opx->offset;
1502             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1503                 (int32_t)data != (int64_t)data) {
1504                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1505                         "signed dword immediate exceeds bounds");
1506             }
1507             if (is_sbyte32(opx)) {
1508                 bytes[0] = data;
1509                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1510                     NO_SEG);
1511                 offset++;
1512             } else {
1513                 out(offset, segment, &data, OUT_ADDRESS, 4,
1514                     opx->segment, opx->wrt);
1515                 offset += 4;
1516             }
1517             break;
1518
1519         case4(0254):
1520             data = opx->offset;
1521             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1522                 (int32_t)data != (int64_t)data) {
1523                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1524                         "signed dword immediate exceeds bounds");
1525             }
1526             out(offset, segment, &data, OUT_ADDRESS, 4,
1527                 opx->segment, opx->wrt);
1528             offset += 4;
1529             break;
1530
1531         case4(0260):
1532         case 0270:
1533             codes += 2;
1534             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1535                 bytes[0] = 0xc4;
1536                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1537                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1538                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1539                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1540                 offset += 3;
1541             } else {
1542                 bytes[0] = 0xc5;
1543                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1544                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1545                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1546                 offset += 2;
1547             }
1548             break;
1549
1550         case4(0274):
1551         {
1552             uint64_t uv, um;
1553             int s;
1554
1555             if (ins->rex & REX_W)
1556                 s = 64;
1557             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1558                 s = 16;
1559             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1560                 s = 32;
1561             else
1562                 s = bits;
1563
1564             um = (uint64_t)2 << (s-1);
1565             uv = opx->offset;
1566
1567             if (uv > 127 && uv < (uint64_t)-128 &&
1568                 (uv < um-128 || uv > um-1)) {
1569                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1570                         "signed byte value exceeds bounds");
1571             }
1572             if (opx->segment != NO_SEG) {
1573                 data = uv;
1574                 out(offset, segment, &data, OUT_ADDRESS, 1,
1575                     opx->segment, opx->wrt);
1576             } else {
1577                 bytes[0] = uv;
1578                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1579                     NO_SEG);
1580             }
1581             offset += 1;
1582             break;
1583         }
1584
1585         case4(0300):
1586             break;
1587
1588         case 0310:
1589             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1590                 *bytes = 0x67;
1591                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1592                 offset += 1;
1593             } else
1594                 offset += 0;
1595             break;
1596
1597         case 0311:
1598             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1599                 *bytes = 0x67;
1600                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1601                 offset += 1;
1602             } else
1603                 offset += 0;
1604             break;
1605
1606         case 0312:
1607             break;
1608
1609         case 0313:
1610             ins->rex = 0;
1611             break;
1612
1613         case4(0314):
1614             break;
1615
1616         case 0320:
1617             if (bits != 16) {
1618                 *bytes = 0x66;
1619                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1620                 offset += 1;
1621             } else
1622                 offset += 0;
1623             break;
1624
1625         case 0321:
1626             if (bits == 16) {
1627                 *bytes = 0x66;
1628                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1629                 offset += 1;
1630             } else
1631                 offset += 0;
1632             break;
1633
1634         case 0322:
1635         case 0323:
1636             break;
1637
1638         case 0324:
1639             ins->rex |= REX_W;
1640             break;
1641
1642         case 0330:
1643             *bytes = *codes++ ^ condval[ins->condition];
1644             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1645             offset += 1;
1646             break;
1647
1648         case 0331:
1649             break;
1650
1651         case 0332:
1652         case 0333:
1653             *bytes = c - 0332 + 0xF2;
1654             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1655             offset += 1;
1656             break;
1657
1658         case 0334:
1659             if (ins->rex & REX_R) {
1660                 *bytes = 0xF0;
1661                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1662                 offset += 1;
1663             }
1664             ins->rex &= ~(REX_L|REX_R);
1665             break;
1666
1667         case 0335:
1668             break;
1669
1670         case 0336:
1671         case 0337:
1672             break;
1673
1674         case 0340:
1675             if (ins->oprs[0].segment != NO_SEG)
1676                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1677             else {
1678                 int64_t size = ins->oprs[0].offset;
1679                 if (size > 0)
1680                     out(offset, segment, NULL,
1681                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1682                 offset += size;
1683             }
1684             break;
1685
1686         case 0341:
1687             break;
1688
1689         case 0344:
1690         case 0345:
1691             bytes[0] = c & 1;
1692             switch (ins->oprs[0].basereg) {
1693             case R_CS:
1694                 bytes[0] += 0x0E;
1695                 break;
1696             case R_DS:
1697                 bytes[0] += 0x1E;
1698                 break;
1699             case R_ES:
1700                 bytes[0] += 0x06;
1701                 break;
1702             case R_SS:
1703                 bytes[0] += 0x16;
1704                 break;
1705             default:
1706                 errfunc(ERR_PANIC,
1707                         "bizarre 8086 segment register received");
1708             }
1709             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1710             offset++;
1711             break;
1712
1713         case 0346:
1714         case 0347:
1715             bytes[0] = c & 1;
1716             switch (ins->oprs[0].basereg) {
1717             case R_FS:
1718                 bytes[0] += 0xA0;
1719                 break;
1720             case R_GS:
1721                 bytes[0] += 0xA8;
1722                 break;
1723             default:
1724                 errfunc(ERR_PANIC,
1725                         "bizarre 386 segment register received");
1726             }
1727             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1728             offset++;
1729             break;
1730
1731         case 0360:
1732             break;
1733
1734         case 0361:
1735             bytes[0] = 0x66;
1736             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1737             offset += 1;
1738             break;
1739
1740         case 0362:
1741         case 0363:
1742             bytes[0] = c - 0362 + 0xf2;
1743             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1744             offset += 1;
1745             break;
1746
1747         case 0364:
1748         case 0365:
1749             break;
1750
1751         case 0366:
1752         case 0367:
1753             *bytes = c - 0366 + 0x66;
1754             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1755             offset += 1;
1756             break;
1757
1758         case 0370:
1759         case 0371:
1760         case 0372:
1761             break;
1762
1763         case 0373:
1764             *bytes = bits == 16 ? 3 : 5;
1765             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1766             offset += 1;
1767             break;
1768
1769         case4(0100):
1770         case4(0110):
1771         case4(0120):
1772         case4(0130):
1773         case4(0200):
1774         case4(0204):
1775         case4(0210):
1776         case4(0214):
1777         case4(0220):
1778         case4(0224):
1779         case4(0230):
1780         case4(0234):
1781             {
1782                 ea ea_data;
1783                 int rfield;
1784                 int32_t rflags;
1785                 uint8_t *p;
1786                 int32_t s;
1787                 enum out_type type;
1788                 struct operand *opy = &ins->oprs[op2];
1789
1790                 if (c <= 0177) {
1791                     /* pick rfield from operand b (opx) */
1792                     rflags = regflag(opx);
1793                     rfield = nasm_regvals[opx->basereg];
1794                 } else {
1795                     /* rfield is constant */
1796                     rflags = 0;
1797                     rfield = c & 7;
1798                 }
1799
1800                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1801                                 rfield, rflags)) {
1802                     errfunc(ERR_NONFATAL, "invalid effective address");
1803                 }
1804
1805
1806                 p = bytes;
1807                 *p++ = ea_data.modrm;
1808                 if (ea_data.sib_present)
1809                     *p++ = ea_data.sib;
1810
1811                 /* DREX suffixes come between the SIB and the displacement */
1812                 if (ins->rex & REX_D) {
1813                     *p++ = (ins->drexdst << 4) |
1814                            (ins->rex & REX_OC ? 0x08 : 0) |
1815                            (ins->rex & (REX_R|REX_X|REX_B));
1816                     ins->rex = 0;
1817                 }
1818
1819                 s = p - bytes;
1820                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1821
1822                 /*
1823                  * Make sure the address gets the right offset in case
1824                  * the line breaks in the .lst file (BR 1197827)
1825                  */
1826                 offset += s;
1827                 s = 0;
1828
1829                 switch (ea_data.bytes) {
1830                 case 0:
1831                     break;
1832                 case 1:
1833                 case 2:
1834                 case 4:
1835                 case 8:
1836                     data = opy->offset;
1837                     warn_overflow(ea_data.bytes, opy);
1838                     s += ea_data.bytes;
1839                     if (ea_data.rip) {
1840                         if (opy->segment == segment) {
1841                             data -= insn_end;
1842                             out(offset, segment, &data, OUT_ADDRESS,
1843                                 ea_data.bytes, NO_SEG, NO_SEG);
1844                         } else {
1845                             out(offset, segment, &data, OUT_REL4ADR,
1846                                 insn_end - offset, opy->segment, opy->wrt);
1847                         }
1848                     } else {
1849                         type = OUT_ADDRESS;
1850                         out(offset, segment, &data, OUT_ADDRESS,
1851                             ea_data.bytes, opy->segment, opy->wrt);
1852                     }
1853                     break;
1854                 default:
1855                     /* Impossible! */
1856                     errfunc(ERR_PANIC,
1857                             "Invalid amount of bytes (%d) for offset?!",
1858                             ea_data.bytes);
1859                     break;
1860                 }
1861                 offset += s;
1862             }
1863             break;
1864
1865         default:
1866             errfunc(ERR_PANIC, "internal instruction table corrupt"
1867                     ": instruction code 0x%02X given", c);
1868             break;
1869         }
1870     }
1871 }
1872
1873 static int32_t regflag(const operand * o)
1874 {
1875     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1876         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1877     }
1878     return nasm_reg_flags[o->basereg];
1879 }
1880
1881 static int32_t regval(const operand * o)
1882 {
1883     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1884         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1885     }
1886     return nasm_regvals[o->basereg];
1887 }
1888
1889 static int op_rexflags(const operand * o, int mask)
1890 {
1891     int32_t flags;
1892     int val;
1893
1894     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1895         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1896     }
1897
1898     flags = nasm_reg_flags[o->basereg];
1899     val = nasm_regvals[o->basereg];
1900
1901     return rexflags(val, flags, mask);
1902 }
1903
1904 static int rexflags(int val, int32_t flags, int mask)
1905 {
1906     int rex = 0;
1907
1908     if (val >= 8)
1909         rex |= REX_B|REX_X|REX_R;
1910     if (flags & BITS64)
1911         rex |= REX_W;
1912     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1913         rex |= REX_H;
1914     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1915         rex |= REX_P;
1916
1917     return rex & mask;
1918 }
1919
1920 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1921 {
1922     int i, size[MAX_OPERANDS], asize, oprs, ret;
1923
1924     ret = 100;
1925
1926     /*
1927      * Check the opcode
1928      */
1929     if (itemp->opcode != instruction->opcode)
1930         return 0;
1931
1932     /*
1933      * Count the operands
1934      */
1935     if (itemp->operands != instruction->operands)
1936         return 0;
1937
1938     /*
1939      * Check that no spurious colons or TOs are present
1940      */
1941     for (i = 0; i < itemp->operands; i++)
1942         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1943             return 0;
1944
1945     /*
1946      * Process size flags
1947      */
1948     if (itemp->flags & IF_ARMASK) {
1949         memset(size, 0, sizeof size);
1950
1951         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1952
1953         switch (itemp->flags & IF_SMASK) {
1954         case IF_SB:
1955             size[i] = BITS8;
1956             break;
1957         case IF_SW:
1958             size[i] = BITS16;
1959             break;
1960         case IF_SD:
1961             size[i] = BITS32;
1962             break;
1963         case IF_SQ:
1964             size[i] = BITS64;
1965             break;
1966         case IF_SO:
1967             size[i] = BITS128;
1968             break;
1969         case IF_SY:
1970             size[i] = BITS256;
1971             break;
1972         case IF_SZ:
1973             switch (bits) {
1974             case 16:
1975                 size[i] = BITS16;
1976                 break;
1977             case 32:
1978                 size[i] = BITS32;
1979                 break;
1980             case 64:
1981                 size[i] = BITS64;
1982                 break;
1983             }
1984             break;
1985         default:
1986             break;
1987         }
1988     } else {
1989         asize = 0;
1990         switch (itemp->flags & IF_SMASK) {
1991         case IF_SB:
1992             asize = BITS8;
1993             break;
1994         case IF_SW:
1995             asize = BITS16;
1996             break;
1997         case IF_SD:
1998             asize = BITS32;
1999             break;
2000         case IF_SQ:
2001             asize = BITS64;
2002             break;
2003         case IF_SO:
2004             asize = BITS128;
2005             break;
2006         case IF_SY:
2007             asize = BITS256;
2008             break;
2009         case IF_SZ:
2010             switch (bits) {
2011             case 16:
2012                 asize = BITS16;
2013                 break;
2014             case 32:
2015                 asize = BITS32;
2016                 break;
2017             case 64:
2018                 asize = BITS64;
2019                 break;
2020             }
2021             break;
2022         default:
2023             break;
2024         }
2025         for (i = 0; i < MAX_OPERANDS; i++)
2026             size[i] = asize;
2027     }
2028
2029     /*
2030      * Check that the operand flags all match up
2031      */
2032     for (i = 0; i < itemp->operands; i++) {
2033         int32_t type = instruction->oprs[i].type;
2034         if (!(type & SIZE_MASK))
2035             type |= size[i];
2036
2037         if (itemp->opd[i] & SAME_AS) {
2038             int j = itemp->opd[i] & ~SAME_AS;
2039             if (type != instruction->oprs[j].type ||
2040                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2041                 return 0;
2042         } else if (itemp->opd[i] & ~type ||
2043             ((itemp->opd[i] & SIZE_MASK) &&
2044              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2045             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2046                 (type & SIZE_MASK))
2047                 return 0;
2048             else
2049                 return 1;
2050         }
2051     }
2052
2053     /*
2054      * Check operand sizes
2055      */
2056     if (itemp->flags & (IF_SM | IF_SM2)) {
2057         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2058         asize = 0;
2059         for (i = 0; i < oprs; i++) {
2060             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2061                 int j;
2062                 for (j = 0; j < oprs; j++)
2063                     size[j] = asize;
2064                 break;
2065             }
2066         }
2067     } else {
2068         oprs = itemp->operands;
2069     }
2070
2071     for (i = 0; i < itemp->operands; i++) {
2072         if (!(itemp->opd[i] & SIZE_MASK) &&
2073             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2074             return 2;
2075     }
2076
2077     /*
2078      * Check template is okay at the set cpu level
2079      */
2080     if (((itemp->flags & IF_PLEVEL) > cpu))
2081         return 3;
2082
2083     /*
2084      * Verify the appropriate long mode flag.
2085      */
2086     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2087         return 4;
2088
2089     /*
2090      * Check if special handling needed for Jumps
2091      */
2092     if ((uint8_t)(itemp->code[0]) >= 0370)
2093         return 99;
2094
2095     return ret;
2096 }
2097
2098 static ea *process_ea(operand * input, ea * output, int bits,
2099                       int addrbits, int rfield, int32_t rflags)
2100 {
2101     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2102
2103     output->rip = false;
2104
2105     /* REX flags for the rfield operand */
2106     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2107
2108     if (!(REGISTER & ~input->type)) {   /* register direct */
2109         int i;
2110         int32_t f;
2111
2112         if (input->basereg < EXPR_REG_START /* Verify as Register */
2113             || input->basereg >= REG_ENUM_LIMIT)
2114             return NULL;
2115         f = regflag(input);
2116         i = nasm_regvals[input->basereg];
2117
2118         if (REG_EA & ~f)
2119             return NULL;        /* Invalid EA register */
2120
2121         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2122
2123         output->sib_present = false;             /* no SIB necessary */
2124         output->bytes = 0;  /* no offset necessary either */
2125         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2126     } else {                    /* it's a memory reference */
2127         if (input->basereg == -1
2128             && (input->indexreg == -1 || input->scale == 0)) {
2129             /* it's a pure offset */
2130             if (bits == 64 && (~input->type & IP_REL)) {
2131               int scale, index, base;
2132               output->sib_present = true;
2133               scale = 0;
2134               index = 4;
2135               base = 5;
2136               output->sib = (scale << 6) | (index << 3) | base;
2137               output->bytes = 4;
2138               output->modrm = 4 | ((rfield & 7) << 3);
2139               output->rip = false;
2140             } else {
2141               output->sib_present = false;
2142               output->bytes = (addrbits != 16 ? 4 : 2);
2143               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2144               output->rip = bits == 64;
2145             }
2146         } else {                /* it's an indirection */
2147             int i = input->indexreg, b = input->basereg, s = input->scale;
2148             int32_t o = input->offset, seg = input->segment;
2149             int hb = input->hintbase, ht = input->hinttype;
2150             int t;
2151             int it, bt;
2152             int32_t ix, bx;     /* register flags */
2153
2154             if (s == 0)
2155                 i = -1;         /* make this easy, at least */
2156
2157             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2158                 it = nasm_regvals[i];
2159                 ix = nasm_reg_flags[i];
2160             } else {
2161                 it = -1;
2162                 ix = 0;
2163             }
2164
2165             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2166                 bt = nasm_regvals[b];
2167                 bx = nasm_reg_flags[b];
2168             } else {
2169                 bt = -1;
2170                 bx = 0;
2171             }
2172
2173             /* check for a 32/64-bit memory reference... */
2174             if ((ix|bx) & (BITS32|BITS64)) {
2175                 /* it must be a 32/64-bit memory reference. Firstly we have
2176                  * to check that all registers involved are type E/Rxx. */
2177                 int32_t sok = BITS32|BITS64;
2178
2179                 if (it != -1) {
2180                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2181                         sok &= ix;
2182                     else
2183                         return NULL;
2184                 }
2185
2186                 if (bt != -1) {
2187                     if (REG_GPR & ~bx)
2188                         return NULL; /* Invalid register */
2189                     if (~sok & bx & SIZE_MASK)
2190                         return NULL; /* Invalid size */
2191                     sok &= bx;
2192                 }
2193
2194                 /* While we're here, ensure the user didn't specify
2195                    WORD or QWORD. */
2196                 if (input->disp_size == 16 || input->disp_size == 64)
2197                     return NULL;
2198
2199                 if (addrbits == 16 ||
2200                     (addrbits == 32 && !(sok & BITS32)) ||
2201                     (addrbits == 64 && !(sok & BITS64)))
2202                     return NULL;
2203
2204                 /* now reorganize base/index */
2205                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2206                     ((hb == b && ht == EAH_NOTBASE)
2207                      || (hb == i && ht == EAH_MAKEBASE))) {
2208                     /* swap if hints say so */
2209                     t = bt, bt = it, it = t;
2210                     t = bx, bx = ix, ix = t;
2211                 }
2212                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2213                     bt = -1, bx = 0, s++;
2214                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2215                     /* make single reg base, unless hint */
2216                     bt = it, bx = ix, it = -1, ix = 0;
2217                 }
2218                 if (((s == 2 && it != REG_NUM_ESP
2219                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2220                      || s == 5 || s == 9) && bt == -1)
2221                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2222                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2223                     && (input->eaflags & EAF_TIMESTWO))
2224                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2225                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2226                 if (s == 1 && it == REG_NUM_ESP) {
2227                     /* swap ESP into base if scale is 1 */
2228                     t = it, it = bt, bt = t;
2229                     t = ix, ix = bx, bx = t;
2230                 }
2231                 if (it == REG_NUM_ESP
2232                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2233                     return NULL;        /* wrong, for various reasons */
2234
2235                 output->rex |= rexflags(it, ix, REX_X);
2236                 output->rex |= rexflags(bt, bx, REX_B);
2237
2238                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2239                     /* no SIB needed */
2240                     int mod, rm;
2241
2242                     if (bt == -1) {
2243                         rm = 5;
2244                         mod = 0;
2245                     } else {
2246                         rm = (bt & 7);
2247                         if (rm != REG_NUM_EBP && o == 0 &&
2248                                 seg == NO_SEG && !forw_ref &&
2249                                 !(input->eaflags &
2250                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2251                             mod = 0;
2252                         else if (input->eaflags & EAF_BYTEOFFS ||
2253                                  (o >= -128 && o <= 127 && seg == NO_SEG
2254                                   && !forw_ref
2255                                   && !(input->eaflags & EAF_WORDOFFS)))
2256                             mod = 1;
2257                         else
2258                             mod = 2;
2259                     }
2260
2261                     output->sib_present = false;
2262                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2263                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2264                 } else {
2265                     /* we need a SIB */
2266                     int mod, scale, index, base;
2267
2268                     if (it == -1)
2269                         index = 4, s = 1;
2270                     else
2271                         index = (it & 7);
2272
2273                     switch (s) {
2274                     case 1:
2275                         scale = 0;
2276                         break;
2277                     case 2:
2278                         scale = 1;
2279                         break;
2280                     case 4:
2281                         scale = 2;
2282                         break;
2283                     case 8:
2284                         scale = 3;
2285                         break;
2286                     default:   /* then what the smeg is it? */
2287                         return NULL;    /* panic */
2288                     }
2289
2290                     if (bt == -1) {
2291                         base = 5;
2292                         mod = 0;
2293                     } else {
2294                         base = (bt & 7);
2295                         if (base != REG_NUM_EBP && o == 0 &&
2296                                     seg == NO_SEG && !forw_ref &&
2297                                     !(input->eaflags &
2298                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2299                             mod = 0;
2300                         else if (input->eaflags & EAF_BYTEOFFS ||
2301                                  (o >= -128 && o <= 127 && seg == NO_SEG
2302                                   && !forw_ref
2303                                   && !(input->eaflags & EAF_WORDOFFS)))
2304                             mod = 1;
2305                         else
2306                             mod = 2;
2307                     }
2308
2309                     output->sib_present = true;
2310                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2311                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2312                     output->sib = (scale << 6) | (index << 3) | base;
2313                 }
2314             } else {            /* it's 16-bit */
2315                 int mod, rm;
2316
2317                 /* check for 64-bit long mode */
2318                 if (addrbits == 64)
2319                     return NULL;
2320
2321                 /* check all registers are BX, BP, SI or DI */
2322                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2323                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2324                                        && i != R_SI && i != R_DI))
2325                     return NULL;
2326
2327                 /* ensure the user didn't specify DWORD/QWORD */
2328                 if (input->disp_size == 32 || input->disp_size == 64)
2329                     return NULL;
2330
2331                 if (s != 1 && i != -1)
2332                     return NULL;        /* no can do, in 16-bit EA */
2333                 if (b == -1 && i != -1) {
2334                     int tmp = b;
2335                     b = i;
2336                     i = tmp;
2337                 }               /* swap */
2338                 if ((b == R_SI || b == R_DI) && i != -1) {
2339                     int tmp = b;
2340                     b = i;
2341                     i = tmp;
2342                 }
2343                 /* have BX/BP as base, SI/DI index */
2344                 if (b == i)
2345                     return NULL;        /* shouldn't ever happen, in theory */
2346                 if (i != -1 && b != -1 &&
2347                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2348                     return NULL;        /* invalid combinations */
2349                 if (b == -1)    /* pure offset: handled above */
2350                     return NULL;        /* so if it gets to here, panic! */
2351
2352                 rm = -1;
2353                 if (i != -1)
2354                     switch (i * 256 + b) {
2355                     case R_SI * 256 + R_BX:
2356                         rm = 0;
2357                         break;
2358                     case R_DI * 256 + R_BX:
2359                         rm = 1;
2360                         break;
2361                     case R_SI * 256 + R_BP:
2362                         rm = 2;
2363                         break;
2364                     case R_DI * 256 + R_BP:
2365                         rm = 3;
2366                         break;
2367                 } else
2368                     switch (b) {
2369                     case R_SI:
2370                         rm = 4;
2371                         break;
2372                     case R_DI:
2373                         rm = 5;
2374                         break;
2375                     case R_BP:
2376                         rm = 6;
2377                         break;
2378                     case R_BX:
2379                         rm = 7;
2380                         break;
2381                     }
2382                 if (rm == -1)   /* can't happen, in theory */
2383                     return NULL;        /* so panic if it does */
2384
2385                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2386                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2387                     mod = 0;
2388                 else if (input->eaflags & EAF_BYTEOFFS ||
2389                          (o >= -128 && o <= 127 && seg == NO_SEG
2390                           && !forw_ref
2391                           && !(input->eaflags & EAF_WORDOFFS)))
2392                     mod = 1;
2393                 else
2394                     mod = 2;
2395
2396                 output->sib_present = false;    /* no SIB - it's 16-bit */
2397                 output->bytes = mod;    /* bytes of offset needed */
2398                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2399             }
2400         }
2401     }
2402
2403     output->size = 1 + output->sib_present + output->bytes;
2404     return output;
2405 }
2406
2407 static void add_asp(insn *ins, int addrbits)
2408 {
2409     int j, valid;
2410     int defdisp;
2411
2412     valid = (addrbits == 64) ? 64|32 : 32|16;
2413
2414     switch (ins->prefixes[PPS_ASIZE]) {
2415     case P_A16:
2416         valid &= 16;
2417         break;
2418     case P_A32:
2419         valid &= 32;
2420         break;
2421     case P_A64:
2422         valid &= 64;
2423         break;
2424     case P_ASP:
2425         valid &= (addrbits == 32) ? 16 : 32;
2426         break;
2427     default:
2428         break;
2429     }
2430
2431     for (j = 0; j < ins->operands; j++) {
2432         if (!(MEMORY & ~ins->oprs[j].type)) {
2433             int32_t i, b;
2434
2435             /* Verify as Register */
2436             if (ins->oprs[j].indexreg < EXPR_REG_START
2437                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2438                 i = 0;
2439             else
2440                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2441
2442             /* Verify as Register */
2443             if (ins->oprs[j].basereg < EXPR_REG_START
2444                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2445                 b = 0;
2446             else
2447                 b = nasm_reg_flags[ins->oprs[j].basereg];
2448
2449             if (ins->oprs[j].scale == 0)
2450                 i = 0;
2451
2452             if (!i && !b) {
2453                 int ds = ins->oprs[j].disp_size;
2454                 if ((addrbits != 64 && ds > 8) ||
2455                     (addrbits == 64 && ds == 16))
2456                     valid &= ds;
2457             } else {
2458                 if (!(REG16 & ~b))
2459                     valid &= 16;
2460                 if (!(REG32 & ~b))
2461                     valid &= 32;
2462                 if (!(REG64 & ~b))
2463                     valid &= 64;
2464
2465                 if (!(REG16 & ~i))
2466                     valid &= 16;
2467                 if (!(REG32 & ~i))
2468                     valid &= 32;
2469                 if (!(REG64 & ~i))
2470                     valid &= 64;
2471             }
2472         }
2473     }
2474
2475     if (valid & addrbits) {
2476         ins->addr_size = addrbits;
2477     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2478         /* Add an address size prefix */
2479         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2480         ins->prefixes[PPS_ASIZE] = pref;
2481         ins->addr_size = (addrbits == 32) ? 16 : 32;
2482     } else {
2483         /* Impossible... */
2484         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2485         ins->addr_size = addrbits; /* Error recovery */
2486     }
2487
2488     defdisp = ins->addr_size == 16 ? 16 : 32;
2489
2490     for (j = 0; j < ins->operands; j++) {
2491         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2492             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2493             != ins->addr_size) {
2494             /* mem_offs sizes must match the address size; if not,
2495                strip the MEM_OFFS bit and match only EA instructions */
2496             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2497         }
2498     }
2499 }