assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1, \2, \3    - that many literal bytes follow in the code stream
  11  * \10..\13      - a literal byte follows in the code stream, to be added
  12  *                 to the register value of operand 0..3
  13  * \14..\17      - a signed byte immediate operand, from operand 0..3
  14  * \20..\23      - a byte immediate operand, from operand 0..3
  15  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  16  * \30..\33      - a word immediate operand, from operand 0..3
  17  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  18  *                 assembly mode or the operand-size override on the operand
  19  * \40..\43      - a long immediate operand, from operand 0..3
  20  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  21  *                 depending on the address size of the instruction.
  22  * \50..\53      - a byte relative operand, from operand 0..3
  23  * \54..\57      - a qword immediate operand, from operand 0..3
  24  * \60..\63      - a word relative operand, from operand 0..3
  25  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  26  *                 assembly mode or the operand-size override on the operand
  27  * \70..\73      - a long relative operand, from operand 0..3
  28  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  29  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  30  *                 field the register value of operand b.
  31  * \140..\143    - an immediate word or signed byte for operand 0..3
  32  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  33  *                  is a signed byte rather than a word.  Opcode byte follows.
  34  * \150..\153    - an immediate dword or signed byte for operand 0..3
  35  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a dword.  Opcode byte follows.
  37  * \160..\163    - this instruction uses DREX rather than REX, with the
  38  *                 OC0 field set to 0, and the dest field taken from
  39  *                 operand 0..3.
  40  * \164..\167    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 1, and the dest field taken from
  42  *                 operand 0..3.
  43  * \171          - placement of DREX suffix in the absence of an EA
  44  * \172\ab       - the register number from operand a in bits 7..4, with
  45  *                 the 4-bit immediate from operand b in bits 3..0.
  46  * \173\xab      - the register number from operand a in bits 7..4, with
  47  *                 the value b in bits 3..0.
  48  * \174\a        - the register number from operand a in bits 7..4, and
  49  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  50  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  51  *                 field equal to digit b.
  52  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  53  *                 is not equal to the truncated and sign-extended 32-bit
  54  *                 operand; used for 32-bit immediates in 64-bit mode.
  55  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  56  * \260..\263    - this instruction uses VEX rather than REX, with the
  57  *                 V field taken from operand 0..3.
  58  * \270          - this instruction uses VEX rather than REX, with the
  59  *                 V field set to 1111b.
  60  *
  61  * VEX prefixes are followed by the sequence:
  62  * \mm\wlp         where mm is the M field; and wlp is:
  63  *                 00 0ww lpp
  64  *                 [w0] ww = 0 for W = 0
  65  *                 [w1] ww = 1 for W = 1
  66  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  67  *                 [ww] ww = 3 for W used as REX.W
  68  *
  69  *
  70  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  71  *                 which is to be extended to the operand size.
  72  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  73  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  74  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  75  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  76  * \314          - (disassembler only) invalid with REX.B
  77  * \315          - (disassembler only) invalid with REX.X
  78  * \316          - (disassembler only) invalid with REX.R
  79  * \317          - (disassembler only) invalid with REX.W
  80  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  81  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  82  * \322          - indicates that this instruction is only valid when the
  83  *                 operand size is the default (instruction to disassembler,
  84  *                 generates no code in the assembler)
  85  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  86  * \324          - indicates 64-bit operand size requiring REX prefix.
  87  * \330          - a literal byte follows in the code stream, to be added
  88  *                 to the condition code value of the instruction.
  89  * \331          - instruction not valid with REP prefix.  Hint for
  90  *                 disassembler only; for SSE instructions.
  91  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  92  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  93  * \334          - LOCK prefix used instead of REX.R
  94  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  95  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  96  * \337          - force a REPNE prefix (0xF3) even if not specified.
  97  *                 \336-\337 are still listed as prefixes in the disassembler.
  98  * \340          - reserve <operand 0> bytes of uninitialized storage.
  99  *                 Operand 0 had better be a segmentless constant.
 100  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 101  *                 (POP is never used for CS) depending on operand 0
 102  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 103  *                 on operand 0
 104  * \360          - no SSE prefix (== \364\331)
 105  * \361          - 66 SSE prefix (== \366\331)
 106  * \362          - F2 SSE prefix (== \364\332)
 107  * \363          - F3 SSE prefix (== \364\333)
 108  * \364          - operand-size prefix (0x66) not permitted
 109  * \365          - address-size prefix (0x67) not permitted
 110  * \366          - operand-size prefix (0x66) used as opcode extension
 111  * \367          - address-size prefix (0x67) used as opcode extension
 112  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 113  *                 370 is used for Jcc, 371 is used for JMP.
 114  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 115  *                 used for conditional jump over longer jump
 116  */
 117
 118 #include "compiler.h"
 119
 120 #include <stdio.h>
 121 #include <string.h>
 122 #include <inttypes.h>
 123
 124 #include "nasm.h"
 125 #include "nasmlib.h"
 126 #include "assemble.h"
 127 #include "insns.h"
 128 #include "tables.h"
 129
 130 /* Initialized to zero by the C standard */
 131 static const uint8_t const_zero_buf[256];
 132
 133 typedef struct {
 134     int sib_present;                 /* is a SIB byte necessary? */
 135     int bytes;                       /* # of bytes of offset needed */
 136     int size;                        /* lazy - this is sib+bytes+1 */
 137     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 138 } ea;
 139
 140 static uint32_t cpu;            /* cpu level received from nasm.c */
 141 static efunc errfunc;
 142 static struct ofmt *outfmt;
 143 static ListGen *list;
 144
 145 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 146 static void gencode(int32_t segment, int64_t offset, int bits,
 147                     insn * ins, const struct itemplate *temp,
 148                     int64_t insn_end);
 149 static int matches(const struct itemplate *, insn *, int bits);
 150 static int32_t regflag(const operand *);
 151 static int32_t regval(const operand *);
 152 static int rexflags(int, int32_t, int);
 153 static int op_rexflags(const operand *, int);
 154 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 155 static void add_asp(insn *, int);
 156
 157 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 158 {
 159     return ins->prefixes[pos] == prefix;
 160 }
 161
 162 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 163 {
 164     if (ins->prefixes[pos])
 165         errfunc(ERR_NONFATAL, "invalid %s prefix",
 166                 prefix_name(ins->prefixes[pos]));
 167 }
 168
 169 static const char *size_name(int size)
 170 {
 171     switch (size) {
 172     case 1:
 173         return "byte";
 174     case 2:
 175         return "word";
 176     case 4:
 177         return "dword";
 178     case 8:
 179         return "qword";
 180     case 10:
 181         return "tword";
 182     case 16:
 183         return "oword";
 184     case 32:
 185         return "yword";
 186     default:
 187         return "???";
 188     }
 189 }
 190
 191 static void warn_overflow(int size, const struct operand *o)
 192 {
 193     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 194         int64_t lim = ((int64_t)1 << (size*8))-1;
 195         int64_t data = o->offset;
 196
 197         if (data < ~lim || data > lim)
 198             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 199                     "%s data exceeds bounds", size_name(size));
 200     }
 201 }
 202 /*
 203  * This routine wrappers the real output format's output routine,
 204  * in order to pass a copy of the data off to the listing file
 205  * generator at the same time.
 206  */
 207 static void out(int64_t offset, int32_t segto, const void *data,
 208                 enum out_type type, uint64_t size,
 209                 int32_t segment, int32_t wrt)
 210 {
 211     static int32_t lineno = 0;     /* static!!! */
 212     static char *lnfname = NULL;
 213     uint8_t p[8];
 214
 215     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 216         /*
 217          * This is a non-relocated address, and we're going to
 218          * convert it into RAWDATA format.
 219          */
 220         uint8_t *q = p;
 221
 222         if (size > 8) {
 223             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 224             return;
 225         }
 226
 227         WRITEADDR(q, *(int64_t *)data, size);
 228         data = p;
 229         type = OUT_RAWDATA;
 230     }
 231
 232     list->output(offset, data, type, size);
 233
 234     /*
 235      * this call to src_get determines when we call the
 236      * debug-format-specific "linenum" function
 237      * it updates lineno and lnfname to the current values
 238      * returning 0 if "same as last time", -2 if lnfname
 239      * changed, and the amount by which lineno changed,
 240      * if it did. thus, these variables must be static
 241      */
 242
 243     if (src_get(&lineno, &lnfname)) {
 244         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 245     }
 246
 247     outfmt->output(segto, data, type, size, segment, wrt);
 248 }
 249
 250 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 251                      insn * ins, const uint8_t *code)
 252 {
 253     int64_t isize;
 254     uint8_t c = code[0];
 255
 256     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 257         return false;
 258     if (!optimizing)
 259         return false;
 260     if (optimizing < 0 && c == 0371)
 261         return false;
 262
 263     isize = calcsize(segment, offset, bits, ins, code);
 264     if (ins->oprs[0].segment != segment)
 265         return false;
 266
 267     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 268     return (isize >= -128 && isize <= 127); /* is it byte size? */
 269 }
 270
 271 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 272               insn * instruction, struct ofmt *output, efunc error,
 273               ListGen * listgen)
 274 {
 275     const struct itemplate *temp;
 276     int j;
 277     int size_prob;
 278     int64_t insn_end;
 279     int32_t itimes;
 280     int64_t start = offset;
 281     int64_t wsize = 0;             /* size for DB etc. */
 282
 283     errfunc = error;            /* to pass to other functions */
 284     cpu = cp;
 285     outfmt = output;            /* likewise */
 286     list = listgen;             /* and again */
 287
 288     switch (instruction->opcode) {
 289     case -1:
 290         return 0;
 291     case I_DB:
 292         wsize = 1;
 293         break;
 294     case I_DW:
 295         wsize = 2;
 296         break;
 297     case I_DD:
 298         wsize = 4;
 299         break;
 300     case I_DQ:
 301         wsize = 8;
 302         break;
 303     case I_DT:
 304         wsize = 10;
 305         break;
 306     case I_DO:
 307         wsize = 16;
 308         break;
 309     case I_DY:
 310         wsize = 32;
 311         break;
 312     default:
 313         break;
 314     }
 315
 316     if (wsize) {
 317         extop *e;
 318         int32_t t = instruction->times;
 319         if (t < 0)
 320             errfunc(ERR_PANIC,
 321                     "instruction->times < 0 (%ld) in assemble()", t);
 322
 323         while (t--) {           /* repeat TIMES times */
 324             for (e = instruction->eops; e; e = e->next) {
 325                 if (e->type == EOT_DB_NUMBER) {
 326                     if (wsize == 1) {
 327                         if (e->segment != NO_SEG)
 328                             errfunc(ERR_NONFATAL,
 329                                     "one-byte relocation attempted");
 330                         else {
 331                             uint8_t out_byte = e->offset;
 332                             out(offset, segment, &out_byte,
 333                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 334                         }
 335                     } else if (wsize > 8) {
 336                         errfunc(ERR_NONFATAL,
 337                                 "integer supplied to a DT, DO or DY"
 338                                 " instruction");
 339                     } else
 340                         out(offset, segment, &e->offset,
 341                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 342                     offset += wsize;
 343                 } else if (e->type == EOT_DB_STRING ||
 344                            e->type == EOT_DB_STRING_FREE) {
 345                     int align;
 346
 347                     out(offset, segment, e->stringval,
 348                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 349                     align = e->stringlen % wsize;
 350
 351                     if (align) {
 352                         align = wsize - align;
 353                         out(offset, segment, const_zero_buf,
 354                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 355                     }
 356                     offset += e->stringlen + align;
 357                 }
 358             }
 359             if (t > 0 && t == instruction->times - 1) {
 360                 /*
 361                  * Dummy call to list->output to give the offset to the
 362                  * listing module.
 363                  */
 364                 list->output(offset, NULL, OUT_RAWDATA, 0);
 365                 list->uplevel(LIST_TIMES);
 366             }
 367         }
 368         if (instruction->times > 1)
 369             list->downlevel(LIST_TIMES);
 370         return offset - start;
 371     }
 372
 373     if (instruction->opcode == I_INCBIN) {
 374         const char *fname = instruction->eops->stringval;
 375         FILE *fp;
 376
 377         fp = fopen(fname, "rb");
 378         if (!fp) {
 379             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 380                   fname);
 381         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 382             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 383                   fname);
 384         } else {
 385             static char buf[4096];
 386             size_t t = instruction->times;
 387             size_t base = 0;
 388             size_t len;
 389
 390             len = ftell(fp);
 391             if (instruction->eops->next) {
 392                 base = instruction->eops->next->offset;
 393                 len -= base;
 394                 if (instruction->eops->next->next &&
 395                     len > (size_t)instruction->eops->next->next->offset)
 396                     len = (size_t)instruction->eops->next->next->offset;
 397             }
 398             /*
 399              * Dummy call to list->output to give the offset to the
 400              * listing module.
 401              */
 402             list->output(offset, NULL, OUT_RAWDATA, 0);
 403             list->uplevel(LIST_INCBIN);
 404             while (t--) {
 405                 size_t l;
 406
 407                 fseek(fp, base, SEEK_SET);
 408                 l = len;
 409                 while (l > 0) {
 410                     int32_t m =
 411                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 412                               fp);
 413                     if (!m) {
 414                         /*
 415                          * This shouldn't happen unless the file
 416                          * actually changes while we are reading
 417                          * it.
 418                          */
 419                         error(ERR_NONFATAL,
 420                               "`incbin': unexpected EOF while"
 421                               " reading file `%s'", fname);
 422                         t = 0;  /* Try to exit cleanly */
 423                         break;
 424                     }
 425                     out(offset, segment, buf, OUT_RAWDATA, m,
 426                         NO_SEG, NO_SEG);
 427                     l -= m;
 428                 }
 429             }
 430             list->downlevel(LIST_INCBIN);
 431             if (instruction->times > 1) {
 432                 /*
 433                  * Dummy call to list->output to give the offset to the
 434                  * listing module.
 435                  */
 436                 list->output(offset, NULL, OUT_RAWDATA, 0);
 437                 list->uplevel(LIST_TIMES);
 438                 list->downlevel(LIST_TIMES);
 439             }
 440             fclose(fp);
 441             return instruction->times * len;
 442         }
 443         return 0;               /* if we're here, there's an error */
 444     }
 445
 446     /* Check to see if we need an address-size prefix */
 447     add_asp(instruction, bits);
 448
 449     size_prob = false;
 450
 451     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 452         int m = matches(temp, instruction, bits);
 453         if (m == 100 ||
 454             (m == 99 && jmp_match(segment, offset, bits,
 455                                   instruction, temp->code))) {
 456             /* Matches! */
 457             int64_t insn_size = calcsize(segment, offset, bits,
 458                                       instruction, temp->code);
 459             itimes = instruction->times;
 460             if (insn_size < 0)  /* shouldn't be, on pass two */
 461                 error(ERR_PANIC, "errors made it through from pass one");
 462             else
 463                 while (itimes--) {
 464                     for (j = 0; j < MAXPREFIX; j++) {
 465                         uint8_t c = 0;
 466                         switch (instruction->prefixes[j]) {
 467                         case P_LOCK:
 468                             c = 0xF0;
 469                             break;
 470                         case P_REPNE:
 471                         case P_REPNZ:
 472                             c = 0xF2;
 473                             break;
 474                         case P_REPE:
 475                         case P_REPZ:
 476                         case P_REP:
 477                             c = 0xF3;
 478                             break;
 479                         case R_CS:
 480                             if (bits == 64) {
 481                                 error(ERR_WARNING | ERR_PASS2,
 482                                       "cs segment base generated, but will be ignored in 64-bit mode");
 483                             }
 484                             c = 0x2E;
 485                             break;
 486                         case R_DS:
 487                             if (bits == 64) {
 488                                 error(ERR_WARNING | ERR_PASS2,
 489                                       "ds segment base generated, but will be ignored in 64-bit mode");
 490                             }
 491                             c = 0x3E;
 492                             break;
 493                         case R_ES:
 494                            if (bits == 64) {
 495                                 error(ERR_WARNING | ERR_PASS2,
 496                                       "es segment base generated, but will be ignored in 64-bit mode");
 497                            }
 498                             c = 0x26;
 499                             break;
 500                         case R_FS:
 501                             c = 0x64;
 502                             break;
 503                         case R_GS:
 504                             c = 0x65;
 505                             break;
 506                         case R_SS:
 507                             if (bits == 64) {
 508                                 error(ERR_WARNING | ERR_PASS2,
 509                                       "ss segment base generated, but will be ignored in 64-bit mode");
 510                             }
 511                             c = 0x36;
 512                             break;
 513                         case R_SEGR6:
 514                         case R_SEGR7:
 515                             error(ERR_NONFATAL,
 516                                   "segr6 and segr7 cannot be used as prefixes");
 517                             break;
 518                         case P_A16:
 519                             if (bits == 64) {
 520                                 error(ERR_NONFATAL,
 521                                       "16-bit addressing is not supported "
 522                                       "in 64-bit mode");
 523                             } else if (bits != 16)
 524                                 c = 0x67;
 525                             break;
 526                         case P_A32:
 527                             if (bits != 32)
 528                                 c = 0x67;
 529                             break;
 530                         case P_A64:
 531                             if (bits != 64) {
 532                                 error(ERR_NONFATAL,
 533                                       "64-bit addressing is only supported "
 534                                       "in 64-bit mode");
 535                             }
 536                             break;
 537                         case P_ASP:
 538                             c = 0x67;
 539                             break;
 540                         case P_O16:
 541                             if (bits != 16)
 542                                 c = 0x66;
 543                             break;
 544                         case P_O32:
 545                             if (bits == 16)
 546                                 c = 0x66;
 547                             break;
 548                         case P_O64:
 549                             /* REX.W */
 550                             break;
 551                         case P_OSP:
 552                             c = 0x66;
 553                             break;
 554                         case P_none:
 555                             break;
 556                         default:
 557                             error(ERR_PANIC, "invalid instruction prefix");
 558                         }
 559                         if (c != 0) {
 560                             out(offset, segment, &c, OUT_RAWDATA, 1,
 561                                 NO_SEG, NO_SEG);
 562                             offset++;
 563                         }
 564                     }
 565                     insn_end = offset + insn_size;
 566                     gencode(segment, offset, bits, instruction,
 567                             temp, insn_end);
 568                     offset += insn_size;
 569                     if (itimes > 0 && itimes == instruction->times - 1) {
 570                         /*
 571                          * Dummy call to list->output to give the offset to the
 572                          * listing module.
 573                          */
 574                         list->output(offset, NULL, OUT_RAWDATA, 0);
 575                         list->uplevel(LIST_TIMES);
 576                     }
 577                 }
 578             if (instruction->times > 1)
 579                 list->downlevel(LIST_TIMES);
 580             return offset - start;
 581         } else if (m > 0 && m > size_prob) {
 582             size_prob = m;
 583         }
 584     }
 585
 586     if (temp->opcode == -1) {   /* didn't match any instruction */
 587         switch (size_prob) {
 588         case 1:
 589             error(ERR_NONFATAL, "operation size not specified");
 590             break;
 591         case 2:
 592             error(ERR_NONFATAL, "mismatch in operand sizes");
 593             break;
 594         case 3:
 595             error(ERR_NONFATAL, "no instruction for this cpu level");
 596             break;
 597         case 4:
 598             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 599             break;
 600         default:
 601             error(ERR_NONFATAL,
 602                   "invalid combination of opcode and operands");
 603             break;
 604         }
 605     }
 606     return 0;
 607 }
 608
 609 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 610                insn * instruction, efunc error)
 611 {
 612     const struct itemplate *temp;
 613
 614     errfunc = error;            /* to pass to other functions */
 615     cpu = cp;
 616
 617     if (instruction->opcode == -1)
 618         return 0;
 619
 620     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 621         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 622         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 623         instruction->opcode == I_DY) {
 624         extop *e;
 625         int32_t isize, osize, wsize = 0;   /* placate gcc */
 626
 627         isize = 0;
 628         switch (instruction->opcode) {
 629         case I_DB:
 630             wsize = 1;
 631             break;
 632         case I_DW:
 633             wsize = 2;
 634             break;
 635         case I_DD:
 636             wsize = 4;
 637             break;
 638         case I_DQ:
 639             wsize = 8;
 640             break;
 641         case I_DT:
 642             wsize = 10;
 643             break;
 644         case I_DO:
 645             wsize = 16;
 646             break;
 647         case I_DY:
 648             wsize = 32;
 649             break;
 650         default:
 651             break;
 652         }
 653
 654         for (e = instruction->eops; e; e = e->next) {
 655             int32_t align;
 656
 657             osize = 0;
 658             if (e->type == EOT_DB_NUMBER)
 659                 osize = 1;
 660             else if (e->type == EOT_DB_STRING ||
 661                      e->type == EOT_DB_STRING_FREE)
 662                 osize = e->stringlen;
 663
 664             align = (-osize) % wsize;
 665             if (align < 0)
 666                 align += wsize;
 667             isize += osize + align;
 668         }
 669         return isize * instruction->times;
 670     }
 671
 672     if (instruction->opcode == I_INCBIN) {
 673         const char *fname = instruction->eops->stringval;
 674         FILE *fp;
 675         size_t len;
 676
 677         fp = fopen(fname, "rb");
 678         if (!fp)
 679             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 680                   fname);
 681         else if (fseek(fp, 0L, SEEK_END) < 0)
 682             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 683                   fname);
 684         else {
 685             len = ftell(fp);
 686             fclose(fp);
 687             if (instruction->eops->next) {
 688                 len -= instruction->eops->next->offset;
 689                 if (instruction->eops->next->next &&
 690                     len > (size_t)instruction->eops->next->next->offset) {
 691                     len = (size_t)instruction->eops->next->next->offset;
 692                 }
 693             }
 694             return instruction->times * len;
 695         }
 696         return 0;               /* if we're here, there's an error */
 697     }
 698
 699     /* Check to see if we need an address-size prefix */
 700     add_asp(instruction, bits);
 701
 702     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 703         int m = matches(temp, instruction, bits);
 704         if (m == 100 ||
 705             (m == 99 && jmp_match(segment, offset, bits,
 706                                   instruction, temp->code))) {
 707             /* we've matched an instruction. */
 708             int64_t isize;
 709             const uint8_t *codes = temp->code;
 710             int j;
 711
 712             isize = calcsize(segment, offset, bits, instruction, codes);
 713             if (isize < 0)
 714                 return -1;
 715             for (j = 0; j < MAXPREFIX; j++) {
 716                 switch (instruction->prefixes[j]) {
 717                 case P_A16:
 718                     if (bits != 16)
 719                         isize++;
 720                     break;
 721                 case P_A32:
 722                     if (bits != 32)
 723                         isize++;
 724                     break;
 725                 case P_O16:
 726                     if (bits != 16)
 727                         isize++;
 728                     break;
 729                 case P_O32:
 730                     if (bits == 16)
 731                         isize++;
 732                     break;
 733                 case P_A64:
 734                 case P_O64:
 735                 case P_none:
 736                     break;
 737                 default:
 738                     isize++;
 739                     break;
 740                 }
 741             }
 742             return isize * instruction->times;
 743         }
 744     }
 745     return -1;                  /* didn't match any instruction */
 746 }
 747
 748 static bool possible_sbyte(operand *o)
 749 {
 750     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 751         !(o->opflags & OPFLAG_FORWARD) &&
 752         optimizing >= 0 && !(o->type & STRICT);
 753 }
 754
 755 /* check that opn[op]  is a signed byte of size 16 or 32 */
 756 static bool is_sbyte16(operand *o)
 757 {
 758     int16_t v;
 759
 760     if (!possible_sbyte(o))
 761         return false;
 762
 763     v = o->offset;
 764     return v >= -128 && v <= 127;
 765 }
 766
 767 static bool is_sbyte32(operand *o)
 768 {
 769     int32_t v;
 770
 771     if (!possible_sbyte(o))
 772         return false;
 773
 774     v = o->offset;
 775     return v >= -128 && v <= 127;
 776 }
 777
 778 /* Common construct */
 779 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 780
 781 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 782                         insn * ins, const uint8_t *codes)
 783 {
 784     int64_t length = 0;
 785     uint8_t c;
 786     int rex_mask = ~0;
 787     struct operand *opx;
 788
 789     ins->rex = 0;               /* Ensure REX is reset */
 790
 791     if (ins->prefixes[PPS_OSIZE] == P_O64)
 792         ins->rex |= REX_W;
 793
 794     (void)segment;              /* Don't warn that this parameter is unused */
 795     (void)offset;               /* Don't warn that this parameter is unused */
 796
 797     while (*codes) {
 798         c = *codes++;
 799         opx = &ins->oprs[c & 3];
 800         switch (c) {
 801         case 01:
 802         case 02:
 803         case 03:
 804             codes += c, length += c;
 805             break;
 806
 807         case4(010):
 808             ins->rex |=
 809                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 810             codes++, length++;
 811             break;
 812
 813         case4(014):
 814         case4(020):
 815         case4(024):
 816             length++;
 817             break;
 818
 819         case4(030):
 820             length += 2;
 821             break;
 822
 823         case4(034):
 824             if (opx->type & (BITS16 | BITS32 | BITS64))
 825                 length += (opx->type & BITS16) ? 2 : 4;
 826             else
 827                 length += (bits == 16) ? 2 : 4;
 828             break;
 829
 830         case4(040):
 831             length += 4;
 832             break;
 833
 834         case4(044):
 835             length += ins->addr_size >> 3;
 836             break;
 837
 838         case4(050):
 839             length++;
 840             break;
 841
 842         case4(054):
 843             length += 8; /* MOV reg64/imm */
 844             break;
 845
 846         case4(060):
 847             length += 2;
 848             break;
 849
 850         case4(064):
 851             if (opx->type & (BITS16 | BITS32 | BITS64))
 852                 length += (opx->type & BITS16) ? 2 : 4;
 853             else
 854                 length += (bits == 16) ? 2 : 4;
 855             break;
 856
 857         case4(070):
 858             length += 4;
 859             break;
 860
 861         case4(074):
 862             length += 2;
 863             break;
 864
 865         case4(0140):
 866             length += is_sbyte16(opx) ? 1 : 2;
 867             break;
 868
 869         case4(0144):
 870             codes++;
 871             length++;
 872             break;
 873
 874         case4(0150):
 875             length += is_sbyte32(opx) ? 1 : 4;
 876             break;
 877
 878         case4(0154):
 879             codes++;
 880             length++;
 881             break;
 882
 883         case4(0160):
 884             length++;
 885             ins->rex |= REX_D;
 886             ins->drexdst = regval(opx);
 887             break;
 888
 889         case4(0164):
 890             length++;
 891             ins->rex |= REX_D|REX_OC;
 892             ins->drexdst = regval(opx);
 893             break;
 894
 895         case 0171:
 896             break;
 897
 898         case 0172:
 899         case 0173:
 900         case 0174:
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0250):
 906             length += is_sbyte32(opx) ? 1 : 4;
 907             break;
 908
 909         case4(0254):
 910             length += 4;
 911             break;
 912
 913         case4(0260):
 914             ins->rex |= REX_V;
 915             ins->drexdst = regval(opx);
 916             ins->vex_m = *codes++;
 917             ins->vex_wlp = *codes++;
 918             break;
 919
 920         case 0270:
 921             ins->rex |= REX_V;
 922             ins->drexdst = 0;
 923             ins->vex_m = *codes++;
 924             ins->vex_wlp = *codes++;
 925             break;
 926
 927         case4(0274):
 928             length++;
 929             break;
 930
 931         case4(0300):
 932             break;
 933
 934         case 0310:
 935             if (bits == 64)
 936                 return -1;
 937             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 938             break;
 939
 940         case 0311:
 941             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 942             break;
 943
 944         case 0312:
 945             break;
 946
 947         case 0313:
 948             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 949                 has_prefix(ins, PPS_ASIZE, P_A32))
 950                 return -1;
 951             break;
 952
 953         case4(0314):
 954             break;
 955
 956         case 0320:
 957             length += (bits != 16);
 958             break;
 959
 960         case 0321:
 961             length += (bits == 16);
 962             break;
 963
 964         case 0322:
 965             break;
 966
 967         case 0323:
 968             rex_mask &= ~REX_W;
 969             break;
 970
 971         case 0324:
 972             ins->rex |= REX_W;
 973             break;
 974
 975         case 0330:
 976             codes++, length++;
 977             break;
 978
 979         case 0331:
 980             break;
 981
 982         case 0332:
 983         case 0333:
 984             length++;
 985             break;
 986
 987         case 0334:
 988             ins->rex |= REX_L;
 989             break;
 990
 991         case 0335:
 992             break;
 993
 994         case 0336:
 995             if (!ins->prefixes[PPS_LREP])
 996                 ins->prefixes[PPS_LREP] = P_REP;
 997             break;
 998
 999         case 0337:
1000             if (!ins->prefixes[PPS_LREP])
1001                 ins->prefixes[PPS_LREP] = P_REPNE;
1002             break;
1003
1004         case 0340:
1005             if (ins->oprs[0].segment != NO_SEG)
1006                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1007                         " quantity of BSS space");
1008             else
1009                 length += ins->oprs[0].offset;
1010             break;
1011
1012         case4(0344):
1013             length++;
1014             break;
1015
1016         case 0360:
1017             break;
1018
1019         case 0361:
1020         case 0362:
1021         case 0363:
1022             length++;
1023             break;
1024
1025         case 0364:
1026         case 0365:
1027             break;
1028
1029         case 0366:
1030         case 0367:
1031             length++;
1032             break;
1033
1034         case 0370:
1035         case 0371:
1036         case 0372:
1037             break;
1038
1039         case 0373:
1040             length++;
1041             break;
1042
1043         case4(0100):
1044         case4(0110):
1045         case4(0120):
1046         case4(0130):
1047         case4(0200):
1048         case4(0204):
1049         case4(0210):
1050         case4(0214):
1051         case4(0220):
1052         case4(0224):
1053         case4(0230):
1054         case4(0234):
1055             {
1056                 ea ea_data;
1057                 int rfield;
1058                 int32_t rflags;
1059                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1060
1061                 if (c <= 0177) {
1062                     /* pick rfield from operand b */
1063                     rflags = regflag(&ins->oprs[c & 7]);
1064                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1065                 } else {
1066                     rflags = 0;
1067                     rfield = c & 7;
1068                 }
1069
1070                 if (!process_ea
1071                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1072                      ins->addr_size, rfield, rflags)) {
1073                     errfunc(ERR_NONFATAL, "invalid effective address");
1074                     return -1;
1075                 } else {
1076                     ins->rex |= ea_data.rex;
1077                     length += ea_data.size;
1078                 }
1079             }
1080             break;
1081
1082         default:
1083             errfunc(ERR_PANIC, "internal instruction table corrupt"
1084                     ": instruction code 0x%02X given", c);
1085             break;
1086         }
1087     }
1088
1089     ins->rex &= rex_mask;
1090
1091     if (ins->rex & REX_V) {
1092         int bad32 = REX_R|REX_W|REX_X|REX_B;
1093
1094         if (ins->rex & REX_H) {
1095             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1096             return -1;
1097         }
1098         switch (ins->vex_wlp & 030) {
1099         case 000:
1100         case 020:
1101             ins->rex &= ~REX_W;
1102             break;
1103         case 010:
1104             ins->rex |= REX_W;
1105             bad32 &= ~REX_W;
1106             break;
1107         case 030:
1108             /* Follow REX_W */
1109             break;
1110         }
1111
1112         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1113             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1114             return -1;
1115         }
1116         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1117             length += 3;
1118         else
1119             length += 2;
1120     } else if (ins->rex & REX_D) {
1121         if (ins->rex & REX_H) {
1122             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1123             return -1;
1124         }
1125         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1126                            ins->drexdst > 7)) {
1127             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1128             return -1;
1129         }
1130         length++;
1131     } else if (ins->rex & REX_REAL) {
1132         if (ins->rex & REX_H) {
1133             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1134             return -1;
1135         } else if (bits == 64) {
1136             length++;
1137         } else if ((ins->rex & REX_L) &&
1138                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1139                    cpu >= IF_X86_64) {
1140             /* LOCK-as-REX.R */
1141             assert_no_prefix(ins, PPS_LREP);
1142             length++;
1143         } else {
1144             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1145             return -1;
1146         }
1147     }
1148
1149     return length;
1150 }
1151
1152 #define EMIT_REX()                                                      \
1153     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1154         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1155         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1156         ins->rex = 0;                                                   \
1157         offset += 1; \
1158     }
1159
1160 static void gencode(int32_t segment, int64_t offset, int bits,
1161                     insn * ins, const struct itemplate *temp,
1162                     int64_t insn_end)
1163 {
1164     static char condval[] = {   /* conditional opcodes */
1165         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1166         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1167         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1168     };
1169     uint8_t c;
1170     uint8_t bytes[4];
1171     int64_t size;
1172     int64_t data;
1173     struct operand *opx;
1174     const uint8_t *codes = temp->code;
1175
1176     while (*codes) {
1177         c = *codes++;
1178         opx = &ins->oprs[c & 3];
1179         switch (c) {
1180         case 01:
1181         case 02:
1182         case 03:
1183             EMIT_REX();
1184             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1185             codes += c;
1186             offset += c;
1187             break;
1188
1189         case4(010):
1190             EMIT_REX();
1191             bytes[0] = *codes++ + ((regval(opx)) & 7);
1192             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1193             offset += 1;
1194             break;
1195
1196         case4(014):
1197             /* The test for BITS8 and SBYTE here is intended to avoid
1198                warning on optimizer actions due to SBYTE, while still
1199                warn on explicit BYTE directives.  Also warn, obviously,
1200                if the optimizer isn't enabled. */
1201             if (((opx->type & BITS8) ||
1202                  !(opx->type & temp->opd[c & 3] & BYTENESS)) &&
1203                 (opx->offset < -128 || opx->offset > 127)) {
1204                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1205                         "signed byte value exceeds bounds");
1206             }
1207             if (opx->segment != NO_SEG) {
1208                 data = opx->offset;
1209                 out(offset, segment, &data, OUT_ADDRESS, 1,
1210                     opx->segment, opx->wrt);
1211             } else {
1212                 bytes[0] = opx->offset;
1213                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1214                     NO_SEG);
1215             }
1216             offset += 1;
1217             break;
1218
1219         case4(020):
1220             if (opx->offset < -256 || opx->offset > 255) {
1221                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1222                         "byte value exceeds bounds");
1223             }
1224             if (opx->segment != NO_SEG) {
1225                 data = opx->offset;
1226                 out(offset, segment, &data, OUT_ADDRESS, 1,
1227                     opx->segment, opx->wrt);
1228             } else {
1229                 bytes[0] = opx->offset;
1230                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1231                     NO_SEG);
1232             }
1233             offset += 1;
1234             break;
1235
1236         case4(024):
1237             if (opx->offset < 0 || opx->offset > 255)
1238                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1239                         "unsigned byte value exceeds bounds");
1240             if (opx->segment != NO_SEG) {
1241                 data = opx->offset;
1242                 out(offset, segment, &data, OUT_ADDRESS, 1,
1243                     opx->segment, opx->wrt);
1244             } else {
1245                 bytes[0] = opx->offset;
1246                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1247                     NO_SEG);
1248             }
1249             offset += 1;
1250             break;
1251
1252         case4(030):
1253             warn_overflow(2, opx);
1254             data = opx->offset;
1255             out(offset, segment, &data, OUT_ADDRESS, 2,
1256                 opx->segment, opx->wrt);
1257             offset += 2;
1258             break;
1259
1260         case4(034):
1261             if (opx->type & (BITS16 | BITS32))
1262                 size = (opx->type & BITS16) ? 2 : 4;
1263             else
1264                 size = (bits == 16) ? 2 : 4;
1265             warn_overflow(size, opx);
1266             data = opx->offset;
1267             out(offset, segment, &data, OUT_ADDRESS, size,
1268                 opx->segment, opx->wrt);
1269             offset += size;
1270             break;
1271
1272         case4(040):
1273             warn_overflow(4, opx);
1274             data = opx->offset;
1275             out(offset, segment, &data, OUT_ADDRESS, 4,
1276                 opx->segment, opx->wrt);
1277             offset += 4;
1278             break;
1279
1280         case4(044):
1281             data = opx->offset;
1282             size = ins->addr_size >> 3;
1283             warn_overflow(size, opx);
1284             out(offset, segment, &data, OUT_ADDRESS, size,
1285                 opx->segment, opx->wrt);
1286             offset += size;
1287             break;
1288
1289         case4(050):
1290             if (opx->segment != segment)
1291                 errfunc(ERR_NONFATAL,
1292                         "short relative jump outside segment");
1293             data = opx->offset - insn_end;
1294             if (data > 127 || data < -128)
1295                 errfunc(ERR_NONFATAL, "short jump is out of range");
1296             bytes[0] = data;
1297             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1298             offset += 1;
1299             break;
1300
1301         case4(054):
1302             data = (int64_t)opx->offset;
1303             out(offset, segment, &data, OUT_ADDRESS, 8,
1304                 opx->segment, opx->wrt);
1305             offset += 8;
1306             break;
1307
1308         case4(060):
1309             if (opx->segment != segment) {
1310                 data = opx->offset;
1311                 out(offset, segment, &data,
1312                     OUT_REL2ADR, insn_end - offset,
1313                     opx->segment, opx->wrt);
1314             } else {
1315                 data = opx->offset - insn_end;
1316                 out(offset, segment, &data,
1317                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1318             }
1319             offset += 2;
1320             break;
1321
1322         case4(064):
1323             if (opx->type & (BITS16 | BITS32 | BITS64))
1324                 size = (opx->type & BITS16) ? 2 : 4;
1325             else
1326                 size = (bits == 16) ? 2 : 4;
1327             if (opx->segment != segment) {
1328                 data = opx->offset;
1329                 out(offset, segment, &data,
1330                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1331                     insn_end - offset, opx->segment, opx->wrt);
1332             } else {
1333                 data = opx->offset - insn_end;
1334                 out(offset, segment, &data,
1335                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1336             }
1337             offset += size;
1338             break;
1339
1340         case4(070):
1341             if (opx->segment != segment) {
1342                 data = opx->offset;
1343                 out(offset, segment, &data,
1344                     OUT_REL4ADR, insn_end - offset,
1345                     opx->segment, opx->wrt);
1346             } else {
1347                 data = opx->offset - insn_end;
1348                 out(offset, segment, &data,
1349                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1350             }
1351             offset += 4;
1352             break;
1353
1354         case4(074):
1355             if (opx->segment == NO_SEG)
1356                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1357                         " relocatable");
1358             data = 0;
1359             out(offset, segment, &data, OUT_ADDRESS, 2,
1360                 outfmt->segbase(1 + opx->segment),
1361                 opx->wrt);
1362             offset += 2;
1363             break;
1364
1365         case4(0140):
1366             data = opx->offset;
1367             warn_overflow(2, opx);
1368             if (is_sbyte16(opx)) {
1369                 bytes[0] = data;
1370                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1371                     NO_SEG);
1372                 offset++;
1373             } else {
1374                 out(offset, segment, &data, OUT_ADDRESS, 2,
1375                     opx->segment, opx->wrt);
1376                 offset += 2;
1377             }
1378             break;
1379
1380         case4(0144):
1381             EMIT_REX();
1382             bytes[0] = *codes++;
1383             if (is_sbyte16(opx))
1384                 bytes[0] |= 2;  /* s-bit */
1385             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1386             offset++;
1387             break;
1388
1389         case4(0150):
1390             data = opx->offset;
1391             warn_overflow(4, opx);
1392             if (is_sbyte32(opx)) {
1393                 bytes[0] = data;
1394                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1395                     NO_SEG);
1396                 offset++;
1397             } else {
1398                 out(offset, segment, &data, OUT_ADDRESS, 4,
1399                     opx->segment, opx->wrt);
1400                 offset += 4;
1401             }
1402             break;
1403
1404         case4(0154):
1405             EMIT_REX();
1406             bytes[0] = *codes++;
1407             if (is_sbyte32(opx))
1408                 bytes[0] |= 2;  /* s-bit */
1409             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1410             offset++;
1411             break;
1412
1413         case4(0160):
1414         case4(0164):
1415             break;
1416
1417         case 0171:
1418             bytes[0] =
1419                 (ins->drexdst << 4) |
1420                 (ins->rex & REX_OC ? 0x08 : 0) |
1421                 (ins->rex & (REX_R|REX_X|REX_B));
1422             ins->rex = 0;
1423             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1424             offset++;
1425             break;
1426
1427         case 0172:
1428             c = *codes++;
1429             opx = &ins->oprs[c >> 3];
1430             bytes[0] = nasm_regvals[opx->basereg] << 4;
1431             opx = &ins->oprs[c & 7];
1432             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1433                 errfunc(ERR_NONFATAL,
1434                         "non-absolute expression not permitted as argument %d",
1435                         c & 7);
1436             } else {
1437                 if (opx->offset & ~15) {
1438                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1439                             "four-bit argument exceeds bounds");
1440                 }
1441                 bytes[0] |= opx->offset & 15;
1442             }
1443             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1444             offset++;
1445             break;
1446
1447         case 0173:
1448             c = *codes++;
1449             opx = &ins->oprs[c >> 4];
1450             bytes[0] = nasm_regvals[opx->basereg] << 4;
1451             bytes[0] |= c & 15;
1452             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1453             offset++;
1454             break;
1455
1456         case 0174:
1457             c = *codes++;
1458             opx = &ins->oprs[c];
1459             bytes[0] = nasm_regvals[opx->basereg] << 4;
1460             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1461             offset++;
1462             break;
1463
1464         case4(0250):
1465             data = opx->offset;
1466             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1467                 (int32_t)data != (int64_t)data) {
1468                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1469                         "signed dword immediate exceeds bounds");
1470             }
1471             if (is_sbyte32(opx)) {
1472                 bytes[0] = data;
1473                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1474                     NO_SEG);
1475                 offset++;
1476             } else {
1477                 out(offset, segment, &data, OUT_ADDRESS, 4,
1478                     opx->segment, opx->wrt);
1479                 offset += 4;
1480             }
1481             break;
1482
1483         case4(0254):
1484             data = opx->offset;
1485             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1486                 (int32_t)data != (int64_t)data) {
1487                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1488                         "signed dword immediate exceeds bounds");
1489             }
1490             out(offset, segment, &data, OUT_ADDRESS, 4,
1491                 opx->segment, opx->wrt);
1492             offset += 4;
1493             break;
1494
1495         case4(0260):
1496         case 0270:
1497             codes += 2;
1498             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1499                 bytes[0] = 0xc4;
1500                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1501                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1502                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1503                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1504                 offset += 3;
1505             } else {
1506                 bytes[0] = 0xc5;
1507                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1508                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1509                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1510                 offset += 2;
1511             }
1512             break;
1513
1514         case4(0274):
1515         {
1516             uint64_t uv, um;
1517             int s;
1518
1519             if (ins->rex & REX_W)
1520                 s = 64;
1521             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1522                 s = 16;
1523             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1524                 s = 32;
1525             else
1526                 s = bits;
1527
1528             um = (uint64_t)2 << (s-1);
1529             uv = opx->offset;
1530
1531             if (uv > 127 && uv < (uint64_t)-128 &&
1532                 (uv < um-128 || uv > um-1)) {
1533                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1534                         "signed byte value exceeds bounds");
1535             }
1536             if (opx->segment != NO_SEG) {
1537                 data = uv;
1538                 out(offset, segment, &data, OUT_ADDRESS, 1,
1539                     opx->segment, opx->wrt);
1540             } else {
1541                 bytes[0] = uv;
1542                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1543                     NO_SEG);
1544             }
1545             offset += 1;
1546             break;
1547         }
1548
1549         case4(0300):
1550             break;
1551
1552         case 0310:
1553             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1554                 *bytes = 0x67;
1555                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1556                 offset += 1;
1557             } else
1558                 offset += 0;
1559             break;
1560
1561         case 0311:
1562             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1563                 *bytes = 0x67;
1564                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1565                 offset += 1;
1566             } else
1567                 offset += 0;
1568             break;
1569
1570         case 0312:
1571             break;
1572
1573         case 0313:
1574             ins->rex = 0;
1575             break;
1576
1577         case4(0314):
1578             break;
1579
1580         case 0320:
1581             if (bits != 16) {
1582                 *bytes = 0x66;
1583                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1584                 offset += 1;
1585             } else
1586                 offset += 0;
1587             break;
1588
1589         case 0321:
1590             if (bits == 16) {
1591                 *bytes = 0x66;
1592                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1593                 offset += 1;
1594             } else
1595                 offset += 0;
1596             break;
1597
1598         case 0322:
1599         case 0323:
1600             break;
1601
1602         case 0324:
1603             ins->rex |= REX_W;
1604             break;
1605
1606         case 0330:
1607             *bytes = *codes++ ^ condval[ins->condition];
1608             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1609             offset += 1;
1610             break;
1611
1612         case 0331:
1613             break;
1614
1615         case 0332:
1616         case 0333:
1617             *bytes = c - 0332 + 0xF2;
1618             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1619             offset += 1;
1620             break;
1621
1622         case 0334:
1623             if (ins->rex & REX_R) {
1624                 *bytes = 0xF0;
1625                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1626                 offset += 1;
1627             }
1628             ins->rex &= ~(REX_L|REX_R);
1629             break;
1630
1631         case 0335:
1632             break;
1633
1634         case 0336:
1635         case 0337:
1636             break;
1637
1638         case 0340:
1639             if (ins->oprs[0].segment != NO_SEG)
1640                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1641             else {
1642                 int64_t size = ins->oprs[0].offset;
1643                 if (size > 0)
1644                     out(offset, segment, NULL,
1645                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1646                 offset += size;
1647             }
1648             break;
1649
1650         case 0344:
1651         case 0345:
1652             bytes[0] = c & 1;
1653             switch (ins->oprs[0].basereg) {
1654             case R_CS:
1655                 bytes[0] += 0x0E;
1656                 break;
1657             case R_DS:
1658                 bytes[0] += 0x1E;
1659                 break;
1660             case R_ES:
1661                 bytes[0] += 0x06;
1662                 break;
1663             case R_SS:
1664                 bytes[0] += 0x16;
1665                 break;
1666             default:
1667                 errfunc(ERR_PANIC,
1668                         "bizarre 8086 segment register received");
1669             }
1670             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1671             offset++;
1672             break;
1673
1674         case 0346:
1675         case 0347:
1676             bytes[0] = c & 1;
1677             switch (ins->oprs[0].basereg) {
1678             case R_FS:
1679                 bytes[0] += 0xA0;
1680                 break;
1681             case R_GS:
1682                 bytes[0] += 0xA8;
1683                 break;
1684             default:
1685                 errfunc(ERR_PANIC,
1686                         "bizarre 386 segment register received");
1687             }
1688             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1689             offset++;
1690             break;
1691
1692         case 0360:
1693             break;
1694
1695         case 0361:
1696             bytes[0] = 0x66;
1697             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1698             offset += 1;
1699             break;
1700
1701         case 0362:
1702         case 0363:
1703             bytes[0] = c - 0362 + 0xf2;
1704             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1705             offset += 1;
1706             break;
1707
1708         case 0364:
1709         case 0365:
1710             break;
1711
1712         case 0366:
1713         case 0367:
1714             *bytes = c - 0366 + 0x66;
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset += 1;
1717             break;
1718
1719         case 0370:
1720         case 0371:
1721         case 0372:
1722             break;
1723
1724         case 0373:
1725             *bytes = bits == 16 ? 3 : 5;
1726             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1727             offset += 1;
1728             break;
1729
1730         case4(0100):
1731         case4(0110):
1732         case4(0120):
1733         case4(0130):
1734         case4(0200):
1735         case4(0204):
1736         case4(0210):
1737         case4(0214):
1738         case4(0220):
1739         case4(0224):
1740         case4(0230):
1741         case4(0234):
1742             {
1743                 ea ea_data;
1744                 int rfield;
1745                 int32_t rflags;
1746                 uint8_t *p;
1747                 int32_t s;
1748                 enum out_type type;
1749
1750                 if (c <= 0177) {
1751                     /* pick rfield from operand b */
1752                     rflags = regflag(&ins->oprs[c & 7]);
1753                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1754                 } else {
1755                     /* rfield is constant */
1756                     rflags = 0;
1757                     rfield = c & 7;
1758                 }
1759
1760                 if (!process_ea
1761                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1762                      ins->addr_size, rfield, rflags)) {
1763                     errfunc(ERR_NONFATAL, "invalid effective address");
1764                 }
1765
1766
1767                 p = bytes;
1768                 *p++ = ea_data.modrm;
1769                 if (ea_data.sib_present)
1770                     *p++ = ea_data.sib;
1771
1772                 /* DREX suffixes come between the SIB and the displacement */
1773                 if (ins->rex & REX_D) {
1774                     *p++ =
1775                         (ins->drexdst << 4) |
1776                         (ins->rex & REX_OC ? 0x08 : 0) |
1777                         (ins->rex & (REX_R|REX_X|REX_B));
1778                     ins->rex = 0;
1779                 }
1780
1781                 s = p - bytes;
1782                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1783
1784                 /*
1785                  * Make sure the address gets the right offset in case
1786                  * the line breaks in the .lst file (BR 1197827)
1787                  */
1788                 offset += s;
1789                 s = 0;
1790
1791                 switch (ea_data.bytes) {
1792                 case 0:
1793                     break;
1794                 case 1:
1795                     if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1796                         data = ins->oprs[(c >> 3) & 7].offset;
1797                         out(offset, segment, &data, OUT_ADDRESS, 1,
1798                             ins->oprs[(c >> 3) & 7].segment,
1799                             ins->oprs[(c >> 3) & 7].wrt);
1800                     } else {
1801                         *bytes = ins->oprs[(c >> 3) & 7].offset;
1802                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1803                             NO_SEG, NO_SEG);
1804                     }
1805                     s++;
1806                     break;
1807                 case 8:
1808                 case 2:
1809                 case 4:
1810                     data = ins->oprs[(c >> 3) & 7].offset;
1811                     warn_overflow(ea_data.bytes, &ins->oprs[(c >> 3) & 7]);
1812                     s += ea_data.bytes;
1813                     if (ea_data.rip) {
1814                         if (ins->oprs[(c >> 3) & 7].segment == segment) {
1815                             data -= insn_end;
1816                             out(offset, segment, &data,
1817                                 OUT_ADDRESS, ea_data.bytes,
1818                                 NO_SEG, NO_SEG);
1819                         } else {
1820                             out(offset, segment, &data,
1821                                 OUT_REL4ADR, insn_end - offset,
1822                                 ins->oprs[(c >> 3) & 7].segment,
1823                                 ins->oprs[(c >> 3) & 7].wrt);
1824                         }
1825                     } else {
1826                         type = OUT_ADDRESS;
1827                         out(offset, segment, &data,
1828                             OUT_ADDRESS, ea_data.bytes,
1829                             ins->oprs[(c >> 3) & 7].segment,
1830                             ins->oprs[(c >> 3) & 7].wrt);
1831                     }
1832                     break;
1833                 }
1834                 offset += s;
1835             }
1836             break;
1837
1838         default:
1839             errfunc(ERR_PANIC, "internal instruction table corrupt"
1840                     ": instruction code 0x%02X given", c);
1841             break;
1842         }
1843     }
1844 }
1845
1846 static int32_t regflag(const operand * o)
1847 {
1848     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1849         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1850     }
1851     return nasm_reg_flags[o->basereg];
1852 }
1853
1854 static int32_t regval(const operand * o)
1855 {
1856     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1857         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1858     }
1859     return nasm_regvals[o->basereg];
1860 }
1861
1862 static int op_rexflags(const operand * o, int mask)
1863 {
1864     int32_t flags;
1865     int val;
1866
1867     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1868         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1869     }
1870
1871     flags = nasm_reg_flags[o->basereg];
1872     val = nasm_regvals[o->basereg];
1873
1874     return rexflags(val, flags, mask);
1875 }
1876
1877 static int rexflags(int val, int32_t flags, int mask)
1878 {
1879     int rex = 0;
1880
1881     if (val >= 8)
1882         rex |= REX_B|REX_X|REX_R;
1883     if (flags & BITS64)
1884         rex |= REX_W;
1885     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1886         rex |= REX_H;
1887     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1888         rex |= REX_P;
1889
1890     return rex & mask;
1891 }
1892
1893 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1894 {
1895     int i, size[MAX_OPERANDS], asize, oprs, ret;
1896
1897     ret = 100;
1898
1899     /*
1900      * Check the opcode
1901      */
1902     if (itemp->opcode != instruction->opcode)
1903         return 0;
1904
1905     /*
1906      * Count the operands
1907      */
1908     if (itemp->operands != instruction->operands)
1909         return 0;
1910
1911     /*
1912      * Check that no spurious colons or TOs are present
1913      */
1914     for (i = 0; i < itemp->operands; i++)
1915         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1916             return 0;
1917
1918     /*
1919      * Process size flags
1920      */
1921     if (itemp->flags & IF_ARMASK) {
1922         memset(size, 0, sizeof size);
1923
1924         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1925
1926         switch (itemp->flags & IF_SMASK) {
1927         case IF_SB:
1928             size[i] = BITS8;
1929             break;
1930         case IF_SW:
1931             size[i] = BITS16;
1932             break;
1933         case IF_SD:
1934             size[i] = BITS32;
1935             break;
1936         case IF_SQ:
1937             size[i] = BITS64;
1938             break;
1939         case IF_SO:
1940             size[i] = BITS128;
1941             break;
1942         case IF_SY:
1943             size[i] = BITS256;
1944             break;
1945         case IF_SZ:
1946             switch (bits) {
1947             case 16:
1948                 size[i] = BITS16;
1949                 break;
1950             case 32:
1951                 size[i] = BITS32;
1952                 break;
1953             case 64:
1954                 size[i] = BITS64;
1955                 break;
1956             }
1957             break;
1958         default:
1959             break;
1960         }
1961     } else {
1962         asize = 0;
1963         switch (itemp->flags & IF_SMASK) {
1964         case IF_SB:
1965             asize = BITS8;
1966             break;
1967         case IF_SW:
1968             asize = BITS16;
1969             break;
1970         case IF_SD:
1971             asize = BITS32;
1972             break;
1973         case IF_SQ:
1974             asize = BITS64;
1975             break;
1976         case IF_SO:
1977             asize = BITS128;
1978             break;
1979         case IF_SY:
1980             asize = BITS256;
1981             break;
1982         case IF_SZ:
1983             switch (bits) {
1984             case 16:
1985                 asize = BITS16;
1986                 break;
1987             case 32:
1988                 asize = BITS32;
1989                 break;
1990             case 64:
1991                 asize = BITS64;
1992                 break;
1993             }
1994             break;
1995         default:
1996             break;
1997         }
1998         for (i = 0; i < MAX_OPERANDS; i++)
1999             size[i] = asize;
2000     }
2001
2002     /*
2003      * Check that the operand flags all match up
2004      */
2005     for (i = 0; i < itemp->operands; i++) {
2006         int32_t type = instruction->oprs[i].type;
2007         if (!(type & SIZE_MASK))
2008             type |= size[i];
2009
2010         if (itemp->opd[i] & SAME_AS) {
2011             int j = itemp->opd[i] & ~SAME_AS;
2012             if (type != instruction->oprs[j].type ||
2013                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2014                 return 0;
2015         } else if (itemp->opd[i] & ~type ||
2016             ((itemp->opd[i] & SIZE_MASK) &&
2017              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2018             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2019                 (type & SIZE_MASK))
2020                 return 0;
2021             else
2022                 return 1;
2023         }
2024     }
2025
2026     /*
2027      * Check operand sizes
2028      */
2029     if (itemp->flags & (IF_SM | IF_SM2)) {
2030         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2031         asize = 0;
2032         for (i = 0; i < oprs; i++) {
2033             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2034                 int j;
2035                 for (j = 0; j < oprs; j++)
2036                     size[j] = asize;
2037                 break;
2038             }
2039         }
2040     } else {
2041         oprs = itemp->operands;
2042     }
2043
2044     for (i = 0; i < itemp->operands; i++) {
2045         if (!(itemp->opd[i] & SIZE_MASK) &&
2046             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2047             return 2;
2048     }
2049
2050     /*
2051      * Check template is okay at the set cpu level
2052      */
2053     if (((itemp->flags & IF_PLEVEL) > cpu))
2054         return 3;
2055
2056     /*
2057      * Check if instruction is available in long mode
2058      */
2059     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2060         return 4;
2061
2062     /*
2063      * Check if special handling needed for Jumps
2064      */
2065     if ((uint8_t)(itemp->code[0]) >= 0370)
2066         return 99;
2067
2068     return ret;
2069 }
2070
2071 static ea *process_ea(operand * input, ea * output, int bits,
2072                       int addrbits, int rfield, int32_t rflags)
2073 {
2074     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2075
2076     output->rip = false;
2077
2078     /* REX flags for the rfield operand */
2079     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2080
2081     if (!(REGISTER & ~input->type)) {   /* register direct */
2082         int i;
2083         int32_t f;
2084
2085         if (input->basereg < EXPR_REG_START /* Verify as Register */
2086             || input->basereg >= REG_ENUM_LIMIT)
2087             return NULL;
2088         f = regflag(input);
2089         i = nasm_regvals[input->basereg];
2090
2091         if (REG_EA & ~f)
2092             return NULL;        /* Invalid EA register */
2093
2094         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2095
2096         output->sib_present = false;             /* no SIB necessary */
2097         output->bytes = 0;  /* no offset necessary either */
2098         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2099     } else {                    /* it's a memory reference */
2100         if (input->basereg == -1
2101             && (input->indexreg == -1 || input->scale == 0)) {
2102             /* it's a pure offset */
2103             if (bits == 64 && (~input->type & IP_REL)) {
2104               int scale, index, base;
2105               output->sib_present = true;
2106               scale = 0;
2107               index = 4;
2108               base = 5;
2109               output->sib = (scale << 6) | (index << 3) | base;
2110               output->bytes = 4;
2111               output->modrm = 4 | ((rfield & 7) << 3);
2112               output->rip = false;
2113             } else {
2114               output->sib_present = false;
2115               output->bytes = (addrbits != 16 ? 4 : 2);
2116               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2117               output->rip = bits == 64;
2118             }
2119         } else {                /* it's an indirection */
2120             int i = input->indexreg, b = input->basereg, s = input->scale;
2121             int32_t o = input->offset, seg = input->segment;
2122             int hb = input->hintbase, ht = input->hinttype;
2123             int t;
2124             int it, bt;
2125             int32_t ix, bx;     /* register flags */
2126
2127             if (s == 0)
2128                 i = -1;         /* make this easy, at least */
2129
2130             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2131                 it = nasm_regvals[i];
2132                 ix = nasm_reg_flags[i];
2133             } else {
2134                 it = -1;
2135                 ix = 0;
2136             }
2137
2138             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2139                 bt = nasm_regvals[b];
2140                 bx = nasm_reg_flags[b];
2141             } else {
2142                 bt = -1;
2143                 bx = 0;
2144             }
2145
2146             /* check for a 32/64-bit memory reference... */
2147             if ((ix|bx) & (BITS32|BITS64)) {
2148                 /* it must be a 32/64-bit memory reference. Firstly we have
2149                  * to check that all registers involved are type E/Rxx. */
2150                 int32_t sok = BITS32|BITS64;
2151
2152                 if (it != -1) {
2153                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2154                         sok &= ix;
2155                     else
2156                         return NULL;
2157                 }
2158
2159                 if (bt != -1) {
2160                     if (REG_GPR & ~bx)
2161                         return NULL; /* Invalid register */
2162                     if (~sok & bx & SIZE_MASK)
2163                         return NULL; /* Invalid size */
2164                     sok &= bx;
2165                 }
2166
2167                 /* While we're here, ensure the user didn't specify
2168                    WORD or QWORD. */
2169                 if (input->disp_size == 16 || input->disp_size == 64)
2170                     return NULL;
2171
2172                 if (addrbits == 16 ||
2173                     (addrbits == 32 && !(sok & BITS32)) ||
2174                     (addrbits == 64 && !(sok & BITS64)))
2175                     return NULL;
2176
2177                 /* now reorganize base/index */
2178                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2179                     ((hb == b && ht == EAH_NOTBASE)
2180                      || (hb == i && ht == EAH_MAKEBASE))) {
2181                     /* swap if hints say so */
2182                     t = bt, bt = it, it = t;
2183                     t = bx, bx = ix, ix = t;
2184                 }
2185                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2186                     bt = -1, bx = 0, s++;
2187                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2188                     /* make single reg base, unless hint */
2189                     bt = it, bx = ix, it = -1, ix = 0;
2190                 }
2191                 if (((s == 2 && it != REG_NUM_ESP
2192                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2193                      || s == 5 || s == 9) && bt == -1)
2194                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2195                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2196                     && (input->eaflags & EAF_TIMESTWO))
2197                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2198                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2199                 if (s == 1 && it == REG_NUM_ESP) {
2200                     /* swap ESP into base if scale is 1 */
2201                     t = it, it = bt, bt = t;
2202                     t = ix, ix = bx, bx = t;
2203                 }
2204                 if (it == REG_NUM_ESP
2205                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2206                     return NULL;        /* wrong, for various reasons */
2207
2208                 output->rex |= rexflags(it, ix, REX_X);
2209                 output->rex |= rexflags(bt, bx, REX_B);
2210
2211                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2212                     /* no SIB needed */
2213                     int mod, rm;
2214
2215                     if (bt == -1) {
2216                         rm = 5;
2217                         mod = 0;
2218                     } else {
2219                         rm = (bt & 7);
2220                         if (rm != REG_NUM_EBP && o == 0 &&
2221                                 seg == NO_SEG && !forw_ref &&
2222                                 !(input->eaflags &
2223                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2224                             mod = 0;
2225                         else if (input->eaflags & EAF_BYTEOFFS ||
2226                                  (o >= -128 && o <= 127 && seg == NO_SEG
2227                                   && !forw_ref
2228                                   && !(input->eaflags & EAF_WORDOFFS)))
2229                             mod = 1;
2230                         else
2231                             mod = 2;
2232                     }
2233
2234                     output->sib_present = false;
2235                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2236                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2237                 } else {
2238                     /* we need a SIB */
2239                     int mod, scale, index, base;
2240
2241                     if (it == -1)
2242                         index = 4, s = 1;
2243                     else
2244                         index = (it & 7);
2245
2246                     switch (s) {
2247                     case 1:
2248                         scale = 0;
2249                         break;
2250                     case 2:
2251                         scale = 1;
2252                         break;
2253                     case 4:
2254                         scale = 2;
2255                         break;
2256                     case 8:
2257                         scale = 3;
2258                         break;
2259                     default:   /* then what the smeg is it? */
2260                         return NULL;    /* panic */
2261                     }
2262
2263                     if (bt == -1) {
2264                         base = 5;
2265                         mod = 0;
2266                     } else {
2267                         base = (bt & 7);
2268                         if (base != REG_NUM_EBP && o == 0 &&
2269                                     seg == NO_SEG && !forw_ref &&
2270                                     !(input->eaflags &
2271                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2272                             mod = 0;
2273                         else if (input->eaflags & EAF_BYTEOFFS ||
2274                                  (o >= -128 && o <= 127 && seg == NO_SEG
2275                                   && !forw_ref
2276                                   && !(input->eaflags & EAF_WORDOFFS)))
2277                             mod = 1;
2278                         else
2279                             mod = 2;
2280                     }
2281
2282                     output->sib_present = true;
2283                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2284                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2285                     output->sib = (scale << 6) | (index << 3) | base;
2286                 }
2287             } else {            /* it's 16-bit */
2288                 int mod, rm;
2289
2290                 /* check for 64-bit long mode */
2291                 if (addrbits == 64)
2292                     return NULL;
2293
2294                 /* check all registers are BX, BP, SI or DI */
2295                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2296                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2297                                        && i != R_SI && i != R_DI))
2298                     return NULL;
2299
2300                 /* ensure the user didn't specify DWORD/QWORD */
2301                 if (input->disp_size == 32 || input->disp_size == 64)
2302                     return NULL;
2303
2304                 if (s != 1 && i != -1)
2305                     return NULL;        /* no can do, in 16-bit EA */
2306                 if (b == -1 && i != -1) {
2307                     int tmp = b;
2308                     b = i;
2309                     i = tmp;
2310                 }               /* swap */
2311                 if ((b == R_SI || b == R_DI) && i != -1) {
2312                     int tmp = b;
2313                     b = i;
2314                     i = tmp;
2315                 }
2316                 /* have BX/BP as base, SI/DI index */
2317                 if (b == i)
2318                     return NULL;        /* shouldn't ever happen, in theory */
2319                 if (i != -1 && b != -1 &&
2320                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2321                     return NULL;        /* invalid combinations */
2322                 if (b == -1)    /* pure offset: handled above */
2323                     return NULL;        /* so if it gets to here, panic! */
2324
2325                 rm = -1;
2326                 if (i != -1)
2327                     switch (i * 256 + b) {
2328                     case R_SI * 256 + R_BX:
2329                         rm = 0;
2330                         break;
2331                     case R_DI * 256 + R_BX:
2332                         rm = 1;
2333                         break;
2334                     case R_SI * 256 + R_BP:
2335                         rm = 2;
2336                         break;
2337                     case R_DI * 256 + R_BP:
2338                         rm = 3;
2339                         break;
2340                 } else
2341                     switch (b) {
2342                     case R_SI:
2343                         rm = 4;
2344                         break;
2345                     case R_DI:
2346                         rm = 5;
2347                         break;
2348                     case R_BP:
2349                         rm = 6;
2350                         break;
2351                     case R_BX:
2352                         rm = 7;
2353                         break;
2354                     }
2355                 if (rm == -1)   /* can't happen, in theory */
2356                     return NULL;        /* so panic if it does */
2357
2358                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2359                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2360                     mod = 0;
2361                 else if (input->eaflags & EAF_BYTEOFFS ||
2362                          (o >= -128 && o <= 127 && seg == NO_SEG
2363                           && !forw_ref
2364                           && !(input->eaflags & EAF_WORDOFFS)))
2365                     mod = 1;
2366                 else
2367                     mod = 2;
2368
2369                 output->sib_present = false;    /* no SIB - it's 16-bit */
2370                 output->bytes = mod;    /* bytes of offset needed */
2371                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2372             }
2373         }
2374     }
2375
2376     output->size = 1 + output->sib_present + output->bytes;
2377     return output;
2378 }
2379
2380 static void add_asp(insn *ins, int addrbits)
2381 {
2382     int j, valid;
2383     int defdisp;
2384
2385     valid = (addrbits == 64) ? 64|32 : 32|16;
2386
2387     switch (ins->prefixes[PPS_ASIZE]) {
2388     case P_A16:
2389         valid &= 16;
2390         break;
2391     case P_A32:
2392         valid &= 32;
2393         break;
2394     case P_A64:
2395         valid &= 64;
2396         break;
2397     case P_ASP:
2398         valid &= (addrbits == 32) ? 16 : 32;
2399         break;
2400     default:
2401         break;
2402     }
2403
2404     for (j = 0; j < ins->operands; j++) {
2405         if (!(MEMORY & ~ins->oprs[j].type)) {
2406             int32_t i, b;
2407
2408             /* Verify as Register */
2409             if (ins->oprs[j].indexreg < EXPR_REG_START
2410                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2411                 i = 0;
2412             else
2413                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2414
2415             /* Verify as Register */
2416             if (ins->oprs[j].basereg < EXPR_REG_START
2417                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2418                 b = 0;
2419             else
2420                 b = nasm_reg_flags[ins->oprs[j].basereg];
2421
2422             if (ins->oprs[j].scale == 0)
2423                 i = 0;
2424
2425             if (!i && !b) {
2426                 int ds = ins->oprs[j].disp_size;
2427                 if ((addrbits != 64 && ds > 8) ||
2428                     (addrbits == 64 && ds == 16))
2429                     valid &= ds;
2430             } else {
2431                 if (!(REG16 & ~b))
2432                     valid &= 16;
2433                 if (!(REG32 & ~b))
2434                     valid &= 32;
2435                 if (!(REG64 & ~b))
2436                     valid &= 64;
2437
2438                 if (!(REG16 & ~i))
2439                     valid &= 16;
2440                 if (!(REG32 & ~i))
2441                     valid &= 32;
2442                 if (!(REG64 & ~i))
2443                     valid &= 64;
2444             }
2445         }
2446     }
2447
2448     if (valid & addrbits) {
2449         ins->addr_size = addrbits;
2450     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2451         /* Add an address size prefix */
2452         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2453         ins->prefixes[PPS_ASIZE] = pref;
2454         ins->addr_size = (addrbits == 32) ? 16 : 32;
2455     } else {
2456         /* Impossible... */
2457         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2458         ins->addr_size = addrbits; /* Error recovery */
2459     }
2460
2461     defdisp = ins->addr_size == 16 ? 16 : 32;
2462
2463     for (j = 0; j < ins->operands; j++) {
2464         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2465             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2466             != ins->addr_size) {
2467             /* mem_offs sizes must match the address size; if not,
2468                strip the MEM_OFFS bit and match only EA instructions */
2469             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2470         }
2471     }
2472 }