assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 104  *                 (POP is never used for CS) depending on operand 0
 105  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 106  *                 on operand 0
 107  * \360          - no SSE prefix (== \364\331)
 108  * \361          - 66 SSE prefix (== \366\331)
 109  * \362          - F2 SSE prefix (== \364\332)
 110  * \363          - F3 SSE prefix (== \364\333)
 111  * \364          - operand-size prefix (0x66) not permitted
 112  * \365          - address-size prefix (0x67) not permitted
 113  * \366          - operand-size prefix (0x66) used as opcode extension
 114  * \367          - address-size prefix (0x67) used as opcode extension
 115  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 116  *                 370 is used for Jcc, 371 is used for JMP.
 117  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 118  *                 used for conditional jump over longer jump
 119  */
 120
 121 #include "compiler.h"
 122
 123 #include <stdio.h>
 124 #include <string.h>
 125 #include <inttypes.h>
 126
 127 #include "nasm.h"
 128 #include "nasmlib.h"
 129 #include "assemble.h"
 130 #include "insns.h"
 131 #include "tables.h"
 132
 133 /* Initialized to zero by the C standard */
 134 static const uint8_t const_zero_buf[256];
 135
 136 typedef struct {
 137     int sib_present;                 /* is a SIB byte necessary? */
 138     int bytes;                       /* # of bytes of offset needed */
 139     int size;                        /* lazy - this is sib+bytes+1 */
 140     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 141 } ea;
 142
 143 static uint32_t cpu;            /* cpu level received from nasm.c */
 144 static efunc errfunc;
 145 static struct ofmt *outfmt;
 146 static ListGen *list;
 147
 148 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 149 static void gencode(int32_t segment, int64_t offset, int bits,
 150                     insn * ins, const struct itemplate *temp,
 151                     int64_t insn_end);
 152 static int matches(const struct itemplate *, insn *, int bits);
 153 static int32_t regflag(const operand *);
 154 static int32_t regval(const operand *);
 155 static int rexflags(int, int32_t, int);
 156 static int op_rexflags(const operand *, int);
 157 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 158 static void add_asp(insn *, int);
 159
 160 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 161 {
 162     return ins->prefixes[pos] == prefix;
 163 }
 164
 165 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 166 {
 167     if (ins->prefixes[pos])
 168         errfunc(ERR_NONFATAL, "invalid %s prefix",
 169                 prefix_name(ins->prefixes[pos]));
 170 }
 171
 172 static const char *size_name(int size)
 173 {
 174     switch (size) {
 175     case 1:
 176         return "byte";
 177     case 2:
 178         return "word";
 179     case 4:
 180         return "dword";
 181     case 8:
 182         return "qword";
 183     case 10:
 184         return "tword";
 185     case 16:
 186         return "oword";
 187     case 32:
 188         return "yword";
 189     default:
 190         return "???";
 191     }
 192 }
 193
 194 static void warn_overflow(int size, const struct operand *o)
 195 {
 196     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 197         int64_t lim = ((int64_t)1 << (size*8))-1;
 198         int64_t data = o->offset;
 199
 200         if (data < ~lim || data > lim)
 201             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 202                     "%s data exceeds bounds", size_name(size));
 203     }
 204 }
 205 /*
 206  * This routine wrappers the real output format's output routine,
 207  * in order to pass a copy of the data off to the listing file
 208  * generator at the same time.
 209  */
 210 static void out(int64_t offset, int32_t segto, const void *data,
 211                 enum out_type type, uint64_t size,
 212                 int32_t segment, int32_t wrt)
 213 {
 214     static int32_t lineno = 0;     /* static!!! */
 215     static char *lnfname = NULL;
 216     uint8_t p[8];
 217
 218     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 219         /*
 220          * This is a non-relocated address, and we're going to
 221          * convert it into RAWDATA format.
 222          */
 223         uint8_t *q = p;
 224
 225         if (size > 8) {
 226             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 227             return;
 228         }
 229
 230         WRITEADDR(q, *(int64_t *)data, size);
 231         data = p;
 232         type = OUT_RAWDATA;
 233     }
 234
 235     list->output(offset, data, type, size);
 236
 237     /*
 238      * this call to src_get determines when we call the
 239      * debug-format-specific "linenum" function
 240      * it updates lineno and lnfname to the current values
 241      * returning 0 if "same as last time", -2 if lnfname
 242      * changed, and the amount by which lineno changed,
 243      * if it did. thus, these variables must be static
 244      */
 245
 246     if (src_get(&lineno, &lnfname)) {
 247         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 248     }
 249
 250     outfmt->output(segto, data, type, size, segment, wrt);
 251 }
 252
 253 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 254                      insn * ins, const uint8_t *code)
 255 {
 256     int64_t isize;
 257     uint8_t c = code[0];
 258
 259     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 260         return false;
 261     if (!optimizing)
 262         return false;
 263     if (optimizing < 0 && c == 0371)
 264         return false;
 265
 266     isize = calcsize(segment, offset, bits, ins, code);
 267     if (ins->oprs[0].segment != segment)
 268         return false;
 269
 270     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 271     return (isize >= -128 && isize <= 127); /* is it byte size? */
 272 }
 273
 274 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 275               insn * instruction, struct ofmt *output, efunc error,
 276               ListGen * listgen)
 277 {
 278     const struct itemplate *temp;
 279     int j;
 280     int size_prob;
 281     int64_t insn_end;
 282     int32_t itimes;
 283     int64_t start = offset;
 284     int64_t wsize = 0;             /* size for DB etc. */
 285
 286     errfunc = error;            /* to pass to other functions */
 287     cpu = cp;
 288     outfmt = output;            /* likewise */
 289     list = listgen;             /* and again */
 290
 291     switch (instruction->opcode) {
 292     case -1:
 293         return 0;
 294     case I_DB:
 295         wsize = 1;
 296         break;
 297     case I_DW:
 298         wsize = 2;
 299         break;
 300     case I_DD:
 301         wsize = 4;
 302         break;
 303     case I_DQ:
 304         wsize = 8;
 305         break;
 306     case I_DT:
 307         wsize = 10;
 308         break;
 309     case I_DO:
 310         wsize = 16;
 311         break;
 312     case I_DY:
 313         wsize = 32;
 314         break;
 315     default:
 316         break;
 317     }
 318
 319     if (wsize) {
 320         extop *e;
 321         int32_t t = instruction->times;
 322         if (t < 0)
 323             errfunc(ERR_PANIC,
 324                     "instruction->times < 0 (%ld) in assemble()", t);
 325
 326         while (t--) {           /* repeat TIMES times */
 327             for (e = instruction->eops; e; e = e->next) {
 328                 if (e->type == EOT_DB_NUMBER) {
 329                     if (wsize == 1) {
 330                         if (e->segment != NO_SEG)
 331                             errfunc(ERR_NONFATAL,
 332                                     "one-byte relocation attempted");
 333                         else {
 334                             uint8_t out_byte = e->offset;
 335                             out(offset, segment, &out_byte,
 336                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 337                         }
 338                     } else if (wsize > 8) {
 339                         errfunc(ERR_NONFATAL,
 340                                 "integer supplied to a DT, DO or DY"
 341                                 " instruction");
 342                     } else
 343                         out(offset, segment, &e->offset,
 344                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 345                     offset += wsize;
 346                 } else if (e->type == EOT_DB_STRING ||
 347                            e->type == EOT_DB_STRING_FREE) {
 348                     int align;
 349
 350                     out(offset, segment, e->stringval,
 351                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 352                     align = e->stringlen % wsize;
 353
 354                     if (align) {
 355                         align = wsize - align;
 356                         out(offset, segment, const_zero_buf,
 357                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 358                     }
 359                     offset += e->stringlen + align;
 360                 }
 361             }
 362             if (t > 0 && t == instruction->times - 1) {
 363                 /*
 364                  * Dummy call to list->output to give the offset to the
 365                  * listing module.
 366                  */
 367                 list->output(offset, NULL, OUT_RAWDATA, 0);
 368                 list->uplevel(LIST_TIMES);
 369             }
 370         }
 371         if (instruction->times > 1)
 372             list->downlevel(LIST_TIMES);
 373         return offset - start;
 374     }
 375
 376     if (instruction->opcode == I_INCBIN) {
 377         const char *fname = instruction->eops->stringval;
 378         FILE *fp;
 379
 380         fp = fopen(fname, "rb");
 381         if (!fp) {
 382             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 383                   fname);
 384         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 385             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 386                   fname);
 387         } else {
 388             static char buf[4096];
 389             size_t t = instruction->times;
 390             size_t base = 0;
 391             size_t len;
 392
 393             len = ftell(fp);
 394             if (instruction->eops->next) {
 395                 base = instruction->eops->next->offset;
 396                 len -= base;
 397                 if (instruction->eops->next->next &&
 398                     len > (size_t)instruction->eops->next->next->offset)
 399                     len = (size_t)instruction->eops->next->next->offset;
 400             }
 401             /*
 402              * Dummy call to list->output to give the offset to the
 403              * listing module.
 404              */
 405             list->output(offset, NULL, OUT_RAWDATA, 0);
 406             list->uplevel(LIST_INCBIN);
 407             while (t--) {
 408                 size_t l;
 409
 410                 fseek(fp, base, SEEK_SET);
 411                 l = len;
 412                 while (l > 0) {
 413                     int32_t m =
 414                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 415                               fp);
 416                     if (!m) {
 417                         /*
 418                          * This shouldn't happen unless the file
 419                          * actually changes while we are reading
 420                          * it.
 421                          */
 422                         error(ERR_NONFATAL,
 423                               "`incbin': unexpected EOF while"
 424                               " reading file `%s'", fname);
 425                         t = 0;  /* Try to exit cleanly */
 426                         break;
 427                     }
 428                     out(offset, segment, buf, OUT_RAWDATA, m,
 429                         NO_SEG, NO_SEG);
 430                     l -= m;
 431                 }
 432             }
 433             list->downlevel(LIST_INCBIN);
 434             if (instruction->times > 1) {
 435                 /*
 436                  * Dummy call to list->output to give the offset to the
 437                  * listing module.
 438                  */
 439                 list->output(offset, NULL, OUT_RAWDATA, 0);
 440                 list->uplevel(LIST_TIMES);
 441                 list->downlevel(LIST_TIMES);
 442             }
 443             fclose(fp);
 444             return instruction->times * len;
 445         }
 446         return 0;               /* if we're here, there's an error */
 447     }
 448
 449     /* Check to see if we need an address-size prefix */
 450     add_asp(instruction, bits);
 451
 452     size_prob = false;
 453
 454     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 455         int m = matches(temp, instruction, bits);
 456         if (m == 100 ||
 457             (m == 99 && jmp_match(segment, offset, bits,
 458                                   instruction, temp->code))) {
 459             /* Matches! */
 460             int64_t insn_size = calcsize(segment, offset, bits,
 461                                       instruction, temp->code);
 462             itimes = instruction->times;
 463             if (insn_size < 0)  /* shouldn't be, on pass two */
 464                 error(ERR_PANIC, "errors made it through from pass one");
 465             else
 466                 while (itimes--) {
 467                     for (j = 0; j < MAXPREFIX; j++) {
 468                         uint8_t c = 0;
 469                         switch (instruction->prefixes[j]) {
 470                         case P_LOCK:
 471                             c = 0xF0;
 472                             break;
 473                         case P_REPNE:
 474                         case P_REPNZ:
 475                             c = 0xF2;
 476                             break;
 477                         case P_REPE:
 478                         case P_REPZ:
 479                         case P_REP:
 480                             c = 0xF3;
 481                             break;
 482                         case R_CS:
 483                             if (bits == 64) {
 484                                 error(ERR_WARNING | ERR_PASS2,
 485                                       "cs segment base generated, but will be ignored in 64-bit mode");
 486                             }
 487                             c = 0x2E;
 488                             break;
 489                         case R_DS:
 490                             if (bits == 64) {
 491                                 error(ERR_WARNING | ERR_PASS2,
 492                                       "ds segment base generated, but will be ignored in 64-bit mode");
 493                             }
 494                             c = 0x3E;
 495                             break;
 496                         case R_ES:
 497                            if (bits == 64) {
 498                                 error(ERR_WARNING | ERR_PASS2,
 499                                       "es segment base generated, but will be ignored in 64-bit mode");
 500                            }
 501                             c = 0x26;
 502                             break;
 503                         case R_FS:
 504                             c = 0x64;
 505                             break;
 506                         case R_GS:
 507                             c = 0x65;
 508                             break;
 509                         case R_SS:
 510                             if (bits == 64) {
 511                                 error(ERR_WARNING | ERR_PASS2,
 512                                       "ss segment base generated, but will be ignored in 64-bit mode");
 513                             }
 514                             c = 0x36;
 515                             break;
 516                         case R_SEGR6:
 517                         case R_SEGR7:
 518                             error(ERR_NONFATAL,
 519                                   "segr6 and segr7 cannot be used as prefixes");
 520                             break;
 521                         case P_A16:
 522                             if (bits == 64) {
 523                                 error(ERR_NONFATAL,
 524                                       "16-bit addressing is not supported "
 525                                       "in 64-bit mode");
 526                             } else if (bits != 16)
 527                                 c = 0x67;
 528                             break;
 529                         case P_A32:
 530                             if (bits != 32)
 531                                 c = 0x67;
 532                             break;
 533                         case P_A64:
 534                             if (bits != 64) {
 535                                 error(ERR_NONFATAL,
 536                                       "64-bit addressing is only supported "
 537                                       "in 64-bit mode");
 538                             }
 539                             break;
 540                         case P_ASP:
 541                             c = 0x67;
 542                             break;
 543                         case P_O16:
 544                             if (bits != 16)
 545                                 c = 0x66;
 546                             break;
 547                         case P_O32:
 548                             if (bits == 16)
 549                                 c = 0x66;
 550                             break;
 551                         case P_O64:
 552                             /* REX.W */
 553                             break;
 554                         case P_OSP:
 555                             c = 0x66;
 556                             break;
 557                         case P_none:
 558                             break;
 559                         default:
 560                             error(ERR_PANIC, "invalid instruction prefix");
 561                         }
 562                         if (c != 0) {
 563                             out(offset, segment, &c, OUT_RAWDATA, 1,
 564                                 NO_SEG, NO_SEG);
 565                             offset++;
 566                         }
 567                     }
 568                     insn_end = offset + insn_size;
 569                     gencode(segment, offset, bits, instruction,
 570                             temp, insn_end);
 571                     offset += insn_size;
 572                     if (itimes > 0 && itimes == instruction->times - 1) {
 573                         /*
 574                          * Dummy call to list->output to give the offset to the
 575                          * listing module.
 576                          */
 577                         list->output(offset, NULL, OUT_RAWDATA, 0);
 578                         list->uplevel(LIST_TIMES);
 579                     }
 580                 }
 581             if (instruction->times > 1)
 582                 list->downlevel(LIST_TIMES);
 583             return offset - start;
 584         } else if (m > 0 && m > size_prob) {
 585             size_prob = m;
 586         }
 587     }
 588
 589     if (temp->opcode == -1) {   /* didn't match any instruction */
 590         switch (size_prob) {
 591         case 1:
 592             error(ERR_NONFATAL, "operation size not specified");
 593             break;
 594         case 2:
 595             error(ERR_NONFATAL, "mismatch in operand sizes");
 596             break;
 597         case 3:
 598             error(ERR_NONFATAL, "no instruction for this cpu level");
 599             break;
 600         case 4:
 601             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 602             break;
 603         default:
 604             error(ERR_NONFATAL,
 605                   "invalid combination of opcode and operands");
 606             break;
 607         }
 608     }
 609     return 0;
 610 }
 611
 612 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 613                insn * instruction, efunc error)
 614 {
 615     const struct itemplate *temp;
 616
 617     errfunc = error;            /* to pass to other functions */
 618     cpu = cp;
 619
 620     if (instruction->opcode == -1)
 621         return 0;
 622
 623     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 624         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 625         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 626         instruction->opcode == I_DY) {
 627         extop *e;
 628         int32_t isize, osize, wsize = 0;   /* placate gcc */
 629
 630         isize = 0;
 631         switch (instruction->opcode) {
 632         case I_DB:
 633             wsize = 1;
 634             break;
 635         case I_DW:
 636             wsize = 2;
 637             break;
 638         case I_DD:
 639             wsize = 4;
 640             break;
 641         case I_DQ:
 642             wsize = 8;
 643             break;
 644         case I_DT:
 645             wsize = 10;
 646             break;
 647         case I_DO:
 648             wsize = 16;
 649             break;
 650         case I_DY:
 651             wsize = 32;
 652             break;
 653         default:
 654             break;
 655         }
 656
 657         for (e = instruction->eops; e; e = e->next) {
 658             int32_t align;
 659
 660             osize = 0;
 661             if (e->type == EOT_DB_NUMBER)
 662                 osize = 1;
 663             else if (e->type == EOT_DB_STRING ||
 664                      e->type == EOT_DB_STRING_FREE)
 665                 osize = e->stringlen;
 666
 667             align = (-osize) % wsize;
 668             if (align < 0)
 669                 align += wsize;
 670             isize += osize + align;
 671         }
 672         return isize * instruction->times;
 673     }
 674
 675     if (instruction->opcode == I_INCBIN) {
 676         const char *fname = instruction->eops->stringval;
 677         FILE *fp;
 678         size_t len;
 679
 680         fp = fopen(fname, "rb");
 681         if (!fp)
 682             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 683                   fname);
 684         else if (fseek(fp, 0L, SEEK_END) < 0)
 685             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 686                   fname);
 687         else {
 688             len = ftell(fp);
 689             fclose(fp);
 690             if (instruction->eops->next) {
 691                 len -= instruction->eops->next->offset;
 692                 if (instruction->eops->next->next &&
 693                     len > (size_t)instruction->eops->next->next->offset) {
 694                     len = (size_t)instruction->eops->next->next->offset;
 695                 }
 696             }
 697             return instruction->times * len;
 698         }
 699         return 0;               /* if we're here, there's an error */
 700     }
 701
 702     /* Check to see if we need an address-size prefix */
 703     add_asp(instruction, bits);
 704
 705     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 706         int m = matches(temp, instruction, bits);
 707         if (m == 100 ||
 708             (m == 99 && jmp_match(segment, offset, bits,
 709                                   instruction, temp->code))) {
 710             /* we've matched an instruction. */
 711             int64_t isize;
 712             const uint8_t *codes = temp->code;
 713             int j;
 714
 715             isize = calcsize(segment, offset, bits, instruction, codes);
 716             if (isize < 0)
 717                 return -1;
 718             for (j = 0; j < MAXPREFIX; j++) {
 719                 switch (instruction->prefixes[j]) {
 720                 case P_A16:
 721                     if (bits != 16)
 722                         isize++;
 723                     break;
 724                 case P_A32:
 725                     if (bits != 32)
 726                         isize++;
 727                     break;
 728                 case P_O16:
 729                     if (bits != 16)
 730                         isize++;
 731                     break;
 732                 case P_O32:
 733                     if (bits == 16)
 734                         isize++;
 735                     break;
 736                 case P_A64:
 737                 case P_O64:
 738                 case P_none:
 739                     break;
 740                 default:
 741                     isize++;
 742                     break;
 743                 }
 744             }
 745             return isize * instruction->times;
 746         }
 747     }
 748     return -1;                  /* didn't match any instruction */
 749 }
 750
 751 static bool possible_sbyte(operand *o)
 752 {
 753     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 754         !(o->opflags & OPFLAG_FORWARD) &&
 755         optimizing >= 0 && !(o->type & STRICT);
 756 }
 757
 758 /* check that opn[op]  is a signed byte of size 16 or 32 */
 759 static bool is_sbyte16(operand *o)
 760 {
 761     int16_t v;
 762
 763     if (!possible_sbyte(o))
 764         return false;
 765
 766     v = o->offset;
 767     return v >= -128 && v <= 127;
 768 }
 769
 770 static bool is_sbyte32(operand *o)
 771 {
 772     int32_t v;
 773
 774     if (!possible_sbyte(o))
 775         return false;
 776
 777     v = o->offset;
 778     return v >= -128 && v <= 127;
 779 }
 780
 781 /* Common construct */
 782 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 783
 784 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 785                         insn * ins, const uint8_t *codes)
 786 {
 787     int64_t length = 0;
 788     uint8_t c;
 789     int rex_mask = ~0;
 790     int op1, op2;
 791     struct operand *opx;
 792     uint8_t opex = 0;
 793
 794     ins->rex = 0;               /* Ensure REX is reset */
 795
 796     if (ins->prefixes[PPS_OSIZE] == P_O64)
 797         ins->rex |= REX_W;
 798
 799     (void)segment;              /* Don't warn that this parameter is unused */
 800     (void)offset;               /* Don't warn that this parameter is unused */
 801
 802     while (*codes) {
 803         c = *codes++;
 804         op1 = (c & 3) + ((opex & 1) << 2);
 805         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 806         opx = &ins->oprs[op1];
 807         opex = 0;               /* For the next iteration */
 808
 809         switch (c) {
 810         case 01:
 811         case 02:
 812         case 03:
 813         case 04:
 814             codes += c, length += c;
 815             break;
 816
 817         case 05:
 818         case 06:
 819         case 07:
 820             opex = c;
 821             break;
 822
 823         case4(010):
 824             ins->rex |=
 825                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 826             codes++, length++;
 827             break;
 828
 829         case4(014):
 830         case4(020):
 831         case4(024):
 832             length++;
 833             break;
 834
 835         case4(030):
 836             length += 2;
 837             break;
 838
 839         case4(034):
 840             if (opx->type & (BITS16 | BITS32 | BITS64))
 841                 length += (opx->type & BITS16) ? 2 : 4;
 842             else
 843                 length += (bits == 16) ? 2 : 4;
 844             break;
 845
 846         case4(040):
 847             length += 4;
 848             break;
 849
 850         case4(044):
 851             length += ins->addr_size >> 3;
 852             break;
 853
 854         case4(050):
 855             length++;
 856             break;
 857
 858         case4(054):
 859             length += 8; /* MOV reg64/imm */
 860             break;
 861
 862         case4(060):
 863             length += 2;
 864             break;
 865
 866         case4(064):
 867             if (opx->type & (BITS16 | BITS32 | BITS64))
 868                 length += (opx->type & BITS16) ? 2 : 4;
 869             else
 870                 length += (bits == 16) ? 2 : 4;
 871             break;
 872
 873         case4(070):
 874             length += 4;
 875             break;
 876
 877         case4(074):
 878             length += 2;
 879             break;
 880
 881         case4(0140):
 882             length += is_sbyte16(opx) ? 1 : 2;
 883             break;
 884
 885         case4(0144):
 886             codes++;
 887             length++;
 888             break;
 889
 890         case4(0150):
 891             length += is_sbyte32(opx) ? 1 : 4;
 892             break;
 893
 894         case4(0154):
 895             codes++;
 896             length++;
 897             break;
 898
 899         case4(0160):
 900             length++;
 901             ins->rex |= REX_D;
 902             ins->drexdst = regval(opx);
 903             break;
 904
 905         case4(0164):
 906             length++;
 907             ins->rex |= REX_D|REX_OC;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case 0171:
 912             break;
 913
 914         case 0172:
 915         case 0173:
 916         case 0174:
 917             codes++;
 918             length++;
 919             break;
 920
 921         case4(0250):
 922             length += is_sbyte32(opx) ? 1 : 4;
 923             break;
 924
 925         case4(0254):
 926             length += 4;
 927             break;
 928
 929         case4(0260):
 930             ins->rex |= REX_V;
 931             ins->drexdst = regval(opx);
 932             ins->vex_m = *codes++;
 933             ins->vex_wlp = *codes++;
 934             break;
 935
 936         case 0270:
 937             ins->rex |= REX_V;
 938             ins->drexdst = 0;
 939             ins->vex_m = *codes++;
 940             ins->vex_wlp = *codes++;
 941             break;
 942
 943         case4(0274):
 944             length++;
 945             break;
 946
 947         case4(0300):
 948             break;
 949
 950         case 0310:
 951             if (bits == 64)
 952                 return -1;
 953             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 954             break;
 955
 956         case 0311:
 957             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 958             break;
 959
 960         case 0312:
 961             break;
 962
 963         case 0313:
 964             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 965                 has_prefix(ins, PPS_ASIZE, P_A32))
 966                 return -1;
 967             break;
 968
 969         case4(0314):
 970             break;
 971
 972         case 0320:
 973             length += (bits != 16);
 974             break;
 975
 976         case 0321:
 977             length += (bits == 16);
 978             break;
 979
 980         case 0322:
 981             break;
 982
 983         case 0323:
 984             rex_mask &= ~REX_W;
 985             break;
 986
 987         case 0324:
 988             ins->rex |= REX_W;
 989             break;
 990
 991         case 0330:
 992             codes++, length++;
 993             break;
 994
 995         case 0331:
 996             break;
 997
 998         case 0332:
 999         case 0333:
1000             length++;
1001             break;
1002
1003         case 0334:
1004             ins->rex |= REX_L;
1005             break;
1006
1007         case 0335:
1008             break;
1009
1010         case 0336:
1011             if (!ins->prefixes[PPS_LREP])
1012                 ins->prefixes[PPS_LREP] = P_REP;
1013             break;
1014
1015         case 0337:
1016             if (!ins->prefixes[PPS_LREP])
1017                 ins->prefixes[PPS_LREP] = P_REPNE;
1018             break;
1019
1020         case 0340:
1021             if (ins->oprs[0].segment != NO_SEG)
1022                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1023                         " quantity of BSS space");
1024             else
1025                 length += ins->oprs[0].offset;
1026             break;
1027
1028         case4(0344):
1029             length++;
1030             break;
1031
1032         case 0360:
1033             break;
1034
1035         case 0361:
1036         case 0362:
1037         case 0363:
1038             length++;
1039             break;
1040
1041         case 0364:
1042         case 0365:
1043             break;
1044
1045         case 0366:
1046         case 0367:
1047             length++;
1048             break;
1049
1050         case 0370:
1051         case 0371:
1052         case 0372:
1053             break;
1054
1055         case 0373:
1056             length++;
1057             break;
1058
1059         case4(0100):
1060         case4(0110):
1061         case4(0120):
1062         case4(0130):
1063         case4(0200):
1064         case4(0204):
1065         case4(0210):
1066         case4(0214):
1067         case4(0220):
1068         case4(0224):
1069         case4(0230):
1070         case4(0234):
1071             {
1072                 ea ea_data;
1073                 int rfield;
1074                 int32_t rflags;
1075                 struct operand *opy = &ins->oprs[op2];
1076
1077                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1078
1079                 if (c <= 0177) {
1080                     /* pick rfield from operand b (opx) */
1081                     rflags = regflag(opx);
1082                     rfield = nasm_regvals[opx->basereg];
1083                 } else {
1084                     rflags = 0;
1085                     rfield = c & 7;
1086                 }
1087                 if (!process_ea(opy, &ea_data, bits,
1088                                 ins->addr_size, rfield, rflags)) {
1089                     errfunc(ERR_NONFATAL, "invalid effective address");
1090                     return -1;
1091                 } else {
1092                     ins->rex |= ea_data.rex;
1093                     length += ea_data.size;
1094                 }
1095             }
1096             break;
1097
1098         default:
1099             errfunc(ERR_PANIC, "internal instruction table corrupt"
1100                     ": instruction code 0x%02X given", c);
1101             break;
1102         }
1103     }
1104
1105     ins->rex &= rex_mask;
1106
1107     if (ins->rex & REX_V) {
1108         int bad32 = REX_R|REX_W|REX_X|REX_B;
1109
1110         if (ins->rex & REX_H) {
1111             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1112             return -1;
1113         }
1114         switch (ins->vex_wlp & 030) {
1115         case 000:
1116         case 020:
1117             ins->rex &= ~REX_W;
1118             break;
1119         case 010:
1120             ins->rex |= REX_W;
1121             bad32 &= ~REX_W;
1122             break;
1123         case 030:
1124             /* Follow REX_W */
1125             break;
1126         }
1127
1128         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1129             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1130             return -1;
1131         }
1132         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1133             length += 3;
1134         else
1135             length += 2;
1136     } else if (ins->rex & REX_D) {
1137         if (ins->rex & REX_H) {
1138             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1139             return -1;
1140         }
1141         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1142                            ins->drexdst > 7)) {
1143             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1144             return -1;
1145         }
1146         length++;
1147     } else if (ins->rex & REX_REAL) {
1148         if (ins->rex & REX_H) {
1149             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1150             return -1;
1151         } else if (bits == 64) {
1152             length++;
1153         } else if ((ins->rex & REX_L) &&
1154                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1155                    cpu >= IF_X86_64) {
1156             /* LOCK-as-REX.R */
1157             assert_no_prefix(ins, PPS_LREP);
1158             length++;
1159         } else {
1160             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1161             return -1;
1162         }
1163     }
1164
1165     return length;
1166 }
1167
1168 #define EMIT_REX()                                                      \
1169     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1170         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1171         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1172         ins->rex = 0;                                                   \
1173         offset += 1; \
1174     }
1175
1176 static void gencode(int32_t segment, int64_t offset, int bits,
1177                     insn * ins, const struct itemplate *temp,
1178                     int64_t insn_end)
1179 {
1180     static char condval[] = {   /* conditional opcodes */
1181         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1182         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1183         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1184     };
1185     uint8_t c;
1186     uint8_t bytes[4];
1187     int64_t size;
1188     int64_t data;
1189     int op1, op2;
1190     struct operand *opx;
1191     const uint8_t *codes = temp->code;
1192     uint8_t opex = 0;
1193
1194     while (*codes) {
1195         c = *codes++;
1196         op1 = (c & 3) + ((opex & 1) << 2);
1197         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1198         opx = &ins->oprs[op1];
1199         opex = 0;               /* For the next iteration */
1200
1201         switch (c) {
1202         case 01:
1203         case 02:
1204         case 03:
1205         case 04:
1206             EMIT_REX();
1207             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1208             codes += c;
1209             offset += c;
1210             break;
1211
1212         case 05:
1213         case 06:
1214         case 07:
1215             opex = c;
1216             break;
1217
1218         case4(010):
1219             EMIT_REX();
1220             bytes[0] = *codes++ + (regval(opx) & 7);
1221             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1222             offset += 1;
1223             break;
1224
1225         case4(014):
1226             /* The test for BITS8 and SBYTE here is intended to avoid
1227                warning on optimizer actions due to SBYTE, while still
1228                warn on explicit BYTE directives.  Also warn, obviously,
1229                if the optimizer isn't enabled. */
1230             if (((opx->type & BITS8) ||
1231                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1232                 (opx->offset < -128 || opx->offset > 127)) {
1233                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1234                         "signed byte value exceeds bounds");
1235             }
1236             if (opx->segment != NO_SEG) {
1237                 data = opx->offset;
1238                 out(offset, segment, &data, OUT_ADDRESS, 1,
1239                     opx->segment, opx->wrt);
1240             } else {
1241                 bytes[0] = opx->offset;
1242                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1243                     NO_SEG);
1244             }
1245             offset += 1;
1246             break;
1247
1248         case4(020):
1249             if (opx->offset < -256 || opx->offset > 255) {
1250                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1251                         "byte value exceeds bounds");
1252             }
1253             if (opx->segment != NO_SEG) {
1254                 data = opx->offset;
1255                 out(offset, segment, &data, OUT_ADDRESS, 1,
1256                     opx->segment, opx->wrt);
1257             } else {
1258                 bytes[0] = opx->offset;
1259                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1260                     NO_SEG);
1261             }
1262             offset += 1;
1263             break;
1264
1265         case4(024):
1266             if (opx->offset < 0 || opx->offset > 255)
1267                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1268                         "unsigned byte value exceeds bounds");
1269             if (opx->segment != NO_SEG) {
1270                 data = opx->offset;
1271                 out(offset, segment, &data, OUT_ADDRESS, 1,
1272                     opx->segment, opx->wrt);
1273             } else {
1274                 bytes[0] = opx->offset;
1275                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1276                     NO_SEG);
1277             }
1278             offset += 1;
1279             break;
1280
1281         case4(030):
1282             warn_overflow(2, opx);
1283             data = opx->offset;
1284             out(offset, segment, &data, OUT_ADDRESS, 2,
1285                 opx->segment, opx->wrt);
1286             offset += 2;
1287             break;
1288
1289         case4(034):
1290             if (opx->type & (BITS16 | BITS32))
1291                 size = (opx->type & BITS16) ? 2 : 4;
1292             else
1293                 size = (bits == 16) ? 2 : 4;
1294             warn_overflow(size, opx);
1295             data = opx->offset;
1296             out(offset, segment, &data, OUT_ADDRESS, size,
1297                 opx->segment, opx->wrt);
1298             offset += size;
1299             break;
1300
1301         case4(040):
1302             warn_overflow(4, opx);
1303             data = opx->offset;
1304             out(offset, segment, &data, OUT_ADDRESS, 4,
1305                 opx->segment, opx->wrt);
1306             offset += 4;
1307             break;
1308
1309         case4(044):
1310             data = opx->offset;
1311             size = ins->addr_size >> 3;
1312             warn_overflow(size, opx);
1313             out(offset, segment, &data, OUT_ADDRESS, size,
1314                 opx->segment, opx->wrt);
1315             offset += size;
1316             break;
1317
1318         case4(050):
1319             if (opx->segment != segment)
1320                 errfunc(ERR_NONFATAL,
1321                         "short relative jump outside segment");
1322             data = opx->offset - insn_end;
1323             if (data > 127 || data < -128)
1324                 errfunc(ERR_NONFATAL, "short jump is out of range");
1325             bytes[0] = data;
1326             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1327             offset += 1;
1328             break;
1329
1330         case4(054):
1331             data = (int64_t)opx->offset;
1332             out(offset, segment, &data, OUT_ADDRESS, 8,
1333                 opx->segment, opx->wrt);
1334             offset += 8;
1335             break;
1336
1337         case4(060):
1338             if (opx->segment != segment) {
1339                 data = opx->offset;
1340                 out(offset, segment, &data,
1341                     OUT_REL2ADR, insn_end - offset,
1342                     opx->segment, opx->wrt);
1343             } else {
1344                 data = opx->offset - insn_end;
1345                 out(offset, segment, &data,
1346                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1347             }
1348             offset += 2;
1349             break;
1350
1351         case4(064):
1352             if (opx->type & (BITS16 | BITS32 | BITS64))
1353                 size = (opx->type & BITS16) ? 2 : 4;
1354             else
1355                 size = (bits == 16) ? 2 : 4;
1356             if (opx->segment != segment) {
1357                 data = opx->offset;
1358                 out(offset, segment, &data,
1359                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1360                     insn_end - offset, opx->segment, opx->wrt);
1361             } else {
1362                 data = opx->offset - insn_end;
1363                 out(offset, segment, &data,
1364                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1365             }
1366             offset += size;
1367             break;
1368
1369         case4(070):
1370             if (opx->segment != segment) {
1371                 data = opx->offset;
1372                 out(offset, segment, &data,
1373                     OUT_REL4ADR, insn_end - offset,
1374                     opx->segment, opx->wrt);
1375             } else {
1376                 data = opx->offset - insn_end;
1377                 out(offset, segment, &data,
1378                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1379             }
1380             offset += 4;
1381             break;
1382
1383         case4(074):
1384             if (opx->segment == NO_SEG)
1385                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1386                         " relocatable");
1387             data = 0;
1388             out(offset, segment, &data, OUT_ADDRESS, 2,
1389                 outfmt->segbase(1 + opx->segment),
1390                 opx->wrt);
1391             offset += 2;
1392             break;
1393
1394         case4(0140):
1395             data = opx->offset;
1396             warn_overflow(2, opx);
1397             if (is_sbyte16(opx)) {
1398                 bytes[0] = data;
1399                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1400                     NO_SEG);
1401                 offset++;
1402             } else {
1403                 out(offset, segment, &data, OUT_ADDRESS, 2,
1404                     opx->segment, opx->wrt);
1405                 offset += 2;
1406             }
1407             break;
1408
1409         case4(0144):
1410             EMIT_REX();
1411             bytes[0] = *codes++;
1412             if (is_sbyte16(opx))
1413                 bytes[0] |= 2;  /* s-bit */
1414             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1415             offset++;
1416             break;
1417
1418         case4(0150):
1419             data = opx->offset;
1420             warn_overflow(4, opx);
1421             if (is_sbyte32(opx)) {
1422                 bytes[0] = data;
1423                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1424                     NO_SEG);
1425                 offset++;
1426             } else {
1427                 out(offset, segment, &data, OUT_ADDRESS, 4,
1428                     opx->segment, opx->wrt);
1429                 offset += 4;
1430             }
1431             break;
1432
1433         case4(0154):
1434             EMIT_REX();
1435             bytes[0] = *codes++;
1436             if (is_sbyte32(opx))
1437                 bytes[0] |= 2;  /* s-bit */
1438             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1439             offset++;
1440             break;
1441
1442         case4(0160):
1443         case4(0164):
1444             break;
1445
1446         case 0171:
1447             bytes[0] =
1448                 (ins->drexdst << 4) |
1449                 (ins->rex & REX_OC ? 0x08 : 0) |
1450                 (ins->rex & (REX_R|REX_X|REX_B));
1451             ins->rex = 0;
1452             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1453             offset++;
1454             break;
1455
1456         case 0172:
1457             c = *codes++;
1458             opx = &ins->oprs[c >> 3];
1459             bytes[0] = nasm_regvals[opx->basereg] << 4;
1460             opx = &ins->oprs[c & 7];
1461             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1462                 errfunc(ERR_NONFATAL,
1463                         "non-absolute expression not permitted as argument %d",
1464                         c & 7);
1465             } else {
1466                 if (opx->offset & ~15) {
1467                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1468                             "four-bit argument exceeds bounds");
1469                 }
1470                 bytes[0] |= opx->offset & 15;
1471             }
1472             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1473             offset++;
1474             break;
1475
1476         case 0173:
1477             c = *codes++;
1478             opx = &ins->oprs[c >> 4];
1479             bytes[0] = nasm_regvals[opx->basereg] << 4;
1480             bytes[0] |= c & 15;
1481             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1482             offset++;
1483             break;
1484
1485         case 0174:
1486             c = *codes++;
1487             opx = &ins->oprs[c];
1488             bytes[0] = nasm_regvals[opx->basereg] << 4;
1489             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1490             offset++;
1491             break;
1492
1493         case4(0250):
1494             data = opx->offset;
1495             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1496                 (int32_t)data != (int64_t)data) {
1497                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1498                         "signed dword immediate exceeds bounds");
1499             }
1500             if (is_sbyte32(opx)) {
1501                 bytes[0] = data;
1502                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1503                     NO_SEG);
1504                 offset++;
1505             } else {
1506                 out(offset, segment, &data, OUT_ADDRESS, 4,
1507                     opx->segment, opx->wrt);
1508                 offset += 4;
1509             }
1510             break;
1511
1512         case4(0254):
1513             data = opx->offset;
1514             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1515                 (int32_t)data != (int64_t)data) {
1516                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1517                         "signed dword immediate exceeds bounds");
1518             }
1519             out(offset, segment, &data, OUT_ADDRESS, 4,
1520                 opx->segment, opx->wrt);
1521             offset += 4;
1522             break;
1523
1524         case4(0260):
1525         case 0270:
1526             codes += 2;
1527             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1528                 bytes[0] = 0xc4;
1529                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1530                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1531                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1532                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1533                 offset += 3;
1534             } else {
1535                 bytes[0] = 0xc5;
1536                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1537                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1538                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1539                 offset += 2;
1540             }
1541             break;
1542
1543         case4(0274):
1544         {
1545             uint64_t uv, um;
1546             int s;
1547
1548             if (ins->rex & REX_W)
1549                 s = 64;
1550             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1551                 s = 16;
1552             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1553                 s = 32;
1554             else
1555                 s = bits;
1556
1557             um = (uint64_t)2 << (s-1);
1558             uv = opx->offset;
1559
1560             if (uv > 127 && uv < (uint64_t)-128 &&
1561                 (uv < um-128 || uv > um-1)) {
1562                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1563                         "signed byte value exceeds bounds");
1564             }
1565             if (opx->segment != NO_SEG) {
1566                 data = uv;
1567                 out(offset, segment, &data, OUT_ADDRESS, 1,
1568                     opx->segment, opx->wrt);
1569             } else {
1570                 bytes[0] = uv;
1571                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1572                     NO_SEG);
1573             }
1574             offset += 1;
1575             break;
1576         }
1577
1578         case4(0300):
1579             break;
1580
1581         case 0310:
1582             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1583                 *bytes = 0x67;
1584                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1585                 offset += 1;
1586             } else
1587                 offset += 0;
1588             break;
1589
1590         case 0311:
1591             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1592                 *bytes = 0x67;
1593                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1594                 offset += 1;
1595             } else
1596                 offset += 0;
1597             break;
1598
1599         case 0312:
1600             break;
1601
1602         case 0313:
1603             ins->rex = 0;
1604             break;
1605
1606         case4(0314):
1607             break;
1608
1609         case 0320:
1610             if (bits != 16) {
1611                 *bytes = 0x66;
1612                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1613                 offset += 1;
1614             } else
1615                 offset += 0;
1616             break;
1617
1618         case 0321:
1619             if (bits == 16) {
1620                 *bytes = 0x66;
1621                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1622                 offset += 1;
1623             } else
1624                 offset += 0;
1625             break;
1626
1627         case 0322:
1628         case 0323:
1629             break;
1630
1631         case 0324:
1632             ins->rex |= REX_W;
1633             break;
1634
1635         case 0330:
1636             *bytes = *codes++ ^ condval[ins->condition];
1637             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1638             offset += 1;
1639             break;
1640
1641         case 0331:
1642             break;
1643
1644         case 0332:
1645         case 0333:
1646             *bytes = c - 0332 + 0xF2;
1647             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1648             offset += 1;
1649             break;
1650
1651         case 0334:
1652             if (ins->rex & REX_R) {
1653                 *bytes = 0xF0;
1654                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1655                 offset += 1;
1656             }
1657             ins->rex &= ~(REX_L|REX_R);
1658             break;
1659
1660         case 0335:
1661             break;
1662
1663         case 0336:
1664         case 0337:
1665             break;
1666
1667         case 0340:
1668             if (ins->oprs[0].segment != NO_SEG)
1669                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1670             else {
1671                 int64_t size = ins->oprs[0].offset;
1672                 if (size > 0)
1673                     out(offset, segment, NULL,
1674                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1675                 offset += size;
1676             }
1677             break;
1678
1679         case 0344:
1680         case 0345:
1681             bytes[0] = c & 1;
1682             switch (ins->oprs[0].basereg) {
1683             case R_CS:
1684                 bytes[0] += 0x0E;
1685                 break;
1686             case R_DS:
1687                 bytes[0] += 0x1E;
1688                 break;
1689             case R_ES:
1690                 bytes[0] += 0x06;
1691                 break;
1692             case R_SS:
1693                 bytes[0] += 0x16;
1694                 break;
1695             default:
1696                 errfunc(ERR_PANIC,
1697                         "bizarre 8086 segment register received");
1698             }
1699             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1700             offset++;
1701             break;
1702
1703         case 0346:
1704         case 0347:
1705             bytes[0] = c & 1;
1706             switch (ins->oprs[0].basereg) {
1707             case R_FS:
1708                 bytes[0] += 0xA0;
1709                 break;
1710             case R_GS:
1711                 bytes[0] += 0xA8;
1712                 break;
1713             default:
1714                 errfunc(ERR_PANIC,
1715                         "bizarre 386 segment register received");
1716             }
1717             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1718             offset++;
1719             break;
1720
1721         case 0360:
1722             break;
1723
1724         case 0361:
1725             bytes[0] = 0x66;
1726             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1727             offset += 1;
1728             break;
1729
1730         case 0362:
1731         case 0363:
1732             bytes[0] = c - 0362 + 0xf2;
1733             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1734             offset += 1;
1735             break;
1736
1737         case 0364:
1738         case 0365:
1739             break;
1740
1741         case 0366:
1742         case 0367:
1743             *bytes = c - 0366 + 0x66;
1744             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1745             offset += 1;
1746             break;
1747
1748         case 0370:
1749         case 0371:
1750         case 0372:
1751             break;
1752
1753         case 0373:
1754             *bytes = bits == 16 ? 3 : 5;
1755             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1756             offset += 1;
1757             break;
1758
1759         case4(0100):
1760         case4(0110):
1761         case4(0120):
1762         case4(0130):
1763         case4(0200):
1764         case4(0204):
1765         case4(0210):
1766         case4(0214):
1767         case4(0220):
1768         case4(0224):
1769         case4(0230):
1770         case4(0234):
1771             {
1772                 ea ea_data;
1773                 int rfield;
1774                 int32_t rflags;
1775                 uint8_t *p;
1776                 int32_t s;
1777                 enum out_type type;
1778                 struct operand *opy = &ins->oprs[op2];
1779
1780                 if (c <= 0177) {
1781                     /* pick rfield from operand b (opx) */
1782                     rflags = regflag(opx);
1783                     rfield = nasm_regvals[opx->basereg];
1784                 } else {
1785                     /* rfield is constant */
1786                     rflags = 0;
1787                     rfield = c & 7;
1788                 }
1789
1790                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1791                                 rfield, rflags)) {
1792                     errfunc(ERR_NONFATAL, "invalid effective address");
1793                 }
1794
1795
1796                 p = bytes;
1797                 *p++ = ea_data.modrm;
1798                 if (ea_data.sib_present)
1799                     *p++ = ea_data.sib;
1800
1801                 /* DREX suffixes come between the SIB and the displacement */
1802                 if (ins->rex & REX_D) {
1803                     *p++ = (ins->drexdst << 4) |
1804                            (ins->rex & REX_OC ? 0x08 : 0) |
1805                            (ins->rex & (REX_R|REX_X|REX_B));
1806                     ins->rex = 0;
1807                 }
1808
1809                 s = p - bytes;
1810                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1811
1812                 /*
1813                  * Make sure the address gets the right offset in case
1814                  * the line breaks in the .lst file (BR 1197827)
1815                  */
1816                 offset += s;
1817                 s = 0;
1818
1819                 switch (ea_data.bytes) {
1820                 case 0:
1821                     break;
1822                 case 1:
1823                 case 2:
1824                 case 4:
1825                 case 8:
1826                     data = opy->offset;
1827                     warn_overflow(ea_data.bytes, opy);
1828                     s += ea_data.bytes;
1829                     if (ea_data.rip) {
1830                         if (opy->segment == segment) {
1831                             data -= insn_end;
1832                             out(offset, segment, &data, OUT_ADDRESS,
1833                                 ea_data.bytes, NO_SEG, NO_SEG);
1834                         } else {
1835                             out(offset, segment, &data, OUT_REL4ADR,
1836                                 insn_end - offset, opy->segment, opy->wrt);
1837                         }
1838                     } else {
1839                         type = OUT_ADDRESS;
1840                         out(offset, segment, &data, OUT_ADDRESS,
1841                             ea_data.bytes, opy->segment, opy->wrt);
1842                     }
1843                     break;
1844                 default:
1845                     /* Impossible! */
1846                     errfunc(ERR_PANIC,
1847                             "Invalid amount of bytes (%d) for offset?!",
1848                             ea_data.bytes);
1849                     break;
1850                 }
1851                 offset += s;
1852             }
1853             break;
1854
1855         default:
1856             errfunc(ERR_PANIC, "internal instruction table corrupt"
1857                     ": instruction code 0x%02X given", c);
1858             break;
1859         }
1860     }
1861 }
1862
1863 static int32_t regflag(const operand * o)
1864 {
1865     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1866         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1867     }
1868     return nasm_reg_flags[o->basereg];
1869 }
1870
1871 static int32_t regval(const operand * o)
1872 {
1873     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1874         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1875     }
1876     return nasm_regvals[o->basereg];
1877 }
1878
1879 static int op_rexflags(const operand * o, int mask)
1880 {
1881     int32_t flags;
1882     int val;
1883
1884     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1885         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1886     }
1887
1888     flags = nasm_reg_flags[o->basereg];
1889     val = nasm_regvals[o->basereg];
1890
1891     return rexflags(val, flags, mask);
1892 }
1893
1894 static int rexflags(int val, int32_t flags, int mask)
1895 {
1896     int rex = 0;
1897
1898     if (val >= 8)
1899         rex |= REX_B|REX_X|REX_R;
1900     if (flags & BITS64)
1901         rex |= REX_W;
1902     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1903         rex |= REX_H;
1904     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1905         rex |= REX_P;
1906
1907     return rex & mask;
1908 }
1909
1910 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1911 {
1912     int i, size[MAX_OPERANDS], asize, oprs, ret;
1913
1914     ret = 100;
1915
1916     /*
1917      * Check the opcode
1918      */
1919     if (itemp->opcode != instruction->opcode)
1920         return 0;
1921
1922     /*
1923      * Count the operands
1924      */
1925     if (itemp->operands != instruction->operands)
1926         return 0;
1927
1928     /*
1929      * Check that no spurious colons or TOs are present
1930      */
1931     for (i = 0; i < itemp->operands; i++)
1932         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1933             return 0;
1934
1935     /*
1936      * Process size flags
1937      */
1938     if (itemp->flags & IF_ARMASK) {
1939         memset(size, 0, sizeof size);
1940
1941         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1942
1943         switch (itemp->flags & IF_SMASK) {
1944         case IF_SB:
1945             size[i] = BITS8;
1946             break;
1947         case IF_SW:
1948             size[i] = BITS16;
1949             break;
1950         case IF_SD:
1951             size[i] = BITS32;
1952             break;
1953         case IF_SQ:
1954             size[i] = BITS64;
1955             break;
1956         case IF_SO:
1957             size[i] = BITS128;
1958             break;
1959         case IF_SY:
1960             size[i] = BITS256;
1961             break;
1962         case IF_SZ:
1963             switch (bits) {
1964             case 16:
1965                 size[i] = BITS16;
1966                 break;
1967             case 32:
1968                 size[i] = BITS32;
1969                 break;
1970             case 64:
1971                 size[i] = BITS64;
1972                 break;
1973             }
1974             break;
1975         default:
1976             break;
1977         }
1978     } else {
1979         asize = 0;
1980         switch (itemp->flags & IF_SMASK) {
1981         case IF_SB:
1982             asize = BITS8;
1983             break;
1984         case IF_SW:
1985             asize = BITS16;
1986             break;
1987         case IF_SD:
1988             asize = BITS32;
1989             break;
1990         case IF_SQ:
1991             asize = BITS64;
1992             break;
1993         case IF_SO:
1994             asize = BITS128;
1995             break;
1996         case IF_SY:
1997             asize = BITS256;
1998             break;
1999         case IF_SZ:
2000             switch (bits) {
2001             case 16:
2002                 asize = BITS16;
2003                 break;
2004             case 32:
2005                 asize = BITS32;
2006                 break;
2007             case 64:
2008                 asize = BITS64;
2009                 break;
2010             }
2011             break;
2012         default:
2013             break;
2014         }
2015         for (i = 0; i < MAX_OPERANDS; i++)
2016             size[i] = asize;
2017     }
2018
2019     /*
2020      * Check that the operand flags all match up
2021      */
2022     for (i = 0; i < itemp->operands; i++) {
2023         int32_t type = instruction->oprs[i].type;
2024         if (!(type & SIZE_MASK))
2025             type |= size[i];
2026
2027         if (itemp->opd[i] & SAME_AS) {
2028             int j = itemp->opd[i] & ~SAME_AS;
2029             if (type != instruction->oprs[j].type ||
2030                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2031                 return 0;
2032         } else if (itemp->opd[i] & ~type ||
2033             ((itemp->opd[i] & SIZE_MASK) &&
2034              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2035             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2036                 (type & SIZE_MASK))
2037                 return 0;
2038             else
2039                 return 1;
2040         }
2041     }
2042
2043     /*
2044      * Check operand sizes
2045      */
2046     if (itemp->flags & (IF_SM | IF_SM2)) {
2047         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2048         asize = 0;
2049         for (i = 0; i < oprs; i++) {
2050             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2051                 int j;
2052                 for (j = 0; j < oprs; j++)
2053                     size[j] = asize;
2054                 break;
2055             }
2056         }
2057     } else {
2058         oprs = itemp->operands;
2059     }
2060
2061     for (i = 0; i < itemp->operands; i++) {
2062         if (!(itemp->opd[i] & SIZE_MASK) &&
2063             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2064             return 2;
2065     }
2066
2067     /*
2068      * Check template is okay at the set cpu level
2069      */
2070     if (((itemp->flags & IF_PLEVEL) > cpu))
2071         return 3;
2072
2073     /*
2074      * Check if instruction is available in long mode
2075      */
2076     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2077         return 4;
2078
2079     /*
2080      * Check if special handling needed for Jumps
2081      */
2082     if ((uint8_t)(itemp->code[0]) >= 0370)
2083         return 99;
2084
2085     return ret;
2086 }
2087
2088 static ea *process_ea(operand * input, ea * output, int bits,
2089                       int addrbits, int rfield, int32_t rflags)
2090 {
2091     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2092
2093     output->rip = false;
2094
2095     /* REX flags for the rfield operand */
2096     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2097
2098     if (!(REGISTER & ~input->type)) {   /* register direct */
2099         int i;
2100         int32_t f;
2101
2102         if (input->basereg < EXPR_REG_START /* Verify as Register */
2103             || input->basereg >= REG_ENUM_LIMIT)
2104             return NULL;
2105         f = regflag(input);
2106         i = nasm_regvals[input->basereg];
2107
2108         if (REG_EA & ~f)
2109             return NULL;        /* Invalid EA register */
2110
2111         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2112
2113         output->sib_present = false;             /* no SIB necessary */
2114         output->bytes = 0;  /* no offset necessary either */
2115         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2116     } else {                    /* it's a memory reference */
2117         if (input->basereg == -1
2118             && (input->indexreg == -1 || input->scale == 0)) {
2119             /* it's a pure offset */
2120             if (bits == 64 && (~input->type & IP_REL)) {
2121               int scale, index, base;
2122               output->sib_present = true;
2123               scale = 0;
2124               index = 4;
2125               base = 5;
2126               output->sib = (scale << 6) | (index << 3) | base;
2127               output->bytes = 4;
2128               output->modrm = 4 | ((rfield & 7) << 3);
2129               output->rip = false;
2130             } else {
2131               output->sib_present = false;
2132               output->bytes = (addrbits != 16 ? 4 : 2);
2133               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2134               output->rip = bits == 64;
2135             }
2136         } else {                /* it's an indirection */
2137             int i = input->indexreg, b = input->basereg, s = input->scale;
2138             int32_t o = input->offset, seg = input->segment;
2139             int hb = input->hintbase, ht = input->hinttype;
2140             int t;
2141             int it, bt;
2142             int32_t ix, bx;     /* register flags */
2143
2144             if (s == 0)
2145                 i = -1;         /* make this easy, at least */
2146
2147             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2148                 it = nasm_regvals[i];
2149                 ix = nasm_reg_flags[i];
2150             } else {
2151                 it = -1;
2152                 ix = 0;
2153             }
2154
2155             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2156                 bt = nasm_regvals[b];
2157                 bx = nasm_reg_flags[b];
2158             } else {
2159                 bt = -1;
2160                 bx = 0;
2161             }
2162
2163             /* check for a 32/64-bit memory reference... */
2164             if ((ix|bx) & (BITS32|BITS64)) {
2165                 /* it must be a 32/64-bit memory reference. Firstly we have
2166                  * to check that all registers involved are type E/Rxx. */
2167                 int32_t sok = BITS32|BITS64;
2168
2169                 if (it != -1) {
2170                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2171                         sok &= ix;
2172                     else
2173                         return NULL;
2174                 }
2175
2176                 if (bt != -1) {
2177                     if (REG_GPR & ~bx)
2178                         return NULL; /* Invalid register */
2179                     if (~sok & bx & SIZE_MASK)
2180                         return NULL; /* Invalid size */
2181                     sok &= bx;
2182                 }
2183
2184                 /* While we're here, ensure the user didn't specify
2185                    WORD or QWORD. */
2186                 if (input->disp_size == 16 || input->disp_size == 64)
2187                     return NULL;
2188
2189                 if (addrbits == 16 ||
2190                     (addrbits == 32 && !(sok & BITS32)) ||
2191                     (addrbits == 64 && !(sok & BITS64)))
2192                     return NULL;
2193
2194                 /* now reorganize base/index */
2195                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2196                     ((hb == b && ht == EAH_NOTBASE)
2197                      || (hb == i && ht == EAH_MAKEBASE))) {
2198                     /* swap if hints say so */
2199                     t = bt, bt = it, it = t;
2200                     t = bx, bx = ix, ix = t;
2201                 }
2202                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2203                     bt = -1, bx = 0, s++;
2204                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2205                     /* make single reg base, unless hint */
2206                     bt = it, bx = ix, it = -1, ix = 0;
2207                 }
2208                 if (((s == 2 && it != REG_NUM_ESP
2209                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2210                      || s == 5 || s == 9) && bt == -1)
2211                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2212                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2213                     && (input->eaflags & EAF_TIMESTWO))
2214                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2215                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2216                 if (s == 1 && it == REG_NUM_ESP) {
2217                     /* swap ESP into base if scale is 1 */
2218                     t = it, it = bt, bt = t;
2219                     t = ix, ix = bx, bx = t;
2220                 }
2221                 if (it == REG_NUM_ESP
2222                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2223                     return NULL;        /* wrong, for various reasons */
2224
2225                 output->rex |= rexflags(it, ix, REX_X);
2226                 output->rex |= rexflags(bt, bx, REX_B);
2227
2228                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2229                     /* no SIB needed */
2230                     int mod, rm;
2231
2232                     if (bt == -1) {
2233                         rm = 5;
2234                         mod = 0;
2235                     } else {
2236                         rm = (bt & 7);
2237                         if (rm != REG_NUM_EBP && o == 0 &&
2238                                 seg == NO_SEG && !forw_ref &&
2239                                 !(input->eaflags &
2240                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2241                             mod = 0;
2242                         else if (input->eaflags & EAF_BYTEOFFS ||
2243                                  (o >= -128 && o <= 127 && seg == NO_SEG
2244                                   && !forw_ref
2245                                   && !(input->eaflags & EAF_WORDOFFS)))
2246                             mod = 1;
2247                         else
2248                             mod = 2;
2249                     }
2250
2251                     output->sib_present = false;
2252                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2253                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2254                 } else {
2255                     /* we need a SIB */
2256                     int mod, scale, index, base;
2257
2258                     if (it == -1)
2259                         index = 4, s = 1;
2260                     else
2261                         index = (it & 7);
2262
2263                     switch (s) {
2264                     case 1:
2265                         scale = 0;
2266                         break;
2267                     case 2:
2268                         scale = 1;
2269                         break;
2270                     case 4:
2271                         scale = 2;
2272                         break;
2273                     case 8:
2274                         scale = 3;
2275                         break;
2276                     default:   /* then what the smeg is it? */
2277                         return NULL;    /* panic */
2278                     }
2279
2280                     if (bt == -1) {
2281                         base = 5;
2282                         mod = 0;
2283                     } else {
2284                         base = (bt & 7);
2285                         if (base != REG_NUM_EBP && o == 0 &&
2286                                     seg == NO_SEG && !forw_ref &&
2287                                     !(input->eaflags &
2288                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2289                             mod = 0;
2290                         else if (input->eaflags & EAF_BYTEOFFS ||
2291                                  (o >= -128 && o <= 127 && seg == NO_SEG
2292                                   && !forw_ref
2293                                   && !(input->eaflags & EAF_WORDOFFS)))
2294                             mod = 1;
2295                         else
2296                             mod = 2;
2297                     }
2298
2299                     output->sib_present = true;
2300                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2301                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2302                     output->sib = (scale << 6) | (index << 3) | base;
2303                 }
2304             } else {            /* it's 16-bit */
2305                 int mod, rm;
2306
2307                 /* check for 64-bit long mode */
2308                 if (addrbits == 64)
2309                     return NULL;
2310
2311                 /* check all registers are BX, BP, SI or DI */
2312                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2313                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2314                                        && i != R_SI && i != R_DI))
2315                     return NULL;
2316
2317                 /* ensure the user didn't specify DWORD/QWORD */
2318                 if (input->disp_size == 32 || input->disp_size == 64)
2319                     return NULL;
2320
2321                 if (s != 1 && i != -1)
2322                     return NULL;        /* no can do, in 16-bit EA */
2323                 if (b == -1 && i != -1) {
2324                     int tmp = b;
2325                     b = i;
2326                     i = tmp;
2327                 }               /* swap */
2328                 if ((b == R_SI || b == R_DI) && i != -1) {
2329                     int tmp = b;
2330                     b = i;
2331                     i = tmp;
2332                 }
2333                 /* have BX/BP as base, SI/DI index */
2334                 if (b == i)
2335                     return NULL;        /* shouldn't ever happen, in theory */
2336                 if (i != -1 && b != -1 &&
2337                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2338                     return NULL;        /* invalid combinations */
2339                 if (b == -1)    /* pure offset: handled above */
2340                     return NULL;        /* so if it gets to here, panic! */
2341
2342                 rm = -1;
2343                 if (i != -1)
2344                     switch (i * 256 + b) {
2345                     case R_SI * 256 + R_BX:
2346                         rm = 0;
2347                         break;
2348                     case R_DI * 256 + R_BX:
2349                         rm = 1;
2350                         break;
2351                     case R_SI * 256 + R_BP:
2352                         rm = 2;
2353                         break;
2354                     case R_DI * 256 + R_BP:
2355                         rm = 3;
2356                         break;
2357                 } else
2358                     switch (b) {
2359                     case R_SI:
2360                         rm = 4;
2361                         break;
2362                     case R_DI:
2363                         rm = 5;
2364                         break;
2365                     case R_BP:
2366                         rm = 6;
2367                         break;
2368                     case R_BX:
2369                         rm = 7;
2370                         break;
2371                     }
2372                 if (rm == -1)   /* can't happen, in theory */
2373                     return NULL;        /* so panic if it does */
2374
2375                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2376                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2377                     mod = 0;
2378                 else if (input->eaflags & EAF_BYTEOFFS ||
2379                          (o >= -128 && o <= 127 && seg == NO_SEG
2380                           && !forw_ref
2381                           && !(input->eaflags & EAF_WORDOFFS)))
2382                     mod = 1;
2383                 else
2384                     mod = 2;
2385
2386                 output->sib_present = false;    /* no SIB - it's 16-bit */
2387                 output->bytes = mod;    /* bytes of offset needed */
2388                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2389             }
2390         }
2391     }
2392
2393     output->size = 1 + output->sib_present + output->bytes;
2394     return output;
2395 }
2396
2397 static void add_asp(insn *ins, int addrbits)
2398 {
2399     int j, valid;
2400     int defdisp;
2401
2402     valid = (addrbits == 64) ? 64|32 : 32|16;
2403
2404     switch (ins->prefixes[PPS_ASIZE]) {
2405     case P_A16:
2406         valid &= 16;
2407         break;
2408     case P_A32:
2409         valid &= 32;
2410         break;
2411     case P_A64:
2412         valid &= 64;
2413         break;
2414     case P_ASP:
2415         valid &= (addrbits == 32) ? 16 : 32;
2416         break;
2417     default:
2418         break;
2419     }
2420
2421     for (j = 0; j < ins->operands; j++) {
2422         if (!(MEMORY & ~ins->oprs[j].type)) {
2423             int32_t i, b;
2424
2425             /* Verify as Register */
2426             if (ins->oprs[j].indexreg < EXPR_REG_START
2427                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2428                 i = 0;
2429             else
2430                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2431
2432             /* Verify as Register */
2433             if (ins->oprs[j].basereg < EXPR_REG_START
2434                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2435                 b = 0;
2436             else
2437                 b = nasm_reg_flags[ins->oprs[j].basereg];
2438
2439             if (ins->oprs[j].scale == 0)
2440                 i = 0;
2441
2442             if (!i && !b) {
2443                 int ds = ins->oprs[j].disp_size;
2444                 if ((addrbits != 64 && ds > 8) ||
2445                     (addrbits == 64 && ds == 16))
2446                     valid &= ds;
2447             } else {
2448                 if (!(REG16 & ~b))
2449                     valid &= 16;
2450                 if (!(REG32 & ~b))
2451                     valid &= 32;
2452                 if (!(REG64 & ~b))
2453                     valid &= 64;
2454
2455                 if (!(REG16 & ~i))
2456                     valid &= 16;
2457                 if (!(REG32 & ~i))
2458                     valid &= 32;
2459                 if (!(REG64 & ~i))
2460                     valid &= 64;
2461             }
2462         }
2463     }
2464
2465     if (valid & addrbits) {
2466         ins->addr_size = addrbits;
2467     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2468         /* Add an address size prefix */
2469         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2470         ins->prefixes[PPS_ASIZE] = pref;
2471         ins->addr_size = (addrbits == 32) ? 16 : 32;
2472     } else {
2473         /* Impossible... */
2474         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2475         ins->addr_size = addrbits; /* Error recovery */
2476     }
2477
2478     defdisp = ins->addr_size == 16 ? 16 : 32;
2479
2480     for (j = 0; j < ins->operands; j++) {
2481         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2482             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2483             != ins->addr_size) {
2484             /* mem_offs sizes must match the address size; if not,
2485                strip the MEM_OFFS bit and match only EA instructions */
2486             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2487         }
2488     }
2489 }